xref: /openbmc/linux/arch/parisc/kernel/pacache.S (revision 2f0f2441b4a10948e2ec042b48fef13680387f7c)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *  PARISC TLB and cache flushing support
4 *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
5 *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
6 *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
7 */
8
9/*
10 * NOTE: fdc,fic, and pdc instructions that use base register modification
11 *       should only use index and base registers that are not shadowed,
12 *       so that the fast path emulation in the non access miss handler
13 *       can be used.
14 */
15
16#ifdef CONFIG_64BIT
17	.level	2.0w
18#else
19	.level	2.0
20#endif
21
22#include <asm/psw.h>
23#include <asm/assembly.h>
24#include <asm/pgtable.h>
25#include <asm/cache.h>
26#include <asm/ldcw.h>
27#include <asm/alternative.h>
28#include <linux/linkage.h>
29#include <linux/init.h>
30
31	.section .text.hot
32	.align	16
33
34ENTRY_CFI(flush_tlb_all_local)
35	/*
36	 * The pitlbe and pdtlbe instructions should only be used to
37	 * flush the entire tlb. Also, there needs to be no intervening
38	 * tlb operations, e.g. tlb misses, so the operation needs
39	 * to happen in real mode with all interruptions disabled.
40	 */
41
42	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
43	rsm		PSW_SM_I, %r19		/* save I-bit state */
44	load32		PA(1f), %r1
45	nop
46	nop
47	nop
48	nop
49	nop
50
51	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
52	mtctl		%r0, %cr17		/* Clear IIASQ tail */
53	mtctl		%r0, %cr17		/* Clear IIASQ head */
54	mtctl		%r1, %cr18		/* IIAOQ head */
55	ldo		4(%r1), %r1
56	mtctl		%r1, %cr18		/* IIAOQ tail */
57	load32		REAL_MODE_PSW, %r1
58	mtctl           %r1, %ipsw
59	rfi
60	nop
61
621:      load32		PA(cache_info), %r1
63
64	/* Flush Instruction Tlb */
65
66	LDREG		ITLB_SID_BASE(%r1), %r20
67	LDREG		ITLB_SID_STRIDE(%r1), %r21
68	LDREG		ITLB_SID_COUNT(%r1), %r22
69	LDREG		ITLB_OFF_BASE(%r1), %arg0
70	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
71	LDREG		ITLB_OFF_COUNT(%r1), %arg2
72	LDREG		ITLB_LOOP(%r1), %arg3
73
74	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
75	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
76	copy		%arg0, %r28		/* Init base addr */
77
78fitmanyloop:					/* Loop if LOOP >= 2 */
79	mtsp		%r20, %sr1
80	add		%r21, %r20, %r20	/* increment space */
81	copy		%arg2, %r29		/* Init middle loop count */
82
83fitmanymiddle:					/* Loop if LOOP >= 2 */
84	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
85	pitlbe		%r0(%sr1, %r28)
86	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
87	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
88	copy		%arg3, %r31		/* Re-init inner loop count */
89
90	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
91	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
92
93fitoneloop:					/* Loop if LOOP = 1 */
94	mtsp		%r20, %sr1
95	copy		%arg0, %r28		/* init base addr */
96	copy		%arg2, %r29		/* init middle loop count */
97
98fitonemiddle:					/* Loop if LOOP = 1 */
99	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
100	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
101
102	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
103	add		%r21, %r20, %r20		/* increment space */
104
105fitdone:
106
107	/* Flush Data Tlb */
108
109	LDREG		DTLB_SID_BASE(%r1), %r20
110	LDREG		DTLB_SID_STRIDE(%r1), %r21
111	LDREG		DTLB_SID_COUNT(%r1), %r22
112	LDREG		DTLB_OFF_BASE(%r1), %arg0
113	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
114	LDREG		DTLB_OFF_COUNT(%r1), %arg2
115	LDREG		DTLB_LOOP(%r1), %arg3
116
117	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
118	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
119	copy		%arg0, %r28		/* Init base addr */
120
121fdtmanyloop:					/* Loop if LOOP >= 2 */
122	mtsp		%r20, %sr1
123	add		%r21, %r20, %r20	/* increment space */
124	copy		%arg2, %r29		/* Init middle loop count */
125
126fdtmanymiddle:					/* Loop if LOOP >= 2 */
127	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
128	pdtlbe		%r0(%sr1, %r28)
129	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
130	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
131	copy		%arg3, %r31		/* Re-init inner loop count */
132
133	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
134	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
135
136fdtoneloop:					/* Loop if LOOP = 1 */
137	mtsp		%r20, %sr1
138	copy		%arg0, %r28		/* init base addr */
139	copy		%arg2, %r29		/* init middle loop count */
140
141fdtonemiddle:					/* Loop if LOOP = 1 */
142	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
143	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
144
145	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
146	add		%r21, %r20, %r20	/* increment space */
147
148
149fdtdone:
150	/*
151	 * Switch back to virtual mode
152	 */
153	/* pcxt_ssm_bug */
154	rsm		PSW_SM_I, %r0
155	load32		2f, %r1
156	nop
157	nop
158	nop
159	nop
160	nop
161
162	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
163	mtctl		%r0, %cr17		/* Clear IIASQ tail */
164	mtctl		%r0, %cr17		/* Clear IIASQ head */
165	mtctl		%r1, %cr18		/* IIAOQ head */
166	ldo		4(%r1), %r1
167	mtctl		%r1, %cr18		/* IIAOQ tail */
168	load32		KERNEL_PSW, %r1
169	or		%r1, %r19, %r1	/* I-bit to state on entry */
170	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
171	rfi
172	nop
173
1742:      bv		%r0(%r2)
175	nop
176ENDPROC_CFI(flush_tlb_all_local)
177
178	.import cache_info,data
179
180ENTRY_CFI(flush_instruction_cache_local)
18188:	load32		cache_info, %r1
182
183	/* Flush Instruction Cache */
184
185	LDREG		ICACHE_BASE(%r1), %arg0
186	LDREG		ICACHE_STRIDE(%r1), %arg1
187	LDREG		ICACHE_COUNT(%r1), %arg2
188	LDREG		ICACHE_LOOP(%r1), %arg3
189	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
190	mtsp		%r0, %sr1
191	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
192	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
193
194fimanyloop:					/* Loop if LOOP >= 2 */
195	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
196	fice            %r0(%sr1, %arg0)
197	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
198	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
199	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
200
201fioneloop:					/* Loop if LOOP = 1 */
202	/* Some implementations may flush with a single fice instruction */
203	cmpib,COND(>>=),n	15, %arg2, fioneloop2
204
205fioneloop1:
206	fice,m		%arg1(%sr1, %arg0)
207	fice,m		%arg1(%sr1, %arg0)
208	fice,m		%arg1(%sr1, %arg0)
209	fice,m		%arg1(%sr1, %arg0)
210	fice,m		%arg1(%sr1, %arg0)
211	fice,m		%arg1(%sr1, %arg0)
212	fice,m		%arg1(%sr1, %arg0)
213	fice,m		%arg1(%sr1, %arg0)
214	fice,m		%arg1(%sr1, %arg0)
215	fice,m		%arg1(%sr1, %arg0)
216	fice,m		%arg1(%sr1, %arg0)
217	fice,m		%arg1(%sr1, %arg0)
218	fice,m		%arg1(%sr1, %arg0)
219	fice,m		%arg1(%sr1, %arg0)
220	fice,m		%arg1(%sr1, %arg0)
221	addib,COND(>)	-16, %arg2, fioneloop1
222	fice,m		%arg1(%sr1, %arg0)
223
224	/* Check if done */
225	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
226
227fioneloop2:
228	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
229	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
230
231fisync:
232	sync
233	mtsm		%r22			/* restore I-bit */
23489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
235	bv		%r0(%r2)
236	nop
237ENDPROC_CFI(flush_instruction_cache_local)
238
239
240	.import cache_info, data
241ENTRY_CFI(flush_data_cache_local)
24288:	load32		cache_info, %r1
243
244	/* Flush Data Cache */
245
246	LDREG		DCACHE_BASE(%r1), %arg0
247	LDREG		DCACHE_STRIDE(%r1), %arg1
248	LDREG		DCACHE_COUNT(%r1), %arg2
249	LDREG		DCACHE_LOOP(%r1), %arg3
250	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
251	mtsp		%r0, %sr1
252	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
253	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
254
255fdmanyloop:					/* Loop if LOOP >= 2 */
256	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
257	fdce		%r0(%sr1, %arg0)
258	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
259	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
260	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
261
262fdoneloop:					/* Loop if LOOP = 1 */
263	/* Some implementations may flush with a single fdce instruction */
264	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
265
266fdoneloop1:
267	fdce,m		%arg1(%sr1, %arg0)
268	fdce,m		%arg1(%sr1, %arg0)
269	fdce,m		%arg1(%sr1, %arg0)
270	fdce,m		%arg1(%sr1, %arg0)
271	fdce,m		%arg1(%sr1, %arg0)
272	fdce,m		%arg1(%sr1, %arg0)
273	fdce,m		%arg1(%sr1, %arg0)
274	fdce,m		%arg1(%sr1, %arg0)
275	fdce,m		%arg1(%sr1, %arg0)
276	fdce,m		%arg1(%sr1, %arg0)
277	fdce,m		%arg1(%sr1, %arg0)
278	fdce,m		%arg1(%sr1, %arg0)
279	fdce,m		%arg1(%sr1, %arg0)
280	fdce,m		%arg1(%sr1, %arg0)
281	fdce,m		%arg1(%sr1, %arg0)
282	addib,COND(>)	-16, %arg2, fdoneloop1
283	fdce,m		%arg1(%sr1, %arg0)
284
285	/* Check if done */
286	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
287
288fdoneloop2:
289	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
290	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
291
292fdsync:
293	syncdma
294	sync
295	mtsm		%r22			/* restore I-bit */
29689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
297	bv		%r0(%r2)
298	nop
299ENDPROC_CFI(flush_data_cache_local)
300
301/* Clear page using kernel mapping.  */
302
303ENTRY_CFI(clear_page_asm)
304#ifdef CONFIG_64BIT
305
306	/* Unroll the loop.  */
307	ldi		(PAGE_SIZE / 128), %r1
308
3091:
310	std		%r0, 0(%r26)
311	std		%r0, 8(%r26)
312	std		%r0, 16(%r26)
313	std		%r0, 24(%r26)
314	std		%r0, 32(%r26)
315	std		%r0, 40(%r26)
316	std		%r0, 48(%r26)
317	std		%r0, 56(%r26)
318	std		%r0, 64(%r26)
319	std		%r0, 72(%r26)
320	std		%r0, 80(%r26)
321	std		%r0, 88(%r26)
322	std		%r0, 96(%r26)
323	std		%r0, 104(%r26)
324	std		%r0, 112(%r26)
325	std		%r0, 120(%r26)
326
327	/* Note reverse branch hint for addib is taken.  */
328	addib,COND(>),n	-1, %r1, 1b
329	ldo		128(%r26), %r26
330
331#else
332
333	/*
334	 * Note that until (if) we start saving the full 64-bit register
335	 * values on interrupt, we can't use std on a 32 bit kernel.
336	 */
337	ldi		(PAGE_SIZE / 64), %r1
338
3391:
340	stw		%r0, 0(%r26)
341	stw		%r0, 4(%r26)
342	stw		%r0, 8(%r26)
343	stw		%r0, 12(%r26)
344	stw		%r0, 16(%r26)
345	stw		%r0, 20(%r26)
346	stw		%r0, 24(%r26)
347	stw		%r0, 28(%r26)
348	stw		%r0, 32(%r26)
349	stw		%r0, 36(%r26)
350	stw		%r0, 40(%r26)
351	stw		%r0, 44(%r26)
352	stw		%r0, 48(%r26)
353	stw		%r0, 52(%r26)
354	stw		%r0, 56(%r26)
355	stw		%r0, 60(%r26)
356
357	addib,COND(>),n	-1, %r1, 1b
358	ldo		64(%r26), %r26
359#endif
360	bv		%r0(%r2)
361	nop
362ENDPROC_CFI(clear_page_asm)
363
364/* Copy page using kernel mapping.  */
365
366ENTRY_CFI(copy_page_asm)
367#ifdef CONFIG_64BIT
368	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
369	 * Unroll the loop by hand and arrange insn appropriately.
370	 * Prefetch doesn't improve performance on rp3440.
371	 * GCC probably can do this just as well...
372	 */
373
374	ldi		(PAGE_SIZE / 128), %r1
375
3761:	ldd		0(%r25), %r19
377	ldd		8(%r25), %r20
378
379	ldd		16(%r25), %r21
380	ldd		24(%r25), %r22
381	std		%r19, 0(%r26)
382	std		%r20, 8(%r26)
383
384	ldd		32(%r25), %r19
385	ldd		40(%r25), %r20
386	std		%r21, 16(%r26)
387	std		%r22, 24(%r26)
388
389	ldd		48(%r25), %r21
390	ldd		56(%r25), %r22
391	std		%r19, 32(%r26)
392	std		%r20, 40(%r26)
393
394	ldd		64(%r25), %r19
395	ldd		72(%r25), %r20
396	std		%r21, 48(%r26)
397	std		%r22, 56(%r26)
398
399	ldd		80(%r25), %r21
400	ldd		88(%r25), %r22
401	std		%r19, 64(%r26)
402	std		%r20, 72(%r26)
403
404	ldd		 96(%r25), %r19
405	ldd		104(%r25), %r20
406	std		%r21, 80(%r26)
407	std		%r22, 88(%r26)
408
409	ldd		112(%r25), %r21
410	ldd		120(%r25), %r22
411	ldo		128(%r25), %r25
412	std		%r19, 96(%r26)
413	std		%r20, 104(%r26)
414
415	std		%r21, 112(%r26)
416	std		%r22, 120(%r26)
417
418	/* Note reverse branch hint for addib is taken.  */
419	addib,COND(>),n	-1, %r1, 1b
420	ldo		128(%r26), %r26
421
422#else
423
424	/*
425	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
426	 * bundles (very restricted rules for bundling).
427	 * Note that until (if) we start saving
428	 * the full 64 bit register values on interrupt, we can't
429	 * use ldd/std on a 32 bit kernel.
430	 */
431	ldw		0(%r25), %r19
432	ldi		(PAGE_SIZE / 64), %r1
433
4341:
435	ldw		4(%r25), %r20
436	ldw		8(%r25), %r21
437	ldw		12(%r25), %r22
438	stw		%r19, 0(%r26)
439	stw		%r20, 4(%r26)
440	stw		%r21, 8(%r26)
441	stw		%r22, 12(%r26)
442	ldw		16(%r25), %r19
443	ldw		20(%r25), %r20
444	ldw		24(%r25), %r21
445	ldw		28(%r25), %r22
446	stw		%r19, 16(%r26)
447	stw		%r20, 20(%r26)
448	stw		%r21, 24(%r26)
449	stw		%r22, 28(%r26)
450	ldw		32(%r25), %r19
451	ldw		36(%r25), %r20
452	ldw		40(%r25), %r21
453	ldw		44(%r25), %r22
454	stw		%r19, 32(%r26)
455	stw		%r20, 36(%r26)
456	stw		%r21, 40(%r26)
457	stw		%r22, 44(%r26)
458	ldw		48(%r25), %r19
459	ldw		52(%r25), %r20
460	ldw		56(%r25), %r21
461	ldw		60(%r25), %r22
462	stw		%r19, 48(%r26)
463	stw		%r20, 52(%r26)
464	ldo		64(%r25), %r25
465	stw		%r21, 56(%r26)
466	stw		%r22, 60(%r26)
467	ldo		64(%r26), %r26
468	addib,COND(>),n	-1, %r1, 1b
469	ldw		0(%r25), %r19
470#endif
471	bv		%r0(%r2)
472	nop
473ENDPROC_CFI(copy_page_asm)
474
475/*
476 * NOTE: Code in clear_user_page has a hard coded dependency on the
477 *       maximum alias boundary being 4 Mb. We've been assured by the
478 *       parisc chip designers that there will not ever be a parisc
479 *       chip with a larger alias boundary (Never say never :-) ).
480 *
481 *       Subtle: the dtlb miss handlers support the temp alias region by
482 *       "knowing" that if a dtlb miss happens within the temp alias
483 *       region it must have occurred while in clear_user_page. Since
484 *       this routine makes use of processor local translations, we
485 *       don't want to insert them into the kernel page table. Instead,
486 *       we load up some general registers (they need to be registers
487 *       which aren't shadowed) with the physical page numbers (preshifted
488 *       for tlb insertion) needed to insert the translations. When we
489 *       miss on the translation, the dtlb miss handler inserts the
490 *       translation into the tlb using these values:
491 *
492 *          %r26 physical page (shifted for tlb insert) of "to" translation
493 *          %r23 physical page (shifted for tlb insert) of "from" translation
494 */
495
496        /* Drop prot bits and convert to page addr for iitlbt and idtlbt */
497        #define PAGE_ADD_SHIFT  (PAGE_SHIFT-12)
498        .macro          convert_phys_for_tlb_insert20  phys
499        extrd,u         \phys, 56-PAGE_ADD_SHIFT, 32-PAGE_ADD_SHIFT, \phys
500#if _PAGE_SIZE_ENCODING_DEFAULT
501        depdi           _PAGE_SIZE_ENCODING_DEFAULT, 63, (63-58), \phys
502#endif
503	.endm
504
505	/*
506	 * copy_user_page_asm() performs a page copy using mappings
507	 * equivalent to the user page mappings.  It can be used to
508	 * implement copy_user_page() but unfortunately both the `from'
509	 * and `to' pages need to be flushed through mappings equivalent
510	 * to the user mappings after the copy because the kernel accesses
511	 * the `from' page through the kmap kernel mapping and the `to'
512	 * page needs to be flushed since code can be copied.  As a
513	 * result, this implementation is less efficient than the simpler
514	 * copy using the kernel mapping.  It only needs the `from' page
515	 * to flushed via the user mapping.  The kunmap routines handle
516	 * the flushes needed for the kernel mapping.
517	 *
518	 * I'm still keeping this around because it may be possible to
519	 * use it if more information is passed into copy_user_page().
520	 * Have to do some measurements to see if it is worthwhile to
521	 * lobby for such a change.
522	 *
523	 */
524
525ENTRY_CFI(copy_user_page_asm)
526	/* Convert virtual `to' and `from' addresses to physical addresses.
527	   Move `from' physical address to non shadowed register.  */
528	ldil		L%(__PAGE_OFFSET), %r1
529	sub		%r26, %r1, %r26
530	sub		%r25, %r1, %r23
531
532	ldil		L%(TMPALIAS_MAP_START), %r28
533#ifdef CONFIG_64BIT
534#if (TMPALIAS_MAP_START >= 0x80000000)
535	depdi		0, 31,32, %r28		/* clear any sign extension */
536#endif
537	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
538	convert_phys_for_tlb_insert20 %r23	/* convert phys addr to tlb insert format */
539	depd		%r24,63,22, %r28	/* Form aliased virtual address 'to' */
540	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
541	copy		%r28, %r29
542	depdi		1, 41,1, %r29		/* Form aliased virtual address 'from' */
543#else
544	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
545	extrw,u		%r23, 24,25, %r23	/* convert phys addr to tlb insert format */
546	depw		%r24, 31,22, %r28	/* Form aliased virtual address 'to' */
547	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
548	copy		%r28, %r29
549	depwi		1, 9,1, %r29		/* Form aliased virtual address 'from' */
550#endif
551
552	/* Purge any old translations */
553
554#ifdef CONFIG_PA20
555	pdtlb,l		%r0(%r28)
556	pdtlb,l		%r0(%r29)
557#else
5580:	pdtlb		%r0(%r28)
5591:	pdtlb		%r0(%r29)
560	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
561	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
562#endif
563
564#ifdef CONFIG_64BIT
565	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
566	 * Unroll the loop by hand and arrange insn appropriately.
567	 * GCC probably can do this just as well.
568	 */
569
570	ldd		0(%r29), %r19
571	ldi		(PAGE_SIZE / 128), %r1
572
5731:	ldd		8(%r29), %r20
574
575	ldd		16(%r29), %r21
576	ldd		24(%r29), %r22
577	std		%r19, 0(%r28)
578	std		%r20, 8(%r28)
579
580	ldd		32(%r29), %r19
581	ldd		40(%r29), %r20
582	std		%r21, 16(%r28)
583	std		%r22, 24(%r28)
584
585	ldd		48(%r29), %r21
586	ldd		56(%r29), %r22
587	std		%r19, 32(%r28)
588	std		%r20, 40(%r28)
589
590	ldd		64(%r29), %r19
591	ldd		72(%r29), %r20
592	std		%r21, 48(%r28)
593	std		%r22, 56(%r28)
594
595	ldd		80(%r29), %r21
596	ldd		88(%r29), %r22
597	std		%r19, 64(%r28)
598	std		%r20, 72(%r28)
599
600	ldd		 96(%r29), %r19
601	ldd		104(%r29), %r20
602	std		%r21, 80(%r28)
603	std		%r22, 88(%r28)
604
605	ldd		112(%r29), %r21
606	ldd		120(%r29), %r22
607	std		%r19, 96(%r28)
608	std		%r20, 104(%r28)
609
610	ldo		128(%r29), %r29
611	std		%r21, 112(%r28)
612	std		%r22, 120(%r28)
613	ldo		128(%r28), %r28
614
615	/* conditional branches nullify on forward taken branch, and on
616	 * non-taken backward branch. Note that .+4 is a backwards branch.
617	 * The ldd should only get executed if the branch is taken.
618	 */
619	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
620	ldd		0(%r29), %r19		/* start next loads */
621
622#else
623	ldi		(PAGE_SIZE / 64), %r1
624
625	/*
626	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
627	 * bundles (very restricted rules for bundling). It probably
628	 * does OK on PCXU and better, but we could do better with
629	 * ldd/std instructions. Note that until (if) we start saving
630	 * the full 64 bit register values on interrupt, we can't
631	 * use ldd/std on a 32 bit kernel.
632	 */
633
6341:	ldw		0(%r29), %r19
635	ldw		4(%r29), %r20
636	ldw		8(%r29), %r21
637	ldw		12(%r29), %r22
638	stw		%r19, 0(%r28)
639	stw		%r20, 4(%r28)
640	stw		%r21, 8(%r28)
641	stw		%r22, 12(%r28)
642	ldw		16(%r29), %r19
643	ldw		20(%r29), %r20
644	ldw		24(%r29), %r21
645	ldw		28(%r29), %r22
646	stw		%r19, 16(%r28)
647	stw		%r20, 20(%r28)
648	stw		%r21, 24(%r28)
649	stw		%r22, 28(%r28)
650	ldw		32(%r29), %r19
651	ldw		36(%r29), %r20
652	ldw		40(%r29), %r21
653	ldw		44(%r29), %r22
654	stw		%r19, 32(%r28)
655	stw		%r20, 36(%r28)
656	stw		%r21, 40(%r28)
657	stw		%r22, 44(%r28)
658	ldw		48(%r29), %r19
659	ldw		52(%r29), %r20
660	ldw		56(%r29), %r21
661	ldw		60(%r29), %r22
662	stw		%r19, 48(%r28)
663	stw		%r20, 52(%r28)
664	stw		%r21, 56(%r28)
665	stw		%r22, 60(%r28)
666	ldo		64(%r28), %r28
667
668	addib,COND(>)		-1, %r1,1b
669	ldo		64(%r29), %r29
670#endif
671
672	bv		%r0(%r2)
673	nop
674ENDPROC_CFI(copy_user_page_asm)
675
676ENTRY_CFI(clear_user_page_asm)
677	tophys_r1	%r26
678
679	ldil		L%(TMPALIAS_MAP_START), %r28
680#ifdef CONFIG_64BIT
681#if (TMPALIAS_MAP_START >= 0x80000000)
682	depdi		0, 31,32, %r28		/* clear any sign extension */
683#endif
684	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
685	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
686	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
687#else
688	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
689	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
690	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
691#endif
692
693	/* Purge any old translation */
694
695#ifdef CONFIG_PA20
696	pdtlb,l		%r0(%r28)
697#else
6980:	pdtlb		%r0(%r28)
699	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
700#endif
701
702#ifdef CONFIG_64BIT
703	ldi		(PAGE_SIZE / 128), %r1
704
705	/* PREFETCH (Write) has not (yet) been proven to help here */
706	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
707
7081:	std		%r0, 0(%r28)
709	std		%r0, 8(%r28)
710	std		%r0, 16(%r28)
711	std		%r0, 24(%r28)
712	std		%r0, 32(%r28)
713	std		%r0, 40(%r28)
714	std		%r0, 48(%r28)
715	std		%r0, 56(%r28)
716	std		%r0, 64(%r28)
717	std		%r0, 72(%r28)
718	std		%r0, 80(%r28)
719	std		%r0, 88(%r28)
720	std		%r0, 96(%r28)
721	std		%r0, 104(%r28)
722	std		%r0, 112(%r28)
723	std		%r0, 120(%r28)
724	addib,COND(>)		-1, %r1, 1b
725	ldo		128(%r28), %r28
726
727#else	/* ! CONFIG_64BIT */
728	ldi		(PAGE_SIZE / 64), %r1
729
7301:	stw		%r0, 0(%r28)
731	stw		%r0, 4(%r28)
732	stw		%r0, 8(%r28)
733	stw		%r0, 12(%r28)
734	stw		%r0, 16(%r28)
735	stw		%r0, 20(%r28)
736	stw		%r0, 24(%r28)
737	stw		%r0, 28(%r28)
738	stw		%r0, 32(%r28)
739	stw		%r0, 36(%r28)
740	stw		%r0, 40(%r28)
741	stw		%r0, 44(%r28)
742	stw		%r0, 48(%r28)
743	stw		%r0, 52(%r28)
744	stw		%r0, 56(%r28)
745	stw		%r0, 60(%r28)
746	addib,COND(>)		-1, %r1, 1b
747	ldo		64(%r28), %r28
748#endif	/* CONFIG_64BIT */
749
750	bv		%r0(%r2)
751	nop
752ENDPROC_CFI(clear_user_page_asm)
753
754ENTRY_CFI(flush_dcache_page_asm)
755	ldil		L%(TMPALIAS_MAP_START), %r28
756#ifdef CONFIG_64BIT
757#if (TMPALIAS_MAP_START >= 0x80000000)
758	depdi		0, 31,32, %r28		/* clear any sign extension */
759#endif
760	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
761	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
762	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
763#else
764	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
765	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
766	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
767#endif
768
769	/* Purge any old translation */
770
771#ifdef CONFIG_PA20
772	pdtlb,l		%r0(%r28)
773#else
7740:	pdtlb		%r0(%r28)
775	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
776#endif
777
77888:	ldil		L%dcache_stride, %r1
779	ldw		R%dcache_stride(%r1), r31
780
781#ifdef CONFIG_64BIT
782	depdi,z		1, 63-PAGE_SHIFT,1, %r25
783#else
784	depwi,z		1, 31-PAGE_SHIFT,1, %r25
785#endif
786	add		%r28, %r25, %r25
787	sub		%r25, r31, %r25
788
7891:	fdc,m		r31(%r28)
790	fdc,m		r31(%r28)
791	fdc,m		r31(%r28)
792	fdc,m		r31(%r28)
793	fdc,m		r31(%r28)
794	fdc,m		r31(%r28)
795	fdc,m		r31(%r28)
796	fdc,m		r31(%r28)
797	fdc,m		r31(%r28)
798	fdc,m		r31(%r28)
799	fdc,m		r31(%r28)
800	fdc,m		r31(%r28)
801	fdc,m		r31(%r28)
802	fdc,m		r31(%r28)
803	fdc,m		r31(%r28)
804	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
805	fdc,m		r31(%r28)
806
80789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
808	sync
809	bv		%r0(%r2)
810	nop
811ENDPROC_CFI(flush_dcache_page_asm)
812
813ENTRY_CFI(purge_dcache_page_asm)
814	ldil		L%(TMPALIAS_MAP_START), %r28
815#ifdef CONFIG_64BIT
816#if (TMPALIAS_MAP_START >= 0x80000000)
817	depdi		0, 31,32, %r28		/* clear any sign extension */
818#endif
819	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
820	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
821	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
822#else
823	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
824	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
825	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
826#endif
827
828	/* Purge any old translation */
829
830#ifdef CONFIG_PA20
831	pdtlb,l		%r0(%r28)
832#else
8330:	pdtlb		%r0(%r28)
834	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
835#endif
836
83788:	ldil		L%dcache_stride, %r1
838	ldw		R%dcache_stride(%r1), r31
839
840#ifdef CONFIG_64BIT
841	depdi,z		1, 63-PAGE_SHIFT,1, %r25
842#else
843	depwi,z		1, 31-PAGE_SHIFT,1, %r25
844#endif
845	add		%r28, %r25, %r25
846	sub		%r25, r31, %r25
847
8481:      pdc,m		r31(%r28)
849	pdc,m		r31(%r28)
850	pdc,m		r31(%r28)
851	pdc,m		r31(%r28)
852	pdc,m		r31(%r28)
853	pdc,m		r31(%r28)
854	pdc,m		r31(%r28)
855	pdc,m		r31(%r28)
856	pdc,m		r31(%r28)
857	pdc,m		r31(%r28)
858	pdc,m		r31(%r28)
859	pdc,m		r31(%r28)
860	pdc,m		r31(%r28)
861	pdc,m		r31(%r28)
862	pdc,m		r31(%r28)
863	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
864	pdc,m		r31(%r28)
865
86689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
867	sync
868	bv		%r0(%r2)
869	nop
870ENDPROC_CFI(purge_dcache_page_asm)
871
872ENTRY_CFI(flush_icache_page_asm)
873	ldil		L%(TMPALIAS_MAP_START), %r28
874#ifdef CONFIG_64BIT
875#if (TMPALIAS_MAP_START >= 0x80000000)
876	depdi		0, 31,32, %r28		/* clear any sign extension */
877#endif
878	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
879	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
880	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
881#else
882	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
883	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
884	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
885#endif
886
887	/* Purge any old translation.  Note that the FIC instruction
888	 * may use either the instruction or data TLB.  Given that we
889	 * have a flat address space, it's not clear which TLB will be
890	 * used.  So, we purge both entries.  */
891
892#ifdef CONFIG_PA20
893	pdtlb,l		%r0(%r28)
8941:	pitlb,l         %r0(%sr4,%r28)
895	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
896#else
8970:	pdtlb		%r0(%r28)
8981:	pitlb           %r0(%sr4,%r28)
899	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
900	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
901	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
902#endif
903
90488:	ldil		L%icache_stride, %r1
905	ldw		R%icache_stride(%r1), %r31
906
907#ifdef CONFIG_64BIT
908	depdi,z		1, 63-PAGE_SHIFT,1, %r25
909#else
910	depwi,z		1, 31-PAGE_SHIFT,1, %r25
911#endif
912	add		%r28, %r25, %r25
913	sub		%r25, %r31, %r25
914
915	/* fic only has the type 26 form on PA1.1, requiring an
916	 * explicit space specification, so use %sr4 */
9171:      fic,m		%r31(%sr4,%r28)
918	fic,m		%r31(%sr4,%r28)
919	fic,m		%r31(%sr4,%r28)
920	fic,m		%r31(%sr4,%r28)
921	fic,m		%r31(%sr4,%r28)
922	fic,m		%r31(%sr4,%r28)
923	fic,m		%r31(%sr4,%r28)
924	fic,m		%r31(%sr4,%r28)
925	fic,m		%r31(%sr4,%r28)
926	fic,m		%r31(%sr4,%r28)
927	fic,m		%r31(%sr4,%r28)
928	fic,m		%r31(%sr4,%r28)
929	fic,m		%r31(%sr4,%r28)
930	fic,m		%r31(%sr4,%r28)
931	fic,m		%r31(%sr4,%r28)
932	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
933	fic,m		%r31(%sr4,%r28)
934
93589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
936	sync
937	bv		%r0(%r2)
938	nop
939ENDPROC_CFI(flush_icache_page_asm)
940
941ENTRY_CFI(flush_kernel_dcache_page_asm)
94288:	ldil		L%dcache_stride, %r1
943	ldw		R%dcache_stride(%r1), %r23
944
945#ifdef CONFIG_64BIT
946	depdi,z		1, 63-PAGE_SHIFT,1, %r25
947#else
948	depwi,z		1, 31-PAGE_SHIFT,1, %r25
949#endif
950	add		%r26, %r25, %r25
951	sub		%r25, %r23, %r25
952
9531:      fdc,m		%r23(%r26)
954	fdc,m		%r23(%r26)
955	fdc,m		%r23(%r26)
956	fdc,m		%r23(%r26)
957	fdc,m		%r23(%r26)
958	fdc,m		%r23(%r26)
959	fdc,m		%r23(%r26)
960	fdc,m		%r23(%r26)
961	fdc,m		%r23(%r26)
962	fdc,m		%r23(%r26)
963	fdc,m		%r23(%r26)
964	fdc,m		%r23(%r26)
965	fdc,m		%r23(%r26)
966	fdc,m		%r23(%r26)
967	fdc,m		%r23(%r26)
968	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
969	fdc,m		%r23(%r26)
970
97189:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
972	sync
973	bv		%r0(%r2)
974	nop
975ENDPROC_CFI(flush_kernel_dcache_page_asm)
976
977ENTRY_CFI(purge_kernel_dcache_page_asm)
97888:	ldil		L%dcache_stride, %r1
979	ldw		R%dcache_stride(%r1), %r23
980
981#ifdef CONFIG_64BIT
982	depdi,z		1, 63-PAGE_SHIFT,1, %r25
983#else
984	depwi,z		1, 31-PAGE_SHIFT,1, %r25
985#endif
986	add		%r26, %r25, %r25
987	sub		%r25, %r23, %r25
988
9891:      pdc,m		%r23(%r26)
990	pdc,m		%r23(%r26)
991	pdc,m		%r23(%r26)
992	pdc,m		%r23(%r26)
993	pdc,m		%r23(%r26)
994	pdc,m		%r23(%r26)
995	pdc,m		%r23(%r26)
996	pdc,m		%r23(%r26)
997	pdc,m		%r23(%r26)
998	pdc,m		%r23(%r26)
999	pdc,m		%r23(%r26)
1000	pdc,m		%r23(%r26)
1001	pdc,m		%r23(%r26)
1002	pdc,m		%r23(%r26)
1003	pdc,m		%r23(%r26)
1004	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1005	pdc,m		%r23(%r26)
1006
100789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1008	sync
1009	bv		%r0(%r2)
1010	nop
1011ENDPROC_CFI(purge_kernel_dcache_page_asm)
1012
1013ENTRY_CFI(flush_user_dcache_range_asm)
101488:	ldil		L%dcache_stride, %r1
1015	ldw		R%dcache_stride(%r1), %r23
1016	ldo		-1(%r23), %r21
1017	ANDCM		%r26, %r21, %r26
1018
1019#ifdef CONFIG_64BIT
1020	depd,z		%r23, 59, 60, %r21
1021#else
1022	depw,z		%r23, 27, 28, %r21
1023#endif
1024	add		%r26, %r21, %r22
1025	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10261:	add		%r22, %r21, %r22
1027	fdc,m		%r23(%sr3, %r26)
1028	fdc,m		%r23(%sr3, %r26)
1029	fdc,m		%r23(%sr3, %r26)
1030	fdc,m		%r23(%sr3, %r26)
1031	fdc,m		%r23(%sr3, %r26)
1032	fdc,m		%r23(%sr3, %r26)
1033	fdc,m		%r23(%sr3, %r26)
1034	fdc,m		%r23(%sr3, %r26)
1035	fdc,m		%r23(%sr3, %r26)
1036	fdc,m		%r23(%sr3, %r26)
1037	fdc,m		%r23(%sr3, %r26)
1038	fdc,m		%r23(%sr3, %r26)
1039	fdc,m		%r23(%sr3, %r26)
1040	fdc,m		%r23(%sr3, %r26)
1041	fdc,m		%r23(%sr3, %r26)
1042	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1043	fdc,m		%r23(%sr3, %r26)
1044
10452:	cmpb,COND(>>),n	%r25, %r26, 2b
1046	fdc,m		%r23(%sr3, %r26)
1047
104889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1049	sync
1050	bv		%r0(%r2)
1051	nop
1052ENDPROC_CFI(flush_user_dcache_range_asm)
1053
1054ENTRY_CFI(flush_kernel_dcache_range_asm)
105588:	ldil		L%dcache_stride, %r1
1056	ldw		R%dcache_stride(%r1), %r23
1057	ldo		-1(%r23), %r21
1058	ANDCM		%r26, %r21, %r26
1059
1060#ifdef CONFIG_64BIT
1061	depd,z		%r23, 59, 60, %r21
1062#else
1063	depw,z		%r23, 27, 28, %r21
1064#endif
1065	add		%r26, %r21, %r22
1066	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10671:	add		%r22, %r21, %r22
1068	fdc,m		%r23(%r26)
1069	fdc,m		%r23(%r26)
1070	fdc,m		%r23(%r26)
1071	fdc,m		%r23(%r26)
1072	fdc,m		%r23(%r26)
1073	fdc,m		%r23(%r26)
1074	fdc,m		%r23(%r26)
1075	fdc,m		%r23(%r26)
1076	fdc,m		%r23(%r26)
1077	fdc,m		%r23(%r26)
1078	fdc,m		%r23(%r26)
1079	fdc,m		%r23(%r26)
1080	fdc,m		%r23(%r26)
1081	fdc,m		%r23(%r26)
1082	fdc,m		%r23(%r26)
1083	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1084	fdc,m		%r23(%r26)
1085
10862:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1087	fdc,m		%r23(%r26)
1088
1089	sync
109089:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1091	syncdma
1092	bv		%r0(%r2)
1093	nop
1094ENDPROC_CFI(flush_kernel_dcache_range_asm)
1095
1096ENTRY_CFI(purge_kernel_dcache_range_asm)
109788:	ldil		L%dcache_stride, %r1
1098	ldw		R%dcache_stride(%r1), %r23
1099	ldo		-1(%r23), %r21
1100	ANDCM		%r26, %r21, %r26
1101
1102#ifdef CONFIG_64BIT
1103	depd,z		%r23, 59, 60, %r21
1104#else
1105	depw,z		%r23, 27, 28, %r21
1106#endif
1107	add		%r26, %r21, %r22
1108	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11091:	add		%r22, %r21, %r22
1110	pdc,m		%r23(%r26)
1111	pdc,m		%r23(%r26)
1112	pdc,m		%r23(%r26)
1113	pdc,m		%r23(%r26)
1114	pdc,m		%r23(%r26)
1115	pdc,m		%r23(%r26)
1116	pdc,m		%r23(%r26)
1117	pdc,m		%r23(%r26)
1118	pdc,m		%r23(%r26)
1119	pdc,m		%r23(%r26)
1120	pdc,m		%r23(%r26)
1121	pdc,m		%r23(%r26)
1122	pdc,m		%r23(%r26)
1123	pdc,m		%r23(%r26)
1124	pdc,m		%r23(%r26)
1125	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1126	pdc,m		%r23(%r26)
1127
11282:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1129	pdc,m		%r23(%r26)
1130
1131	sync
113289:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1133	syncdma
1134	bv		%r0(%r2)
1135	nop
1136ENDPROC_CFI(purge_kernel_dcache_range_asm)
1137
1138ENTRY_CFI(flush_user_icache_range_asm)
113988:	ldil		L%icache_stride, %r1
1140	ldw		R%icache_stride(%r1), %r23
1141	ldo		-1(%r23), %r21
1142	ANDCM		%r26, %r21, %r26
1143
1144#ifdef CONFIG_64BIT
1145	depd,z		%r23, 59, 60, %r21
1146#else
1147	depw,z		%r23, 27, 28, %r21
1148#endif
1149	add		%r26, %r21, %r22
1150	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11511:	add		%r22, %r21, %r22
1152	fic,m		%r23(%sr3, %r26)
1153	fic,m		%r23(%sr3, %r26)
1154	fic,m		%r23(%sr3, %r26)
1155	fic,m		%r23(%sr3, %r26)
1156	fic,m		%r23(%sr3, %r26)
1157	fic,m		%r23(%sr3, %r26)
1158	fic,m		%r23(%sr3, %r26)
1159	fic,m		%r23(%sr3, %r26)
1160	fic,m		%r23(%sr3, %r26)
1161	fic,m		%r23(%sr3, %r26)
1162	fic,m		%r23(%sr3, %r26)
1163	fic,m		%r23(%sr3, %r26)
1164	fic,m		%r23(%sr3, %r26)
1165	fic,m		%r23(%sr3, %r26)
1166	fic,m		%r23(%sr3, %r26)
1167	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1168	fic,m		%r23(%sr3, %r26)
1169
11702:	cmpb,COND(>>),n	%r25, %r26, 2b
1171	fic,m		%r23(%sr3, %r26)
1172
117389:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1174	sync
1175	bv		%r0(%r2)
1176	nop
1177ENDPROC_CFI(flush_user_icache_range_asm)
1178
1179ENTRY_CFI(flush_kernel_icache_page)
118088:	ldil		L%icache_stride, %r1
1181	ldw		R%icache_stride(%r1), %r23
1182
1183#ifdef CONFIG_64BIT
1184	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1185#else
1186	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1187#endif
1188	add		%r26, %r25, %r25
1189	sub		%r25, %r23, %r25
1190
1191
11921:      fic,m		%r23(%sr4, %r26)
1193	fic,m		%r23(%sr4, %r26)
1194	fic,m		%r23(%sr4, %r26)
1195	fic,m		%r23(%sr4, %r26)
1196	fic,m		%r23(%sr4, %r26)
1197	fic,m		%r23(%sr4, %r26)
1198	fic,m		%r23(%sr4, %r26)
1199	fic,m		%r23(%sr4, %r26)
1200	fic,m		%r23(%sr4, %r26)
1201	fic,m		%r23(%sr4, %r26)
1202	fic,m		%r23(%sr4, %r26)
1203	fic,m		%r23(%sr4, %r26)
1204	fic,m		%r23(%sr4, %r26)
1205	fic,m		%r23(%sr4, %r26)
1206	fic,m		%r23(%sr4, %r26)
1207	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1208	fic,m		%r23(%sr4, %r26)
1209
121089:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1211	sync
1212	bv		%r0(%r2)
1213	nop
1214ENDPROC_CFI(flush_kernel_icache_page)
1215
1216ENTRY_CFI(flush_kernel_icache_range_asm)
121788:	ldil		L%icache_stride, %r1
1218	ldw		R%icache_stride(%r1), %r23
1219	ldo		-1(%r23), %r21
1220	ANDCM		%r26, %r21, %r26
1221
1222#ifdef CONFIG_64BIT
1223	depd,z		%r23, 59, 60, %r21
1224#else
1225	depw,z		%r23, 27, 28, %r21
1226#endif
1227	add		%r26, %r21, %r22
1228	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
12291:	add		%r22, %r21, %r22
1230	fic,m		%r23(%sr4, %r26)
1231	fic,m		%r23(%sr4, %r26)
1232	fic,m		%r23(%sr4, %r26)
1233	fic,m		%r23(%sr4, %r26)
1234	fic,m		%r23(%sr4, %r26)
1235	fic,m		%r23(%sr4, %r26)
1236	fic,m		%r23(%sr4, %r26)
1237	fic,m		%r23(%sr4, %r26)
1238	fic,m		%r23(%sr4, %r26)
1239	fic,m		%r23(%sr4, %r26)
1240	fic,m		%r23(%sr4, %r26)
1241	fic,m		%r23(%sr4, %r26)
1242	fic,m		%r23(%sr4, %r26)
1243	fic,m		%r23(%sr4, %r26)
1244	fic,m		%r23(%sr4, %r26)
1245	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1246	fic,m		%r23(%sr4, %r26)
1247
12482:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1249	fic,m		%r23(%sr4, %r26)
1250
125189:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1252	sync
1253	bv		%r0(%r2)
1254	nop
1255ENDPROC_CFI(flush_kernel_icache_range_asm)
1256
1257	__INIT
1258
1259	/* align should cover use of rfi in disable_sr_hashing_asm and
1260	 * srdis_done.
1261	 */
1262	.align	256
1263ENTRY_CFI(disable_sr_hashing_asm)
1264	/*
1265	 * Switch to real mode
1266	 */
1267	/* pcxt_ssm_bug */
1268	rsm		PSW_SM_I, %r0
1269	load32		PA(1f), %r1
1270	nop
1271	nop
1272	nop
1273	nop
1274	nop
1275
1276	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1277	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1278	mtctl		%r0, %cr17		/* Clear IIASQ head */
1279	mtctl		%r1, %cr18		/* IIAOQ head */
1280	ldo		4(%r1), %r1
1281	mtctl		%r1, %cr18		/* IIAOQ tail */
1282	load32		REAL_MODE_PSW, %r1
1283	mtctl		%r1, %ipsw
1284	rfi
1285	nop
1286
12871:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
1288	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
1289	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
1290	b,n		srdis_done
1291
1292srdis_pcxs:
1293
1294	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
1295
1296	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
1297	.word		0x141c1a00		/* must issue twice */
1298	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
1299	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
1300	.word		0x141c1600		/* mtdiag %r28, %dr0 */
1301	.word		0x141c1600		/* must issue twice */
1302	b,n		srdis_done
1303
1304srdis_pcxl:
1305
1306	/* Disable Space Register Hashing for PCXL */
1307
1308	.word		0x141c0600		/* mfdiag %dr0, %r28 */
1309	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
1310	.word		0x141c0240		/* mtdiag %r28, %dr0 */
1311	b,n		srdis_done
1312
1313srdis_pa20:
1314
1315	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
1316
1317	.word		0x144008bc		/* mfdiag %dr2, %r28 */
1318	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
1319	.word		0x145c1840		/* mtdiag %r28, %dr2 */
1320
1321
1322srdis_done:
1323	/* Switch back to virtual mode */
1324	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
1325	load32 	   	2f, %r1
1326	nop
1327	nop
1328	nop
1329	nop
1330	nop
1331
1332	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1333	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1334	mtctl		%r0, %cr17		/* Clear IIASQ head */
1335	mtctl		%r1, %cr18		/* IIAOQ head */
1336	ldo		4(%r1), %r1
1337	mtctl		%r1, %cr18		/* IIAOQ tail */
1338	load32		KERNEL_PSW, %r1
1339	mtctl		%r1, %ipsw
1340	rfi
1341	nop
1342
13432:      bv		%r0(%r2)
1344	nop
1345ENDPROC_CFI(disable_sr_hashing_asm)
1346
1347	.end
1348