xref: /openbmc/linux/arch/parisc/kernel/pacache.S (revision 6a87e0f0)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *  PARISC TLB and cache flushing support
4 *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
5 *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
6 *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
7 */
8
9/*
10 * NOTE: fdc,fic, and pdc instructions that use base register modification
11 *       should only use index and base registers that are not shadowed,
12 *       so that the fast path emulation in the non access miss handler
13 *       can be used.
14 */
15
16#ifdef CONFIG_64BIT
17	.level	2.0w
18#else
19	.level	2.0
20#endif
21
22#include <asm/psw.h>
23#include <asm/assembly.h>
24#include <asm/cache.h>
25#include <asm/ldcw.h>
26#include <asm/alternative.h>
27#include <linux/linkage.h>
28#include <linux/init.h>
29#include <linux/pgtable.h>
30
31	.section .text.hot
32	.align	16
33
34ENTRY_CFI(flush_tlb_all_local)
35	/*
36	 * The pitlbe and pdtlbe instructions should only be used to
37	 * flush the entire tlb. Also, there needs to be no intervening
38	 * tlb operations, e.g. tlb misses, so the operation needs
39	 * to happen in real mode with all interruptions disabled.
40	 */
41
42	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
43	rsm		PSW_SM_I, %r19		/* save I-bit state */
44	load32		PA(1f), %r1
45	nop
46	nop
47	nop
48	nop
49	nop
50
51	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
52	mtctl		%r0, %cr17		/* Clear IIASQ tail */
53	mtctl		%r0, %cr17		/* Clear IIASQ head */
54	mtctl		%r1, %cr18		/* IIAOQ head */
55	ldo		4(%r1), %r1
56	mtctl		%r1, %cr18		/* IIAOQ tail */
57	load32		REAL_MODE_PSW, %r1
58	mtctl           %r1, %ipsw
59	rfi
60	nop
61
621:      load32		PA(cache_info), %r1
63
64	/* Flush Instruction Tlb */
65
6688:	LDREG		ITLB_SID_BASE(%r1), %r20
67	LDREG		ITLB_SID_STRIDE(%r1), %r21
68	LDREG		ITLB_SID_COUNT(%r1), %r22
69	LDREG		ITLB_OFF_BASE(%r1), %arg0
70	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
71	LDREG		ITLB_OFF_COUNT(%r1), %arg2
72	LDREG		ITLB_LOOP(%r1), %arg3
73
74	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
75	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
76	copy		%arg0, %r28		/* Init base addr */
77
78fitmanyloop:					/* Loop if LOOP >= 2 */
79	mtsp		%r20, %sr1
80	add		%r21, %r20, %r20	/* increment space */
81	copy		%arg2, %r29		/* Init middle loop count */
82
83fitmanymiddle:					/* Loop if LOOP >= 2 */
84	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
85	pitlbe		%r0(%sr1, %r28)
86	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
87	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
88	copy		%arg3, %r31		/* Re-init inner loop count */
89
90	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
91	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
92
93fitoneloop:					/* Loop if LOOP = 1 */
94	mtsp		%r20, %sr1
95	copy		%arg0, %r28		/* init base addr */
96	copy		%arg2, %r29		/* init middle loop count */
97
98fitonemiddle:					/* Loop if LOOP = 1 */
99	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
100	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
101
102	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
103	add		%r21, %r20, %r20		/* increment space */
104
105fitdone:
106	ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
107
108	/* Flush Data Tlb */
109
110	LDREG		DTLB_SID_BASE(%r1), %r20
111	LDREG		DTLB_SID_STRIDE(%r1), %r21
112	LDREG		DTLB_SID_COUNT(%r1), %r22
113	LDREG		DTLB_OFF_BASE(%r1), %arg0
114	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
115	LDREG		DTLB_OFF_COUNT(%r1), %arg2
116	LDREG		DTLB_LOOP(%r1), %arg3
117
118	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
119	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
120	copy		%arg0, %r28		/* Init base addr */
121
122fdtmanyloop:					/* Loop if LOOP >= 2 */
123	mtsp		%r20, %sr1
124	add		%r21, %r20, %r20	/* increment space */
125	copy		%arg2, %r29		/* Init middle loop count */
126
127fdtmanymiddle:					/* Loop if LOOP >= 2 */
128	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
129	pdtlbe		%r0(%sr1, %r28)
130	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
131	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
132	copy		%arg3, %r31		/* Re-init inner loop count */
133
134	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
135	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
136
137fdtoneloop:					/* Loop if LOOP = 1 */
138	mtsp		%r20, %sr1
139	copy		%arg0, %r28		/* init base addr */
140	copy		%arg2, %r29		/* init middle loop count */
141
142fdtonemiddle:					/* Loop if LOOP = 1 */
143	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
144	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
145
146	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
147	add		%r21, %r20, %r20	/* increment space */
148
149
150fdtdone:
151	/*
152	 * Switch back to virtual mode
153	 */
154	/* pcxt_ssm_bug */
155	rsm		PSW_SM_I, %r0
156	load32		2f, %r1
157	nop
158	nop
159	nop
160	nop
161	nop
162
163	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
164	mtctl		%r0, %cr17		/* Clear IIASQ tail */
165	mtctl		%r0, %cr17		/* Clear IIASQ head */
166	mtctl		%r1, %cr18		/* IIAOQ head */
167	ldo		4(%r1), %r1
168	mtctl		%r1, %cr18		/* IIAOQ tail */
169	load32		KERNEL_PSW, %r1
170	or		%r1, %r19, %r1	/* I-bit to state on entry */
171	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
172	rfi
173	nop
174
1752:      bv		%r0(%r2)
176	nop
177
178	/*
179	 * When running in qemu, drop whole flush_tlb_all_local function and
180	 * replace by one pdtlbe instruction, for which QEMU will drop all
181	 * local TLB entries.
182	 */
1833:	pdtlbe		%r0(%sr1,%r0)
184	bv,n		%r0(%r2)
185	ALTERNATIVE_CODE(flush_tlb_all_local, 2, ALT_COND_RUN_ON_QEMU, 3b)
186ENDPROC_CFI(flush_tlb_all_local)
187
188	.import cache_info,data
189
190ENTRY_CFI(flush_instruction_cache_local)
19188:	load32		cache_info, %r1
192
193	/* Flush Instruction Cache */
194
195	LDREG		ICACHE_BASE(%r1), %arg0
196	LDREG		ICACHE_STRIDE(%r1), %arg1
197	LDREG		ICACHE_COUNT(%r1), %arg2
198	LDREG		ICACHE_LOOP(%r1), %arg3
199	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
200	mtsp		%r0, %sr1
201	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
202	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
203
204fimanyloop:					/* Loop if LOOP >= 2 */
205	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
206	fice            %r0(%sr1, %arg0)
207	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
208	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
209	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
210
211fioneloop:					/* Loop if LOOP = 1 */
212	/* Some implementations may flush with a single fice instruction */
213	cmpib,COND(>>=),n	15, %arg2, fioneloop2
214
215fioneloop1:
216	fice,m		%arg1(%sr1, %arg0)
217	fice,m		%arg1(%sr1, %arg0)
218	fice,m		%arg1(%sr1, %arg0)
219	fice,m		%arg1(%sr1, %arg0)
220	fice,m		%arg1(%sr1, %arg0)
221	fice,m		%arg1(%sr1, %arg0)
222	fice,m		%arg1(%sr1, %arg0)
223	fice,m		%arg1(%sr1, %arg0)
224	fice,m		%arg1(%sr1, %arg0)
225	fice,m		%arg1(%sr1, %arg0)
226	fice,m		%arg1(%sr1, %arg0)
227	fice,m		%arg1(%sr1, %arg0)
228	fice,m		%arg1(%sr1, %arg0)
229	fice,m		%arg1(%sr1, %arg0)
230	fice,m		%arg1(%sr1, %arg0)
231	addib,COND(>)	-16, %arg2, fioneloop1
232	fice,m		%arg1(%sr1, %arg0)
233
234	/* Check if done */
235	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
236
237fioneloop2:
238	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
239	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
240
241fisync:
242	sync
243	mtsm		%r22			/* restore I-bit */
24489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
245	bv		%r0(%r2)
246	nop
247ENDPROC_CFI(flush_instruction_cache_local)
248
249
250	.import cache_info, data
251ENTRY_CFI(flush_data_cache_local)
25288:	load32		cache_info, %r1
253
254	/* Flush Data Cache */
255
256	LDREG		DCACHE_BASE(%r1), %arg0
257	LDREG		DCACHE_STRIDE(%r1), %arg1
258	LDREG		DCACHE_COUNT(%r1), %arg2
259	LDREG		DCACHE_LOOP(%r1), %arg3
260	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
261	mtsp		%r0, %sr1
262	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
263	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
264
265fdmanyloop:					/* Loop if LOOP >= 2 */
266	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
267	fdce		%r0(%sr1, %arg0)
268	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
269	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
270	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
271
272fdoneloop:					/* Loop if LOOP = 1 */
273	/* Some implementations may flush with a single fdce instruction */
274	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
275
276fdoneloop1:
277	fdce,m		%arg1(%sr1, %arg0)
278	fdce,m		%arg1(%sr1, %arg0)
279	fdce,m		%arg1(%sr1, %arg0)
280	fdce,m		%arg1(%sr1, %arg0)
281	fdce,m		%arg1(%sr1, %arg0)
282	fdce,m		%arg1(%sr1, %arg0)
283	fdce,m		%arg1(%sr1, %arg0)
284	fdce,m		%arg1(%sr1, %arg0)
285	fdce,m		%arg1(%sr1, %arg0)
286	fdce,m		%arg1(%sr1, %arg0)
287	fdce,m		%arg1(%sr1, %arg0)
288	fdce,m		%arg1(%sr1, %arg0)
289	fdce,m		%arg1(%sr1, %arg0)
290	fdce,m		%arg1(%sr1, %arg0)
291	fdce,m		%arg1(%sr1, %arg0)
292	addib,COND(>)	-16, %arg2, fdoneloop1
293	fdce,m		%arg1(%sr1, %arg0)
294
295	/* Check if done */
296	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
297
298fdoneloop2:
299	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
300	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
301
302fdsync:
303	sync
304	mtsm		%r22			/* restore I-bit */
30589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
306	bv		%r0(%r2)
307	nop
308ENDPROC_CFI(flush_data_cache_local)
309
310/* Clear page using kernel mapping.  */
311
312ENTRY_CFI(clear_page_asm)
313#ifdef CONFIG_64BIT
314
315	/* Unroll the loop.  */
316	ldi		(PAGE_SIZE / 128), %r1
317
3181:
319	std		%r0, 0(%r26)
320	std		%r0, 8(%r26)
321	std		%r0, 16(%r26)
322	std		%r0, 24(%r26)
323	std		%r0, 32(%r26)
324	std		%r0, 40(%r26)
325	std		%r0, 48(%r26)
326	std		%r0, 56(%r26)
327	std		%r0, 64(%r26)
328	std		%r0, 72(%r26)
329	std		%r0, 80(%r26)
330	std		%r0, 88(%r26)
331	std		%r0, 96(%r26)
332	std		%r0, 104(%r26)
333	std		%r0, 112(%r26)
334	std		%r0, 120(%r26)
335
336	/* Note reverse branch hint for addib is taken.  */
337	addib,COND(>),n	-1, %r1, 1b
338	ldo		128(%r26), %r26
339
340#else
341
342	/*
343	 * Note that until (if) we start saving the full 64-bit register
344	 * values on interrupt, we can't use std on a 32 bit kernel.
345	 */
346	ldi		(PAGE_SIZE / 64), %r1
347
3481:
349	stw		%r0, 0(%r26)
350	stw		%r0, 4(%r26)
351	stw		%r0, 8(%r26)
352	stw		%r0, 12(%r26)
353	stw		%r0, 16(%r26)
354	stw		%r0, 20(%r26)
355	stw		%r0, 24(%r26)
356	stw		%r0, 28(%r26)
357	stw		%r0, 32(%r26)
358	stw		%r0, 36(%r26)
359	stw		%r0, 40(%r26)
360	stw		%r0, 44(%r26)
361	stw		%r0, 48(%r26)
362	stw		%r0, 52(%r26)
363	stw		%r0, 56(%r26)
364	stw		%r0, 60(%r26)
365
366	addib,COND(>),n	-1, %r1, 1b
367	ldo		64(%r26), %r26
368#endif
369	bv		%r0(%r2)
370	nop
371ENDPROC_CFI(clear_page_asm)
372
373/* Copy page using kernel mapping.  */
374
375ENTRY_CFI(copy_page_asm)
376#ifdef CONFIG_64BIT
377	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
378	 * Unroll the loop by hand and arrange insn appropriately.
379	 * Prefetch doesn't improve performance on rp3440.
380	 * GCC probably can do this just as well...
381	 */
382
383	ldi		(PAGE_SIZE / 128), %r1
384
3851:	ldd		0(%r25), %r19
386	ldd		8(%r25), %r20
387
388	ldd		16(%r25), %r21
389	ldd		24(%r25), %r22
390	std		%r19, 0(%r26)
391	std		%r20, 8(%r26)
392
393	ldd		32(%r25), %r19
394	ldd		40(%r25), %r20
395	std		%r21, 16(%r26)
396	std		%r22, 24(%r26)
397
398	ldd		48(%r25), %r21
399	ldd		56(%r25), %r22
400	std		%r19, 32(%r26)
401	std		%r20, 40(%r26)
402
403	ldd		64(%r25), %r19
404	ldd		72(%r25), %r20
405	std		%r21, 48(%r26)
406	std		%r22, 56(%r26)
407
408	ldd		80(%r25), %r21
409	ldd		88(%r25), %r22
410	std		%r19, 64(%r26)
411	std		%r20, 72(%r26)
412
413	ldd		 96(%r25), %r19
414	ldd		104(%r25), %r20
415	std		%r21, 80(%r26)
416	std		%r22, 88(%r26)
417
418	ldd		112(%r25), %r21
419	ldd		120(%r25), %r22
420	ldo		128(%r25), %r25
421	std		%r19, 96(%r26)
422	std		%r20, 104(%r26)
423
424	std		%r21, 112(%r26)
425	std		%r22, 120(%r26)
426
427	/* Note reverse branch hint for addib is taken.  */
428	addib,COND(>),n	-1, %r1, 1b
429	ldo		128(%r26), %r26
430
431#else
432
433	/*
434	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
435	 * bundles (very restricted rules for bundling).
436	 * Note that until (if) we start saving
437	 * the full 64 bit register values on interrupt, we can't
438	 * use ldd/std on a 32 bit kernel.
439	 */
440	ldw		0(%r25), %r19
441	ldi		(PAGE_SIZE / 64), %r1
442
4431:
444	ldw		4(%r25), %r20
445	ldw		8(%r25), %r21
446	ldw		12(%r25), %r22
447	stw		%r19, 0(%r26)
448	stw		%r20, 4(%r26)
449	stw		%r21, 8(%r26)
450	stw		%r22, 12(%r26)
451	ldw		16(%r25), %r19
452	ldw		20(%r25), %r20
453	ldw		24(%r25), %r21
454	ldw		28(%r25), %r22
455	stw		%r19, 16(%r26)
456	stw		%r20, 20(%r26)
457	stw		%r21, 24(%r26)
458	stw		%r22, 28(%r26)
459	ldw		32(%r25), %r19
460	ldw		36(%r25), %r20
461	ldw		40(%r25), %r21
462	ldw		44(%r25), %r22
463	stw		%r19, 32(%r26)
464	stw		%r20, 36(%r26)
465	stw		%r21, 40(%r26)
466	stw		%r22, 44(%r26)
467	ldw		48(%r25), %r19
468	ldw		52(%r25), %r20
469	ldw		56(%r25), %r21
470	ldw		60(%r25), %r22
471	stw		%r19, 48(%r26)
472	stw		%r20, 52(%r26)
473	ldo		64(%r25), %r25
474	stw		%r21, 56(%r26)
475	stw		%r22, 60(%r26)
476	ldo		64(%r26), %r26
477	addib,COND(>),n	-1, %r1, 1b
478	ldw		0(%r25), %r19
479#endif
480	bv		%r0(%r2)
481	nop
482ENDPROC_CFI(copy_page_asm)
483
484/*
485 * NOTE: Code in clear_user_page has a hard coded dependency on the
486 *       maximum alias boundary being 4 Mb. We've been assured by the
487 *       parisc chip designers that there will not ever be a parisc
488 *       chip with a larger alias boundary (Never say never :-) ).
489 *
490 *       Yah, what about the PA8800 and PA8900 processors?
491 *
492 *       Subtle: the dtlb miss handlers support the temp alias region by
493 *       "knowing" that if a dtlb miss happens within the temp alias
494 *       region it must have occurred while in clear_user_page. Since
495 *       this routine makes use of processor local translations, we
496 *       don't want to insert them into the kernel page table. Instead,
497 *       we load up some general registers (they need to be registers
498 *       which aren't shadowed) with the physical page numbers (preshifted
499 *       for tlb insertion) needed to insert the translations. When we
500 *       miss on the translation, the dtlb miss handler inserts the
501 *       translation into the tlb using these values:
502 *
503 *          %r26 physical address of "to" translation
504 *          %r23 physical address of "from" translation
505 */
506
507	/*
508	 * copy_user_page_asm() performs a page copy using mappings
509	 * equivalent to the user page mappings.  It can be used to
510	 * implement copy_user_page() but unfortunately both the `from'
511	 * and `to' pages need to be flushed through mappings equivalent
512	 * to the user mappings after the copy because the kernel accesses
513	 * the `from' page through the kmap kernel mapping and the `to'
514	 * page needs to be flushed since code can be copied.  As a
515	 * result, this implementation is less efficient than the simpler
516	 * copy using the kernel mapping.  It only needs the `from' page
517	 * to flushed via the user mapping.  The kunmap routines handle
518	 * the flushes needed for the kernel mapping.
519	 *
520	 * I'm still keeping this around because it may be possible to
521	 * use it if more information is passed into copy_user_page().
522	 * Have to do some measurements to see if it is worthwhile to
523	 * lobby for such a change.
524	 *
525	 */
526
527ENTRY_CFI(copy_user_page_asm)
528	/* Convert virtual `to' and `from' addresses to physical addresses.
529	   Move `from' physical address to non shadowed register.  */
530	ldil		L%(__PAGE_OFFSET), %r1
531	sub		%r26, %r1, %r26
532	sub		%r25, %r1, %r23
533
534	ldil		L%(TMPALIAS_MAP_START), %r28
535	dep_safe	%r24, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
536	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
537	copy		%r28, %r29
538	depi_safe	1, 31-TMPALIAS_SIZE_BITS,1, %r29	/* Form aliased virtual address 'from' */
539
540	/* Purge any old translations */
541
542#ifdef CONFIG_PA20
543	pdtlb,l		%r0(%r28)
544	pdtlb,l		%r0(%r29)
545#else
5460:	pdtlb		%r0(%r28)
5471:	pdtlb		%r0(%r29)
548	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
549	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
550#endif
551
552#ifdef CONFIG_64BIT
553	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
554	 * Unroll the loop by hand and arrange insn appropriately.
555	 * GCC probably can do this just as well.
556	 */
557
558	ldd		0(%r29), %r19
559	ldi		(PAGE_SIZE / 128), %r1
560
5611:	ldd		8(%r29), %r20
562
563	ldd		16(%r29), %r21
564	ldd		24(%r29), %r22
565	std		%r19, 0(%r28)
566	std		%r20, 8(%r28)
567
568	ldd		32(%r29), %r19
569	ldd		40(%r29), %r20
570	std		%r21, 16(%r28)
571	std		%r22, 24(%r28)
572
573	ldd		48(%r29), %r21
574	ldd		56(%r29), %r22
575	std		%r19, 32(%r28)
576	std		%r20, 40(%r28)
577
578	ldd		64(%r29), %r19
579	ldd		72(%r29), %r20
580	std		%r21, 48(%r28)
581	std		%r22, 56(%r28)
582
583	ldd		80(%r29), %r21
584	ldd		88(%r29), %r22
585	std		%r19, 64(%r28)
586	std		%r20, 72(%r28)
587
588	ldd		 96(%r29), %r19
589	ldd		104(%r29), %r20
590	std		%r21, 80(%r28)
591	std		%r22, 88(%r28)
592
593	ldd		112(%r29), %r21
594	ldd		120(%r29), %r22
595	std		%r19, 96(%r28)
596	std		%r20, 104(%r28)
597
598	ldo		128(%r29), %r29
599	std		%r21, 112(%r28)
600	std		%r22, 120(%r28)
601	ldo		128(%r28), %r28
602
603	/* conditional branches nullify on forward taken branch, and on
604	 * non-taken backward branch. Note that .+4 is a backwards branch.
605	 * The ldd should only get executed if the branch is taken.
606	 */
607	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
608	ldd		0(%r29), %r19		/* start next loads */
609
610#else
611	ldi		(PAGE_SIZE / 64), %r1
612
613	/*
614	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
615	 * bundles (very restricted rules for bundling). It probably
616	 * does OK on PCXU and better, but we could do better with
617	 * ldd/std instructions. Note that until (if) we start saving
618	 * the full 64 bit register values on interrupt, we can't
619	 * use ldd/std on a 32 bit kernel.
620	 */
621
6221:	ldw		0(%r29), %r19
623	ldw		4(%r29), %r20
624	ldw		8(%r29), %r21
625	ldw		12(%r29), %r22
626	stw		%r19, 0(%r28)
627	stw		%r20, 4(%r28)
628	stw		%r21, 8(%r28)
629	stw		%r22, 12(%r28)
630	ldw		16(%r29), %r19
631	ldw		20(%r29), %r20
632	ldw		24(%r29), %r21
633	ldw		28(%r29), %r22
634	stw		%r19, 16(%r28)
635	stw		%r20, 20(%r28)
636	stw		%r21, 24(%r28)
637	stw		%r22, 28(%r28)
638	ldw		32(%r29), %r19
639	ldw		36(%r29), %r20
640	ldw		40(%r29), %r21
641	ldw		44(%r29), %r22
642	stw		%r19, 32(%r28)
643	stw		%r20, 36(%r28)
644	stw		%r21, 40(%r28)
645	stw		%r22, 44(%r28)
646	ldw		48(%r29), %r19
647	ldw		52(%r29), %r20
648	ldw		56(%r29), %r21
649	ldw		60(%r29), %r22
650	stw		%r19, 48(%r28)
651	stw		%r20, 52(%r28)
652	stw		%r21, 56(%r28)
653	stw		%r22, 60(%r28)
654	ldo		64(%r28), %r28
655
656	addib,COND(>)		-1, %r1,1b
657	ldo		64(%r29), %r29
658#endif
659
660	bv		%r0(%r2)
661	nop
662ENDPROC_CFI(copy_user_page_asm)
663
664ENTRY_CFI(clear_user_page_asm)
665	tophys_r1	%r26
666
667	ldil		L%(TMPALIAS_MAP_START), %r28
668	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
669	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
670
671	/* Purge any old translation */
672
673#ifdef CONFIG_PA20
674	pdtlb,l		%r0(%r28)
675#else
6760:	pdtlb		%r0(%r28)
677	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
678#endif
679
680#ifdef CONFIG_64BIT
681	ldi		(PAGE_SIZE / 128), %r1
682
683	/* PREFETCH (Write) has not (yet) been proven to help here */
684	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
685
6861:	std		%r0, 0(%r28)
687	std		%r0, 8(%r28)
688	std		%r0, 16(%r28)
689	std		%r0, 24(%r28)
690	std		%r0, 32(%r28)
691	std		%r0, 40(%r28)
692	std		%r0, 48(%r28)
693	std		%r0, 56(%r28)
694	std		%r0, 64(%r28)
695	std		%r0, 72(%r28)
696	std		%r0, 80(%r28)
697	std		%r0, 88(%r28)
698	std		%r0, 96(%r28)
699	std		%r0, 104(%r28)
700	std		%r0, 112(%r28)
701	std		%r0, 120(%r28)
702	addib,COND(>)		-1, %r1, 1b
703	ldo		128(%r28), %r28
704
705#else	/* ! CONFIG_64BIT */
706	ldi		(PAGE_SIZE / 64), %r1
707
7081:	stw		%r0, 0(%r28)
709	stw		%r0, 4(%r28)
710	stw		%r0, 8(%r28)
711	stw		%r0, 12(%r28)
712	stw		%r0, 16(%r28)
713	stw		%r0, 20(%r28)
714	stw		%r0, 24(%r28)
715	stw		%r0, 28(%r28)
716	stw		%r0, 32(%r28)
717	stw		%r0, 36(%r28)
718	stw		%r0, 40(%r28)
719	stw		%r0, 44(%r28)
720	stw		%r0, 48(%r28)
721	stw		%r0, 52(%r28)
722	stw		%r0, 56(%r28)
723	stw		%r0, 60(%r28)
724	addib,COND(>)		-1, %r1, 1b
725	ldo		64(%r28), %r28
726#endif	/* CONFIG_64BIT */
727
728	bv		%r0(%r2)
729	nop
730ENDPROC_CFI(clear_user_page_asm)
731
732ENTRY_CFI(flush_dcache_page_asm)
733	ldil		L%(TMPALIAS_MAP_START), %r28
734	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
735	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
736
737	/* Purge any old translation */
738
739#ifdef CONFIG_PA20
740	pdtlb,l		%r0(%r28)
741#else
7420:	pdtlb		%r0(%r28)
743	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
744#endif
745
74688:	ldil		L%dcache_stride, %r1
747	ldw		R%dcache_stride(%r1), r31
748
749#ifdef CONFIG_64BIT
750	depdi,z		1, 63-PAGE_SHIFT,1, %r25
751#else
752	depwi,z		1, 31-PAGE_SHIFT,1, %r25
753#endif
754	add		%r28, %r25, %r25
755	sub		%r25, r31, %r25
756
7571:	fdc,m		r31(%r28)
758	fdc,m		r31(%r28)
759	fdc,m		r31(%r28)
760	fdc,m		r31(%r28)
761	fdc,m		r31(%r28)
762	fdc,m		r31(%r28)
763	fdc,m		r31(%r28)
764	fdc,m		r31(%r28)
765	fdc,m		r31(%r28)
766	fdc,m		r31(%r28)
767	fdc,m		r31(%r28)
768	fdc,m		r31(%r28)
769	fdc,m		r31(%r28)
770	fdc,m		r31(%r28)
771	fdc,m		r31(%r28)
772	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
773	fdc,m		r31(%r28)
774
77589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
776	sync
777	bv		%r0(%r2)
778	nop
779ENDPROC_CFI(flush_dcache_page_asm)
780
781ENTRY_CFI(purge_dcache_page_asm)
782	ldil		L%(TMPALIAS_MAP_START), %r28
783	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
784	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
785
786	/* Purge any old translation */
787
788#ifdef CONFIG_PA20
789	pdtlb,l		%r0(%r28)
790#else
7910:	pdtlb		%r0(%r28)
792	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
793#endif
794
79588:	ldil		L%dcache_stride, %r1
796	ldw		R%dcache_stride(%r1), r31
797
798#ifdef CONFIG_64BIT
799	depdi,z		1, 63-PAGE_SHIFT,1, %r25
800#else
801	depwi,z		1, 31-PAGE_SHIFT,1, %r25
802#endif
803	add		%r28, %r25, %r25
804	sub		%r25, r31, %r25
805
8061:      pdc,m		r31(%r28)
807	pdc,m		r31(%r28)
808	pdc,m		r31(%r28)
809	pdc,m		r31(%r28)
810	pdc,m		r31(%r28)
811	pdc,m		r31(%r28)
812	pdc,m		r31(%r28)
813	pdc,m		r31(%r28)
814	pdc,m		r31(%r28)
815	pdc,m		r31(%r28)
816	pdc,m		r31(%r28)
817	pdc,m		r31(%r28)
818	pdc,m		r31(%r28)
819	pdc,m		r31(%r28)
820	pdc,m		r31(%r28)
821	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
822	pdc,m		r31(%r28)
823
82489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
825	sync
826	bv		%r0(%r2)
827	nop
828ENDPROC_CFI(purge_dcache_page_asm)
829
830ENTRY_CFI(flush_icache_page_asm)
831	ldil		L%(TMPALIAS_MAP_START), %r28
832	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
833	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
834
835	/* Purge any old translation.  Note that the FIC instruction
836	 * may use either the instruction or data TLB.  Given that we
837	 * have a flat address space, it's not clear which TLB will be
838	 * used.  So, we purge both entries.  */
839
840#ifdef CONFIG_PA20
841	pdtlb,l		%r0(%r28)
8421:	pitlb,l         %r0(%sr4,%r28)
843	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
844#else
8450:	pdtlb		%r0(%r28)
8461:	pitlb           %r0(%sr4,%r28)
847	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
848	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
849	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
850#endif
851
85288:	ldil		L%icache_stride, %r1
853	ldw		R%icache_stride(%r1), %r31
854
855#ifdef CONFIG_64BIT
856	depdi,z		1, 63-PAGE_SHIFT,1, %r25
857#else
858	depwi,z		1, 31-PAGE_SHIFT,1, %r25
859#endif
860	add		%r28, %r25, %r25
861	sub		%r25, %r31, %r25
862
863	/* fic only has the type 26 form on PA1.1, requiring an
864	 * explicit space specification, so use %sr4 */
8651:      fic,m		%r31(%sr4,%r28)
866	fic,m		%r31(%sr4,%r28)
867	fic,m		%r31(%sr4,%r28)
868	fic,m		%r31(%sr4,%r28)
869	fic,m		%r31(%sr4,%r28)
870	fic,m		%r31(%sr4,%r28)
871	fic,m		%r31(%sr4,%r28)
872	fic,m		%r31(%sr4,%r28)
873	fic,m		%r31(%sr4,%r28)
874	fic,m		%r31(%sr4,%r28)
875	fic,m		%r31(%sr4,%r28)
876	fic,m		%r31(%sr4,%r28)
877	fic,m		%r31(%sr4,%r28)
878	fic,m		%r31(%sr4,%r28)
879	fic,m		%r31(%sr4,%r28)
880	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
881	fic,m		%r31(%sr4,%r28)
882
88389:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
884	sync
885	bv		%r0(%r2)
886	nop
887ENDPROC_CFI(flush_icache_page_asm)
888
889ENTRY_CFI(flush_kernel_dcache_page_asm)
89088:	ldil		L%dcache_stride, %r1
891	ldw		R%dcache_stride(%r1), %r23
892	depi_safe	0, 31,PAGE_SHIFT, %r26	/* Clear any offset bits */
893
894#ifdef CONFIG_64BIT
895	depdi,z		1, 63-PAGE_SHIFT,1, %r25
896#else
897	depwi,z		1, 31-PAGE_SHIFT,1, %r25
898#endif
899	add		%r26, %r25, %r25
900	sub		%r25, %r23, %r25
901
9021:      fdc,m		%r23(%r26)
903	fdc,m		%r23(%r26)
904	fdc,m		%r23(%r26)
905	fdc,m		%r23(%r26)
906	fdc,m		%r23(%r26)
907	fdc,m		%r23(%r26)
908	fdc,m		%r23(%r26)
909	fdc,m		%r23(%r26)
910	fdc,m		%r23(%r26)
911	fdc,m		%r23(%r26)
912	fdc,m		%r23(%r26)
913	fdc,m		%r23(%r26)
914	fdc,m		%r23(%r26)
915	fdc,m		%r23(%r26)
916	fdc,m		%r23(%r26)
917	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
918	fdc,m		%r23(%r26)
919
92089:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
921	sync
922	bv		%r0(%r2)
923	nop
924ENDPROC_CFI(flush_kernel_dcache_page_asm)
925
926ENTRY_CFI(purge_kernel_dcache_page_asm)
92788:	ldil		L%dcache_stride, %r1
928	ldw		R%dcache_stride(%r1), %r23
929	depi_safe	0, 31,PAGE_SHIFT, %r26	/* Clear any offset bits */
930
931#ifdef CONFIG_64BIT
932	depdi,z		1, 63-PAGE_SHIFT,1, %r25
933#else
934	depwi,z		1, 31-PAGE_SHIFT,1, %r25
935#endif
936	add		%r26, %r25, %r25
937	sub		%r25, %r23, %r25
938
9391:      pdc,m		%r23(%r26)
940	pdc,m		%r23(%r26)
941	pdc,m		%r23(%r26)
942	pdc,m		%r23(%r26)
943	pdc,m		%r23(%r26)
944	pdc,m		%r23(%r26)
945	pdc,m		%r23(%r26)
946	pdc,m		%r23(%r26)
947	pdc,m		%r23(%r26)
948	pdc,m		%r23(%r26)
949	pdc,m		%r23(%r26)
950	pdc,m		%r23(%r26)
951	pdc,m		%r23(%r26)
952	pdc,m		%r23(%r26)
953	pdc,m		%r23(%r26)
954	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
955	pdc,m		%r23(%r26)
956
95789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
958	sync
959	bv		%r0(%r2)
960	nop
961ENDPROC_CFI(purge_kernel_dcache_page_asm)
962
963ENTRY_CFI(flush_user_dcache_range_asm)
96488:	ldil		L%dcache_stride, %r1
965	ldw		R%dcache_stride(%r1), %r23
966	ldo		-1(%r23), %r21
967	ANDCM		%r26, %r21, %r26
968
969#ifdef CONFIG_64BIT
970	depd,z		%r23, 59, 60, %r21
971#else
972	depw,z		%r23, 27, 28, %r21
973#endif
974	add		%r26, %r21, %r22
975	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
9761:	add		%r22, %r21, %r22
977	fdc,m		%r23(%sr3, %r26)
978	fdc,m		%r23(%sr3, %r26)
979	fdc,m		%r23(%sr3, %r26)
980	fdc,m		%r23(%sr3, %r26)
981	fdc,m		%r23(%sr3, %r26)
982	fdc,m		%r23(%sr3, %r26)
983	fdc,m		%r23(%sr3, %r26)
984	fdc,m		%r23(%sr3, %r26)
985	fdc,m		%r23(%sr3, %r26)
986	fdc,m		%r23(%sr3, %r26)
987	fdc,m		%r23(%sr3, %r26)
988	fdc,m		%r23(%sr3, %r26)
989	fdc,m		%r23(%sr3, %r26)
990	fdc,m		%r23(%sr3, %r26)
991	fdc,m		%r23(%sr3, %r26)
992	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
993	fdc,m		%r23(%sr3, %r26)
994
9952:	cmpb,COND(>>),n	%r25, %r26, 2b
996	fdc,m		%r23(%sr3, %r26)
997
99889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
999	sync
1000	bv		%r0(%r2)
1001	nop
1002ENDPROC_CFI(flush_user_dcache_range_asm)
1003
1004ENTRY_CFI(flush_kernel_dcache_range_asm)
100588:	ldil		L%dcache_stride, %r1
1006	ldw		R%dcache_stride(%r1), %r23
1007	ldo		-1(%r23), %r21
1008	ANDCM		%r26, %r21, %r26
1009
1010#ifdef CONFIG_64BIT
1011	depd,z		%r23, 59, 60, %r21
1012#else
1013	depw,z		%r23, 27, 28, %r21
1014#endif
1015	add		%r26, %r21, %r22
1016	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10171:	add		%r22, %r21, %r22
1018	fdc,m		%r23(%r26)
1019	fdc,m		%r23(%r26)
1020	fdc,m		%r23(%r26)
1021	fdc,m		%r23(%r26)
1022	fdc,m		%r23(%r26)
1023	fdc,m		%r23(%r26)
1024	fdc,m		%r23(%r26)
1025	fdc,m		%r23(%r26)
1026	fdc,m		%r23(%r26)
1027	fdc,m		%r23(%r26)
1028	fdc,m		%r23(%r26)
1029	fdc,m		%r23(%r26)
1030	fdc,m		%r23(%r26)
1031	fdc,m		%r23(%r26)
1032	fdc,m		%r23(%r26)
1033	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1034	fdc,m		%r23(%r26)
1035
10362:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1037	fdc,m		%r23(%r26)
1038
1039	sync
104089:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1041	bv		%r0(%r2)
1042	nop
1043ENDPROC_CFI(flush_kernel_dcache_range_asm)
1044
1045ENTRY_CFI(purge_kernel_dcache_range_asm)
104688:	ldil		L%dcache_stride, %r1
1047	ldw		R%dcache_stride(%r1), %r23
1048	ldo		-1(%r23), %r21
1049	ANDCM		%r26, %r21, %r26
1050
1051#ifdef CONFIG_64BIT
1052	depd,z		%r23, 59, 60, %r21
1053#else
1054	depw,z		%r23, 27, 28, %r21
1055#endif
1056	add		%r26, %r21, %r22
1057	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10581:	add		%r22, %r21, %r22
1059	pdc,m		%r23(%r26)
1060	pdc,m		%r23(%r26)
1061	pdc,m		%r23(%r26)
1062	pdc,m		%r23(%r26)
1063	pdc,m		%r23(%r26)
1064	pdc,m		%r23(%r26)
1065	pdc,m		%r23(%r26)
1066	pdc,m		%r23(%r26)
1067	pdc,m		%r23(%r26)
1068	pdc,m		%r23(%r26)
1069	pdc,m		%r23(%r26)
1070	pdc,m		%r23(%r26)
1071	pdc,m		%r23(%r26)
1072	pdc,m		%r23(%r26)
1073	pdc,m		%r23(%r26)
1074	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1075	pdc,m		%r23(%r26)
1076
10772:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1078	pdc,m		%r23(%r26)
1079
1080	sync
108189:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1082	bv		%r0(%r2)
1083	nop
1084ENDPROC_CFI(purge_kernel_dcache_range_asm)
1085
1086ENTRY_CFI(flush_user_icache_range_asm)
108788:	ldil		L%icache_stride, %r1
1088	ldw		R%icache_stride(%r1), %r23
1089	ldo		-1(%r23), %r21
1090	ANDCM		%r26, %r21, %r26
1091
1092#ifdef CONFIG_64BIT
1093	depd,z		%r23, 59, 60, %r21
1094#else
1095	depw,z		%r23, 27, 28, %r21
1096#endif
1097	add		%r26, %r21, %r22
1098	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10991:	add		%r22, %r21, %r22
1100	fic,m		%r23(%sr3, %r26)
1101	fic,m		%r23(%sr3, %r26)
1102	fic,m		%r23(%sr3, %r26)
1103	fic,m		%r23(%sr3, %r26)
1104	fic,m		%r23(%sr3, %r26)
1105	fic,m		%r23(%sr3, %r26)
1106	fic,m		%r23(%sr3, %r26)
1107	fic,m		%r23(%sr3, %r26)
1108	fic,m		%r23(%sr3, %r26)
1109	fic,m		%r23(%sr3, %r26)
1110	fic,m		%r23(%sr3, %r26)
1111	fic,m		%r23(%sr3, %r26)
1112	fic,m		%r23(%sr3, %r26)
1113	fic,m		%r23(%sr3, %r26)
1114	fic,m		%r23(%sr3, %r26)
1115	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1116	fic,m		%r23(%sr3, %r26)
1117
11182:	cmpb,COND(>>),n	%r25, %r26, 2b
1119	fic,m		%r23(%sr3, %r26)
1120
112189:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1122	sync
1123	bv		%r0(%r2)
1124	nop
1125ENDPROC_CFI(flush_user_icache_range_asm)
1126
1127ENTRY_CFI(flush_kernel_icache_page)
112888:	ldil		L%icache_stride, %r1
1129	ldw		R%icache_stride(%r1), %r23
1130
1131#ifdef CONFIG_64BIT
1132	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1133#else
1134	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1135#endif
1136	add		%r26, %r25, %r25
1137	sub		%r25, %r23, %r25
1138
1139
11401:      fic,m		%r23(%sr4, %r26)
1141	fic,m		%r23(%sr4, %r26)
1142	fic,m		%r23(%sr4, %r26)
1143	fic,m		%r23(%sr4, %r26)
1144	fic,m		%r23(%sr4, %r26)
1145	fic,m		%r23(%sr4, %r26)
1146	fic,m		%r23(%sr4, %r26)
1147	fic,m		%r23(%sr4, %r26)
1148	fic,m		%r23(%sr4, %r26)
1149	fic,m		%r23(%sr4, %r26)
1150	fic,m		%r23(%sr4, %r26)
1151	fic,m		%r23(%sr4, %r26)
1152	fic,m		%r23(%sr4, %r26)
1153	fic,m		%r23(%sr4, %r26)
1154	fic,m		%r23(%sr4, %r26)
1155	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1156	fic,m		%r23(%sr4, %r26)
1157
115889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1159	sync
1160	bv		%r0(%r2)
1161	nop
1162ENDPROC_CFI(flush_kernel_icache_page)
1163
1164ENTRY_CFI(flush_kernel_icache_range_asm)
116588:	ldil		L%icache_stride, %r1
1166	ldw		R%icache_stride(%r1), %r23
1167	ldo		-1(%r23), %r21
1168	ANDCM		%r26, %r21, %r26
1169
1170#ifdef CONFIG_64BIT
1171	depd,z		%r23, 59, 60, %r21
1172#else
1173	depw,z		%r23, 27, 28, %r21
1174#endif
1175	add		%r26, %r21, %r22
1176	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11771:	add		%r22, %r21, %r22
1178	fic,m		%r23(%sr4, %r26)
1179	fic,m		%r23(%sr4, %r26)
1180	fic,m		%r23(%sr4, %r26)
1181	fic,m		%r23(%sr4, %r26)
1182	fic,m		%r23(%sr4, %r26)
1183	fic,m		%r23(%sr4, %r26)
1184	fic,m		%r23(%sr4, %r26)
1185	fic,m		%r23(%sr4, %r26)
1186	fic,m		%r23(%sr4, %r26)
1187	fic,m		%r23(%sr4, %r26)
1188	fic,m		%r23(%sr4, %r26)
1189	fic,m		%r23(%sr4, %r26)
1190	fic,m		%r23(%sr4, %r26)
1191	fic,m		%r23(%sr4, %r26)
1192	fic,m		%r23(%sr4, %r26)
1193	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1194	fic,m		%r23(%sr4, %r26)
1195
11962:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1197	fic,m		%r23(%sr4, %r26)
1198
119989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1200	sync
1201	bv		%r0(%r2)
1202	nop
1203ENDPROC_CFI(flush_kernel_icache_range_asm)
1204
1205	.text
1206
1207	/* align should cover use of rfi in disable_sr_hashing_asm and
1208	 * srdis_done.
1209	 */
1210	.align	256
1211ENTRY_CFI(disable_sr_hashing_asm)
1212	/*
1213	 * Switch to real mode
1214	 */
1215	/* pcxt_ssm_bug */
1216	rsm		PSW_SM_I, %r0
1217	load32		PA(1f), %r1
1218	nop
1219	nop
1220	nop
1221	nop
1222	nop
1223
1224	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1225	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1226	mtctl		%r0, %cr17		/* Clear IIASQ head */
1227	mtctl		%r1, %cr18		/* IIAOQ head */
1228	ldo		4(%r1), %r1
1229	mtctl		%r1, %cr18		/* IIAOQ tail */
1230	load32		REAL_MODE_PSW, %r1
1231	mtctl		%r1, %ipsw
1232	rfi
1233	nop
1234
12351:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
1236	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
1237	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
1238	b,n		srdis_done
1239
1240srdis_pcxs:
1241
1242	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
1243
1244	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
1245	.word		0x141c1a00		/* must issue twice */
1246	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
1247	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
1248	.word		0x141c1600		/* mtdiag %r28, %dr0 */
1249	.word		0x141c1600		/* must issue twice */
1250	b,n		srdis_done
1251
1252srdis_pcxl:
1253
1254	/* Disable Space Register Hashing for PCXL */
1255
1256	.word		0x141c0600		/* mfdiag %dr0, %r28 */
1257	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
1258	.word		0x141c0240		/* mtdiag %r28, %dr0 */
1259	b,n		srdis_done
1260
1261srdis_pa20:
1262
1263	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
1264
1265	.word		0x144008bc		/* mfdiag %dr2, %r28 */
1266	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
1267	.word		0x145c1840		/* mtdiag %r28, %dr2 */
1268
1269
1270srdis_done:
1271	/* Switch back to virtual mode */
1272	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
1273	load32 	   	2f, %r1
1274	nop
1275	nop
1276	nop
1277	nop
1278	nop
1279
1280	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1281	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1282	mtctl		%r0, %cr17		/* Clear IIASQ head */
1283	mtctl		%r1, %cr18		/* IIAOQ head */
1284	ldo		4(%r1), %r1
1285	mtctl		%r1, %cr18		/* IIAOQ tail */
1286	load32		KERNEL_PSW, %r1
1287	mtctl		%r1, %ipsw
1288	rfi
1289	nop
1290
12912:      bv		%r0(%r2)
1292	nop
1293ENDPROC_CFI(disable_sr_hashing_asm)
1294
1295	.end
1296