xref: /openbmc/linux/arch/parisc/kernel/pacache.S (revision b8d312aa)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *  PARISC TLB and cache flushing support
4 *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
5 *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
6 *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
7 */
8
9/*
10 * NOTE: fdc,fic, and pdc instructions that use base register modification
11 *       should only use index and base registers that are not shadowed,
12 *       so that the fast path emulation in the non access miss handler
13 *       can be used.
14 */
15
16#ifdef CONFIG_64BIT
17	.level	2.0w
18#else
19	.level	2.0
20#endif
21
22#include <asm/psw.h>
23#include <asm/assembly.h>
24#include <asm/pgtable.h>
25#include <asm/cache.h>
26#include <asm/ldcw.h>
27#include <asm/alternative.h>
28#include <linux/linkage.h>
29#include <linux/init.h>
30
31	.section .text.hot
32	.align	16
33
34ENTRY_CFI(flush_tlb_all_local)
35	/*
36	 * The pitlbe and pdtlbe instructions should only be used to
37	 * flush the entire tlb. Also, there needs to be no intervening
38	 * tlb operations, e.g. tlb misses, so the operation needs
39	 * to happen in real mode with all interruptions disabled.
40	 */
41
42	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
43	rsm		PSW_SM_I, %r19		/* save I-bit state */
44	load32		PA(1f), %r1
45	nop
46	nop
47	nop
48	nop
49	nop
50
51	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
52	mtctl		%r0, %cr17		/* Clear IIASQ tail */
53	mtctl		%r0, %cr17		/* Clear IIASQ head */
54	mtctl		%r1, %cr18		/* IIAOQ head */
55	ldo		4(%r1), %r1
56	mtctl		%r1, %cr18		/* IIAOQ tail */
57	load32		REAL_MODE_PSW, %r1
58	mtctl           %r1, %ipsw
59	rfi
60	nop
61
621:      load32		PA(cache_info), %r1
63
64	/* Flush Instruction Tlb */
65
6688:	LDREG		ITLB_SID_BASE(%r1), %r20
67	LDREG		ITLB_SID_STRIDE(%r1), %r21
68	LDREG		ITLB_SID_COUNT(%r1), %r22
69	LDREG		ITLB_OFF_BASE(%r1), %arg0
70	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
71	LDREG		ITLB_OFF_COUNT(%r1), %arg2
72	LDREG		ITLB_LOOP(%r1), %arg3
73
74	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
75	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
76	copy		%arg0, %r28		/* Init base addr */
77
78fitmanyloop:					/* Loop if LOOP >= 2 */
79	mtsp		%r20, %sr1
80	add		%r21, %r20, %r20	/* increment space */
81	copy		%arg2, %r29		/* Init middle loop count */
82
83fitmanymiddle:					/* Loop if LOOP >= 2 */
84	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
85	pitlbe		%r0(%sr1, %r28)
86	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
87	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
88	copy		%arg3, %r31		/* Re-init inner loop count */
89
90	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
91	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
92
93fitoneloop:					/* Loop if LOOP = 1 */
94	mtsp		%r20, %sr1
95	copy		%arg0, %r28		/* init base addr */
96	copy		%arg2, %r29		/* init middle loop count */
97
98fitonemiddle:					/* Loop if LOOP = 1 */
99	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
100	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
101
102	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
103	add		%r21, %r20, %r20		/* increment space */
104
105fitdone:
106	ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
107
108	/* Flush Data Tlb */
109
110	LDREG		DTLB_SID_BASE(%r1), %r20
111	LDREG		DTLB_SID_STRIDE(%r1), %r21
112	LDREG		DTLB_SID_COUNT(%r1), %r22
113	LDREG		DTLB_OFF_BASE(%r1), %arg0
114	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
115	LDREG		DTLB_OFF_COUNT(%r1), %arg2
116	LDREG		DTLB_LOOP(%r1), %arg3
117
118	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
119	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
120	copy		%arg0, %r28		/* Init base addr */
121
122fdtmanyloop:					/* Loop if LOOP >= 2 */
123	mtsp		%r20, %sr1
124	add		%r21, %r20, %r20	/* increment space */
125	copy		%arg2, %r29		/* Init middle loop count */
126
127fdtmanymiddle:					/* Loop if LOOP >= 2 */
128	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
129	pdtlbe		%r0(%sr1, %r28)
130	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
131	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
132	copy		%arg3, %r31		/* Re-init inner loop count */
133
134	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
135	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
136
137fdtoneloop:					/* Loop if LOOP = 1 */
138	mtsp		%r20, %sr1
139	copy		%arg0, %r28		/* init base addr */
140	copy		%arg2, %r29		/* init middle loop count */
141
142fdtonemiddle:					/* Loop if LOOP = 1 */
143	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
144	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
145
146	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
147	add		%r21, %r20, %r20	/* increment space */
148
149
150fdtdone:
151	/*
152	 * Switch back to virtual mode
153	 */
154	/* pcxt_ssm_bug */
155	rsm		PSW_SM_I, %r0
156	load32		2f, %r1
157	nop
158	nop
159	nop
160	nop
161	nop
162
163	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
164	mtctl		%r0, %cr17		/* Clear IIASQ tail */
165	mtctl		%r0, %cr17		/* Clear IIASQ head */
166	mtctl		%r1, %cr18		/* IIAOQ head */
167	ldo		4(%r1), %r1
168	mtctl		%r1, %cr18		/* IIAOQ tail */
169	load32		KERNEL_PSW, %r1
170	or		%r1, %r19, %r1	/* I-bit to state on entry */
171	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
172	rfi
173	nop
174
1752:      bv		%r0(%r2)
176	nop
177ENDPROC_CFI(flush_tlb_all_local)
178
179	.import cache_info,data
180
181ENTRY_CFI(flush_instruction_cache_local)
18288:	load32		cache_info, %r1
183
184	/* Flush Instruction Cache */
185
186	LDREG		ICACHE_BASE(%r1), %arg0
187	LDREG		ICACHE_STRIDE(%r1), %arg1
188	LDREG		ICACHE_COUNT(%r1), %arg2
189	LDREG		ICACHE_LOOP(%r1), %arg3
190	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
191	mtsp		%r0, %sr1
192	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
193	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
194
195fimanyloop:					/* Loop if LOOP >= 2 */
196	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
197	fice            %r0(%sr1, %arg0)
198	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
199	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
200	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
201
202fioneloop:					/* Loop if LOOP = 1 */
203	/* Some implementations may flush with a single fice instruction */
204	cmpib,COND(>>=),n	15, %arg2, fioneloop2
205
206fioneloop1:
207	fice,m		%arg1(%sr1, %arg0)
208	fice,m		%arg1(%sr1, %arg0)
209	fice,m		%arg1(%sr1, %arg0)
210	fice,m		%arg1(%sr1, %arg0)
211	fice,m		%arg1(%sr1, %arg0)
212	fice,m		%arg1(%sr1, %arg0)
213	fice,m		%arg1(%sr1, %arg0)
214	fice,m		%arg1(%sr1, %arg0)
215	fice,m		%arg1(%sr1, %arg0)
216	fice,m		%arg1(%sr1, %arg0)
217	fice,m		%arg1(%sr1, %arg0)
218	fice,m		%arg1(%sr1, %arg0)
219	fice,m		%arg1(%sr1, %arg0)
220	fice,m		%arg1(%sr1, %arg0)
221	fice,m		%arg1(%sr1, %arg0)
222	addib,COND(>)	-16, %arg2, fioneloop1
223	fice,m		%arg1(%sr1, %arg0)
224
225	/* Check if done */
226	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
227
228fioneloop2:
229	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
230	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
231
232fisync:
233	sync
234	mtsm		%r22			/* restore I-bit */
23589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
236	bv		%r0(%r2)
237	nop
238ENDPROC_CFI(flush_instruction_cache_local)
239
240
241	.import cache_info, data
242ENTRY_CFI(flush_data_cache_local)
24388:	load32		cache_info, %r1
244
245	/* Flush Data Cache */
246
247	LDREG		DCACHE_BASE(%r1), %arg0
248	LDREG		DCACHE_STRIDE(%r1), %arg1
249	LDREG		DCACHE_COUNT(%r1), %arg2
250	LDREG		DCACHE_LOOP(%r1), %arg3
251	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
252	mtsp		%r0, %sr1
253	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
254	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
255
256fdmanyloop:					/* Loop if LOOP >= 2 */
257	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
258	fdce		%r0(%sr1, %arg0)
259	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
260	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
261	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
262
263fdoneloop:					/* Loop if LOOP = 1 */
264	/* Some implementations may flush with a single fdce instruction */
265	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
266
267fdoneloop1:
268	fdce,m		%arg1(%sr1, %arg0)
269	fdce,m		%arg1(%sr1, %arg0)
270	fdce,m		%arg1(%sr1, %arg0)
271	fdce,m		%arg1(%sr1, %arg0)
272	fdce,m		%arg1(%sr1, %arg0)
273	fdce,m		%arg1(%sr1, %arg0)
274	fdce,m		%arg1(%sr1, %arg0)
275	fdce,m		%arg1(%sr1, %arg0)
276	fdce,m		%arg1(%sr1, %arg0)
277	fdce,m		%arg1(%sr1, %arg0)
278	fdce,m		%arg1(%sr1, %arg0)
279	fdce,m		%arg1(%sr1, %arg0)
280	fdce,m		%arg1(%sr1, %arg0)
281	fdce,m		%arg1(%sr1, %arg0)
282	fdce,m		%arg1(%sr1, %arg0)
283	addib,COND(>)	-16, %arg2, fdoneloop1
284	fdce,m		%arg1(%sr1, %arg0)
285
286	/* Check if done */
287	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
288
289fdoneloop2:
290	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
291	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
292
293fdsync:
294	syncdma
295	sync
296	mtsm		%r22			/* restore I-bit */
29789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
298	bv		%r0(%r2)
299	nop
300ENDPROC_CFI(flush_data_cache_local)
301
302/* Clear page using kernel mapping.  */
303
304ENTRY_CFI(clear_page_asm)
305#ifdef CONFIG_64BIT
306
307	/* Unroll the loop.  */
308	ldi		(PAGE_SIZE / 128), %r1
309
3101:
311	std		%r0, 0(%r26)
312	std		%r0, 8(%r26)
313	std		%r0, 16(%r26)
314	std		%r0, 24(%r26)
315	std		%r0, 32(%r26)
316	std		%r0, 40(%r26)
317	std		%r0, 48(%r26)
318	std		%r0, 56(%r26)
319	std		%r0, 64(%r26)
320	std		%r0, 72(%r26)
321	std		%r0, 80(%r26)
322	std		%r0, 88(%r26)
323	std		%r0, 96(%r26)
324	std		%r0, 104(%r26)
325	std		%r0, 112(%r26)
326	std		%r0, 120(%r26)
327
328	/* Note reverse branch hint for addib is taken.  */
329	addib,COND(>),n	-1, %r1, 1b
330	ldo		128(%r26), %r26
331
332#else
333
334	/*
335	 * Note that until (if) we start saving the full 64-bit register
336	 * values on interrupt, we can't use std on a 32 bit kernel.
337	 */
338	ldi		(PAGE_SIZE / 64), %r1
339
3401:
341	stw		%r0, 0(%r26)
342	stw		%r0, 4(%r26)
343	stw		%r0, 8(%r26)
344	stw		%r0, 12(%r26)
345	stw		%r0, 16(%r26)
346	stw		%r0, 20(%r26)
347	stw		%r0, 24(%r26)
348	stw		%r0, 28(%r26)
349	stw		%r0, 32(%r26)
350	stw		%r0, 36(%r26)
351	stw		%r0, 40(%r26)
352	stw		%r0, 44(%r26)
353	stw		%r0, 48(%r26)
354	stw		%r0, 52(%r26)
355	stw		%r0, 56(%r26)
356	stw		%r0, 60(%r26)
357
358	addib,COND(>),n	-1, %r1, 1b
359	ldo		64(%r26), %r26
360#endif
361	bv		%r0(%r2)
362	nop
363ENDPROC_CFI(clear_page_asm)
364
365/* Copy page using kernel mapping.  */
366
367ENTRY_CFI(copy_page_asm)
368#ifdef CONFIG_64BIT
369	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
370	 * Unroll the loop by hand and arrange insn appropriately.
371	 * Prefetch doesn't improve performance on rp3440.
372	 * GCC probably can do this just as well...
373	 */
374
375	ldi		(PAGE_SIZE / 128), %r1
376
3771:	ldd		0(%r25), %r19
378	ldd		8(%r25), %r20
379
380	ldd		16(%r25), %r21
381	ldd		24(%r25), %r22
382	std		%r19, 0(%r26)
383	std		%r20, 8(%r26)
384
385	ldd		32(%r25), %r19
386	ldd		40(%r25), %r20
387	std		%r21, 16(%r26)
388	std		%r22, 24(%r26)
389
390	ldd		48(%r25), %r21
391	ldd		56(%r25), %r22
392	std		%r19, 32(%r26)
393	std		%r20, 40(%r26)
394
395	ldd		64(%r25), %r19
396	ldd		72(%r25), %r20
397	std		%r21, 48(%r26)
398	std		%r22, 56(%r26)
399
400	ldd		80(%r25), %r21
401	ldd		88(%r25), %r22
402	std		%r19, 64(%r26)
403	std		%r20, 72(%r26)
404
405	ldd		 96(%r25), %r19
406	ldd		104(%r25), %r20
407	std		%r21, 80(%r26)
408	std		%r22, 88(%r26)
409
410	ldd		112(%r25), %r21
411	ldd		120(%r25), %r22
412	ldo		128(%r25), %r25
413	std		%r19, 96(%r26)
414	std		%r20, 104(%r26)
415
416	std		%r21, 112(%r26)
417	std		%r22, 120(%r26)
418
419	/* Note reverse branch hint for addib is taken.  */
420	addib,COND(>),n	-1, %r1, 1b
421	ldo		128(%r26), %r26
422
423#else
424
425	/*
426	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
427	 * bundles (very restricted rules for bundling).
428	 * Note that until (if) we start saving
429	 * the full 64 bit register values on interrupt, we can't
430	 * use ldd/std on a 32 bit kernel.
431	 */
432	ldw		0(%r25), %r19
433	ldi		(PAGE_SIZE / 64), %r1
434
4351:
436	ldw		4(%r25), %r20
437	ldw		8(%r25), %r21
438	ldw		12(%r25), %r22
439	stw		%r19, 0(%r26)
440	stw		%r20, 4(%r26)
441	stw		%r21, 8(%r26)
442	stw		%r22, 12(%r26)
443	ldw		16(%r25), %r19
444	ldw		20(%r25), %r20
445	ldw		24(%r25), %r21
446	ldw		28(%r25), %r22
447	stw		%r19, 16(%r26)
448	stw		%r20, 20(%r26)
449	stw		%r21, 24(%r26)
450	stw		%r22, 28(%r26)
451	ldw		32(%r25), %r19
452	ldw		36(%r25), %r20
453	ldw		40(%r25), %r21
454	ldw		44(%r25), %r22
455	stw		%r19, 32(%r26)
456	stw		%r20, 36(%r26)
457	stw		%r21, 40(%r26)
458	stw		%r22, 44(%r26)
459	ldw		48(%r25), %r19
460	ldw		52(%r25), %r20
461	ldw		56(%r25), %r21
462	ldw		60(%r25), %r22
463	stw		%r19, 48(%r26)
464	stw		%r20, 52(%r26)
465	ldo		64(%r25), %r25
466	stw		%r21, 56(%r26)
467	stw		%r22, 60(%r26)
468	ldo		64(%r26), %r26
469	addib,COND(>),n	-1, %r1, 1b
470	ldw		0(%r25), %r19
471#endif
472	bv		%r0(%r2)
473	nop
474ENDPROC_CFI(copy_page_asm)
475
476/*
477 * NOTE: Code in clear_user_page has a hard coded dependency on the
478 *       maximum alias boundary being 4 Mb. We've been assured by the
479 *       parisc chip designers that there will not ever be a parisc
480 *       chip with a larger alias boundary (Never say never :-) ).
481 *
482 *       Subtle: the dtlb miss handlers support the temp alias region by
483 *       "knowing" that if a dtlb miss happens within the temp alias
484 *       region it must have occurred while in clear_user_page. Since
485 *       this routine makes use of processor local translations, we
486 *       don't want to insert them into the kernel page table. Instead,
487 *       we load up some general registers (they need to be registers
488 *       which aren't shadowed) with the physical page numbers (preshifted
489 *       for tlb insertion) needed to insert the translations. When we
490 *       miss on the translation, the dtlb miss handler inserts the
491 *       translation into the tlb using these values:
492 *
493 *          %r26 physical page (shifted for tlb insert) of "to" translation
494 *          %r23 physical page (shifted for tlb insert) of "from" translation
495 */
496
497        /* Drop prot bits and convert to page addr for iitlbt and idtlbt */
498        #define PAGE_ADD_SHIFT  (PAGE_SHIFT-12)
499        .macro          convert_phys_for_tlb_insert20  phys
500        extrd,u         \phys, 56-PAGE_ADD_SHIFT, 32-PAGE_ADD_SHIFT, \phys
501#if _PAGE_SIZE_ENCODING_DEFAULT
502        depdi           _PAGE_SIZE_ENCODING_DEFAULT, 63, (63-58), \phys
503#endif
504	.endm
505
506	/*
507	 * copy_user_page_asm() performs a page copy using mappings
508	 * equivalent to the user page mappings.  It can be used to
509	 * implement copy_user_page() but unfortunately both the `from'
510	 * and `to' pages need to be flushed through mappings equivalent
511	 * to the user mappings after the copy because the kernel accesses
512	 * the `from' page through the kmap kernel mapping and the `to'
513	 * page needs to be flushed since code can be copied.  As a
514	 * result, this implementation is less efficient than the simpler
515	 * copy using the kernel mapping.  It only needs the `from' page
516	 * to flushed via the user mapping.  The kunmap routines handle
517	 * the flushes needed for the kernel mapping.
518	 *
519	 * I'm still keeping this around because it may be possible to
520	 * use it if more information is passed into copy_user_page().
521	 * Have to do some measurements to see if it is worthwhile to
522	 * lobby for such a change.
523	 *
524	 */
525
526ENTRY_CFI(copy_user_page_asm)
527	/* Convert virtual `to' and `from' addresses to physical addresses.
528	   Move `from' physical address to non shadowed register.  */
529	ldil		L%(__PAGE_OFFSET), %r1
530	sub		%r26, %r1, %r26
531	sub		%r25, %r1, %r23
532
533	ldil		L%(TMPALIAS_MAP_START), %r28
534#ifdef CONFIG_64BIT
535#if (TMPALIAS_MAP_START >= 0x80000000)
536	depdi		0, 31,32, %r28		/* clear any sign extension */
537#endif
538	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
539	convert_phys_for_tlb_insert20 %r23	/* convert phys addr to tlb insert format */
540	depd		%r24,63,22, %r28	/* Form aliased virtual address 'to' */
541	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
542	copy		%r28, %r29
543	depdi		1, 41,1, %r29		/* Form aliased virtual address 'from' */
544#else
545	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
546	extrw,u		%r23, 24,25, %r23	/* convert phys addr to tlb insert format */
547	depw		%r24, 31,22, %r28	/* Form aliased virtual address 'to' */
548	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
549	copy		%r28, %r29
550	depwi		1, 9,1, %r29		/* Form aliased virtual address 'from' */
551#endif
552
553	/* Purge any old translations */
554
555#ifdef CONFIG_PA20
556	pdtlb,l		%r0(%r28)
557	pdtlb,l		%r0(%r29)
558#else
5590:	pdtlb		%r0(%r28)
5601:	pdtlb		%r0(%r29)
561	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
562	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
563#endif
564
565#ifdef CONFIG_64BIT
566	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
567	 * Unroll the loop by hand and arrange insn appropriately.
568	 * GCC probably can do this just as well.
569	 */
570
571	ldd		0(%r29), %r19
572	ldi		(PAGE_SIZE / 128), %r1
573
5741:	ldd		8(%r29), %r20
575
576	ldd		16(%r29), %r21
577	ldd		24(%r29), %r22
578	std		%r19, 0(%r28)
579	std		%r20, 8(%r28)
580
581	ldd		32(%r29), %r19
582	ldd		40(%r29), %r20
583	std		%r21, 16(%r28)
584	std		%r22, 24(%r28)
585
586	ldd		48(%r29), %r21
587	ldd		56(%r29), %r22
588	std		%r19, 32(%r28)
589	std		%r20, 40(%r28)
590
591	ldd		64(%r29), %r19
592	ldd		72(%r29), %r20
593	std		%r21, 48(%r28)
594	std		%r22, 56(%r28)
595
596	ldd		80(%r29), %r21
597	ldd		88(%r29), %r22
598	std		%r19, 64(%r28)
599	std		%r20, 72(%r28)
600
601	ldd		 96(%r29), %r19
602	ldd		104(%r29), %r20
603	std		%r21, 80(%r28)
604	std		%r22, 88(%r28)
605
606	ldd		112(%r29), %r21
607	ldd		120(%r29), %r22
608	std		%r19, 96(%r28)
609	std		%r20, 104(%r28)
610
611	ldo		128(%r29), %r29
612	std		%r21, 112(%r28)
613	std		%r22, 120(%r28)
614	ldo		128(%r28), %r28
615
616	/* conditional branches nullify on forward taken branch, and on
617	 * non-taken backward branch. Note that .+4 is a backwards branch.
618	 * The ldd should only get executed if the branch is taken.
619	 */
620	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
621	ldd		0(%r29), %r19		/* start next loads */
622
623#else
624	ldi		(PAGE_SIZE / 64), %r1
625
626	/*
627	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
628	 * bundles (very restricted rules for bundling). It probably
629	 * does OK on PCXU and better, but we could do better with
630	 * ldd/std instructions. Note that until (if) we start saving
631	 * the full 64 bit register values on interrupt, we can't
632	 * use ldd/std on a 32 bit kernel.
633	 */
634
6351:	ldw		0(%r29), %r19
636	ldw		4(%r29), %r20
637	ldw		8(%r29), %r21
638	ldw		12(%r29), %r22
639	stw		%r19, 0(%r28)
640	stw		%r20, 4(%r28)
641	stw		%r21, 8(%r28)
642	stw		%r22, 12(%r28)
643	ldw		16(%r29), %r19
644	ldw		20(%r29), %r20
645	ldw		24(%r29), %r21
646	ldw		28(%r29), %r22
647	stw		%r19, 16(%r28)
648	stw		%r20, 20(%r28)
649	stw		%r21, 24(%r28)
650	stw		%r22, 28(%r28)
651	ldw		32(%r29), %r19
652	ldw		36(%r29), %r20
653	ldw		40(%r29), %r21
654	ldw		44(%r29), %r22
655	stw		%r19, 32(%r28)
656	stw		%r20, 36(%r28)
657	stw		%r21, 40(%r28)
658	stw		%r22, 44(%r28)
659	ldw		48(%r29), %r19
660	ldw		52(%r29), %r20
661	ldw		56(%r29), %r21
662	ldw		60(%r29), %r22
663	stw		%r19, 48(%r28)
664	stw		%r20, 52(%r28)
665	stw		%r21, 56(%r28)
666	stw		%r22, 60(%r28)
667	ldo		64(%r28), %r28
668
669	addib,COND(>)		-1, %r1,1b
670	ldo		64(%r29), %r29
671#endif
672
673	bv		%r0(%r2)
674	nop
675ENDPROC_CFI(copy_user_page_asm)
676
677ENTRY_CFI(clear_user_page_asm)
678	tophys_r1	%r26
679
680	ldil		L%(TMPALIAS_MAP_START), %r28
681#ifdef CONFIG_64BIT
682#if (TMPALIAS_MAP_START >= 0x80000000)
683	depdi		0, 31,32, %r28		/* clear any sign extension */
684#endif
685	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
686	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
687	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
688#else
689	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
690	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
691	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
692#endif
693
694	/* Purge any old translation */
695
696#ifdef CONFIG_PA20
697	pdtlb,l		%r0(%r28)
698#else
6990:	pdtlb		%r0(%r28)
700	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
701#endif
702
703#ifdef CONFIG_64BIT
704	ldi		(PAGE_SIZE / 128), %r1
705
706	/* PREFETCH (Write) has not (yet) been proven to help here */
707	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
708
7091:	std		%r0, 0(%r28)
710	std		%r0, 8(%r28)
711	std		%r0, 16(%r28)
712	std		%r0, 24(%r28)
713	std		%r0, 32(%r28)
714	std		%r0, 40(%r28)
715	std		%r0, 48(%r28)
716	std		%r0, 56(%r28)
717	std		%r0, 64(%r28)
718	std		%r0, 72(%r28)
719	std		%r0, 80(%r28)
720	std		%r0, 88(%r28)
721	std		%r0, 96(%r28)
722	std		%r0, 104(%r28)
723	std		%r0, 112(%r28)
724	std		%r0, 120(%r28)
725	addib,COND(>)		-1, %r1, 1b
726	ldo		128(%r28), %r28
727
728#else	/* ! CONFIG_64BIT */
729	ldi		(PAGE_SIZE / 64), %r1
730
7311:	stw		%r0, 0(%r28)
732	stw		%r0, 4(%r28)
733	stw		%r0, 8(%r28)
734	stw		%r0, 12(%r28)
735	stw		%r0, 16(%r28)
736	stw		%r0, 20(%r28)
737	stw		%r0, 24(%r28)
738	stw		%r0, 28(%r28)
739	stw		%r0, 32(%r28)
740	stw		%r0, 36(%r28)
741	stw		%r0, 40(%r28)
742	stw		%r0, 44(%r28)
743	stw		%r0, 48(%r28)
744	stw		%r0, 52(%r28)
745	stw		%r0, 56(%r28)
746	stw		%r0, 60(%r28)
747	addib,COND(>)		-1, %r1, 1b
748	ldo		64(%r28), %r28
749#endif	/* CONFIG_64BIT */
750
751	bv		%r0(%r2)
752	nop
753ENDPROC_CFI(clear_user_page_asm)
754
755ENTRY_CFI(flush_dcache_page_asm)
756	ldil		L%(TMPALIAS_MAP_START), %r28
757#ifdef CONFIG_64BIT
758#if (TMPALIAS_MAP_START >= 0x80000000)
759	depdi		0, 31,32, %r28		/* clear any sign extension */
760#endif
761	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
762	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
763	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
764#else
765	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
766	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
767	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
768#endif
769
770	/* Purge any old translation */
771
772#ifdef CONFIG_PA20
773	pdtlb,l		%r0(%r28)
774#else
7750:	pdtlb		%r0(%r28)
776	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
777#endif
778
77988:	ldil		L%dcache_stride, %r1
780	ldw		R%dcache_stride(%r1), r31
781
782#ifdef CONFIG_64BIT
783	depdi,z		1, 63-PAGE_SHIFT,1, %r25
784#else
785	depwi,z		1, 31-PAGE_SHIFT,1, %r25
786#endif
787	add		%r28, %r25, %r25
788	sub		%r25, r31, %r25
789
7901:	fdc,m		r31(%r28)
791	fdc,m		r31(%r28)
792	fdc,m		r31(%r28)
793	fdc,m		r31(%r28)
794	fdc,m		r31(%r28)
795	fdc,m		r31(%r28)
796	fdc,m		r31(%r28)
797	fdc,m		r31(%r28)
798	fdc,m		r31(%r28)
799	fdc,m		r31(%r28)
800	fdc,m		r31(%r28)
801	fdc,m		r31(%r28)
802	fdc,m		r31(%r28)
803	fdc,m		r31(%r28)
804	fdc,m		r31(%r28)
805	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
806	fdc,m		r31(%r28)
807
80889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
809	sync
810	bv		%r0(%r2)
811	nop
812ENDPROC_CFI(flush_dcache_page_asm)
813
814ENTRY_CFI(purge_dcache_page_asm)
815	ldil		L%(TMPALIAS_MAP_START), %r28
816#ifdef CONFIG_64BIT
817#if (TMPALIAS_MAP_START >= 0x80000000)
818	depdi		0, 31,32, %r28		/* clear any sign extension */
819#endif
820	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
821	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
822	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
823#else
824	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
825	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
826	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
827#endif
828
829	/* Purge any old translation */
830
831#ifdef CONFIG_PA20
832	pdtlb,l		%r0(%r28)
833#else
8340:	pdtlb		%r0(%r28)
835	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
836#endif
837
83888:	ldil		L%dcache_stride, %r1
839	ldw		R%dcache_stride(%r1), r31
840
841#ifdef CONFIG_64BIT
842	depdi,z		1, 63-PAGE_SHIFT,1, %r25
843#else
844	depwi,z		1, 31-PAGE_SHIFT,1, %r25
845#endif
846	add		%r28, %r25, %r25
847	sub		%r25, r31, %r25
848
8491:      pdc,m		r31(%r28)
850	pdc,m		r31(%r28)
851	pdc,m		r31(%r28)
852	pdc,m		r31(%r28)
853	pdc,m		r31(%r28)
854	pdc,m		r31(%r28)
855	pdc,m		r31(%r28)
856	pdc,m		r31(%r28)
857	pdc,m		r31(%r28)
858	pdc,m		r31(%r28)
859	pdc,m		r31(%r28)
860	pdc,m		r31(%r28)
861	pdc,m		r31(%r28)
862	pdc,m		r31(%r28)
863	pdc,m		r31(%r28)
864	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
865	pdc,m		r31(%r28)
866
86789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
868	sync
869	bv		%r0(%r2)
870	nop
871ENDPROC_CFI(purge_dcache_page_asm)
872
873ENTRY_CFI(flush_icache_page_asm)
874	ldil		L%(TMPALIAS_MAP_START), %r28
875#ifdef CONFIG_64BIT
876#if (TMPALIAS_MAP_START >= 0x80000000)
877	depdi		0, 31,32, %r28		/* clear any sign extension */
878#endif
879	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
880	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
881	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
882#else
883	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
884	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
885	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
886#endif
887
888	/* Purge any old translation.  Note that the FIC instruction
889	 * may use either the instruction or data TLB.  Given that we
890	 * have a flat address space, it's not clear which TLB will be
891	 * used.  So, we purge both entries.  */
892
893#ifdef CONFIG_PA20
894	pdtlb,l		%r0(%r28)
8951:	pitlb,l         %r0(%sr4,%r28)
896	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
897#else
8980:	pdtlb		%r0(%r28)
8991:	pitlb           %r0(%sr4,%r28)
900	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
901	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
902	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
903#endif
904
90588:	ldil		L%icache_stride, %r1
906	ldw		R%icache_stride(%r1), %r31
907
908#ifdef CONFIG_64BIT
909	depdi,z		1, 63-PAGE_SHIFT,1, %r25
910#else
911	depwi,z		1, 31-PAGE_SHIFT,1, %r25
912#endif
913	add		%r28, %r25, %r25
914	sub		%r25, %r31, %r25
915
916	/* fic only has the type 26 form on PA1.1, requiring an
917	 * explicit space specification, so use %sr4 */
9181:      fic,m		%r31(%sr4,%r28)
919	fic,m		%r31(%sr4,%r28)
920	fic,m		%r31(%sr4,%r28)
921	fic,m		%r31(%sr4,%r28)
922	fic,m		%r31(%sr4,%r28)
923	fic,m		%r31(%sr4,%r28)
924	fic,m		%r31(%sr4,%r28)
925	fic,m		%r31(%sr4,%r28)
926	fic,m		%r31(%sr4,%r28)
927	fic,m		%r31(%sr4,%r28)
928	fic,m		%r31(%sr4,%r28)
929	fic,m		%r31(%sr4,%r28)
930	fic,m		%r31(%sr4,%r28)
931	fic,m		%r31(%sr4,%r28)
932	fic,m		%r31(%sr4,%r28)
933	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
934	fic,m		%r31(%sr4,%r28)
935
93689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
937	sync
938	bv		%r0(%r2)
939	nop
940ENDPROC_CFI(flush_icache_page_asm)
941
942ENTRY_CFI(flush_kernel_dcache_page_asm)
94388:	ldil		L%dcache_stride, %r1
944	ldw		R%dcache_stride(%r1), %r23
945
946#ifdef CONFIG_64BIT
947	depdi,z		1, 63-PAGE_SHIFT,1, %r25
948#else
949	depwi,z		1, 31-PAGE_SHIFT,1, %r25
950#endif
951	add		%r26, %r25, %r25
952	sub		%r25, %r23, %r25
953
9541:      fdc,m		%r23(%r26)
955	fdc,m		%r23(%r26)
956	fdc,m		%r23(%r26)
957	fdc,m		%r23(%r26)
958	fdc,m		%r23(%r26)
959	fdc,m		%r23(%r26)
960	fdc,m		%r23(%r26)
961	fdc,m		%r23(%r26)
962	fdc,m		%r23(%r26)
963	fdc,m		%r23(%r26)
964	fdc,m		%r23(%r26)
965	fdc,m		%r23(%r26)
966	fdc,m		%r23(%r26)
967	fdc,m		%r23(%r26)
968	fdc,m		%r23(%r26)
969	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
970	fdc,m		%r23(%r26)
971
97289:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
973	sync
974	bv		%r0(%r2)
975	nop
976ENDPROC_CFI(flush_kernel_dcache_page_asm)
977
978ENTRY_CFI(purge_kernel_dcache_page_asm)
97988:	ldil		L%dcache_stride, %r1
980	ldw		R%dcache_stride(%r1), %r23
981
982#ifdef CONFIG_64BIT
983	depdi,z		1, 63-PAGE_SHIFT,1, %r25
984#else
985	depwi,z		1, 31-PAGE_SHIFT,1, %r25
986#endif
987	add		%r26, %r25, %r25
988	sub		%r25, %r23, %r25
989
9901:      pdc,m		%r23(%r26)
991	pdc,m		%r23(%r26)
992	pdc,m		%r23(%r26)
993	pdc,m		%r23(%r26)
994	pdc,m		%r23(%r26)
995	pdc,m		%r23(%r26)
996	pdc,m		%r23(%r26)
997	pdc,m		%r23(%r26)
998	pdc,m		%r23(%r26)
999	pdc,m		%r23(%r26)
1000	pdc,m		%r23(%r26)
1001	pdc,m		%r23(%r26)
1002	pdc,m		%r23(%r26)
1003	pdc,m		%r23(%r26)
1004	pdc,m		%r23(%r26)
1005	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1006	pdc,m		%r23(%r26)
1007
100889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1009	sync
1010	bv		%r0(%r2)
1011	nop
1012ENDPROC_CFI(purge_kernel_dcache_page_asm)
1013
1014ENTRY_CFI(flush_user_dcache_range_asm)
101588:	ldil		L%dcache_stride, %r1
1016	ldw		R%dcache_stride(%r1), %r23
1017	ldo		-1(%r23), %r21
1018	ANDCM		%r26, %r21, %r26
1019
1020#ifdef CONFIG_64BIT
1021	depd,z		%r23, 59, 60, %r21
1022#else
1023	depw,z		%r23, 27, 28, %r21
1024#endif
1025	add		%r26, %r21, %r22
1026	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10271:	add		%r22, %r21, %r22
1028	fdc,m		%r23(%sr3, %r26)
1029	fdc,m		%r23(%sr3, %r26)
1030	fdc,m		%r23(%sr3, %r26)
1031	fdc,m		%r23(%sr3, %r26)
1032	fdc,m		%r23(%sr3, %r26)
1033	fdc,m		%r23(%sr3, %r26)
1034	fdc,m		%r23(%sr3, %r26)
1035	fdc,m		%r23(%sr3, %r26)
1036	fdc,m		%r23(%sr3, %r26)
1037	fdc,m		%r23(%sr3, %r26)
1038	fdc,m		%r23(%sr3, %r26)
1039	fdc,m		%r23(%sr3, %r26)
1040	fdc,m		%r23(%sr3, %r26)
1041	fdc,m		%r23(%sr3, %r26)
1042	fdc,m		%r23(%sr3, %r26)
1043	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1044	fdc,m		%r23(%sr3, %r26)
1045
10462:	cmpb,COND(>>),n	%r25, %r26, 2b
1047	fdc,m		%r23(%sr3, %r26)
1048
104989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1050	sync
1051	bv		%r0(%r2)
1052	nop
1053ENDPROC_CFI(flush_user_dcache_range_asm)
1054
1055ENTRY_CFI(flush_kernel_dcache_range_asm)
105688:	ldil		L%dcache_stride, %r1
1057	ldw		R%dcache_stride(%r1), %r23
1058	ldo		-1(%r23), %r21
1059	ANDCM		%r26, %r21, %r26
1060
1061#ifdef CONFIG_64BIT
1062	depd,z		%r23, 59, 60, %r21
1063#else
1064	depw,z		%r23, 27, 28, %r21
1065#endif
1066	add		%r26, %r21, %r22
1067	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10681:	add		%r22, %r21, %r22
1069	fdc,m		%r23(%r26)
1070	fdc,m		%r23(%r26)
1071	fdc,m		%r23(%r26)
1072	fdc,m		%r23(%r26)
1073	fdc,m		%r23(%r26)
1074	fdc,m		%r23(%r26)
1075	fdc,m		%r23(%r26)
1076	fdc,m		%r23(%r26)
1077	fdc,m		%r23(%r26)
1078	fdc,m		%r23(%r26)
1079	fdc,m		%r23(%r26)
1080	fdc,m		%r23(%r26)
1081	fdc,m		%r23(%r26)
1082	fdc,m		%r23(%r26)
1083	fdc,m		%r23(%r26)
1084	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1085	fdc,m		%r23(%r26)
1086
10872:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1088	fdc,m		%r23(%r26)
1089
1090	sync
109189:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1092	syncdma
1093	bv		%r0(%r2)
1094	nop
1095ENDPROC_CFI(flush_kernel_dcache_range_asm)
1096
1097ENTRY_CFI(purge_kernel_dcache_range_asm)
109888:	ldil		L%dcache_stride, %r1
1099	ldw		R%dcache_stride(%r1), %r23
1100	ldo		-1(%r23), %r21
1101	ANDCM		%r26, %r21, %r26
1102
1103#ifdef CONFIG_64BIT
1104	depd,z		%r23, 59, 60, %r21
1105#else
1106	depw,z		%r23, 27, 28, %r21
1107#endif
1108	add		%r26, %r21, %r22
1109	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11101:	add		%r22, %r21, %r22
1111	pdc,m		%r23(%r26)
1112	pdc,m		%r23(%r26)
1113	pdc,m		%r23(%r26)
1114	pdc,m		%r23(%r26)
1115	pdc,m		%r23(%r26)
1116	pdc,m		%r23(%r26)
1117	pdc,m		%r23(%r26)
1118	pdc,m		%r23(%r26)
1119	pdc,m		%r23(%r26)
1120	pdc,m		%r23(%r26)
1121	pdc,m		%r23(%r26)
1122	pdc,m		%r23(%r26)
1123	pdc,m		%r23(%r26)
1124	pdc,m		%r23(%r26)
1125	pdc,m		%r23(%r26)
1126	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1127	pdc,m		%r23(%r26)
1128
11292:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1130	pdc,m		%r23(%r26)
1131
1132	sync
113389:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1134	syncdma
1135	bv		%r0(%r2)
1136	nop
1137ENDPROC_CFI(purge_kernel_dcache_range_asm)
1138
1139ENTRY_CFI(flush_user_icache_range_asm)
114088:	ldil		L%icache_stride, %r1
1141	ldw		R%icache_stride(%r1), %r23
1142	ldo		-1(%r23), %r21
1143	ANDCM		%r26, %r21, %r26
1144
1145#ifdef CONFIG_64BIT
1146	depd,z		%r23, 59, 60, %r21
1147#else
1148	depw,z		%r23, 27, 28, %r21
1149#endif
1150	add		%r26, %r21, %r22
1151	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11521:	add		%r22, %r21, %r22
1153	fic,m		%r23(%sr3, %r26)
1154	fic,m		%r23(%sr3, %r26)
1155	fic,m		%r23(%sr3, %r26)
1156	fic,m		%r23(%sr3, %r26)
1157	fic,m		%r23(%sr3, %r26)
1158	fic,m		%r23(%sr3, %r26)
1159	fic,m		%r23(%sr3, %r26)
1160	fic,m		%r23(%sr3, %r26)
1161	fic,m		%r23(%sr3, %r26)
1162	fic,m		%r23(%sr3, %r26)
1163	fic,m		%r23(%sr3, %r26)
1164	fic,m		%r23(%sr3, %r26)
1165	fic,m		%r23(%sr3, %r26)
1166	fic,m		%r23(%sr3, %r26)
1167	fic,m		%r23(%sr3, %r26)
1168	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1169	fic,m		%r23(%sr3, %r26)
1170
11712:	cmpb,COND(>>),n	%r25, %r26, 2b
1172	fic,m		%r23(%sr3, %r26)
1173
117489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1175	sync
1176	bv		%r0(%r2)
1177	nop
1178ENDPROC_CFI(flush_user_icache_range_asm)
1179
1180ENTRY_CFI(flush_kernel_icache_page)
118188:	ldil		L%icache_stride, %r1
1182	ldw		R%icache_stride(%r1), %r23
1183
1184#ifdef CONFIG_64BIT
1185	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1186#else
1187	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1188#endif
1189	add		%r26, %r25, %r25
1190	sub		%r25, %r23, %r25
1191
1192
11931:      fic,m		%r23(%sr4, %r26)
1194	fic,m		%r23(%sr4, %r26)
1195	fic,m		%r23(%sr4, %r26)
1196	fic,m		%r23(%sr4, %r26)
1197	fic,m		%r23(%sr4, %r26)
1198	fic,m		%r23(%sr4, %r26)
1199	fic,m		%r23(%sr4, %r26)
1200	fic,m		%r23(%sr4, %r26)
1201	fic,m		%r23(%sr4, %r26)
1202	fic,m		%r23(%sr4, %r26)
1203	fic,m		%r23(%sr4, %r26)
1204	fic,m		%r23(%sr4, %r26)
1205	fic,m		%r23(%sr4, %r26)
1206	fic,m		%r23(%sr4, %r26)
1207	fic,m		%r23(%sr4, %r26)
1208	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1209	fic,m		%r23(%sr4, %r26)
1210
121189:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1212	sync
1213	bv		%r0(%r2)
1214	nop
1215ENDPROC_CFI(flush_kernel_icache_page)
1216
1217ENTRY_CFI(flush_kernel_icache_range_asm)
121888:	ldil		L%icache_stride, %r1
1219	ldw		R%icache_stride(%r1), %r23
1220	ldo		-1(%r23), %r21
1221	ANDCM		%r26, %r21, %r26
1222
1223#ifdef CONFIG_64BIT
1224	depd,z		%r23, 59, 60, %r21
1225#else
1226	depw,z		%r23, 27, 28, %r21
1227#endif
1228	add		%r26, %r21, %r22
1229	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
12301:	add		%r22, %r21, %r22
1231	fic,m		%r23(%sr4, %r26)
1232	fic,m		%r23(%sr4, %r26)
1233	fic,m		%r23(%sr4, %r26)
1234	fic,m		%r23(%sr4, %r26)
1235	fic,m		%r23(%sr4, %r26)
1236	fic,m		%r23(%sr4, %r26)
1237	fic,m		%r23(%sr4, %r26)
1238	fic,m		%r23(%sr4, %r26)
1239	fic,m		%r23(%sr4, %r26)
1240	fic,m		%r23(%sr4, %r26)
1241	fic,m		%r23(%sr4, %r26)
1242	fic,m		%r23(%sr4, %r26)
1243	fic,m		%r23(%sr4, %r26)
1244	fic,m		%r23(%sr4, %r26)
1245	fic,m		%r23(%sr4, %r26)
1246	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1247	fic,m		%r23(%sr4, %r26)
1248
12492:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1250	fic,m		%r23(%sr4, %r26)
1251
125289:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1253	sync
1254	bv		%r0(%r2)
1255	nop
1256ENDPROC_CFI(flush_kernel_icache_range_asm)
1257
1258	__INIT
1259
1260	/* align should cover use of rfi in disable_sr_hashing_asm and
1261	 * srdis_done.
1262	 */
1263	.align	256
1264ENTRY_CFI(disable_sr_hashing_asm)
1265	/*
1266	 * Switch to real mode
1267	 */
1268	/* pcxt_ssm_bug */
1269	rsm		PSW_SM_I, %r0
1270	load32		PA(1f), %r1
1271	nop
1272	nop
1273	nop
1274	nop
1275	nop
1276
1277	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1278	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1279	mtctl		%r0, %cr17		/* Clear IIASQ head */
1280	mtctl		%r1, %cr18		/* IIAOQ head */
1281	ldo		4(%r1), %r1
1282	mtctl		%r1, %cr18		/* IIAOQ tail */
1283	load32		REAL_MODE_PSW, %r1
1284	mtctl		%r1, %ipsw
1285	rfi
1286	nop
1287
12881:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
1289	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
1290	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
1291	b,n		srdis_done
1292
1293srdis_pcxs:
1294
1295	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
1296
1297	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
1298	.word		0x141c1a00		/* must issue twice */
1299	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
1300	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
1301	.word		0x141c1600		/* mtdiag %r28, %dr0 */
1302	.word		0x141c1600		/* must issue twice */
1303	b,n		srdis_done
1304
1305srdis_pcxl:
1306
1307	/* Disable Space Register Hashing for PCXL */
1308
1309	.word		0x141c0600		/* mfdiag %dr0, %r28 */
1310	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
1311	.word		0x141c0240		/* mtdiag %r28, %dr0 */
1312	b,n		srdis_done
1313
1314srdis_pa20:
1315
1316	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
1317
1318	.word		0x144008bc		/* mfdiag %dr2, %r28 */
1319	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
1320	.word		0x145c1840		/* mtdiag %r28, %dr2 */
1321
1322
1323srdis_done:
1324	/* Switch back to virtual mode */
1325	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
1326	load32 	   	2f, %r1
1327	nop
1328	nop
1329	nop
1330	nop
1331	nop
1332
1333	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1334	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1335	mtctl		%r0, %cr17		/* Clear IIASQ head */
1336	mtctl		%r1, %cr18		/* IIAOQ head */
1337	ldo		4(%r1), %r1
1338	mtctl		%r1, %cr18		/* IIAOQ tail */
1339	load32		KERNEL_PSW, %r1
1340	mtctl		%r1, %ipsw
1341	rfi
1342	nop
1343
13442:      bv		%r0(%r2)
1345	nop
1346ENDPROC_CFI(disable_sr_hashing_asm)
1347
1348	.end
1349