1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2012
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <asm/page.h>
21#include <asm/ppc_asm.h>
22
23_GLOBAL(copypage_power7)
24	/*
25	 * We prefetch both the source and destination using enhanced touch
26	 * instructions. We use a stream ID of 0 for the load side and
27	 * 1 for the store side. Since source and destination are page
28	 * aligned we don't need to clear the bottom 7 bits of either
29	 * address.
30	 */
31	ori	r9,r3,1		/* stream=1 */
32
33#ifdef CONFIG_PPC_64K_PAGES
34	lis	r7,0x0E01	/* depth=7, units=512 */
35#else
36	lis	r7,0x0E00	/* depth=7 */
37	ori	r7,r7,0x1000	/* units=32 */
38#endif
39	ori	r10,r7,1	/* stream=1 */
40
41	lis	r8,0x8000	/* GO=1 */
42	clrldi	r8,r8,32
43
44.machine push
45.machine "power4"
46	dcbt	r0,r4,0b01000
47	dcbt	r0,r7,0b01010
48	dcbtst	r0,r9,0b01000
49	dcbtst	r0,r10,0b01010
50	eieio
51	dcbt	r0,r8,0b01010	/* GO */
52.machine pop
53
54#ifdef CONFIG_ALTIVEC
55	mflr	r0
56	std	r3,48(r1)
57	std	r4,56(r1)
58	std	r0,16(r1)
59	stdu	r1,-STACKFRAMESIZE(r1)
60	bl	.enter_vmx_copy
61	cmpwi	r3,0
62	ld	r0,STACKFRAMESIZE+16(r1)
63	ld	r3,STACKFRAMESIZE+48(r1)
64	ld	r4,STACKFRAMESIZE+56(r1)
65	mtlr	r0
66
67	li	r0,(PAGE_SIZE/128)
68	mtctr	r0
69
70	beq	.Lnonvmx_copy
71
72	addi	r1,r1,STACKFRAMESIZE
73
74	li	r6,16
75	li	r7,32
76	li	r8,48
77	li	r9,64
78	li	r10,80
79	li	r11,96
80	li	r12,112
81
82	.align	5
831:	lvx	vr7,r0,r4
84	lvx	vr6,r4,r6
85	lvx	vr5,r4,r7
86	lvx	vr4,r4,r8
87	lvx	vr3,r4,r9
88	lvx	vr2,r4,r10
89	lvx	vr1,r4,r11
90	lvx	vr0,r4,r12
91	addi	r4,r4,128
92	stvx	vr7,r0,r3
93	stvx	vr6,r3,r6
94	stvx	vr5,r3,r7
95	stvx	vr4,r3,r8
96	stvx	vr3,r3,r9
97	stvx	vr2,r3,r10
98	stvx	vr1,r3,r11
99	stvx	vr0,r3,r12
100	addi	r3,r3,128
101	bdnz	1b
102
103	b	.exit_vmx_copy		/* tail call optimise */
104
105#else
106	li	r0,(PAGE_SIZE/128)
107	mtctr	r0
108
109	stdu	r1,-STACKFRAMESIZE(r1)
110#endif
111
112.Lnonvmx_copy:
113	std	r14,STK_REG(R14)(r1)
114	std	r15,STK_REG(R15)(r1)
115	std	r16,STK_REG(R16)(r1)
116	std	r17,STK_REG(R17)(r1)
117	std	r18,STK_REG(R18)(r1)
118	std	r19,STK_REG(R19)(r1)
119	std	r20,STK_REG(R20)(r1)
120
1211:	ld	r0,0(r4)
122	ld	r5,8(r4)
123	ld	r6,16(r4)
124	ld	r7,24(r4)
125	ld	r8,32(r4)
126	ld	r9,40(r4)
127	ld	r10,48(r4)
128	ld	r11,56(r4)
129	ld	r12,64(r4)
130	ld	r14,72(r4)
131	ld	r15,80(r4)
132	ld	r16,88(r4)
133	ld	r17,96(r4)
134	ld	r18,104(r4)
135	ld	r19,112(r4)
136	ld	r20,120(r4)
137	addi	r4,r4,128
138	std	r0,0(r3)
139	std	r5,8(r3)
140	std	r6,16(r3)
141	std	r7,24(r3)
142	std	r8,32(r3)
143	std	r9,40(r3)
144	std	r10,48(r3)
145	std	r11,56(r3)
146	std	r12,64(r3)
147	std	r14,72(r3)
148	std	r15,80(r3)
149	std	r16,88(r3)
150	std	r17,96(r3)
151	std	r18,104(r3)
152	std	r19,112(r3)
153	std	r20,120(r3)
154	addi	r3,r3,128
155	bdnz	1b
156
157	ld	r14,STK_REG(R14)(r1)
158	ld	r15,STK_REG(R15)(r1)
159	ld	r16,STK_REG(R16)(r1)
160	ld	r17,STK_REG(R17)(r1)
161	ld	r18,STK_REG(R18)(r1)
162	ld	r19,STK_REG(R19)(r1)
163	ld	r20,STK_REG(R20)(r1)
164	addi	r1,r1,STACKFRAMESIZE
165	blr
166