xref: /openbmc/linux/arch/powerpc/lib/string_64.S (revision 52fb57e7)
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2012
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20
21#include <asm/ppc_asm.h>
22#include <asm/asm-offsets.h>
23
24	.section	".toc","aw"
25PPC64_CACHES:
26	.tc		ppc64_caches[TC],ppc64_caches
27	.section	".text"
28
29/**
30 * __clear_user: - Zero a block of memory in user space, with less checking.
31 * @to:   Destination address, in user space.
32 * @n:    Number of bytes to zero.
33 *
34 * Zero a block of memory in user space.  Caller must check
35 * the specified block with access_ok() before calling this function.
36 *
37 * Returns number of bytes that could not be cleared.
38 * On success, this will be zero.
39 */
40
41	.macro err1
42100:
43	.section __ex_table,"a"
44	.align 3
45	.llong 100b,.Ldo_err1
46	.previous
47	.endm
48
49	.macro err2
50200:
51	.section __ex_table,"a"
52	.align 3
53	.llong 200b,.Ldo_err2
54	.previous
55	.endm
56
57	.macro err3
58300:
59	.section __ex_table,"a"
60	.align 3
61	.llong 300b,.Ldo_err3
62	.previous
63	.endm
64
65.Ldo_err1:
66	mr	r3,r8
67
68.Ldo_err2:
69	mtctr	r4
701:
71err3;	stb	r0,0(r3)
72	addi	r3,r3,1
73	addi	r4,r4,-1
74	bdnz	1b
75
76.Ldo_err3:
77	mr	r3,r4
78	blr
79
80_GLOBAL_TOC(__clear_user)
81	cmpdi	r4,32
82	neg	r6,r3
83	li	r0,0
84	blt	.Lshort_clear
85	mr	r8,r3
86	mtocrf	0x01,r6
87	clrldi	r6,r6,(64-3)
88
89	/* Get the destination 8 byte aligned */
90	bf	cr7*4+3,1f
91err1;	stb	r0,0(r3)
92	addi	r3,r3,1
93
941:	bf	cr7*4+2,2f
95err1;	sth	r0,0(r3)
96	addi	r3,r3,2
97
982:	bf	cr7*4+1,3f
99err1;	stw	r0,0(r3)
100	addi	r3,r3,4
101
1023:	sub	r4,r4,r6
103
104	cmpdi	r4,32
105	cmpdi	cr1,r4,512
106	blt	.Lshort_clear
107	bgt	cr1,.Llong_clear
108
109.Lmedium_clear:
110	srdi	r6,r4,5
111	mtctr	r6
112
113	/* Do 32 byte chunks */
1144:
115err2;	std	r0,0(r3)
116err2;	std	r0,8(r3)
117err2;	std	r0,16(r3)
118err2;	std	r0,24(r3)
119	addi	r3,r3,32
120	addi	r4,r4,-32
121	bdnz	4b
122
123.Lshort_clear:
124	/* up to 31 bytes to go */
125	cmpdi	r4,16
126	blt	6f
127err2;	std	r0,0(r3)
128err2;	std	r0,8(r3)
129	addi	r3,r3,16
130	addi	r4,r4,-16
131
132	/* Up to 15 bytes to go */
1336:	mr	r8,r3
134	clrldi	r4,r4,(64-4)
135	mtocrf	0x01,r4
136	bf	cr7*4+0,7f
137err1;	std	r0,0(r3)
138	addi	r3,r3,8
139
1407:	bf	cr7*4+1,8f
141err1;	stw	r0,0(r3)
142	addi	r3,r3,4
143
1448:	bf	cr7*4+2,9f
145err1;	sth	r0,0(r3)
146	addi	r3,r3,2
147
1489:	bf	cr7*4+3,10f
149err1;	stb	r0,0(r3)
150
15110:	li	r3,0
152	blr
153
154.Llong_clear:
155	ld	r5,PPC64_CACHES@toc(r2)
156
157	bf	cr7*4+0,11f
158err2;	std	r0,0(r3)
159	addi	r3,r3,8
160	addi	r4,r4,-8
161
162	/* Destination is 16 byte aligned, need to get it cacheline aligned */
16311:	lwz	r7,DCACHEL1LOGLINESIZE(r5)
164	lwz	r9,DCACHEL1LINESIZE(r5)
165
166	/*
167	 * With worst case alignment the long clear loop takes a minimum
168	 * of 1 byte less than 2 cachelines.
169	 */
170	sldi	r10,r9,2
171	cmpd	r4,r10
172	blt	.Lmedium_clear
173
174	neg	r6,r3
175	addi	r10,r9,-1
176	and.	r5,r6,r10
177	beq	13f
178
179	srdi	r6,r5,4
180	mtctr	r6
181	mr	r8,r3
18212:
183err1;	std	r0,0(r3)
184err1;	std	r0,8(r3)
185	addi	r3,r3,16
186	bdnz	12b
187
188	sub	r4,r4,r5
189
19013:	srd	r6,r4,r7
191	mtctr	r6
192	mr	r8,r3
19314:
194err1;	dcbz	r0,r3
195	add	r3,r3,r9
196	bdnz	14b
197
198	and	r4,r4,r10
199
200	cmpdi	r4,32
201	blt	.Lshort_clear
202	b	.Lmedium_clear
203