xref: /openbmc/linux/arch/powerpc/lib/string_64.S (revision 01ab991f)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *
4 * Copyright (C) IBM Corporation, 2012
5 *
6 * Author: Anton Blanchard <anton@au.ibm.com>
7 */
8
9#include <asm/ppc_asm.h>
10#include <asm/linkage.h>
11#include <asm/asm-offsets.h>
12#include <asm/export.h>
13
14/**
15 * __arch_clear_user: - Zero a block of memory in user space, with less checking.
16 * @to:   Destination address, in user space.
17 * @n:    Number of bytes to zero.
18 *
19 * Zero a block of memory in user space.  Caller must check
20 * the specified block with access_ok() before calling this function.
21 *
22 * Returns number of bytes that could not be cleared.
23 * On success, this will be zero.
24 */
25
26	.macro err1
27100:
28	EX_TABLE(100b,.Ldo_err1)
29	.endm
30
31	.macro err2
32200:
33	EX_TABLE(200b,.Ldo_err2)
34	.endm
35
36	.macro err3
37300:
38	EX_TABLE(300b,.Ldo_err3)
39	.endm
40
41.Ldo_err1:
42	mr	r3,r8
43
44.Ldo_err2:
45	mtctr	r4
461:
47err3;	stb	r0,0(r3)
48	addi	r3,r3,1
49	addi	r4,r4,-1
50	bdnz	1b
51
52.Ldo_err3:
53	mr	r3,r4
54	blr
55
56_GLOBAL_TOC(__arch_clear_user)
57	cmpdi	r4,32
58	neg	r6,r3
59	li	r0,0
60	blt	.Lshort_clear
61	mr	r8,r3
62	mtocrf	0x01,r6
63	clrldi	r6,r6,(64-3)
64
65	/* Get the destination 8 byte aligned */
66	bf	cr7*4+3,1f
67err1;	stb	r0,0(r3)
68	addi	r3,r3,1
69
701:	bf	cr7*4+2,2f
71err1;	sth	r0,0(r3)
72	addi	r3,r3,2
73
742:	bf	cr7*4+1,3f
75err1;	stw	r0,0(r3)
76	addi	r3,r3,4
77
783:	sub	r4,r4,r6
79
80	cmpdi	r4,32
81	cmpdi	cr1,r4,512
82	blt	.Lshort_clear
83	bgt	cr1,.Llong_clear
84
85.Lmedium_clear:
86	srdi	r6,r4,5
87	mtctr	r6
88
89	/* Do 32 byte chunks */
904:
91err2;	std	r0,0(r3)
92err2;	std	r0,8(r3)
93err2;	std	r0,16(r3)
94err2;	std	r0,24(r3)
95	addi	r3,r3,32
96	addi	r4,r4,-32
97	bdnz	4b
98
99.Lshort_clear:
100	/* up to 31 bytes to go */
101	cmpdi	r4,16
102	blt	6f
103err2;	std	r0,0(r3)
104err2;	std	r0,8(r3)
105	addi	r3,r3,16
106	addi	r4,r4,-16
107
108	/* Up to 15 bytes to go */
1096:	mr	r8,r3
110	clrldi	r4,r4,(64-4)
111	mtocrf	0x01,r4
112	bf	cr7*4+0,7f
113err1;	std	r0,0(r3)
114	addi	r3,r3,8
115
1167:	bf	cr7*4+1,8f
117err1;	stw	r0,0(r3)
118	addi	r3,r3,4
119
1208:	bf	cr7*4+2,9f
121err1;	sth	r0,0(r3)
122	addi	r3,r3,2
123
1249:	bf	cr7*4+3,10f
125err1;	stb	r0,0(r3)
126
12710:	li	r3,0
128	blr
129
130.Llong_clear:
131	LOAD_REG_ADDR(r5, ppc64_caches)
132
133	bf	cr7*4+0,11f
134err2;	std	r0,0(r3)
135	addi	r3,r3,8
136	addi	r4,r4,-8
137
138	/* Destination is 16 byte aligned, need to get it cache block aligned */
13911:	lwz	r7,DCACHEL1LOGBLOCKSIZE(r5)
140	lwz	r9,DCACHEL1BLOCKSIZE(r5)
141
142	/*
143	 * With worst case alignment the long clear loop takes a minimum
144	 * of 1 byte less than 2 cachelines.
145	 */
146	sldi	r10,r9,2
147	cmpd	r4,r10
148	blt	.Lmedium_clear
149
150	neg	r6,r3
151	addi	r10,r9,-1
152	and.	r5,r6,r10
153	beq	13f
154
155	srdi	r6,r5,4
156	mtctr	r6
157	mr	r8,r3
15812:
159err1;	std	r0,0(r3)
160err1;	std	r0,8(r3)
161	addi	r3,r3,16
162	bdnz	12b
163
164	sub	r4,r4,r5
165
16613:	srd	r6,r4,r7
167	mtctr	r6
168	mr	r8,r3
16914:
170err1;	dcbz	0,r3
171	add	r3,r3,r9
172	bdnz	14b
173
174	and	r4,r4,r10
175
176	cmpdi	r4,32
177	blt	.Lshort_clear
178	b	.Lmedium_clear
179EXPORT_SYMBOL(__arch_clear_user)
180