xref: /openbmc/linux/arch/x86/lib/memmove_32.S (revision 7ae9fb1b7ecbb5d85d07857943f677fd1a559b18)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #include <linux/linkage.h>
4 #include <asm/export.h>
5 
6 SYM_FUNC_START(memmove)
7 /*
8  * void *memmove(void *dest_in, const void *src_in, size_t n)
9  * -mregparm=3 passes these in registers:
10  * dest_in: %eax
11  * src_in: %edx
12  * n: %ecx
13  * See also: arch/x86/entry/calling.h for description of the calling convention.
14  *
15  * n can remain in %ecx, but for `rep movsl`, we'll need dest in %edi and src
16  * in %esi.
17  */
18 .set dest_in, %eax
19 .set dest, %edi
20 .set src_in, %edx
21 .set src, %esi
22 .set n, %ecx
23 .set tmp0, %edx
24 .set tmp0w, %dx
25 .set tmp1, %ebx
26 .set tmp1w, %bx
27 .set tmp2, %eax
28 .set tmp3b, %cl
29 
30 /*
31  * Save all callee-saved registers, because this function is going to clobber
32  * all of them:
33  */
34 	pushl	%ebp
35 	movl	%esp, %ebp	// set standard frame pointer
36 
37 	pushl	%ebx
38 	pushl	%edi
39 	pushl	%esi
40 	pushl	%eax		// save 'dest_in' parameter [eax] as the return value
41 
42 	movl src_in, src
43 	movl dest_in, dest
44 
45 	/* Handle more 16 bytes in loop */
46 	cmpl	$0x10, n
47 	jb	.Lmove_16B
48 
49 	/* Decide forward/backward copy mode */
50 	cmpl	dest, src
51 	jb	.Lbackwards_header
52 
53 	/*
54 	 * movs instruction have many startup latency
55 	 * so we handle small size by general register.
56 	 */
57 	cmpl	$680, n
58 	jb	.Ltoo_small_forwards
59 	/* movs instruction is only good for aligned case. */
60 	movl	src, tmp0
61 	xorl	dest, tmp0
62 	andl	$0xff, tmp0
63 	jz	.Lforward_movs
64 .Ltoo_small_forwards:
65 	subl	$0x10, n
66 
67 	/* We gobble 16 bytes forward in each loop. */
68 .Lmove_16B_forwards_loop:
69 	subl	$0x10, n
70 	movl	0*4(src), tmp0
71 	movl	1*4(src), tmp1
72 	movl	tmp0, 0*4(dest)
73 	movl	tmp1, 1*4(dest)
74 	movl	2*4(src), tmp0
75 	movl	3*4(src), tmp1
76 	movl	tmp0, 2*4(dest)
77 	movl	tmp1, 3*4(dest)
78 	leal	0x10(src), src
79 	leal	0x10(dest), dest
80 	jae	.Lmove_16B_forwards_loop
81 	addl	$0x10, n
82 	jmp	.Lmove_16B
83 
84 	/* Handle data forward by movs. */
85 .p2align 4
86 .Lforward_movs:
87 	movl	-4(src, n), tmp0
88 	leal	-4(dest, n), tmp1
89 	shrl	$2, n
90 	rep	movsl
91 	movl	tmp0, (tmp1)
92 	jmp	.Ldone
93 
94 	/* Handle data backward by movs. */
95 .p2align 4
96 .Lbackwards_movs:
97 	movl	(src), tmp0
98 	movl	dest, tmp1
99 	leal	-4(src, n), src
100 	leal	-4(dest, n), dest
101 	shrl	$2, n
102 	std
103 	rep	movsl
104 	movl	tmp0,(tmp1)
105 	cld
106 	jmp	.Ldone
107 
108 	/* Start to prepare for backward copy. */
109 .p2align 4
110 .Lbackwards_header:
111 	cmpl	$680, n
112 	jb	.Ltoo_small_backwards
113 	movl	src, tmp0
114 	xorl	dest, tmp0
115 	andl	$0xff, tmp0
116 	jz	.Lbackwards_movs
117 
118 	/* Calculate copy position to tail. */
119 .Ltoo_small_backwards:
120 	addl	n, src
121 	addl	n, dest
122 	subl	$0x10, n
123 
124 	/* We gobble 16 bytes backward in each loop. */
125 .Lmove_16B_backwards_loop:
126 	subl	$0x10, n
127 
128 	movl	-1*4(src), tmp0
129 	movl	-2*4(src), tmp1
130 	movl	tmp0, -1*4(dest)
131 	movl	tmp1, -2*4(dest)
132 	movl	-3*4(src), tmp0
133 	movl	-4*4(src), tmp1
134 	movl	tmp0, -3*4(dest)
135 	movl	tmp1, -4*4(dest)
136 	leal	-0x10(src), src
137 	leal	-0x10(dest), dest
138 	jae	.Lmove_16B_backwards_loop
139 	/* Calculate copy position to head. */
140 	addl	$0x10, n
141 	subl	n, src
142 	subl	n, dest
143 
144 	/* Move data from 8 bytes to 15 bytes. */
145 .p2align 4
146 .Lmove_16B:
147 	cmpl	$8, n
148 	jb	.Lmove_8B
149 	movl	0*4(src), tmp0
150 	movl	1*4(src), tmp1
151 	movl	-2*4(src, n), tmp2
152 	movl	-1*4(src, n), src
153 
154 	movl	tmp0, 0*4(dest)
155 	movl	tmp1, 1*4(dest)
156 	movl	tmp2, -2*4(dest, n)
157 	movl	src, -1*4(dest, n)
158 	jmp	.Ldone
159 
160 	/* Move data from 4 bytes to 7 bytes. */
161 .p2align 4
162 .Lmove_8B:
163 	cmpl	$4, n
164 	jb	.Lmove_4B
165 	movl	0*4(src), tmp0
166 	movl	-1*4(src, n), tmp1
167 	movl	tmp0, 0*4(dest)
168 	movl	tmp1, -1*4(dest, n)
169 	jmp	.Ldone
170 
171 	/* Move data from 2 bytes to 3 bytes. */
172 .p2align 4
173 .Lmove_4B:
174 	cmpl	$2, n
175 	jb	.Lmove_1B
176 	movw	0*2(src), tmp0w
177 	movw	-1*2(src, n), tmp1w
178 	movw	tmp0w, 0*2(dest)
179 	movw	tmp1w, -1*2(dest, n)
180 	jmp	.Ldone
181 
182 	/* Move data for 1 byte. */
183 .p2align 4
184 .Lmove_1B:
185 	cmpl	$1, n
186 	jb	.Ldone
187 	movb	(src), tmp3b
188 	movb	tmp3b, (dest)
189 .p2align 4
190 .Ldone:
191 	popl	dest_in	// restore 'dest_in' [eax] as the return value
192 	/* Restore all callee-saved registers: */
193 	popl	%esi
194 	popl	%edi
195 	popl	%ebx
196 	popl	%ebp
197 
198 	RET
199 SYM_FUNC_END(memmove)
200 EXPORT_SYMBOL(memmove)
201