xref: /openbmc/linux/arch/x86/lib/copy_page_64.S (revision cff4fa84)
1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
2
3#include <linux/linkage.h>
4#include <asm/dwarf2.h>
5#include <asm/alternative-asm.h>
6
7	ALIGN
8copy_page_c:
9	CFI_STARTPROC
10	movl $4096/8,%ecx
11	rep movsq
12	ret
13	CFI_ENDPROC
14ENDPROC(copy_page_c)
15
16/* Don't use streaming store because it's better when the target
17   ends up in cache. */
18
19/* Could vary the prefetch distance based on SMP/UP */
20
21ENTRY(copy_page)
22	CFI_STARTPROC
23	subq	$3*8,%rsp
24	CFI_ADJUST_CFA_OFFSET 3*8
25	movq	%rbx,(%rsp)
26	CFI_REL_OFFSET rbx, 0
27	movq	%r12,1*8(%rsp)
28	CFI_REL_OFFSET r12, 1*8
29	movq	%r13,2*8(%rsp)
30	CFI_REL_OFFSET r13, 2*8
31
32	movl	$(4096/64)-5,%ecx
33	.p2align 4
34.Loop64:
35  	dec     %rcx
36
37	movq        (%rsi), %rax
38	movq      8 (%rsi), %rbx
39	movq     16 (%rsi), %rdx
40	movq     24 (%rsi), %r8
41	movq     32 (%rsi), %r9
42	movq     40 (%rsi), %r10
43	movq     48 (%rsi), %r11
44	movq     56 (%rsi), %r12
45
46	prefetcht0 5*64(%rsi)
47
48	movq     %rax,    (%rdi)
49	movq     %rbx,  8 (%rdi)
50	movq     %rdx, 16 (%rdi)
51	movq     %r8,  24 (%rdi)
52	movq     %r9,  32 (%rdi)
53	movq     %r10, 40 (%rdi)
54	movq     %r11, 48 (%rdi)
55	movq     %r12, 56 (%rdi)
56
57	leaq    64 (%rsi), %rsi
58	leaq    64 (%rdi), %rdi
59
60	jnz     .Loop64
61
62	movl	$5,%ecx
63	.p2align 4
64.Loop2:
65	decl   %ecx
66
67	movq        (%rsi), %rax
68	movq      8 (%rsi), %rbx
69	movq     16 (%rsi), %rdx
70	movq     24 (%rsi), %r8
71	movq     32 (%rsi), %r9
72	movq     40 (%rsi), %r10
73	movq     48 (%rsi), %r11
74	movq     56 (%rsi), %r12
75
76	movq     %rax,    (%rdi)
77	movq     %rbx,  8 (%rdi)
78	movq     %rdx, 16 (%rdi)
79	movq     %r8,  24 (%rdi)
80	movq     %r9,  32 (%rdi)
81	movq     %r10, 40 (%rdi)
82	movq     %r11, 48 (%rdi)
83	movq     %r12, 56 (%rdi)
84
85	leaq	64(%rdi),%rdi
86	leaq	64(%rsi),%rsi
87
88	jnz	.Loop2
89
90	movq	(%rsp),%rbx
91	CFI_RESTORE rbx
92	movq	1*8(%rsp),%r12
93	CFI_RESTORE r12
94	movq	2*8(%rsp),%r13
95	CFI_RESTORE r13
96	addq	$3*8,%rsp
97	CFI_ADJUST_CFA_OFFSET -3*8
98	ret
99.Lcopy_page_end:
100	CFI_ENDPROC
101ENDPROC(copy_page)
102
103	/* Some CPUs run faster using the string copy instructions.
104	   It is also a lot simpler. Use this when possible */
105
106#include <asm/cpufeature.h>
107
108	.section .altinstr_replacement,"ax"
1091:	.byte 0xeb					/* jmp <disp8> */
110	.byte (copy_page_c - copy_page) - (2f - 1b)	/* offset */
1112:
112	.previous
113	.section .altinstructions,"a"
114	altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD,	\
115		.Lcopy_page_end-copy_page, 2b-1b
116	.previous
117