xref: /openbmc/linux/arch/x86/lib/copy_page_64.S (revision cd4d09ec)
1185f3d38SThomas Gleixner/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
2185f3d38SThomas Gleixner
3185f3d38SThomas Gleixner#include <linux/linkage.h>
4cd4d09ecSBorislav Petkov#include <asm/cpufeatures.h>
559e97e4dSAndy Lutomirski#include <asm/alternative-asm.h>
6185f3d38SThomas Gleixner
7090a3f61SBorislav Petkov/*
8090a3f61SBorislav Petkov * Some CPUs run faster using the string copy instructions (sane microcode).
9090a3f61SBorislav Petkov * It is also a lot simpler. Use this when possible. But, don't use streaming
10090a3f61SBorislav Petkov * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
11090a3f61SBorislav Petkov * prefetch distance based on SMP/UP.
12090a3f61SBorislav Petkov */
13185f3d38SThomas Gleixner	ALIGN
14090a3f61SBorislav PetkovENTRY(copy_page)
15090a3f61SBorislav Petkov	ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
16185f3d38SThomas Gleixner	movl	$4096/8, %ecx
17185f3d38SThomas Gleixner	rep	movsq
18185f3d38SThomas Gleixner	ret
19090a3f61SBorislav PetkovENDPROC(copy_page)
20185f3d38SThomas Gleixner
21090a3f61SBorislav PetkovENTRY(copy_page_regs)
2242693290SJan Beulich	subq	$2*8,	%rsp
23185f3d38SThomas Gleixner	movq	%rbx,	(%rsp)
24185f3d38SThomas Gleixner	movq	%r12,	1*8(%rsp)
25185f3d38SThomas Gleixner
26185f3d38SThomas Gleixner	movl	$(4096/64)-5,	%ecx
27185f3d38SThomas Gleixner	.p2align 4
28185f3d38SThomas Gleixner.Loop64:
29185f3d38SThomas Gleixner	dec	%rcx
30269833bdSMa Ling	movq	0x8*0(%rsi), %rax
31269833bdSMa Ling	movq	0x8*1(%rsi), %rbx
32269833bdSMa Ling	movq	0x8*2(%rsi), %rdx
33269833bdSMa Ling	movq	0x8*3(%rsi), %r8
34269833bdSMa Ling	movq	0x8*4(%rsi), %r9
35269833bdSMa Ling	movq	0x8*5(%rsi), %r10
36269833bdSMa Ling	movq	0x8*6(%rsi), %r11
37269833bdSMa Ling	movq	0x8*7(%rsi), %r12
38185f3d38SThomas Gleixner
39185f3d38SThomas Gleixner	prefetcht0 5*64(%rsi)
40185f3d38SThomas Gleixner
41269833bdSMa Ling	movq	%rax, 0x8*0(%rdi)
42269833bdSMa Ling	movq	%rbx, 0x8*1(%rdi)
43269833bdSMa Ling	movq	%rdx, 0x8*2(%rdi)
44269833bdSMa Ling	movq	%r8,  0x8*3(%rdi)
45269833bdSMa Ling	movq	%r9,  0x8*4(%rdi)
46269833bdSMa Ling	movq	%r10, 0x8*5(%rdi)
47269833bdSMa Ling	movq	%r11, 0x8*6(%rdi)
48269833bdSMa Ling	movq	%r12, 0x8*7(%rdi)
49185f3d38SThomas Gleixner
50185f3d38SThomas Gleixner	leaq	64 (%rsi), %rsi
51185f3d38SThomas Gleixner	leaq	64 (%rdi), %rdi
52185f3d38SThomas Gleixner
53185f3d38SThomas Gleixner	jnz	.Loop64
54185f3d38SThomas Gleixner
55185f3d38SThomas Gleixner	movl	$5, %ecx
56185f3d38SThomas Gleixner	.p2align 4
57185f3d38SThomas Gleixner.Loop2:
58185f3d38SThomas Gleixner	decl	%ecx
59185f3d38SThomas Gleixner
60269833bdSMa Ling	movq	0x8*0(%rsi), %rax
61269833bdSMa Ling	movq	0x8*1(%rsi), %rbx
62269833bdSMa Ling	movq	0x8*2(%rsi), %rdx
63269833bdSMa Ling	movq	0x8*3(%rsi), %r8
64269833bdSMa Ling	movq	0x8*4(%rsi), %r9
65269833bdSMa Ling	movq	0x8*5(%rsi), %r10
66269833bdSMa Ling	movq	0x8*6(%rsi), %r11
67269833bdSMa Ling	movq	0x8*7(%rsi), %r12
68185f3d38SThomas Gleixner
69269833bdSMa Ling	movq	%rax, 0x8*0(%rdi)
70269833bdSMa Ling	movq	%rbx, 0x8*1(%rdi)
71269833bdSMa Ling	movq	%rdx, 0x8*2(%rdi)
72269833bdSMa Ling	movq	%r8,  0x8*3(%rdi)
73269833bdSMa Ling	movq	%r9,  0x8*4(%rdi)
74269833bdSMa Ling	movq	%r10, 0x8*5(%rdi)
75269833bdSMa Ling	movq	%r11, 0x8*6(%rdi)
76269833bdSMa Ling	movq	%r12, 0x8*7(%rdi)
77185f3d38SThomas Gleixner
78185f3d38SThomas Gleixner	leaq	64(%rdi), %rdi
79185f3d38SThomas Gleixner	leaq	64(%rsi), %rsi
80185f3d38SThomas Gleixner	jnz	.Loop2
81185f3d38SThomas Gleixner
82185f3d38SThomas Gleixner	movq	(%rsp), %rbx
83185f3d38SThomas Gleixner	movq	1*8(%rsp), %r12
8442693290SJan Beulich	addq	$2*8, %rsp
85185f3d38SThomas Gleixner	ret
86090a3f61SBorislav PetkovENDPROC(copy_page_regs)
87