xref: /openbmc/linux/arch/x86/lib/copy_page_64.S (revision 14b476e0)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
2185f3d38SThomas Gleixner/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
3185f3d38SThomas Gleixner
4185f3d38SThomas Gleixner#include <linux/linkage.h>
5cd4d09ecSBorislav Petkov#include <asm/cpufeatures.h>
65e21a3ecSJuergen Gross#include <asm/alternative.h>
7784d5699SAl Viro#include <asm/export.h>
8185f3d38SThomas Gleixner
9090a3f61SBorislav Petkov/*
10090a3f61SBorislav Petkov * Some CPUs run faster using the string copy instructions (sane microcode).
11090a3f61SBorislav Petkov * It is also a lot simpler. Use this when possible. But, don't use streaming
12090a3f61SBorislav Petkov * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
13090a3f61SBorislav Petkov * prefetch distance based on SMP/UP.
14090a3f61SBorislav Petkov */
15185f3d38SThomas Gleixner	ALIGN
166dcc5627SJiri SlabySYM_FUNC_START(copy_page)
17090a3f61SBorislav Petkov	ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
18185f3d38SThomas Gleixner	movl	$4096/8, %ecx
19185f3d38SThomas Gleixner	rep	movsq
20*14b476e0SPeter Zijlstra	RET
216dcc5627SJiri SlabySYM_FUNC_END(copy_page)
22784d5699SAl ViroEXPORT_SYMBOL(copy_page)
23185f3d38SThomas Gleixner
24ef1e0315SJiri SlabySYM_FUNC_START_LOCAL(copy_page_regs)
2542693290SJan Beulich	subq	$2*8,	%rsp
26185f3d38SThomas Gleixner	movq	%rbx,	(%rsp)
27185f3d38SThomas Gleixner	movq	%r12,	1*8(%rsp)
28185f3d38SThomas Gleixner
29185f3d38SThomas Gleixner	movl	$(4096/64)-5,	%ecx
30185f3d38SThomas Gleixner	.p2align 4
31185f3d38SThomas Gleixner.Loop64:
32185f3d38SThomas Gleixner	dec	%rcx
33269833bdSMa Ling	movq	0x8*0(%rsi), %rax
34269833bdSMa Ling	movq	0x8*1(%rsi), %rbx
35269833bdSMa Ling	movq	0x8*2(%rsi), %rdx
36269833bdSMa Ling	movq	0x8*3(%rsi), %r8
37269833bdSMa Ling	movq	0x8*4(%rsi), %r9
38269833bdSMa Ling	movq	0x8*5(%rsi), %r10
39269833bdSMa Ling	movq	0x8*6(%rsi), %r11
40269833bdSMa Ling	movq	0x8*7(%rsi), %r12
41185f3d38SThomas Gleixner
42185f3d38SThomas Gleixner	prefetcht0 5*64(%rsi)
43185f3d38SThomas Gleixner
44269833bdSMa Ling	movq	%rax, 0x8*0(%rdi)
45269833bdSMa Ling	movq	%rbx, 0x8*1(%rdi)
46269833bdSMa Ling	movq	%rdx, 0x8*2(%rdi)
47269833bdSMa Ling	movq	%r8,  0x8*3(%rdi)
48269833bdSMa Ling	movq	%r9,  0x8*4(%rdi)
49269833bdSMa Ling	movq	%r10, 0x8*5(%rdi)
50269833bdSMa Ling	movq	%r11, 0x8*6(%rdi)
51269833bdSMa Ling	movq	%r12, 0x8*7(%rdi)
52185f3d38SThomas Gleixner
53185f3d38SThomas Gleixner	leaq	64 (%rsi), %rsi
54185f3d38SThomas Gleixner	leaq	64 (%rdi), %rdi
55185f3d38SThomas Gleixner
56185f3d38SThomas Gleixner	jnz	.Loop64
57185f3d38SThomas Gleixner
58185f3d38SThomas Gleixner	movl	$5, %ecx
59185f3d38SThomas Gleixner	.p2align 4
60185f3d38SThomas Gleixner.Loop2:
61185f3d38SThomas Gleixner	decl	%ecx
62185f3d38SThomas Gleixner
63269833bdSMa Ling	movq	0x8*0(%rsi), %rax
64269833bdSMa Ling	movq	0x8*1(%rsi), %rbx
65269833bdSMa Ling	movq	0x8*2(%rsi), %rdx
66269833bdSMa Ling	movq	0x8*3(%rsi), %r8
67269833bdSMa Ling	movq	0x8*4(%rsi), %r9
68269833bdSMa Ling	movq	0x8*5(%rsi), %r10
69269833bdSMa Ling	movq	0x8*6(%rsi), %r11
70269833bdSMa Ling	movq	0x8*7(%rsi), %r12
71185f3d38SThomas Gleixner
72269833bdSMa Ling	movq	%rax, 0x8*0(%rdi)
73269833bdSMa Ling	movq	%rbx, 0x8*1(%rdi)
74269833bdSMa Ling	movq	%rdx, 0x8*2(%rdi)
75269833bdSMa Ling	movq	%r8,  0x8*3(%rdi)
76269833bdSMa Ling	movq	%r9,  0x8*4(%rdi)
77269833bdSMa Ling	movq	%r10, 0x8*5(%rdi)
78269833bdSMa Ling	movq	%r11, 0x8*6(%rdi)
79269833bdSMa Ling	movq	%r12, 0x8*7(%rdi)
80185f3d38SThomas Gleixner
81185f3d38SThomas Gleixner	leaq	64(%rdi), %rdi
82185f3d38SThomas Gleixner	leaq	64(%rsi), %rsi
83185f3d38SThomas Gleixner	jnz	.Loop2
84185f3d38SThomas Gleixner
85185f3d38SThomas Gleixner	movq	(%rsp), %rbx
86185f3d38SThomas Gleixner	movq	1*8(%rsp), %r12
8742693290SJan Beulich	addq	$2*8, %rsp
88*14b476e0SPeter Zijlstra	RET
89ef1e0315SJiri SlabySYM_FUNC_END(copy_page_regs)
90