1/* Copyright 2002 Andi Kleen, SuSE Labs */ 2 3#include <linux/linkage.h> 4#include <asm/dwarf2.h> 5 6/* 7 * ISO C memset - set a memory block to a byte value. 8 * 9 * rdi destination 10 * rsi value (char) 11 * rdx count (bytes) 12 * 13 * rax original destination 14 */ 15 ALIGN 16memset_c: 17 CFI_STARTPROC 18 movq %rdi,%r9 19 movl %edx,%r8d 20 andl $7,%r8d 21 movl %edx,%ecx 22 shrl $3,%ecx 23 /* expand byte value */ 24 movzbl %sil,%esi 25 movabs $0x0101010101010101,%rax 26 mulq %rsi /* with rax, clobbers rdx */ 27 rep stosq 28 movl %r8d,%ecx 29 rep stosb 30 movq %r9,%rax 31 ret 32 CFI_ENDPROC 33ENDPROC(memset_c) 34 35ENTRY(memset) 36ENTRY(__memset) 37 CFI_STARTPROC 38 movq %rdi,%r10 39 movq %rdx,%r11 40 41 /* expand byte value */ 42 movzbl %sil,%ecx 43 movabs $0x0101010101010101,%rax 44 mul %rcx /* with rax, clobbers rdx */ 45 46 /* align dst */ 47 movl %edi,%r9d 48 andl $7,%r9d 49 jnz .Lbad_alignment 50 CFI_REMEMBER_STATE 51.Lafter_bad_alignment: 52 53 movl %r11d,%ecx 54 shrl $6,%ecx 55 jz .Lhandle_tail 56 57 .p2align 4 58.Lloop_64: 59 decl %ecx 60 movq %rax,(%rdi) 61 movq %rax,8(%rdi) 62 movq %rax,16(%rdi) 63 movq %rax,24(%rdi) 64 movq %rax,32(%rdi) 65 movq %rax,40(%rdi) 66 movq %rax,48(%rdi) 67 movq %rax,56(%rdi) 68 leaq 64(%rdi),%rdi 69 jnz .Lloop_64 70 71 /* Handle tail in loops. The loops should be faster than hard 72 to predict jump tables. */ 73 .p2align 4 74.Lhandle_tail: 75 movl %r11d,%ecx 76 andl $63&(~7),%ecx 77 jz .Lhandle_7 78 shrl $3,%ecx 79 .p2align 4 80.Lloop_8: 81 decl %ecx 82 movq %rax,(%rdi) 83 leaq 8(%rdi),%rdi 84 jnz .Lloop_8 85 86.Lhandle_7: 87 movl %r11d,%ecx 88 andl $7,%ecx 89 jz .Lende 90 .p2align 4 91.Lloop_1: 92 decl %ecx 93 movb %al,(%rdi) 94 leaq 1(%rdi),%rdi 95 jnz .Lloop_1 96 97.Lende: 98 movq %r10,%rax 99 ret 100 101 CFI_RESTORE_STATE 102.Lbad_alignment: 103 cmpq $7,%r11 104 jbe .Lhandle_7 105 movq %rax,(%rdi) /* unaligned store */ 106 movq $8,%r8 107 subq %r9,%r8 108 addq %r8,%rdi 109 subq %r8,%r11 110 jmp .Lafter_bad_alignment 111.Lfinal: 112 CFI_ENDPROC 113ENDPROC(memset) 114ENDPROC(__memset) 115 116 /* Some CPUs run faster using the string instructions. 117 It is also a lot simpler. Use this when possible */ 118 119#include <asm/cpufeature.h> 120 121 .section .altinstr_replacement,"ax" 1221: .byte 0xeb /* jmp <disp8> */ 123 .byte (memset_c - memset) - (2f - 1b) /* offset */ 1242: 125 .previous 126 .section .altinstructions,"a" 127 .align 8 128 .quad memset 129 .quad 1b 130 .byte X86_FEATURE_REP_GOOD 131 .byte .Lfinal - memset 132 .byte 2b - 1b 133 .previous 134