19599ec04SFenghua Yu/* 29599ec04SFenghua Yu * Normally compiler builtins are used, but sometimes the compiler calls out 39599ec04SFenghua Yu * of line code. Based on asm-i386/string.h. 49599ec04SFenghua Yu * 59599ec04SFenghua Yu * This assembly file is re-written from memmove_64.c file. 69599ec04SFenghua Yu * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> 79599ec04SFenghua Yu */ 89599ec04SFenghua Yu#define _STRING_C 99599ec04SFenghua Yu#include <linux/linkage.h> 109599ec04SFenghua Yu#include <asm/dwarf2.h> 11057e05c1SFenghua Yu#include <asm/cpufeature.h> 1259e97e4dSAndy Lutomirski#include <asm/alternative-asm.h> 139599ec04SFenghua Yu 149599ec04SFenghua Yu#undef memmove 159599ec04SFenghua Yu 169599ec04SFenghua Yu/* 179599ec04SFenghua Yu * Implement memmove(). This can handle overlap between src and dst. 189599ec04SFenghua Yu * 199599ec04SFenghua Yu * Input: 209599ec04SFenghua Yu * rdi: dest 219599ec04SFenghua Yu * rsi: src 229599ec04SFenghua Yu * rdx: count 239599ec04SFenghua Yu * 249599ec04SFenghua Yu * Output: 259599ec04SFenghua Yu * rax: dest 269599ec04SFenghua Yu */ 27393f203fSAndrey Ryabinin.weak memmove 28393f203fSAndrey Ryabinin 299599ec04SFenghua YuENTRY(memmove) 30393f203fSAndrey RyabininENTRY(__memmove) 319599ec04SFenghua Yu CFI_STARTPROC 32057e05c1SFenghua Yu 339599ec04SFenghua Yu /* Handle more 32 bytes in loop */ 349599ec04SFenghua Yu mov %rdi, %rax 359599ec04SFenghua Yu cmp $0x20, %rdx 369599ec04SFenghua Yu jb 1f 379599ec04SFenghua Yu 389599ec04SFenghua Yu /* Decide forward/backward copy mode */ 399599ec04SFenghua Yu cmp %rdi, %rsi 40057e05c1SFenghua Yu jge .Lmemmove_begin_forward 41057e05c1SFenghua Yu mov %rsi, %r8 42057e05c1SFenghua Yu add %rdx, %r8 43057e05c1SFenghua Yu cmp %rdi, %r8 44057e05c1SFenghua Yu jg 2f 459599ec04SFenghua Yu 46057e05c1SFenghua Yu.Lmemmove_begin_forward: 479599ec04SFenghua Yu /* 489599ec04SFenghua Yu * movsq instruction have many startup latency 499599ec04SFenghua Yu * so we handle small size by general register. 509599ec04SFenghua Yu */ 519599ec04SFenghua Yu cmp $680, %rdx 529599ec04SFenghua Yu jb 3f 539599ec04SFenghua Yu /* 549599ec04SFenghua Yu * movsq instruction is only good for aligned case. 559599ec04SFenghua Yu */ 569599ec04SFenghua Yu 579599ec04SFenghua Yu cmpb %dil, %sil 589599ec04SFenghua Yu je 4f 599599ec04SFenghua Yu3: 609599ec04SFenghua Yu sub $0x20, %rdx 619599ec04SFenghua Yu /* 62bb916ff7SAndy Shevchenko * We gobble 32 bytes forward in each loop. 639599ec04SFenghua Yu */ 649599ec04SFenghua Yu5: 659599ec04SFenghua Yu sub $0x20, %rdx 669599ec04SFenghua Yu movq 0*8(%rsi), %r11 679599ec04SFenghua Yu movq 1*8(%rsi), %r10 689599ec04SFenghua Yu movq 2*8(%rsi), %r9 699599ec04SFenghua Yu movq 3*8(%rsi), %r8 709599ec04SFenghua Yu leaq 4*8(%rsi), %rsi 719599ec04SFenghua Yu 729599ec04SFenghua Yu movq %r11, 0*8(%rdi) 739599ec04SFenghua Yu movq %r10, 1*8(%rdi) 749599ec04SFenghua Yu movq %r9, 2*8(%rdi) 759599ec04SFenghua Yu movq %r8, 3*8(%rdi) 769599ec04SFenghua Yu leaq 4*8(%rdi), %rdi 779599ec04SFenghua Yu jae 5b 789599ec04SFenghua Yu addq $0x20, %rdx 799599ec04SFenghua Yu jmp 1f 809599ec04SFenghua Yu /* 819599ec04SFenghua Yu * Handle data forward by movsq. 829599ec04SFenghua Yu */ 839599ec04SFenghua Yu .p2align 4 849599ec04SFenghua Yu4: 859599ec04SFenghua Yu movq %rdx, %rcx 869599ec04SFenghua Yu movq -8(%rsi, %rdx), %r11 879599ec04SFenghua Yu lea -8(%rdi, %rdx), %r10 889599ec04SFenghua Yu shrq $3, %rcx 899599ec04SFenghua Yu rep movsq 909599ec04SFenghua Yu movq %r11, (%r10) 919599ec04SFenghua Yu jmp 13f 92057e05c1SFenghua Yu.Lmemmove_end_forward: 93057e05c1SFenghua Yu 949599ec04SFenghua Yu /* 959599ec04SFenghua Yu * Handle data backward by movsq. 969599ec04SFenghua Yu */ 979599ec04SFenghua Yu .p2align 4 989599ec04SFenghua Yu7: 999599ec04SFenghua Yu movq %rdx, %rcx 1009599ec04SFenghua Yu movq (%rsi), %r11 1019599ec04SFenghua Yu movq %rdi, %r10 1029599ec04SFenghua Yu leaq -8(%rsi, %rdx), %rsi 1039599ec04SFenghua Yu leaq -8(%rdi, %rdx), %rdi 1049599ec04SFenghua Yu shrq $3, %rcx 1059599ec04SFenghua Yu std 1069599ec04SFenghua Yu rep movsq 1079599ec04SFenghua Yu cld 1089599ec04SFenghua Yu movq %r11, (%r10) 1099599ec04SFenghua Yu jmp 13f 1109599ec04SFenghua Yu 1119599ec04SFenghua Yu /* 1129599ec04SFenghua Yu * Start to prepare for backward copy. 1139599ec04SFenghua Yu */ 1149599ec04SFenghua Yu .p2align 4 1159599ec04SFenghua Yu2: 1169599ec04SFenghua Yu cmp $680, %rdx 1179599ec04SFenghua Yu jb 6f 1189599ec04SFenghua Yu cmp %dil, %sil 1199599ec04SFenghua Yu je 7b 1209599ec04SFenghua Yu6: 1219599ec04SFenghua Yu /* 1229599ec04SFenghua Yu * Calculate copy position to tail. 1239599ec04SFenghua Yu */ 1249599ec04SFenghua Yu addq %rdx, %rsi 1259599ec04SFenghua Yu addq %rdx, %rdi 1269599ec04SFenghua Yu subq $0x20, %rdx 1279599ec04SFenghua Yu /* 128bb916ff7SAndy Shevchenko * We gobble 32 bytes backward in each loop. 1299599ec04SFenghua Yu */ 1309599ec04SFenghua Yu8: 1319599ec04SFenghua Yu subq $0x20, %rdx 1329599ec04SFenghua Yu movq -1*8(%rsi), %r11 1339599ec04SFenghua Yu movq -2*8(%rsi), %r10 1349599ec04SFenghua Yu movq -3*8(%rsi), %r9 1359599ec04SFenghua Yu movq -4*8(%rsi), %r8 1369599ec04SFenghua Yu leaq -4*8(%rsi), %rsi 1379599ec04SFenghua Yu 1389599ec04SFenghua Yu movq %r11, -1*8(%rdi) 1399599ec04SFenghua Yu movq %r10, -2*8(%rdi) 1409599ec04SFenghua Yu movq %r9, -3*8(%rdi) 1419599ec04SFenghua Yu movq %r8, -4*8(%rdi) 1429599ec04SFenghua Yu leaq -4*8(%rdi), %rdi 1439599ec04SFenghua Yu jae 8b 1449599ec04SFenghua Yu /* 1459599ec04SFenghua Yu * Calculate copy position to head. 1469599ec04SFenghua Yu */ 1479599ec04SFenghua Yu addq $0x20, %rdx 1489599ec04SFenghua Yu subq %rdx, %rsi 1499599ec04SFenghua Yu subq %rdx, %rdi 1509599ec04SFenghua Yu1: 1519599ec04SFenghua Yu cmpq $16, %rdx 1529599ec04SFenghua Yu jb 9f 1539599ec04SFenghua Yu /* 1549599ec04SFenghua Yu * Move data from 16 bytes to 31 bytes. 1559599ec04SFenghua Yu */ 1569599ec04SFenghua Yu movq 0*8(%rsi), %r11 1579599ec04SFenghua Yu movq 1*8(%rsi), %r10 1589599ec04SFenghua Yu movq -2*8(%rsi, %rdx), %r9 1599599ec04SFenghua Yu movq -1*8(%rsi, %rdx), %r8 1609599ec04SFenghua Yu movq %r11, 0*8(%rdi) 1619599ec04SFenghua Yu movq %r10, 1*8(%rdi) 1629599ec04SFenghua Yu movq %r9, -2*8(%rdi, %rdx) 1639599ec04SFenghua Yu movq %r8, -1*8(%rdi, %rdx) 1649599ec04SFenghua Yu jmp 13f 1659599ec04SFenghua Yu .p2align 4 1669599ec04SFenghua Yu9: 1679599ec04SFenghua Yu cmpq $8, %rdx 1689599ec04SFenghua Yu jb 10f 1699599ec04SFenghua Yu /* 1709599ec04SFenghua Yu * Move data from 8 bytes to 15 bytes. 1719599ec04SFenghua Yu */ 1729599ec04SFenghua Yu movq 0*8(%rsi), %r11 1739599ec04SFenghua Yu movq -1*8(%rsi, %rdx), %r10 1749599ec04SFenghua Yu movq %r11, 0*8(%rdi) 1759599ec04SFenghua Yu movq %r10, -1*8(%rdi, %rdx) 1769599ec04SFenghua Yu jmp 13f 1779599ec04SFenghua Yu10: 1789599ec04SFenghua Yu cmpq $4, %rdx 1799599ec04SFenghua Yu jb 11f 1809599ec04SFenghua Yu /* 1819599ec04SFenghua Yu * Move data from 4 bytes to 7 bytes. 1829599ec04SFenghua Yu */ 1839599ec04SFenghua Yu movl (%rsi), %r11d 1849599ec04SFenghua Yu movl -4(%rsi, %rdx), %r10d 1859599ec04SFenghua Yu movl %r11d, (%rdi) 1869599ec04SFenghua Yu movl %r10d, -4(%rdi, %rdx) 1879599ec04SFenghua Yu jmp 13f 1889599ec04SFenghua Yu11: 1899599ec04SFenghua Yu cmp $2, %rdx 1909599ec04SFenghua Yu jb 12f 1919599ec04SFenghua Yu /* 1929599ec04SFenghua Yu * Move data from 2 bytes to 3 bytes. 1939599ec04SFenghua Yu */ 1949599ec04SFenghua Yu movw (%rsi), %r11w 1959599ec04SFenghua Yu movw -2(%rsi, %rdx), %r10w 1969599ec04SFenghua Yu movw %r11w, (%rdi) 1979599ec04SFenghua Yu movw %r10w, -2(%rdi, %rdx) 1989599ec04SFenghua Yu jmp 13f 1999599ec04SFenghua Yu12: 2009599ec04SFenghua Yu cmp $1, %rdx 2019599ec04SFenghua Yu jb 13f 2029599ec04SFenghua Yu /* 2039599ec04SFenghua Yu * Move data for 1 byte. 2049599ec04SFenghua Yu */ 2059599ec04SFenghua Yu movb (%rsi), %r11b 2069599ec04SFenghua Yu movb %r11b, (%rdi) 2079599ec04SFenghua Yu13: 2089599ec04SFenghua Yu retq 2099599ec04SFenghua Yu CFI_ENDPROC 210057e05c1SFenghua Yu 211057e05c1SFenghua Yu .section .altinstr_replacement,"ax" 212057e05c1SFenghua Yu.Lmemmove_begin_forward_efs: 213057e05c1SFenghua Yu /* Forward moving data. */ 214057e05c1SFenghua Yu movq %rdx, %rcx 215057e05c1SFenghua Yu rep movsb 216057e05c1SFenghua Yu retq 217057e05c1SFenghua Yu.Lmemmove_end_forward_efs: 218057e05c1SFenghua Yu .previous 219057e05c1SFenghua Yu 220057e05c1SFenghua Yu .section .altinstructions,"a" 22159e97e4dSAndy Lutomirski altinstruction_entry .Lmemmove_begin_forward, \ 22259e97e4dSAndy Lutomirski .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \ 22359e97e4dSAndy Lutomirski .Lmemmove_end_forward-.Lmemmove_begin_forward, \ 22459e97e4dSAndy Lutomirski .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs 225057e05c1SFenghua Yu .previous 226393f203fSAndrey RyabininENDPROC(__memmove) 2279599ec04SFenghua YuENDPROC(memmove) 228