1d875c9b3Szhichang.yuan/* 2d875c9b3Szhichang.yuan * Copyright (C) 2013 ARM Ltd. 3d875c9b3Szhichang.yuan * Copyright (C) 2013 Linaro. 4d875c9b3Szhichang.yuan * 5d875c9b3Szhichang.yuan * This code is based on glibc cortex strings work originally authored by Linaro 6d875c9b3Szhichang.yuan * and re-licensed under GPLv2 for the Linux kernel. The original code can 7d875c9b3Szhichang.yuan * be found @ 8d875c9b3Szhichang.yuan * 9d875c9b3Szhichang.yuan * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10d875c9b3Szhichang.yuan * files/head:/src/aarch64/ 11d875c9b3Szhichang.yuan * 12d875c9b3Szhichang.yuan * This program is free software; you can redistribute it and/or modify 13d875c9b3Szhichang.yuan * it under the terms of the GNU General Public License version 2 as 14d875c9b3Szhichang.yuan * published by the Free Software Foundation. 15d875c9b3Szhichang.yuan * 16d875c9b3Szhichang.yuan * This program is distributed in the hope that it will be useful, 17d875c9b3Szhichang.yuan * but WITHOUT ANY WARRANTY; without even the implied warranty of 18d875c9b3Szhichang.yuan * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19d875c9b3Szhichang.yuan * GNU General Public License for more details. 20d875c9b3Szhichang.yuan * 21d875c9b3Szhichang.yuan * You should have received a copy of the GNU General Public License 22d875c9b3Szhichang.yuan * along with this program. If not, see <http://www.gnu.org/licenses/>. 23d875c9b3Szhichang.yuan */ 24d875c9b3Szhichang.yuan 25d875c9b3Szhichang.yuan#include <linux/linkage.h> 26d875c9b3Szhichang.yuan#include <asm/assembler.h> 27d875c9b3Szhichang.yuan 28d875c9b3Szhichang.yuan/* 29d875c9b3Szhichang.yuan* compare memory areas(when two memory areas' offset are different, 30d875c9b3Szhichang.yuan* alignment handled by the hardware) 31d875c9b3Szhichang.yuan* 32d875c9b3Szhichang.yuan* Parameters: 33d875c9b3Szhichang.yuan* x0 - const memory area 1 pointer 34d875c9b3Szhichang.yuan* x1 - const memory area 2 pointer 35d875c9b3Szhichang.yuan* x2 - the maximal compare byte length 36d875c9b3Szhichang.yuan* Returns: 37d875c9b3Szhichang.yuan* x0 - a compare result, maybe less than, equal to, or greater than ZERO 38d875c9b3Szhichang.yuan*/ 39d875c9b3Szhichang.yuan 40d875c9b3Szhichang.yuan/* Parameters and result. */ 41d875c9b3Szhichang.yuansrc1 .req x0 42d875c9b3Szhichang.yuansrc2 .req x1 43d875c9b3Szhichang.yuanlimit .req x2 44d875c9b3Szhichang.yuanresult .req x0 45d875c9b3Szhichang.yuan 46d875c9b3Szhichang.yuan/* Internal variables. */ 47d875c9b3Szhichang.yuandata1 .req x3 48d875c9b3Szhichang.yuandata1w .req w3 49d875c9b3Szhichang.yuandata2 .req x4 50d875c9b3Szhichang.yuandata2w .req w4 51d875c9b3Szhichang.yuanhas_nul .req x5 52d875c9b3Szhichang.yuandiff .req x6 53d875c9b3Szhichang.yuanendloop .req x7 54d875c9b3Szhichang.yuantmp1 .req x8 55d875c9b3Szhichang.yuantmp2 .req x9 56d875c9b3Szhichang.yuantmp3 .req x10 57d875c9b3Szhichang.yuanpos .req x11 58d875c9b3Szhichang.yuanlimit_wd .req x12 59d875c9b3Szhichang.yuanmask .req x13 60d875c9b3Szhichang.yuan 6119a2ca0fSAndrey RyabininWEAK(memcmp) 62d875c9b3Szhichang.yuan cbz limit, .Lret0 63d875c9b3Szhichang.yuan eor tmp1, src1, src2 64d875c9b3Szhichang.yuan tst tmp1, #7 65d875c9b3Szhichang.yuan b.ne .Lmisaligned8 66d875c9b3Szhichang.yuan ands tmp1, src1, #7 67d875c9b3Szhichang.yuan b.ne .Lmutual_align 68d875c9b3Szhichang.yuan sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ 69d875c9b3Szhichang.yuan lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ 70d875c9b3Szhichang.yuan /* 71d875c9b3Szhichang.yuan * The input source addresses are at alignment boundary. 72d875c9b3Szhichang.yuan * Directly compare eight bytes each time. 73d875c9b3Szhichang.yuan */ 74d875c9b3Szhichang.yuan.Lloop_aligned: 75d875c9b3Szhichang.yuan ldr data1, [src1], #8 76d875c9b3Szhichang.yuan ldr data2, [src2], #8 77d875c9b3Szhichang.yuan.Lstart_realigned: 78d875c9b3Szhichang.yuan subs limit_wd, limit_wd, #1 79d875c9b3Szhichang.yuan eor diff, data1, data2 /* Non-zero if differences found. */ 80d875c9b3Szhichang.yuan csinv endloop, diff, xzr, cs /* Last Dword or differences. */ 81d875c9b3Szhichang.yuan cbz endloop, .Lloop_aligned 82d875c9b3Szhichang.yuan 83d875c9b3Szhichang.yuan /* Not reached the limit, must have found a diff. */ 84d875c9b3Szhichang.yuan tbz limit_wd, #63, .Lnot_limit 85d875c9b3Szhichang.yuan 86d875c9b3Szhichang.yuan /* Limit % 8 == 0 => the diff is in the last 8 bytes. */ 87d875c9b3Szhichang.yuan ands limit, limit, #7 88d875c9b3Szhichang.yuan b.eq .Lnot_limit 89d875c9b3Szhichang.yuan /* 90d875c9b3Szhichang.yuan * The remained bytes less than 8. It is needed to extract valid data 91d875c9b3Szhichang.yuan * from last eight bytes of the intended memory range. 92d875c9b3Szhichang.yuan */ 93d875c9b3Szhichang.yuan lsl limit, limit, #3 /* bytes-> bits. */ 94d875c9b3Szhichang.yuan mov mask, #~0 95d875c9b3Szhichang.yuanCPU_BE( lsr mask, mask, limit ) 96d875c9b3Szhichang.yuanCPU_LE( lsl mask, mask, limit ) 97d875c9b3Szhichang.yuan bic data1, data1, mask 98d875c9b3Szhichang.yuan bic data2, data2, mask 99d875c9b3Szhichang.yuan 100d875c9b3Szhichang.yuan orr diff, diff, mask 101d875c9b3Szhichang.yuan b .Lnot_limit 102d875c9b3Szhichang.yuan 103d875c9b3Szhichang.yuan.Lmutual_align: 104d875c9b3Szhichang.yuan /* 105d875c9b3Szhichang.yuan * Sources are mutually aligned, but are not currently at an 106d875c9b3Szhichang.yuan * alignment boundary. Round down the addresses and then mask off 107d875c9b3Szhichang.yuan * the bytes that precede the start point. 108d875c9b3Szhichang.yuan */ 109d875c9b3Szhichang.yuan bic src1, src1, #7 110d875c9b3Szhichang.yuan bic src2, src2, #7 111d875c9b3Szhichang.yuan ldr data1, [src1], #8 112d875c9b3Szhichang.yuan ldr data2, [src2], #8 113d875c9b3Szhichang.yuan /* 114d875c9b3Szhichang.yuan * We can not add limit with alignment offset(tmp1) here. Since the 115d875c9b3Szhichang.yuan * addition probably make the limit overflown. 116d875c9b3Szhichang.yuan */ 117d875c9b3Szhichang.yuan sub limit_wd, limit, #1/*limit != 0, so no underflow.*/ 118d875c9b3Szhichang.yuan and tmp3, limit_wd, #7 119d875c9b3Szhichang.yuan lsr limit_wd, limit_wd, #3 120d875c9b3Szhichang.yuan add tmp3, tmp3, tmp1 121d875c9b3Szhichang.yuan add limit_wd, limit_wd, tmp3, lsr #3 122d875c9b3Szhichang.yuan add limit, limit, tmp1/* Adjust the limit for the extra. */ 123d875c9b3Szhichang.yuan 124d875c9b3Szhichang.yuan lsl tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/ 125d875c9b3Szhichang.yuan neg tmp1, tmp1/* Bits to alignment -64. */ 126d875c9b3Szhichang.yuan mov tmp2, #~0 127d875c9b3Szhichang.yuan /*mask off the non-intended bytes before the start address.*/ 128d875c9b3Szhichang.yuanCPU_BE( lsl tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/ 129d875c9b3Szhichang.yuan /* Little-endian. Early bytes are at LSB. */ 130d875c9b3Szhichang.yuanCPU_LE( lsr tmp2, tmp2, tmp1 ) 131d875c9b3Szhichang.yuan 132d875c9b3Szhichang.yuan orr data1, data1, tmp2 133d875c9b3Szhichang.yuan orr data2, data2, tmp2 134d875c9b3Szhichang.yuan b .Lstart_realigned 135d875c9b3Szhichang.yuan 136d875c9b3Szhichang.yuan /*src1 and src2 have different alignment offset.*/ 137d875c9b3Szhichang.yuan.Lmisaligned8: 138d875c9b3Szhichang.yuan cmp limit, #8 139d875c9b3Szhichang.yuan b.lo .Ltiny8proc /*limit < 8: compare byte by byte*/ 140d875c9b3Szhichang.yuan 141d875c9b3Szhichang.yuan and tmp1, src1, #7 142d875c9b3Szhichang.yuan neg tmp1, tmp1 143d875c9b3Szhichang.yuan add tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/ 144d875c9b3Szhichang.yuan and tmp2, src2, #7 145d875c9b3Szhichang.yuan neg tmp2, tmp2 146d875c9b3Szhichang.yuan add tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/ 147d875c9b3Szhichang.yuan subs tmp3, tmp1, tmp2 148d875c9b3Szhichang.yuan csel pos, tmp1, tmp2, hi /*Choose the maximum.*/ 149d875c9b3Szhichang.yuan 150d875c9b3Szhichang.yuan sub limit, limit, pos 151d875c9b3Szhichang.yuan /*compare the proceeding bytes in the first 8 byte segment.*/ 152d875c9b3Szhichang.yuan.Ltinycmp: 153d875c9b3Szhichang.yuan ldrb data1w, [src1], #1 154d875c9b3Szhichang.yuan ldrb data2w, [src2], #1 155d875c9b3Szhichang.yuan subs pos, pos, #1 156d875c9b3Szhichang.yuan ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */ 157d875c9b3Szhichang.yuan b.eq .Ltinycmp 158d875c9b3Szhichang.yuan cbnz pos, 1f /*diff occurred before the last byte.*/ 159d875c9b3Szhichang.yuan cmp data1w, data2w 160d875c9b3Szhichang.yuan b.eq .Lstart_align 161d875c9b3Szhichang.yuan1: 162d875c9b3Szhichang.yuan sub result, data1, data2 163d875c9b3Szhichang.yuan ret 164d875c9b3Szhichang.yuan 165d875c9b3Szhichang.yuan.Lstart_align: 166d875c9b3Szhichang.yuan lsr limit_wd, limit, #3 167d875c9b3Szhichang.yuan cbz limit_wd, .Lremain8 168d875c9b3Szhichang.yuan 169d875c9b3Szhichang.yuan ands xzr, src1, #7 170d875c9b3Szhichang.yuan b.eq .Lrecal_offset 171d875c9b3Szhichang.yuan /*process more leading bytes to make src1 aligned...*/ 172d875c9b3Szhichang.yuan add src1, src1, tmp3 /*backwards src1 to alignment boundary*/ 173d875c9b3Szhichang.yuan add src2, src2, tmp3 174d875c9b3Szhichang.yuan sub limit, limit, tmp3 175d875c9b3Szhichang.yuan lsr limit_wd, limit, #3 176d875c9b3Szhichang.yuan cbz limit_wd, .Lremain8 177d875c9b3Szhichang.yuan /*load 8 bytes from aligned SRC1..*/ 178d875c9b3Szhichang.yuan ldr data1, [src1], #8 179d875c9b3Szhichang.yuan ldr data2, [src2], #8 180d875c9b3Szhichang.yuan 181d875c9b3Szhichang.yuan subs limit_wd, limit_wd, #1 182d875c9b3Szhichang.yuan eor diff, data1, data2 /*Non-zero if differences found.*/ 183d875c9b3Szhichang.yuan csinv endloop, diff, xzr, ne 184d875c9b3Szhichang.yuan cbnz endloop, .Lunequal_proc 185d875c9b3Szhichang.yuan /*How far is the current SRC2 from the alignment boundary...*/ 186d875c9b3Szhichang.yuan and tmp3, tmp3, #7 187d875c9b3Szhichang.yuan 188d875c9b3Szhichang.yuan.Lrecal_offset:/*src1 is aligned now..*/ 189d875c9b3Szhichang.yuan neg pos, tmp3 190d875c9b3Szhichang.yuan.Lloopcmp_proc: 191d875c9b3Szhichang.yuan /* 192d875c9b3Szhichang.yuan * Divide the eight bytes into two parts. First,backwards the src2 193d875c9b3Szhichang.yuan * to an alignment boundary,load eight bytes and compare from 194d875c9b3Szhichang.yuan * the SRC2 alignment boundary. If all 8 bytes are equal,then start 195d875c9b3Szhichang.yuan * the second part's comparison. Otherwise finish the comparison. 196d875c9b3Szhichang.yuan * This special handle can garantee all the accesses are in the 197d875c9b3Szhichang.yuan * thread/task space in avoid to overrange access. 198d875c9b3Szhichang.yuan */ 199d875c9b3Szhichang.yuan ldr data1, [src1,pos] 200d875c9b3Szhichang.yuan ldr data2, [src2,pos] 201d875c9b3Szhichang.yuan eor diff, data1, data2 /* Non-zero if differences found. */ 202d875c9b3Szhichang.yuan cbnz diff, .Lnot_limit 203d875c9b3Szhichang.yuan 204d875c9b3Szhichang.yuan /*The second part process*/ 205d875c9b3Szhichang.yuan ldr data1, [src1], #8 206d875c9b3Szhichang.yuan ldr data2, [src2], #8 207d875c9b3Szhichang.yuan eor diff, data1, data2 /* Non-zero if differences found. */ 208d875c9b3Szhichang.yuan subs limit_wd, limit_wd, #1 209d875c9b3Szhichang.yuan csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ 210d875c9b3Szhichang.yuan cbz endloop, .Lloopcmp_proc 211d875c9b3Szhichang.yuan.Lunequal_proc: 212d875c9b3Szhichang.yuan cbz diff, .Lremain8 213d875c9b3Szhichang.yuan 214ef769e32SAdam Buchbinder/* There is difference occurred in the latest comparison. */ 215d875c9b3Szhichang.yuan.Lnot_limit: 216d875c9b3Szhichang.yuan/* 217d875c9b3Szhichang.yuan* For little endian,reverse the low significant equal bits into MSB,then 218d875c9b3Szhichang.yuan* following CLZ can find how many equal bits exist. 219d875c9b3Szhichang.yuan*/ 220d875c9b3Szhichang.yuanCPU_LE( rev diff, diff ) 221d875c9b3Szhichang.yuanCPU_LE( rev data1, data1 ) 222d875c9b3Szhichang.yuanCPU_LE( rev data2, data2 ) 223d875c9b3Szhichang.yuan 224d875c9b3Szhichang.yuan /* 225d875c9b3Szhichang.yuan * The MS-non-zero bit of DIFF marks either the first bit 226d875c9b3Szhichang.yuan * that is different, or the end of the significant data. 227d875c9b3Szhichang.yuan * Shifting left now will bring the critical information into the 228d875c9b3Szhichang.yuan * top bits. 229d875c9b3Szhichang.yuan */ 230d875c9b3Szhichang.yuan clz pos, diff 231d875c9b3Szhichang.yuan lsl data1, data1, pos 232d875c9b3Szhichang.yuan lsl data2, data2, pos 233d875c9b3Szhichang.yuan /* 234d875c9b3Szhichang.yuan * We need to zero-extend (char is unsigned) the value and then 235d875c9b3Szhichang.yuan * perform a signed subtraction. 236d875c9b3Szhichang.yuan */ 237d875c9b3Szhichang.yuan lsr data1, data1, #56 238d875c9b3Szhichang.yuan sub result, data1, data2, lsr #56 239d875c9b3Szhichang.yuan ret 240d875c9b3Szhichang.yuan 241d875c9b3Szhichang.yuan.Lremain8: 242d875c9b3Szhichang.yuan /* Limit % 8 == 0 =>. all data are equal.*/ 243d875c9b3Szhichang.yuan ands limit, limit, #7 244d875c9b3Szhichang.yuan b.eq .Lret0 245d875c9b3Szhichang.yuan 246d875c9b3Szhichang.yuan.Ltiny8proc: 247d875c9b3Szhichang.yuan ldrb data1w, [src1], #1 248d875c9b3Szhichang.yuan ldrb data2w, [src2], #1 249d875c9b3Szhichang.yuan subs limit, limit, #1 250d875c9b3Szhichang.yuan 251d875c9b3Szhichang.yuan ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */ 252d875c9b3Szhichang.yuan b.eq .Ltiny8proc 253d875c9b3Szhichang.yuan sub result, data1, data2 254d875c9b3Szhichang.yuan ret 255d875c9b3Szhichang.yuan.Lret0: 256d875c9b3Szhichang.yuan mov result, #0 257d875c9b3Szhichang.yuan ret 25820791846SArd BiesheuvelENDPIPROC(memcmp) 259*ac0e8c72SMark RutlandEXPORT_SYMBOL_NOKASAN(memcmp) 260