1/* 2 * Author: Anton Blanchard <anton@au.ibm.com> 3 * Copyright 2015 IBM Corporation. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; either version 8 * 2 of the License, or (at your option) any later version. 9 */ 10#include <asm/ppc_asm.h> 11#include <asm/export.h> 12 13#define off8 r6 14#define off16 r7 15#define off24 r8 16 17#define rA r9 18#define rB r10 19#define rC r11 20#define rD r27 21#define rE r28 22#define rF r29 23#define rG r30 24#define rH r31 25 26#ifdef __LITTLE_ENDIAN__ 27#define LD ldbrx 28#else 29#define LD ldx 30#endif 31 32_GLOBAL(memcmp) 33 cmpdi cr1,r5,0 34 35 /* Use the short loop if both strings are not 8B aligned */ 36 or r6,r3,r4 37 andi. r6,r6,7 38 39 /* Use the short loop if length is less than 32B */ 40 cmpdi cr6,r5,31 41 42 beq cr1,.Lzero 43 bne .Lshort 44 bgt cr6,.Llong 45 46.Lshort: 47 mtctr r5 48 491: lbz rA,0(r3) 50 lbz rB,0(r4) 51 subf. rC,rB,rA 52 bne .Lnon_zero 53 bdz .Lzero 54 55 lbz rA,1(r3) 56 lbz rB,1(r4) 57 subf. rC,rB,rA 58 bne .Lnon_zero 59 bdz .Lzero 60 61 lbz rA,2(r3) 62 lbz rB,2(r4) 63 subf. rC,rB,rA 64 bne .Lnon_zero 65 bdz .Lzero 66 67 lbz rA,3(r3) 68 lbz rB,3(r4) 69 subf. rC,rB,rA 70 bne .Lnon_zero 71 72 addi r3,r3,4 73 addi r4,r4,4 74 75 bdnz 1b 76 77.Lzero: 78 li r3,0 79 blr 80 81.Lnon_zero: 82 mr r3,rC 83 blr 84 85.Llong: 86 li off8,8 87 li off16,16 88 li off24,24 89 90 std r31,-8(r1) 91 std r30,-16(r1) 92 std r29,-24(r1) 93 std r28,-32(r1) 94 std r27,-40(r1) 95 96 srdi r0,r5,5 97 mtctr r0 98 andi. r5,r5,31 99 100 LD rA,0,r3 101 LD rB,0,r4 102 103 LD rC,off8,r3 104 LD rD,off8,r4 105 106 LD rE,off16,r3 107 LD rF,off16,r4 108 109 LD rG,off24,r3 110 LD rH,off24,r4 111 cmpld cr0,rA,rB 112 113 addi r3,r3,32 114 addi r4,r4,32 115 116 bdz .Lfirst32 117 118 LD rA,0,r3 119 LD rB,0,r4 120 cmpld cr1,rC,rD 121 122 LD rC,off8,r3 123 LD rD,off8,r4 124 cmpld cr6,rE,rF 125 126 LD rE,off16,r3 127 LD rF,off16,r4 128 cmpld cr7,rG,rH 129 bne cr0,.LcmpAB 130 131 LD rG,off24,r3 132 LD rH,off24,r4 133 cmpld cr0,rA,rB 134 bne cr1,.LcmpCD 135 136 addi r3,r3,32 137 addi r4,r4,32 138 139 bdz .Lsecond32 140 141 .balign 16 142 1431: LD rA,0,r3 144 LD rB,0,r4 145 cmpld cr1,rC,rD 146 bne cr6,.LcmpEF 147 148 LD rC,off8,r3 149 LD rD,off8,r4 150 cmpld cr6,rE,rF 151 bne cr7,.LcmpGH 152 153 LD rE,off16,r3 154 LD rF,off16,r4 155 cmpld cr7,rG,rH 156 bne cr0,.LcmpAB 157 158 LD rG,off24,r3 159 LD rH,off24,r4 160 cmpld cr0,rA,rB 161 bne cr1,.LcmpCD 162 163 addi r3,r3,32 164 addi r4,r4,32 165 166 bdnz 1b 167 168.Lsecond32: 169 cmpld cr1,rC,rD 170 bne cr6,.LcmpEF 171 172 cmpld cr6,rE,rF 173 bne cr7,.LcmpGH 174 175 cmpld cr7,rG,rH 176 bne cr0,.LcmpAB 177 178 bne cr1,.LcmpCD 179 bne cr6,.LcmpEF 180 bne cr7,.LcmpGH 181 182.Ltail: 183 ld r31,-8(r1) 184 ld r30,-16(r1) 185 ld r29,-24(r1) 186 ld r28,-32(r1) 187 ld r27,-40(r1) 188 189 cmpdi r5,0 190 beq .Lzero 191 b .Lshort 192 193.Lfirst32: 194 cmpld cr1,rC,rD 195 cmpld cr6,rE,rF 196 cmpld cr7,rG,rH 197 198 bne cr0,.LcmpAB 199 bne cr1,.LcmpCD 200 bne cr6,.LcmpEF 201 bne cr7,.LcmpGH 202 203 b .Ltail 204 205.LcmpAB: 206 li r3,1 207 bgt cr0,.Lout 208 li r3,-1 209 b .Lout 210 211.LcmpCD: 212 li r3,1 213 bgt cr1,.Lout 214 li r3,-1 215 b .Lout 216 217.LcmpEF: 218 li r3,1 219 bgt cr6,.Lout 220 li r3,-1 221 b .Lout 222 223.LcmpGH: 224 li r3,1 225 bgt cr7,.Lout 226 li r3,-1 227 228.Lout: 229 ld r31,-8(r1) 230 ld r30,-16(r1) 231 ld r29,-24(r1) 232 ld r28,-32(r1) 233 ld r27,-40(r1) 234 blr 235EXPORT_SYMBOL(memcmp) 236