1*d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 25210d1e6SVineet Gupta/* 35210d1e6SVineet Gupta * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 45210d1e6SVineet Gupta */ 55210d1e6SVineet Gupta 65210d1e6SVineet Gupta/* This is optimized primarily for the ARC700. 75210d1e6SVineet Gupta It would be possible to speed up the loops by one cycle / word 85210d1e6SVineet Gupta respective one cycle / byte by forcing double source 1 alignment, unrolling 95210d1e6SVineet Gupta by a factor of two, and speculatively loading the second word / byte of 105210d1e6SVineet Gupta source 1; however, that would increase the overhead for loop setup / finish, 115210d1e6SVineet Gupta and strcmp might often terminate early. */ 125210d1e6SVineet Gupta 13ec7ac6afSVineet Gupta#include <linux/linkage.h> 145210d1e6SVineet Gupta 1586effd0dSVineet GuptaENTRY_CFI(strcmp) 165210d1e6SVineet Gupta or r2,r0,r1 175210d1e6SVineet Gupta bmsk_s r2,r2,1 185210d1e6SVineet Gupta brne r2,0,.Lcharloop 195210d1e6SVineet Gupta mov_s r12,0x01010101 205210d1e6SVineet Gupta ror r5,r12 215210d1e6SVineet Gupta.Lwordloop: 225210d1e6SVineet Gupta ld.ab r2,[r0,4] 235210d1e6SVineet Gupta ld.ab r3,[r1,4] 245210d1e6SVineet Gupta nop_s 255210d1e6SVineet Gupta sub r4,r2,r12 265210d1e6SVineet Gupta bic r4,r4,r2 275210d1e6SVineet Gupta and r4,r4,r5 285210d1e6SVineet Gupta brne r4,0,.Lfound0 295210d1e6SVineet Gupta breq r2,r3,.Lwordloop 305210d1e6SVineet Gupta#ifdef __LITTLE_ENDIAN__ 315210d1e6SVineet Gupta xor r0,r2,r3 ; mask for difference 325210d1e6SVineet Gupta sub_s r1,r0,1 335210d1e6SVineet Gupta bic_s r0,r0,r1 ; mask for least significant difference bit 345210d1e6SVineet Gupta sub r1,r5,r0 355210d1e6SVineet Gupta xor r0,r5,r1 ; mask for least significant difference byte 365210d1e6SVineet Gupta and_s r2,r2,r0 375210d1e6SVineet Gupta and_s r3,r3,r0 385210d1e6SVineet Gupta#endif /* LITTLE ENDIAN */ 395210d1e6SVineet Gupta cmp_s r2,r3 405210d1e6SVineet Gupta mov_s r0,1 415210d1e6SVineet Gupta j_s.d [blink] 425210d1e6SVineet Gupta bset.lo r0,r0,31 435210d1e6SVineet Gupta 445210d1e6SVineet Gupta .balign 4 455210d1e6SVineet Gupta#ifdef __LITTLE_ENDIAN__ 465210d1e6SVineet Gupta.Lfound0: 475210d1e6SVineet Gupta xor r0,r2,r3 ; mask for difference 485210d1e6SVineet Gupta or r0,r0,r4 ; or in zero indicator 495210d1e6SVineet Gupta sub_s r1,r0,1 505210d1e6SVineet Gupta bic_s r0,r0,r1 ; mask for least significant difference bit 515210d1e6SVineet Gupta sub r1,r5,r0 525210d1e6SVineet Gupta xor r0,r5,r1 ; mask for least significant difference byte 535210d1e6SVineet Gupta and_s r2,r2,r0 545210d1e6SVineet Gupta and_s r3,r3,r0 555210d1e6SVineet Gupta sub.f r0,r2,r3 565210d1e6SVineet Gupta mov.hi r0,1 575210d1e6SVineet Gupta j_s.d [blink] 585210d1e6SVineet Gupta bset.lo r0,r0,31 595210d1e6SVineet Gupta#else /* BIG ENDIAN */ 605210d1e6SVineet Gupta /* The zero-detection above can mis-detect 0x01 bytes as zeroes 615210d1e6SVineet Gupta because of carry-propagateion from a lower significant zero byte. 625210d1e6SVineet Gupta We can compensate for this by checking that bit0 is zero. 635210d1e6SVineet Gupta This compensation is not necessary in the step where we 645210d1e6SVineet Gupta get a low estimate for r2, because in any affected bytes 655210d1e6SVineet Gupta we already have 0x00 or 0x01, which will remain unchanged 665210d1e6SVineet Gupta when bit 7 is cleared. */ 675210d1e6SVineet Gupta .balign 4 685210d1e6SVineet Gupta.Lfound0: 695210d1e6SVineet Gupta lsr r0,r4,8 705210d1e6SVineet Gupta lsr_s r1,r2 715210d1e6SVineet Gupta bic_s r2,r2,r0 ; get low estimate for r2 and get ... 725210d1e6SVineet Gupta bic_s r0,r0,r1 ; <this is the adjusted mask for zeros> 735210d1e6SVineet Gupta or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ... 745210d1e6SVineet Gupta cmp_s r3,r2 ; ... be independent of trailing garbage 755210d1e6SVineet Gupta or_s r2,r2,r0 ; likewise for r3 > r2 765210d1e6SVineet Gupta bic_s r3,r3,r0 775210d1e6SVineet Gupta rlc r0,0 ; r0 := r2 > r3 ? 1 : 0 785210d1e6SVineet Gupta cmp_s r2,r3 795210d1e6SVineet Gupta j_s.d [blink] 805210d1e6SVineet Gupta bset.lo r0,r0,31 815210d1e6SVineet Gupta#endif /* ENDIAN */ 825210d1e6SVineet Gupta 835210d1e6SVineet Gupta .balign 4 845210d1e6SVineet Gupta.Lcharloop: 855210d1e6SVineet Gupta ldb.ab r2,[r0,1] 865210d1e6SVineet Gupta ldb.ab r3,[r1,1] 875210d1e6SVineet Gupta nop_s 885210d1e6SVineet Gupta breq r2,0,.Lcmpend 895210d1e6SVineet Gupta breq r2,r3,.Lcharloop 905210d1e6SVineet Gupta.Lcmpend: 915210d1e6SVineet Gupta j_s.d [blink] 925210d1e6SVineet Gupta sub r0,r2,r3 9386effd0dSVineet GuptaEND_CFI(strcmp) 94