1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright (c) 2012-2022, Arm Limited. 4 * 5 * Adapted from the original at: 6 * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S 7 */ 8 9#include <linux/linkage.h> 10#include <asm/assembler.h> 11 12/* Assumptions: 13 * 14 * ARMv8-a, AArch64. 15 * MTE compatible. 16 */ 17 18#define L(label) .L ## label 19 20#define REP8_01 0x0101010101010101 21#define REP8_7f 0x7f7f7f7f7f7f7f7f 22 23#define src1 x0 24#define src2 x1 25#define result x0 26 27#define data1 x2 28#define data1w w2 29#define data2 x3 30#define data2w w3 31#define has_nul x4 32#define diff x5 33#define off1 x5 34#define syndrome x6 35#define tmp x6 36#define data3 x7 37#define zeroones x8 38#define shift x9 39#define off2 x10 40 41/* On big-endian early bytes are at MSB and on little-endian LSB. 42 LS_FW means shifting towards early bytes. */ 43#ifdef __AARCH64EB__ 44# define LS_FW lsl 45#else 46# define LS_FW lsr 47#endif 48 49/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 50 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 51 can be done in parallel across the entire word. 52 Since carry propagation makes 0x1 bytes before a NUL byte appear 53 NUL too in big-endian, byte-reverse the data before the NUL check. */ 54 55 56SYM_FUNC_START(__pi_strcmp) 57 sub off2, src2, src1 58 mov zeroones, REP8_01 59 and tmp, src1, 7 60 tst off2, 7 61 b.ne L(misaligned8) 62 cbnz tmp, L(mutual_align) 63 64 .p2align 4 65 66L(loop_aligned): 67 ldr data2, [src1, off2] 68 ldr data1, [src1], 8 69L(start_realigned): 70#ifdef __AARCH64EB__ 71 rev tmp, data1 72 sub has_nul, tmp, zeroones 73 orr tmp, tmp, REP8_7f 74#else 75 sub has_nul, data1, zeroones 76 orr tmp, data1, REP8_7f 77#endif 78 bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ 79 ccmp data1, data2, 0, eq 80 b.eq L(loop_aligned) 81#ifdef __AARCH64EB__ 82 rev has_nul, has_nul 83#endif 84 eor diff, data1, data2 85 orr syndrome, diff, has_nul 86L(end): 87#ifndef __AARCH64EB__ 88 rev syndrome, syndrome 89 rev data1, data1 90 rev data2, data2 91#endif 92 clz shift, syndrome 93 /* The most-significant-non-zero bit of the syndrome marks either the 94 first bit that is different, or the top bit of the first zero byte. 95 Shifting left now will bring the critical information into the 96 top bits. */ 97 lsl data1, data1, shift 98 lsl data2, data2, shift 99 /* But we need to zero-extend (char is unsigned) the value and then 100 perform a signed 32-bit subtraction. */ 101 lsr data1, data1, 56 102 sub result, data1, data2, lsr 56 103 ret 104 105 .p2align 4 106 107L(mutual_align): 108 /* Sources are mutually aligned, but are not currently at an 109 alignment boundary. Round down the addresses and then mask off 110 the bytes that precede the start point. */ 111 bic src1, src1, 7 112 ldr data2, [src1, off2] 113 ldr data1, [src1], 8 114 neg shift, src2, lsl 3 /* Bits to alignment -64. */ 115 mov tmp, -1 116 LS_FW tmp, tmp, shift 117 orr data1, data1, tmp 118 orr data2, data2, tmp 119 b L(start_realigned) 120 121L(misaligned8): 122 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 123 checking to make sure that we don't access beyond the end of SRC2. */ 124 cbz tmp, L(src1_aligned) 125L(do_misaligned): 126 ldrb data1w, [src1], 1 127 ldrb data2w, [src2], 1 128 cmp data1w, 0 129 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 130 b.ne L(done) 131 tst src1, 7 132 b.ne L(do_misaligned) 133 134L(src1_aligned): 135 neg shift, src2, lsl 3 136 bic src2, src2, 7 137 ldr data3, [src2], 8 138#ifdef __AARCH64EB__ 139 rev data3, data3 140#endif 141 lsr tmp, zeroones, shift 142 orr data3, data3, tmp 143 sub has_nul, data3, zeroones 144 orr tmp, data3, REP8_7f 145 bics has_nul, has_nul, tmp 146 b.ne L(tail) 147 148 sub off1, src2, src1 149 150 .p2align 4 151 152L(loop_unaligned): 153 ldr data3, [src1, off1] 154 ldr data2, [src1, off2] 155#ifdef __AARCH64EB__ 156 rev data3, data3 157#endif 158 sub has_nul, data3, zeroones 159 orr tmp, data3, REP8_7f 160 ldr data1, [src1], 8 161 bics has_nul, has_nul, tmp 162 ccmp data1, data2, 0, eq 163 b.eq L(loop_unaligned) 164 165 lsl tmp, has_nul, shift 166#ifdef __AARCH64EB__ 167 rev tmp, tmp 168#endif 169 eor diff, data1, data2 170 orr syndrome, diff, tmp 171 cbnz syndrome, L(end) 172L(tail): 173 ldr data1, [src1] 174 neg shift, shift 175 lsr data2, data3, shift 176 lsr has_nul, has_nul, shift 177#ifdef __AARCH64EB__ 178 rev data2, data2 179 rev has_nul, has_nul 180#endif 181 eor diff, data1, data2 182 orr syndrome, diff, has_nul 183 b L(end) 184 185L(done): 186 sub result, data1, data2 187 ret 188SYM_FUNC_END(__pi_strcmp) 189SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp) 190EXPORT_SYMBOL_NOKASAN(strcmp) 191