1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright (c) 2013-2021, Arm Limited. 4 * 5 * Adapted from the original at: 6 * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S 7 */ 8 9#include <linux/linkage.h> 10#include <asm/assembler.h> 11 12/* Assumptions: 13 * 14 * ARMv8-a, AArch64, unaligned accesses. 15 */ 16 17#define L(label) .L ## label 18 19/* Parameters and result. */ 20#define src1 x0 21#define src2 x1 22#define limit x2 23#define result w0 24 25/* Internal variables. */ 26#define data1 x3 27#define data1w w3 28#define data1h x4 29#define data2 x5 30#define data2w w5 31#define data2h x6 32#define tmp1 x7 33#define tmp2 x8 34 35SYM_FUNC_START(__pi_memcmp) 36 subs limit, limit, 8 37 b.lo L(less8) 38 39 ldr data1, [src1], 8 40 ldr data2, [src2], 8 41 cmp data1, data2 42 b.ne L(return) 43 44 subs limit, limit, 8 45 b.gt L(more16) 46 47 ldr data1, [src1, limit] 48 ldr data2, [src2, limit] 49 b L(return) 50 51L(more16): 52 ldr data1, [src1], 8 53 ldr data2, [src2], 8 54 cmp data1, data2 55 bne L(return) 56 57 /* Jump directly to comparing the last 16 bytes for 32 byte (or less) 58 strings. */ 59 subs limit, limit, 16 60 b.ls L(last_bytes) 61 62 /* We overlap loads between 0-32 bytes at either side of SRC1 when we 63 try to align, so limit it only to strings larger than 128 bytes. */ 64 cmp limit, 96 65 b.ls L(loop16) 66 67 /* Align src1 and adjust src2 with bytes not yet done. */ 68 and tmp1, src1, 15 69 add limit, limit, tmp1 70 sub src1, src1, tmp1 71 sub src2, src2, tmp1 72 73 /* Loop performing 16 bytes per iteration using aligned src1. 74 Limit is pre-decremented by 16 and must be larger than zero. 75 Exit if <= 16 bytes left to do or if the data is not equal. */ 76 .p2align 4 77L(loop16): 78 ldp data1, data1h, [src1], 16 79 ldp data2, data2h, [src2], 16 80 subs limit, limit, 16 81 ccmp data1, data2, 0, hi 82 ccmp data1h, data2h, 0, eq 83 b.eq L(loop16) 84 85 cmp data1, data2 86 bne L(return) 87 mov data1, data1h 88 mov data2, data2h 89 cmp data1, data2 90 bne L(return) 91 92 /* Compare last 1-16 bytes using unaligned access. */ 93L(last_bytes): 94 add src1, src1, limit 95 add src2, src2, limit 96 ldp data1, data1h, [src1] 97 ldp data2, data2h, [src2] 98 cmp data1, data2 99 bne L(return) 100 mov data1, data1h 101 mov data2, data2h 102 cmp data1, data2 103 104 /* Compare data bytes and set return value to 0, -1 or 1. */ 105L(return): 106#ifndef __AARCH64EB__ 107 rev data1, data1 108 rev data2, data2 109#endif 110 cmp data1, data2 111L(ret_eq): 112 cset result, ne 113 cneg result, result, lo 114 ret 115 116 .p2align 4 117 /* Compare up to 8 bytes. Limit is [-8..-1]. */ 118L(less8): 119 adds limit, limit, 4 120 b.lo L(less4) 121 ldr data1w, [src1], 4 122 ldr data2w, [src2], 4 123 cmp data1w, data2w 124 b.ne L(return) 125 sub limit, limit, 4 126L(less4): 127 adds limit, limit, 4 128 beq L(ret_eq) 129L(byte_loop): 130 ldrb data1w, [src1], 1 131 ldrb data2w, [src2], 1 132 subs limit, limit, 1 133 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 134 b.eq L(byte_loop) 135 sub result, data1w, data2w 136 ret 137SYM_FUNC_END(__pi_memcmp) 138SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp) 139EXPORT_SYMBOL_NOKASAN(memcmp) 140