xref: /openbmc/linux/arch/arm64/lib/memcmp.S (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1caab277bSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
2d875c9b3Szhichang.yuan/*
36b8f6489SMark Rutland * Copyright (c) 2013-2021, Arm Limited.
4d875c9b3Szhichang.yuan *
543de30d3SSam Tebbs * Adapted from the original at:
66b8f6489SMark Rutland * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S
7d875c9b3Szhichang.yuan */
8d875c9b3Szhichang.yuan
9d875c9b3Szhichang.yuan#include <linux/linkage.h>
10d875c9b3Szhichang.yuan#include <asm/assembler.h>
11d875c9b3Szhichang.yuan
1243de30d3SSam Tebbs/* Assumptions:
13d875c9b3Szhichang.yuan *
1443de30d3SSam Tebbs * ARMv8-a, AArch64, unaligned accesses.
15d875c9b3Szhichang.yuan */
16d875c9b3Szhichang.yuan
1743de30d3SSam Tebbs#define L(label) .L ## label
1843de30d3SSam Tebbs
19d875c9b3Szhichang.yuan/* Parameters and result.  */
2043de30d3SSam Tebbs#define src1		x0
2143de30d3SSam Tebbs#define src2		x1
2243de30d3SSam Tebbs#define limit		x2
2343de30d3SSam Tebbs#define result		w0
24d875c9b3Szhichang.yuan
25d875c9b3Szhichang.yuan/* Internal variables.  */
2643de30d3SSam Tebbs#define data1		x3
2743de30d3SSam Tebbs#define data1w		w3
2843de30d3SSam Tebbs#define data1h		x4
2943de30d3SSam Tebbs#define data2		x5
3043de30d3SSam Tebbs#define data2w		w5
3143de30d3SSam Tebbs#define data2h		x6
3243de30d3SSam Tebbs#define tmp1		x7
3343de30d3SSam Tebbs#define tmp2		x8
34d875c9b3Szhichang.yuan
35*0f61f6beSMark RutlandSYM_FUNC_START(__pi_memcmp)
3643de30d3SSam Tebbs	subs	limit, limit, 8
3743de30d3SSam Tebbs	b.lo	L(less8)
38d875c9b3Szhichang.yuan
3943de30d3SSam Tebbs	ldr	data1, [src1], 8
4043de30d3SSam Tebbs	ldr	data2, [src2], 8
4143de30d3SSam Tebbs	cmp	data1, data2
4243de30d3SSam Tebbs	b.ne	L(return)
43d875c9b3Szhichang.yuan
4443de30d3SSam Tebbs	subs	limit, limit, 8
4543de30d3SSam Tebbs	b.gt	L(more16)
46d875c9b3Szhichang.yuan
4743de30d3SSam Tebbs	ldr	data1, [src1, limit]
4843de30d3SSam Tebbs	ldr	data2, [src2, limit]
4943de30d3SSam Tebbs	b	L(return)
50d875c9b3Szhichang.yuan
5143de30d3SSam TebbsL(more16):
5243de30d3SSam Tebbs	ldr	data1, [src1], 8
5343de30d3SSam Tebbs	ldr	data2, [src2], 8
5443de30d3SSam Tebbs	cmp	data1, data2
5543de30d3SSam Tebbs	bne	L(return)
56d875c9b3Szhichang.yuan
5743de30d3SSam Tebbs	/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
5843de30d3SSam Tebbs	   strings.  */
5943de30d3SSam Tebbs	subs	limit, limit, 16
6043de30d3SSam Tebbs	b.ls	L(last_bytes)
61d875c9b3Szhichang.yuan
6243de30d3SSam Tebbs	/* We overlap loads between 0-32 bytes at either side of SRC1 when we
6343de30d3SSam Tebbs	   try to align, so limit it only to strings larger than 128 bytes.  */
6443de30d3SSam Tebbs	cmp	limit, 96
6543de30d3SSam Tebbs	b.ls	L(loop16)
66d875c9b3Szhichang.yuan
6743de30d3SSam Tebbs	/* Align src1 and adjust src2 with bytes not yet done.  */
6843de30d3SSam Tebbs	and	tmp1, src1, 15
6943de30d3SSam Tebbs	add	limit, limit, tmp1
7043de30d3SSam Tebbs	sub	src1, src1, tmp1
7143de30d3SSam Tebbs	sub	src2, src2, tmp1
72d875c9b3Szhichang.yuan
7343de30d3SSam Tebbs	/* Loop performing 16 bytes per iteration using aligned src1.
7443de30d3SSam Tebbs	   Limit is pre-decremented by 16 and must be larger than zero.
7543de30d3SSam Tebbs	   Exit if <= 16 bytes left to do or if the data is not equal.  */
7643de30d3SSam Tebbs	.p2align 4
7743de30d3SSam TebbsL(loop16):
7843de30d3SSam Tebbs	ldp	data1, data1h, [src1], 16
7943de30d3SSam Tebbs	ldp	data2, data2h, [src2], 16
8043de30d3SSam Tebbs	subs	limit, limit, 16
8143de30d3SSam Tebbs	ccmp	data1, data2, 0, hi
8243de30d3SSam Tebbs	ccmp	data1h, data2h, 0, eq
8343de30d3SSam Tebbs	b.eq	L(loop16)
84d875c9b3Szhichang.yuan
8543de30d3SSam Tebbs	cmp	data1, data2
8643de30d3SSam Tebbs	bne	L(return)
8743de30d3SSam Tebbs	mov	data1, data1h
8843de30d3SSam Tebbs	mov	data2, data2h
8943de30d3SSam Tebbs	cmp	data1, data2
9043de30d3SSam Tebbs	bne	L(return)
9143de30d3SSam Tebbs
9243de30d3SSam Tebbs	/* Compare last 1-16 bytes using unaligned access.  */
9343de30d3SSam TebbsL(last_bytes):
9443de30d3SSam Tebbs	add	src1, src1, limit
9543de30d3SSam Tebbs	add	src2, src2, limit
9643de30d3SSam Tebbs	ldp	data1, data1h, [src1]
9743de30d3SSam Tebbs	ldp	data2, data2h, [src2]
9843de30d3SSam Tebbs	cmp	data1, data2
9943de30d3SSam Tebbs	bne	L(return)
10043de30d3SSam Tebbs	mov	data1, data1h
10143de30d3SSam Tebbs	mov	data2, data2h
10243de30d3SSam Tebbs	cmp	data1, data2
10343de30d3SSam Tebbs
10443de30d3SSam Tebbs	/* Compare data bytes and set return value to 0, -1 or 1.  */
10543de30d3SSam TebbsL(return):
10643de30d3SSam Tebbs#ifndef __AARCH64EB__
10743de30d3SSam Tebbs	rev	data1, data1
10843de30d3SSam Tebbs	rev	data2, data2
10943de30d3SSam Tebbs#endif
11043de30d3SSam Tebbs	cmp	data1, data2
11143de30d3SSam TebbsL(ret_eq):
11243de30d3SSam Tebbs	cset	result, ne
11343de30d3SSam Tebbs	cneg	result, result, lo
11443de30d3SSam Tebbs	ret
11543de30d3SSam Tebbs
11643de30d3SSam Tebbs	.p2align 4
11743de30d3SSam Tebbs	/* Compare up to 8 bytes.  Limit is [-8..-1].  */
11843de30d3SSam TebbsL(less8):
11943de30d3SSam Tebbs	adds	limit, limit, 4
12043de30d3SSam Tebbs	b.lo	L(less4)
12143de30d3SSam Tebbs	ldr	data1w, [src1], 4
12243de30d3SSam Tebbs	ldr	data2w, [src2], 4
123d875c9b3Szhichang.yuan	cmp	data1w, data2w
12443de30d3SSam Tebbs	b.ne	L(return)
12543de30d3SSam Tebbs	sub	limit, limit, 4
12643de30d3SSam TebbsL(less4):
12743de30d3SSam Tebbs	adds	limit, limit, 4
12843de30d3SSam Tebbs	beq	L(ret_eq)
12943de30d3SSam TebbsL(byte_loop):
13043de30d3SSam Tebbs	ldrb	data1w, [src1], 1
13143de30d3SSam Tebbs	ldrb	data2w, [src2], 1
13243de30d3SSam Tebbs	subs	limit, limit, 1
13343de30d3SSam Tebbs	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
13443de30d3SSam Tebbs	b.eq	L(byte_loop)
13543de30d3SSam Tebbs	sub	result, data1w, data2w
136d875c9b3Szhichang.yuan	ret
137*0f61f6beSMark RutlandSYM_FUNC_END(__pi_memcmp)
138*0f61f6beSMark RutlandSYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)
139ac0e8c72SMark RutlandEXPORT_SYMBOL_NOKASAN(memcmp)
140