xref: /openbmc/linux/arch/alpha/lib/strlen.S (revision f3c78e94)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu)
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * Finds length of a 0-terminated string.  Optimized for the
61da177e4SLinus Torvalds * Alpha architecture:
71da177e4SLinus Torvalds *
81da177e4SLinus Torvalds *	- memory accessed as aligned quadwords only
91da177e4SLinus Torvalds *	- uses bcmpge to compare 8 bytes in parallel
101da177e4SLinus Torvalds *	- does binary search to find 0 byte in last
111da177e4SLinus Torvalds *	  quadword (HAKMEM needed 12 instructions to
121da177e4SLinus Torvalds *	  do this instead of the 9 instructions that
131da177e4SLinus Torvalds *	  binary search needs).
141da177e4SLinus Torvalds */
15*f3c78e94SMasahiro Yamada#include <linux/export.h>
161da177e4SLinus Torvalds	.set noreorder
171da177e4SLinus Torvalds	.set noat
181da177e4SLinus Torvalds
191da177e4SLinus Torvalds	.align 3
201da177e4SLinus Torvalds
211da177e4SLinus Torvalds	.globl	strlen
221da177e4SLinus Torvalds	.ent	strlen
231da177e4SLinus Torvalds
241da177e4SLinus Torvaldsstrlen:
251da177e4SLinus Torvalds	ldq_u	$1, 0($16)	# load first quadword ($16  may be misaligned)
261da177e4SLinus Torvalds	lda	$2, -1($31)
271da177e4SLinus Torvalds	insqh	$2, $16, $2
281da177e4SLinus Torvalds	andnot	$16, 7, $0
291da177e4SLinus Torvalds	or	$2, $1, $1
301da177e4SLinus Torvalds	cmpbge	$31, $1, $2	# $2  <- bitmask: bit i == 1 <==> i-th byte == 0
311da177e4SLinus Torvalds	bne	$2, found
321da177e4SLinus Torvalds
331da177e4SLinus Torvaldsloop:	ldq	$1, 8($0)
341da177e4SLinus Torvalds	addq	$0, 8, $0	# addr += 8
351da177e4SLinus Torvalds	nop			# helps dual issue last two insns
361da177e4SLinus Torvalds	cmpbge	$31, $1, $2
371da177e4SLinus Torvalds	beq	$2, loop
381da177e4SLinus Torvalds
391da177e4SLinus Torvaldsfound:	blbs	$2, done	# make aligned case fast
401da177e4SLinus Torvalds	negq	$2, $3
411da177e4SLinus Torvalds	and	$2, $3, $2
421da177e4SLinus Torvalds
431da177e4SLinus Torvalds	and	$2, 0x0f, $1
441da177e4SLinus Torvalds	addq	$0, 4, $3
451da177e4SLinus Torvalds	cmoveq	$1, $3, $0
461da177e4SLinus Torvalds
471da177e4SLinus Torvalds	and	$2, 0x33, $1
481da177e4SLinus Torvalds	addq	$0, 2, $3
491da177e4SLinus Torvalds	cmoveq	$1, $3, $0
501da177e4SLinus Torvalds
511da177e4SLinus Torvalds	and	$2, 0x55, $1
521da177e4SLinus Torvalds	addq	$0, 1, $3
531da177e4SLinus Torvalds	cmoveq	$1, $3, $0
541da177e4SLinus Torvalds
551da177e4SLinus Torvaldsdone:	subq	$0, $16, $0
561da177e4SLinus Torvalds	ret	$31, ($26)
571da177e4SLinus Torvalds
581da177e4SLinus Torvalds	.end	strlen
5900fc0e0dSAl Viro	EXPORT_SYMBOL(strlen)
60