xref: /openbmc/linux/arch/alpha/lib/ev67-strlen.S (revision f3c78e94)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds * arch/alpha/lib/ev67-strlen.S
41da177e4SLinus Torvalds * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com>
51da177e4SLinus Torvalds *
61da177e4SLinus Torvalds * Finds length of a 0-terminated string.  Optimized for the
71da177e4SLinus Torvalds * Alpha architecture:
81da177e4SLinus Torvalds *
91da177e4SLinus Torvalds *	- memory accessed as aligned quadwords only
101da177e4SLinus Torvalds *	- uses bcmpge to compare 8 bytes in parallel
111da177e4SLinus Torvalds *
121da177e4SLinus Torvalds * Much of the information about 21264 scheduling/coding comes from:
131da177e4SLinus Torvalds *	Compiler Writer's Guide for the Alpha 21264
141da177e4SLinus Torvalds *	abbreviated as 'CWG' in other comments here
151da177e4SLinus Torvalds *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
161da177e4SLinus Torvalds * Scheduling notation:
171da177e4SLinus Torvalds *	E	- either cluster
181da177e4SLinus Torvalds *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
191da177e4SLinus Torvalds *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
201da177e4SLinus Torvalds */
21*f3c78e94SMasahiro Yamada#include <linux/export.h>
221da177e4SLinus Torvalds	.set noreorder
231da177e4SLinus Torvalds	.set noat
241da177e4SLinus Torvalds
251da177e4SLinus Torvalds	.globl	strlen
261da177e4SLinus Torvalds	.ent	strlen
271da177e4SLinus Torvalds	.align 4
281da177e4SLinus Torvaldsstrlen:
291da177e4SLinus Torvalds	ldq_u	$1, 0($16)	# L : load first quadword ($16  may be misaligned)
301da177e4SLinus Torvalds	lda	$2, -1($31)	# E :
311da177e4SLinus Torvalds	insqh	$2, $16, $2	# U :
321da177e4SLinus Torvalds	andnot	$16, 7, $0	# E :
331da177e4SLinus Torvalds
341da177e4SLinus Torvalds	or	$2, $1, $1	# E :
351da177e4SLinus Torvalds	cmpbge	$31, $1, $2	# E : $2  <- bitmask: bit i == 1 <==> i-th byte == 0
361da177e4SLinus Torvalds	nop			# E :
371da177e4SLinus Torvalds	bne	$2, $found	# U :
381da177e4SLinus Torvalds
391da177e4SLinus Torvalds$loop:	ldq	$1, 8($0)	# L :
401da177e4SLinus Torvalds	addq	$0, 8, $0	# E : addr += 8
411da177e4SLinus Torvalds	cmpbge	$31, $1, $2	# E :
421da177e4SLinus Torvalds	beq	$2, $loop	# U :
431da177e4SLinus Torvalds
441da177e4SLinus Torvalds$found:
451da177e4SLinus Torvalds	cttz	$2, $3		# U0 :
461da177e4SLinus Torvalds	addq	$0, $3, $0	# E :
471da177e4SLinus Torvalds	subq	$0, $16, $0	# E :
481da177e4SLinus Torvalds	ret	$31, ($26)	# L0 :
491da177e4SLinus Torvalds
501da177e4SLinus Torvalds	.end	strlen
5100fc0e0dSAl Viro	EXPORT_SYMBOL(strlen)
52