xref: /openbmc/linux/arch/alpha/lib/ev67-strlen.S (revision 00fc0e0d)
11da177e4SLinus Torvalds/*
21da177e4SLinus Torvalds * arch/alpha/lib/ev67-strlen.S
31da177e4SLinus Torvalds * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com>
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * Finds length of a 0-terminated string.  Optimized for the
61da177e4SLinus Torvalds * Alpha architecture:
71da177e4SLinus Torvalds *
81da177e4SLinus Torvalds *	- memory accessed as aligned quadwords only
91da177e4SLinus Torvalds *	- uses bcmpge to compare 8 bytes in parallel
101da177e4SLinus Torvalds *
111da177e4SLinus Torvalds * Much of the information about 21264 scheduling/coding comes from:
121da177e4SLinus Torvalds *	Compiler Writer's Guide for the Alpha 21264
131da177e4SLinus Torvalds *	abbreviated as 'CWG' in other comments here
141da177e4SLinus Torvalds *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
151da177e4SLinus Torvalds * Scheduling notation:
161da177e4SLinus Torvalds *	E	- either cluster
171da177e4SLinus Torvalds *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
181da177e4SLinus Torvalds *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
191da177e4SLinus Torvalds */
2000fc0e0dSAl Viro#include <asm/export.h>
211da177e4SLinus Torvalds	.set noreorder
221da177e4SLinus Torvalds	.set noat
231da177e4SLinus Torvalds
241da177e4SLinus Torvalds	.globl	strlen
251da177e4SLinus Torvalds	.ent	strlen
261da177e4SLinus Torvalds	.align 4
271da177e4SLinus Torvaldsstrlen:
281da177e4SLinus Torvalds	ldq_u	$1, 0($16)	# L : load first quadword ($16  may be misaligned)
291da177e4SLinus Torvalds	lda	$2, -1($31)	# E :
301da177e4SLinus Torvalds	insqh	$2, $16, $2	# U :
311da177e4SLinus Torvalds	andnot	$16, 7, $0	# E :
321da177e4SLinus Torvalds
331da177e4SLinus Torvalds	or	$2, $1, $1	# E :
341da177e4SLinus Torvalds	cmpbge	$31, $1, $2	# E : $2  <- bitmask: bit i == 1 <==> i-th byte == 0
351da177e4SLinus Torvalds	nop			# E :
361da177e4SLinus Torvalds	bne	$2, $found	# U :
371da177e4SLinus Torvalds
381da177e4SLinus Torvalds$loop:	ldq	$1, 8($0)	# L :
391da177e4SLinus Torvalds	addq	$0, 8, $0	# E : addr += 8
401da177e4SLinus Torvalds	cmpbge	$31, $1, $2	# E :
411da177e4SLinus Torvalds	beq	$2, $loop	# U :
421da177e4SLinus Torvalds
431da177e4SLinus Torvalds$found:
441da177e4SLinus Torvalds	cttz	$2, $3		# U0 :
451da177e4SLinus Torvalds	addq	$0, $3, $0	# E :
461da177e4SLinus Torvalds	subq	$0, $16, $0	# E :
471da177e4SLinus Torvalds	ret	$31, ($26)	# L0 :
481da177e4SLinus Torvalds
491da177e4SLinus Torvalds	.end	strlen
5000fc0e0dSAl Viro	EXPORT_SYMBOL(strlen)
51