xref: /openbmc/linux/arch/alpha/lib/ev67-strncat.S (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds * arch/alpha/lib/ev67-strncat.S
41da177e4SLinus Torvalds * 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com>
51da177e4SLinus Torvalds *
61da177e4SLinus Torvalds * Append no more than COUNT characters from the null-terminated string SRC
71da177e4SLinus Torvalds * to the null-terminated string DST.  Always null-terminate the new DST.
81da177e4SLinus Torvalds *
91da177e4SLinus Torvalds * This differs slightly from the semantics in libc in that we never write
101da177e4SLinus Torvalds * past count, whereas libc may write to count+1.  This follows the generic
111da177e4SLinus Torvalds * implementation in lib/string.c and is, IMHO, more sensible.
121da177e4SLinus Torvalds *
131da177e4SLinus Torvalds * Much of the information about 21264 scheduling/coding comes from:
141da177e4SLinus Torvalds *	Compiler Writer's Guide for the Alpha 21264
151da177e4SLinus Torvalds *	abbreviated as 'CWG' in other comments here
161da177e4SLinus Torvalds *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
171da177e4SLinus Torvalds * Scheduling notation:
181da177e4SLinus Torvalds *	E	- either cluster
191da177e4SLinus Torvalds *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
201da177e4SLinus Torvalds *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
211da177e4SLinus Torvalds * Try not to change the actual algorithm if possible for consistency.
221da177e4SLinus Torvalds */
231da177e4SLinus Torvalds
24*f3c78e94SMasahiro Yamada#include <linux/export.h>
251da177e4SLinus Torvalds	.text
261da177e4SLinus Torvalds
271da177e4SLinus Torvalds	.align 4
281da177e4SLinus Torvalds	.globl strncat
291da177e4SLinus Torvalds	.ent strncat
301da177e4SLinus Torvaldsstrncat:
311da177e4SLinus Torvalds	.frame $30, 0, $26
321da177e4SLinus Torvalds	.prologue 0
331da177e4SLinus Torvalds
341da177e4SLinus Torvalds	mov	$16, $0		# set up return value
351da177e4SLinus Torvalds	beq	$18, $zerocount	# U :
361da177e4SLinus Torvalds	/* Find the end of the string.  */
371da177e4SLinus Torvalds	ldq_u   $1, 0($16)	# L : load first quadword ($16 may be misaligned)
381da177e4SLinus Torvalds	lda     $2, -1($31)	# E :
391da177e4SLinus Torvalds
401da177e4SLinus Torvalds	insqh   $2, $0, $2	# U :
411da177e4SLinus Torvalds	andnot  $16, 7, $16	# E :
421da177e4SLinus Torvalds	nop			# E :
431da177e4SLinus Torvalds	or      $2, $1, $1	# E :
441da177e4SLinus Torvalds
451da177e4SLinus Torvalds	nop			# E :
461da177e4SLinus Torvalds	nop			# E :
471da177e4SLinus Torvalds	cmpbge  $31, $1, $2	# E : bits set iff byte == 0
481da177e4SLinus Torvalds	bne     $2, $found	# U :
491da177e4SLinus Torvalds
501da177e4SLinus Torvalds$loop:	ldq     $1, 8($16)	# L :
511da177e4SLinus Torvalds	addq    $16, 8, $16	# E :
521da177e4SLinus Torvalds	cmpbge  $31, $1, $2	# E :
531da177e4SLinus Torvalds	beq     $2, $loop	# U :
541da177e4SLinus Torvalds
551da177e4SLinus Torvalds$found:	cttz	$2, $3		# U0 :
561da177e4SLinus Torvalds	addq	$16, $3, $16	# E :
571da177e4SLinus Torvalds	nop			# E :
581da177e4SLinus Torvalds	bsr	$23, __stxncpy	# L0 :/* Now do the append.  */
591da177e4SLinus Torvalds
601da177e4SLinus Torvalds	/* Worry about the null termination.  */
611da177e4SLinus Torvalds
621da177e4SLinus Torvalds	zapnot	$1, $27, $2	# U : was last byte a null?
631da177e4SLinus Torvalds	cmplt	$27, $24, $5	# E : did we fill the buffer completely?
641da177e4SLinus Torvalds	bne	$2, 0f		# U :
651da177e4SLinus Torvalds	ret			# L0 :
661da177e4SLinus Torvalds
671da177e4SLinus Torvalds0:	or	$5, $18, $2	# E :
681da177e4SLinus Torvalds	nop
691da177e4SLinus Torvalds	bne	$2, 2f		# U :
701da177e4SLinus Torvalds	and	$24, 0x80, $3	# E : no zero next byte
711da177e4SLinus Torvalds
721da177e4SLinus Torvalds	nop			# E :
731da177e4SLinus Torvalds	bne	$3, 1f		# U :
741da177e4SLinus Torvalds	/* Here there are bytes left in the current word.  Clear one.  */
751da177e4SLinus Torvalds	addq	$24, $24, $24	# E : end-of-count bit <<= 1
761da177e4SLinus Torvalds	nop			# E :
771da177e4SLinus Torvalds
781da177e4SLinus Torvalds2:	zap	$1, $24, $1	# U :
791da177e4SLinus Torvalds	nop			# E :
801da177e4SLinus Torvalds	stq_u	$1, 0($16)	# L :
811da177e4SLinus Torvalds	ret			# L0 :
821da177e4SLinus Torvalds
831da177e4SLinus Torvalds1:	/* Here we must clear the first byte of the next DST word */
841da177e4SLinus Torvalds	stb	$31, 8($16)	# L :
851da177e4SLinus Torvalds	nop			# E :
861da177e4SLinus Torvalds	nop			# E :
871da177e4SLinus Torvalds	ret			# L0 :
881da177e4SLinus Torvalds
891da177e4SLinus Torvalds$zerocount:
901da177e4SLinus Torvalds	nop			# E :
911da177e4SLinus Torvalds	nop			# E :
921da177e4SLinus Torvalds	nop			# E :
931da177e4SLinus Torvalds	ret			# L0 :
941da177e4SLinus Torvalds
951da177e4SLinus Torvalds	.end strncat
9600fc0e0dSAl Viro	EXPORT_SYMBOL(strncat)
97