xref: /openbmc/linux/arch/powerpc/lib/strlen_32.S (revision 39326182)
19412b234SChristophe Leroy/* SPDX-License-Identifier: GPL-2.0 */
29412b234SChristophe Leroy/*
39412b234SChristophe Leroy * strlen() for PPC32
49412b234SChristophe Leroy *
59412b234SChristophe Leroy * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
69412b234SChristophe Leroy *
79412b234SChristophe Leroy * Inspired from glibc implementation
89412b234SChristophe Leroy */
9*39326182SMasahiro Yamada#include <linux/export.h>
109412b234SChristophe Leroy#include <asm/ppc_asm.h>
119412b234SChristophe Leroy#include <asm/cache.h>
129412b234SChristophe Leroy
139412b234SChristophe Leroy	.text
149412b234SChristophe Leroy
159412b234SChristophe Leroy/*
169412b234SChristophe Leroy * Algorithm:
179412b234SChristophe Leroy *
189412b234SChristophe Leroy * 1) Given a word 'x', we can test to see if it contains any 0 bytes
199412b234SChristophe Leroy *    by subtracting 0x01010101, and seeing if any of the high bits of each
209412b234SChristophe Leroy *    byte changed from 0 to 1. This works because the least significant
219412b234SChristophe Leroy *    0 byte must have had no incoming carry (otherwise it's not the least
229412b234SChristophe Leroy *    significant), so it is 0x00 - 0x01 == 0xff. For all other
239412b234SChristophe Leroy *    byte values, either they have the high bit set initially, or when
249412b234SChristophe Leroy *    1 is subtracted you get a value in the range 0x00-0x7f, none of which
259412b234SChristophe Leroy *    have their high bit set. The expression here is
269412b234SChristophe Leroy *    (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when
279412b234SChristophe Leroy *    there were no 0x00 bytes in the word.  You get 0x80 in bytes that
289412b234SChristophe Leroy *    match, but possibly false 0x80 matches in the next more significant
299412b234SChristophe Leroy *    byte to a true match due to carries.  For little-endian this is
309412b234SChristophe Leroy *    of no consequence since the least significant match is the one
319412b234SChristophe Leroy *    we're interested in, but big-endian needs method 2 to find which
329412b234SChristophe Leroy *    byte matches.
339412b234SChristophe Leroy * 2) Given a word 'x', we can test to see _which_ byte was zero by
349412b234SChristophe Leroy *    calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
359412b234SChristophe Leroy *    This produces 0x80 in each byte that was zero, and 0x00 in all
369412b234SChristophe Leroy *    the other bytes. The '| ~0x80808080' clears the low 7 bits in each
379412b234SChristophe Leroy *    byte, and the '| x' part ensures that bytes with the high bit set
389412b234SChristophe Leroy *    produce 0x00. The addition will carry into the high bit of each byte
399412b234SChristophe Leroy *    iff that byte had one of its low 7 bits set. We can then just see
409412b234SChristophe Leroy *    which was the most significant bit set and divide by 8 to find how
419412b234SChristophe Leroy *    many to add to the index.
429412b234SChristophe Leroy *    This is from the book 'The PowerPC Compiler Writer's Guide',
439412b234SChristophe Leroy *    by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
449412b234SChristophe Leroy */
459412b234SChristophe Leroy
469412b234SChristophe Leroy_GLOBAL(strlen)
479412b234SChristophe Leroy	andi.   r0, r3, 3
489412b234SChristophe Leroy	lis	r7, 0x0101
499412b234SChristophe Leroy	addi	r10, r3, -4
509412b234SChristophe Leroy	addic	r7, r7, 0x0101	/* r7 = 0x01010101 (lomagic) & clear XER[CA] */
519412b234SChristophe Leroy	rotlwi	r6, r7, 31 	/* r6 = 0x80808080 (himagic) */
529412b234SChristophe Leroy	bne-	3f
539412b234SChristophe Leroy	.balign IFETCH_ALIGN_BYTES
549412b234SChristophe Leroy1:	lwzu	r9, 4(r10)
559412b234SChristophe Leroy2:	subf	r8, r7, r9
569412b234SChristophe Leroy	and.	r8, r8, r6
579412b234SChristophe Leroy	beq+	1b
589412b234SChristophe Leroy	andc.	r8, r8, r9
599412b234SChristophe Leroy	beq+	1b
609412b234SChristophe Leroy	andc	r8, r9, r6
619412b234SChristophe Leroy	orc	r9, r9, r6
629412b234SChristophe Leroy	subfe	r8, r6, r8
639412b234SChristophe Leroy	nor	r8, r8, r9
649412b234SChristophe Leroy	cntlzw	r8, r8
659412b234SChristophe Leroy	subf	r3, r3, r10
669412b234SChristophe Leroy	srwi	r8, r8, 3
679412b234SChristophe Leroy	add	r3, r3, r8
689412b234SChristophe Leroy	blr
699412b234SChristophe Leroy
709412b234SChristophe Leroy	/* Missaligned string: make sure bytes before string are seen not 0 */
719412b234SChristophe Leroy3:	xor	r10, r10, r0
729412b234SChristophe Leroy	orc	r8, r8, r8
739412b234SChristophe Leroy	lwzu	r9, 4(r10)
749412b234SChristophe Leroy	slwi	r0, r0, 3
759412b234SChristophe Leroy	srw	r8, r8, r0
769412b234SChristophe Leroy	orc	r9, r9, r8
779412b234SChristophe Leroy	b	2b
789412b234SChristophe LeroyEXPORT_SYMBOL(strlen)
79