19412b234SChristophe Leroy/* SPDX-License-Identifier: GPL-2.0 */ 29412b234SChristophe Leroy/* 39412b234SChristophe Leroy * strlen() for PPC32 49412b234SChristophe Leroy * 59412b234SChristophe Leroy * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information. 69412b234SChristophe Leroy * 79412b234SChristophe Leroy * Inspired from glibc implementation 89412b234SChristophe Leroy */ 9*39326182SMasahiro Yamada#include <linux/export.h> 109412b234SChristophe Leroy#include <asm/ppc_asm.h> 119412b234SChristophe Leroy#include <asm/cache.h> 129412b234SChristophe Leroy 139412b234SChristophe Leroy .text 149412b234SChristophe Leroy 159412b234SChristophe Leroy/* 169412b234SChristophe Leroy * Algorithm: 179412b234SChristophe Leroy * 189412b234SChristophe Leroy * 1) Given a word 'x', we can test to see if it contains any 0 bytes 199412b234SChristophe Leroy * by subtracting 0x01010101, and seeing if any of the high bits of each 209412b234SChristophe Leroy * byte changed from 0 to 1. This works because the least significant 219412b234SChristophe Leroy * 0 byte must have had no incoming carry (otherwise it's not the least 229412b234SChristophe Leroy * significant), so it is 0x00 - 0x01 == 0xff. For all other 239412b234SChristophe Leroy * byte values, either they have the high bit set initially, or when 249412b234SChristophe Leroy * 1 is subtracted you get a value in the range 0x00-0x7f, none of which 259412b234SChristophe Leroy * have their high bit set. The expression here is 269412b234SChristophe Leroy * (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when 279412b234SChristophe Leroy * there were no 0x00 bytes in the word. You get 0x80 in bytes that 289412b234SChristophe Leroy * match, but possibly false 0x80 matches in the next more significant 299412b234SChristophe Leroy * byte to a true match due to carries. For little-endian this is 309412b234SChristophe Leroy * of no consequence since the least significant match is the one 319412b234SChristophe Leroy * we're interested in, but big-endian needs method 2 to find which 329412b234SChristophe Leroy * byte matches. 339412b234SChristophe Leroy * 2) Given a word 'x', we can test to see _which_ byte was zero by 349412b234SChristophe Leroy * calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080). 359412b234SChristophe Leroy * This produces 0x80 in each byte that was zero, and 0x00 in all 369412b234SChristophe Leroy * the other bytes. The '| ~0x80808080' clears the low 7 bits in each 379412b234SChristophe Leroy * byte, and the '| x' part ensures that bytes with the high bit set 389412b234SChristophe Leroy * produce 0x00. The addition will carry into the high bit of each byte 399412b234SChristophe Leroy * iff that byte had one of its low 7 bits set. We can then just see 409412b234SChristophe Leroy * which was the most significant bit set and divide by 8 to find how 419412b234SChristophe Leroy * many to add to the index. 429412b234SChristophe Leroy * This is from the book 'The PowerPC Compiler Writer's Guide', 439412b234SChristophe Leroy * by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. 449412b234SChristophe Leroy */ 459412b234SChristophe Leroy 469412b234SChristophe Leroy_GLOBAL(strlen) 479412b234SChristophe Leroy andi. r0, r3, 3 489412b234SChristophe Leroy lis r7, 0x0101 499412b234SChristophe Leroy addi r10, r3, -4 509412b234SChristophe Leroy addic r7, r7, 0x0101 /* r7 = 0x01010101 (lomagic) & clear XER[CA] */ 519412b234SChristophe Leroy rotlwi r6, r7, 31 /* r6 = 0x80808080 (himagic) */ 529412b234SChristophe Leroy bne- 3f 539412b234SChristophe Leroy .balign IFETCH_ALIGN_BYTES 549412b234SChristophe Leroy1: lwzu r9, 4(r10) 559412b234SChristophe Leroy2: subf r8, r7, r9 569412b234SChristophe Leroy and. r8, r8, r6 579412b234SChristophe Leroy beq+ 1b 589412b234SChristophe Leroy andc. r8, r8, r9 599412b234SChristophe Leroy beq+ 1b 609412b234SChristophe Leroy andc r8, r9, r6 619412b234SChristophe Leroy orc r9, r9, r6 629412b234SChristophe Leroy subfe r8, r6, r8 639412b234SChristophe Leroy nor r8, r8, r9 649412b234SChristophe Leroy cntlzw r8, r8 659412b234SChristophe Leroy subf r3, r3, r10 669412b234SChristophe Leroy srwi r8, r8, 3 679412b234SChristophe Leroy add r3, r3, r8 689412b234SChristophe Leroy blr 699412b234SChristophe Leroy 709412b234SChristophe Leroy /* Missaligned string: make sure bytes before string are seen not 0 */ 719412b234SChristophe Leroy3: xor r10, r10, r0 729412b234SChristophe Leroy orc r8, r8, r8 739412b234SChristophe Leroy lwzu r9, 4(r10) 749412b234SChristophe Leroy slwi r0, r0, 3 759412b234SChristophe Leroy srw r8, r8, r0 769412b234SChristophe Leroy orc r9, r9, r8 779412b234SChristophe Leroy b 2b 789412b234SChristophe LeroyEXPORT_SYMBOL(strlen) 79