1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright (C) 2013 ARM Ltd. 4 * Copyright (C) 2013 Linaro. 5 * 6 * This code is based on glibc cortex strings work originally authored by Linaro 7 * be found @ 8 * 9 * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10 * files/head:/src/aarch64/ 11 */ 12 13#include <linux/linkage.h> 14#include <asm/assembler.h> 15 16/* 17 * calculate the length of a string 18 * 19 * Parameters: 20 * x0 - const string pointer 21 * Returns: 22 * x0 - the return length of specific string 23 */ 24 25/* Arguments and results. */ 26srcin .req x0 27len .req x0 28 29/* Locals and temporaries. */ 30src .req x1 31data1 .req x2 32data2 .req x3 33data2a .req x4 34has_nul1 .req x5 35has_nul2 .req x6 36tmp1 .req x7 37tmp2 .req x8 38tmp3 .req x9 39tmp4 .req x10 40zeroones .req x11 41pos .req x12 42 43#define REP8_01 0x0101010101010101 44#define REP8_7f 0x7f7f7f7f7f7f7f7f 45#define REP8_80 0x8080808080808080 46 47SYM_FUNC_START_WEAK_PI(strlen) 48 mov zeroones, #REP8_01 49 bic src, srcin, #15 50 ands tmp1, srcin, #15 51 b.ne .Lmisaligned 52 /* 53 * NUL detection works on the principle that (X - 1) & (~X) & 0x80 54 * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 55 * can be done in parallel across the entire word. 56 */ 57 /* 58 * The inner loop deals with two Dwords at a time. This has a 59 * slightly higher start-up cost, but we should win quite quickly, 60 * especially on cores with a high number of issue slots per 61 * cycle, as we get much better parallelism out of the operations. 62 */ 63.Lloop: 64 ldp data1, data2, [src], #16 65.Lrealigned: 66 sub tmp1, data1, zeroones 67 orr tmp2, data1, #REP8_7f 68 sub tmp3, data2, zeroones 69 orr tmp4, data2, #REP8_7f 70 bic has_nul1, tmp1, tmp2 71 bics has_nul2, tmp3, tmp4 72 ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */ 73 b.eq .Lloop 74 75 sub len, src, srcin 76 cbz has_nul1, .Lnul_in_data2 77CPU_BE( mov data2, data1 ) /*prepare data to re-calculate the syndrome*/ 78 sub len, len, #8 79 mov has_nul2, has_nul1 80.Lnul_in_data2: 81 /* 82 * For big-endian, carry propagation (if the final byte in the 83 * string is 0x01) means we cannot use has_nul directly. The 84 * easiest way to get the correct byte is to byte-swap the data 85 * and calculate the syndrome a second time. 86 */ 87CPU_BE( rev data2, data2 ) 88CPU_BE( sub tmp1, data2, zeroones ) 89CPU_BE( orr tmp2, data2, #REP8_7f ) 90CPU_BE( bic has_nul2, tmp1, tmp2 ) 91 92 sub len, len, #8 93 rev has_nul2, has_nul2 94 clz pos, has_nul2 95 add len, len, pos, lsr #3 /* Bits to bytes. */ 96 ret 97 98.Lmisaligned: 99 cmp tmp1, #8 100 neg tmp1, tmp1 101 ldp data1, data2, [src], #16 102 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 103 mov tmp2, #~0 104 /* Big-endian. Early bytes are at MSB. */ 105CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ 106 /* Little-endian. Early bytes are at LSB. */ 107CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ 108 109 orr data1, data1, tmp2 110 orr data2a, data2, tmp2 111 csinv data1, data1, xzr, le 112 csel data2, data2, data2a, le 113 b .Lrealigned 114SYM_FUNC_END_PI(strlen) 115EXPORT_SYMBOL_NOKASAN(strlen) 116