1/* 2 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/cache.h> 11 12/* 13 * The memset implementation below is optimized to use prefetchw and prealloc 14 * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6) 15 * If you want to implement optimized memset for other possible L1 data cache 16 * line lengths (32B and 128B) you should rewrite code carefully checking 17 * we don't call any prefetchw/prealloc instruction for L1 cache lines which 18 * don't belongs to memset area. 19 */ 20 21#if L1_CACHE_SHIFT == 6 22 23.macro PREALLOC_INSTR reg, off 24 prealloc [\reg, \off] 25.endm 26 27.macro PREFETCHW_INSTR reg, off 28 prefetchw [\reg, \off] 29.endm 30 31#else 32 33.macro PREALLOC_INSTR 34.endm 35 36.macro PREFETCHW_INSTR 37.endm 38 39#endif 40 41ENTRY_CFI(memset) 42 PREFETCHW_INSTR r0, 0 ; Prefetch the first write location 43 mov.f 0, r2 44;;; if size is zero 45 jz.d [blink] 46 mov r3, r0 ; don't clobber ret val 47 48;;; if length < 8 49 brls.d.nt r2, 8, .Lsmallchunk 50 mov.f lp_count,r2 51 52 and.f r4, r0, 0x03 53 rsub lp_count, r4, 4 54 lpnz @.Laligndestination 55 ;; LOOP BEGIN 56 stb.ab r1, [r3,1] 57 sub r2, r2, 1 58.Laligndestination: 59 60;;; Destination is aligned 61 and r1, r1, 0xFF 62 asl r4, r1, 8 63 or r4, r4, r1 64 asl r5, r4, 16 65 or r5, r5, r4 66 mov r4, r5 67 68 sub3 lp_count, r2, 8 69 cmp r2, 64 70 bmsk.hi r2, r2, 5 71 mov.ls lp_count, 0 72 add3.hi r2, r2, 8 73 74;;; Convert len to Dwords, unfold x8 75 lsr.f lp_count, lp_count, 6 76 77 lpnz @.Lset64bytes 78 ;; LOOP START 79 PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching 80 81#ifdef CONFIG_ARC_HAS_LL64 82 std.ab r4, [r3, 8] 83 std.ab r4, [r3, 8] 84 std.ab r4, [r3, 8] 85 std.ab r4, [r3, 8] 86 std.ab r4, [r3, 8] 87 std.ab r4, [r3, 8] 88 std.ab r4, [r3, 8] 89 std.ab r4, [r3, 8] 90#else 91 st.ab r4, [r3, 4] 92 st.ab r4, [r3, 4] 93 st.ab r4, [r3, 4] 94 st.ab r4, [r3, 4] 95 st.ab r4, [r3, 4] 96 st.ab r4, [r3, 4] 97 st.ab r4, [r3, 4] 98 st.ab r4, [r3, 4] 99 st.ab r4, [r3, 4] 100 st.ab r4, [r3, 4] 101 st.ab r4, [r3, 4] 102 st.ab r4, [r3, 4] 103 st.ab r4, [r3, 4] 104 st.ab r4, [r3, 4] 105 st.ab r4, [r3, 4] 106 st.ab r4, [r3, 4] 107#endif 108.Lset64bytes: 109 110 lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes 111 lpnz .Lset32bytes 112 ;; LOOP START 113#ifdef CONFIG_ARC_HAS_LL64 114 std.ab r4, [r3, 8] 115 std.ab r4, [r3, 8] 116 std.ab r4, [r3, 8] 117 std.ab r4, [r3, 8] 118#else 119 st.ab r4, [r3, 4] 120 st.ab r4, [r3, 4] 121 st.ab r4, [r3, 4] 122 st.ab r4, [r3, 4] 123 st.ab r4, [r3, 4] 124 st.ab r4, [r3, 4] 125 st.ab r4, [r3, 4] 126 st.ab r4, [r3, 4] 127#endif 128.Lset32bytes: 129 130 and.f lp_count, r2, 0x1F ;Last remaining 31 bytes 131.Lsmallchunk: 132 lpnz .Lcopy3bytes 133 ;; LOOP START 134 stb.ab r1, [r3, 1] 135.Lcopy3bytes: 136 137 j [blink] 138 139END_CFI(memset) 140 141ENTRY_CFI(memzero) 142 ; adjust bzero args to memset args 143 mov r2, r1 144 b.d memset ;tail call so need to tinker with blink 145 mov r1, 0 146END_CFI(memzero) 147