1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 21f7e3dc0SClaudiu Zissulescu/* 31f7e3dc0SClaudiu Zissulescu * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) 41f7e3dc0SClaudiu Zissulescu */ 51f7e3dc0SClaudiu Zissulescu 61f7e3dc0SClaudiu Zissulescu#include <linux/linkage.h> 7e6a72b7dSEugeniy Paltsev#include <asm/cache.h> 81f7e3dc0SClaudiu Zissulescu 9e6a72b7dSEugeniy Paltsev/* 10e6a72b7dSEugeniy Paltsev * The memset implementation below is optimized to use prefetchw and prealloc 11e6a72b7dSEugeniy Paltsev * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6) 12e6a72b7dSEugeniy Paltsev * If you want to implement optimized memset for other possible L1 data cache 13e6a72b7dSEugeniy Paltsev * line lengths (32B and 128B) you should rewrite code carefully checking 14e6a72b7dSEugeniy Paltsev * we don't call any prefetchw/prealloc instruction for L1 cache lines which 15e6a72b7dSEugeniy Paltsev * don't belongs to memset area. 16e6a72b7dSEugeniy Paltsev */ 17e6a72b7dSEugeniy Paltsev 18e6a72b7dSEugeniy Paltsev#if L1_CACHE_SHIFT == 6 19e6a72b7dSEugeniy Paltsev 20e6a72b7dSEugeniy Paltsev.macro PREALLOC_INSTR reg, off 21e6a72b7dSEugeniy Paltsev prealloc [\reg, \off] 22e6a72b7dSEugeniy Paltsev.endm 23e6a72b7dSEugeniy Paltsev 24e6a72b7dSEugeniy Paltsev.macro PREFETCHW_INSTR reg, off 25e6a72b7dSEugeniy Paltsev prefetchw [\reg, \off] 26e6a72b7dSEugeniy Paltsev.endm 27e6a72b7dSEugeniy Paltsev 28e6a72b7dSEugeniy Paltsev#else 29e6a72b7dSEugeniy Paltsev 3055c0c4c7SEugeniy Paltsev.macro PREALLOC_INSTR reg, off 31e6a72b7dSEugeniy Paltsev.endm 32e6a72b7dSEugeniy Paltsev 3355c0c4c7SEugeniy Paltsev.macro PREFETCHW_INSTR reg, off 34e6a72b7dSEugeniy Paltsev.endm 35e6a72b7dSEugeniy Paltsev 36e6a72b7dSEugeniy Paltsev#endif 371f7e3dc0SClaudiu Zissulescu 3886effd0dSVineet GuptaENTRY_CFI(memset) 391f7e3dc0SClaudiu Zissulescu mov.f 0, r2 401f7e3dc0SClaudiu Zissulescu;;; if size is zero 411f7e3dc0SClaudiu Zissulescu jz.d [blink] 421f7e3dc0SClaudiu Zissulescu mov r3, r0 ; don't clobber ret val 431f7e3dc0SClaudiu Zissulescu 44*1918693fSVineet Gupta PREFETCHW_INSTR r0, 0 ; Prefetch the first write location 45*1918693fSVineet Gupta 461f7e3dc0SClaudiu Zissulescu;;; if length < 8 471f7e3dc0SClaudiu Zissulescu brls.d.nt r2, 8, .Lsmallchunk 481f7e3dc0SClaudiu Zissulescu mov.f lp_count,r2 491f7e3dc0SClaudiu Zissulescu 501f7e3dc0SClaudiu Zissulescu and.f r4, r0, 0x03 511f7e3dc0SClaudiu Zissulescu rsub lp_count, r4, 4 521f7e3dc0SClaudiu Zissulescu lpnz @.Laligndestination 531f7e3dc0SClaudiu Zissulescu ;; LOOP BEGIN 541f7e3dc0SClaudiu Zissulescu stb.ab r1, [r3,1] 551f7e3dc0SClaudiu Zissulescu sub r2, r2, 1 561f7e3dc0SClaudiu Zissulescu.Laligndestination: 571f7e3dc0SClaudiu Zissulescu 581f7e3dc0SClaudiu Zissulescu;;; Destination is aligned 591f7e3dc0SClaudiu Zissulescu and r1, r1, 0xFF 601f7e3dc0SClaudiu Zissulescu asl r4, r1, 8 611f7e3dc0SClaudiu Zissulescu or r4, r4, r1 621f7e3dc0SClaudiu Zissulescu asl r5, r4, 16 631f7e3dc0SClaudiu Zissulescu or r5, r5, r4 641f7e3dc0SClaudiu Zissulescu mov r4, r5 651f7e3dc0SClaudiu Zissulescu 661f7e3dc0SClaudiu Zissulescu sub3 lp_count, r2, 8 671f7e3dc0SClaudiu Zissulescu cmp r2, 64 681f7e3dc0SClaudiu Zissulescu bmsk.hi r2, r2, 5 691f7e3dc0SClaudiu Zissulescu mov.ls lp_count, 0 701f7e3dc0SClaudiu Zissulescu add3.hi r2, r2, 8 711f7e3dc0SClaudiu Zissulescu 721f7e3dc0SClaudiu Zissulescu;;; Convert len to Dwords, unfold x8 731f7e3dc0SClaudiu Zissulescu lsr.f lp_count, lp_count, 6 74262137bcSVineet Gupta 751f7e3dc0SClaudiu Zissulescu lpnz @.Lset64bytes 761f7e3dc0SClaudiu Zissulescu ;; LOOP START 77e6a72b7dSEugeniy Paltsev PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching 78e6a72b7dSEugeniy Paltsev 79262137bcSVineet Gupta#ifdef CONFIG_ARC_HAS_LL64 801f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 811f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 821f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 831f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 841f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 851f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 861f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 871f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 88262137bcSVineet Gupta#else 89262137bcSVineet Gupta st.ab r4, [r3, 4] 90262137bcSVineet Gupta st.ab r4, [r3, 4] 91262137bcSVineet Gupta st.ab r4, [r3, 4] 92262137bcSVineet Gupta st.ab r4, [r3, 4] 93262137bcSVineet Gupta st.ab r4, [r3, 4] 94262137bcSVineet Gupta st.ab r4, [r3, 4] 95262137bcSVineet Gupta st.ab r4, [r3, 4] 96262137bcSVineet Gupta st.ab r4, [r3, 4] 97262137bcSVineet Gupta st.ab r4, [r3, 4] 98262137bcSVineet Gupta st.ab r4, [r3, 4] 99262137bcSVineet Gupta st.ab r4, [r3, 4] 100262137bcSVineet Gupta st.ab r4, [r3, 4] 101262137bcSVineet Gupta st.ab r4, [r3, 4] 102262137bcSVineet Gupta st.ab r4, [r3, 4] 103262137bcSVineet Gupta st.ab r4, [r3, 4] 104262137bcSVineet Gupta st.ab r4, [r3, 4] 105262137bcSVineet Gupta#endif 1061f7e3dc0SClaudiu Zissulescu.Lset64bytes: 1071f7e3dc0SClaudiu Zissulescu 1081f7e3dc0SClaudiu Zissulescu lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes 1091f7e3dc0SClaudiu Zissulescu lpnz .Lset32bytes 1101f7e3dc0SClaudiu Zissulescu ;; LOOP START 111262137bcSVineet Gupta#ifdef CONFIG_ARC_HAS_LL64 1121f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 1131f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 1141f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 1151f7e3dc0SClaudiu Zissulescu std.ab r4, [r3, 8] 116262137bcSVineet Gupta#else 117262137bcSVineet Gupta st.ab r4, [r3, 4] 118262137bcSVineet Gupta st.ab r4, [r3, 4] 119262137bcSVineet Gupta st.ab r4, [r3, 4] 120262137bcSVineet Gupta st.ab r4, [r3, 4] 121262137bcSVineet Gupta st.ab r4, [r3, 4] 122262137bcSVineet Gupta st.ab r4, [r3, 4] 123262137bcSVineet Gupta st.ab r4, [r3, 4] 124262137bcSVineet Gupta st.ab r4, [r3, 4] 125262137bcSVineet Gupta#endif 1261f7e3dc0SClaudiu Zissulescu.Lset32bytes: 1271f7e3dc0SClaudiu Zissulescu 1281f7e3dc0SClaudiu Zissulescu and.f lp_count, r2, 0x1F ;Last remaining 31 bytes 1291f7e3dc0SClaudiu Zissulescu.Lsmallchunk: 1301f7e3dc0SClaudiu Zissulescu lpnz .Lcopy3bytes 1311f7e3dc0SClaudiu Zissulescu ;; LOOP START 1321f7e3dc0SClaudiu Zissulescu stb.ab r1, [r3, 1] 1331f7e3dc0SClaudiu Zissulescu.Lcopy3bytes: 1341f7e3dc0SClaudiu Zissulescu 1351f7e3dc0SClaudiu Zissulescu j [blink] 1361f7e3dc0SClaudiu Zissulescu 13786effd0dSVineet GuptaEND_CFI(memset) 1381f7e3dc0SClaudiu Zissulescu 13986effd0dSVineet GuptaENTRY_CFI(memzero) 1401f7e3dc0SClaudiu Zissulescu ; adjust bzero args to memset args 1411f7e3dc0SClaudiu Zissulescu mov r2, r1 1421f7e3dc0SClaudiu Zissulescu b.d memset ;tail call so need to tinker with blink 1431f7e3dc0SClaudiu Zissulescu mov r1, 0 14486effd0dSVineet GuptaEND_CFI(memzero) 145