1/* 2 * "memset" implementation for SH4 3 * 4 * Copyright (C) 1999 Niibe Yutaka 5 * Copyright (c) 2009 STMicroelectronics Limited 6 * Author: Stuart Menefy <stuart.menefy:st.com> 7 */ 8 9/* 10 * void *memset(void *s, int c, size_t n); 11 */ 12 13#include <linux/linkage.h> 14 15ENTRY(memset) 16 mov #12,r0 17 add r6,r4 18 cmp/gt r6,r0 19 bt/s 40f ! if it's too small, set a byte at once 20 mov r4,r0 21 and #3,r0 22 cmp/eq #0,r0 23 bt/s 2f ! It's aligned 24 sub r0,r6 251: 26 dt r0 27 bf/s 1b 28 mov.b r5,@-r4 292: ! make VVVV 30 extu.b r5,r5 31 swap.b r5,r0 ! V0 32 or r0,r5 ! VV 33 swap.w r5,r0 ! VV00 34 or r0,r5 ! VVVV 35 36 ! Check if enough bytes need to be copied to be worth the big loop 37 mov #0x40, r0 ! (MT) 38 cmp/gt r6,r0 ! (MT) 64 > len => slow loop 39 40 bt/s 22f 41 mov r6,r0 42 43 ! align the dst to the cache block size if necessary 44 mov r4, r3 45 mov #~(0x1f), r1 46 47 and r3, r1 48 cmp/eq r3, r1 49 50 bt/s 11f ! dst is already aligned 51 sub r1, r3 ! r3-r1 -> r3 52 shlr2 r3 ! number of loops 53 5410: mov.l r5,@-r4 55 dt r3 56 bf/s 10b 57 add #-4, r6 58 5911: ! dst is 32byte aligned 60 mov r6,r2 61 mov #-5,r0 62 shld r0,r2 ! number of loops 63 64 add #-32, r4 65 mov r5, r0 6612: 67 movca.l r0,@r4 68 mov.l r5,@(4, r4) 69 mov.l r5,@(8, r4) 70 mov.l r5,@(12,r4) 71 mov.l r5,@(16,r4) 72 mov.l r5,@(20,r4) 73 add #-0x20, r6 74 mov.l r5,@(24,r4) 75 dt r2 76 mov.l r5,@(28,r4) 77 bf/s 12b 78 add #-32, r4 79 80 add #32, r4 81 mov #8, r0 82 cmp/ge r0, r6 83 bf 40f 84 85 mov r6,r0 8622: 87 shlr2 r0 88 shlr r0 ! r0 = r6 >> 3 893: 90 dt r0 91 mov.l r5,@-r4 ! set 8-byte at once 92 bf/s 3b 93 mov.l r5,@-r4 94 ! 95 mov #7,r0 96 and r0,r6 97 98 ! fill bytes (length may be zero) 9940: tst r6,r6 100 bt 5f 1014: 102 dt r6 103 bf/s 4b 104 mov.b r5,@-r4 1055: 106 rts 107 mov r4,r0 108