1/* 2 * linux/arch/arm/lib/csumpartialcopygeneric.S 3 * 4 * Copyright (C) 1995-2001 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11/* 12 * unsigned int 13 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) 14 * r0 = src, r1 = dst, r2 = len, r3 = sum 15 * Returns : r0 = checksum 16 * 17 * Note that 'tst' and 'teq' preserve the carry flag. 18 */ 19 20src .req r0 21dst .req r1 22len .req r2 23sum .req r3 24 25.zero: mov r0, sum 26 load_regs ea 27 28 /* 29 * Align an unaligned destination pointer. We know that 30 * we have >= 8 bytes here, so we don't need to check 31 * the length. Note that the source pointer hasn't been 32 * aligned yet. 33 */ 34.dst_unaligned: tst dst, #1 35 beq .dst_16bit 36 37 load1b ip 38 sub len, len, #1 39 adcs sum, sum, ip, put_byte_1 @ update checksum 40 strb ip, [dst], #1 41 tst dst, #2 42 moveq pc, lr @ dst is now 32bit aligned 43 44.dst_16bit: load2b r8, ip 45 sub len, len, #2 46 adcs sum, sum, r8, put_byte_0 47 strb r8, [dst], #1 48 adcs sum, sum, ip, put_byte_1 49 strb ip, [dst], #1 50 mov pc, lr @ dst is now 32bit aligned 51 52 /* 53 * Handle 0 to 7 bytes, with any alignment of source and 54 * destination pointers. Note that when we get here, C = 0 55 */ 56.less8: teq len, #0 @ check for zero count 57 beq .zero 58 59 /* we must have at least one byte. */ 60 tst dst, #1 @ dst 16-bit aligned 61 beq .less8_aligned 62 63 /* Align dst */ 64 load1b ip 65 sub len, len, #1 66 adcs sum, sum, ip, put_byte_1 @ update checksum 67 strb ip, [dst], #1 68 tst len, #6 69 beq .less8_byteonly 70 711: load2b r8, ip 72 sub len, len, #2 73 adcs sum, sum, r8, put_byte_0 74 strb r8, [dst], #1 75 adcs sum, sum, ip, put_byte_1 76 strb ip, [dst], #1 77.less8_aligned: tst len, #6 78 bne 1b 79.less8_byteonly: 80 tst len, #1 81 beq .done 82 load1b r8 83 adcs sum, sum, r8, put_byte_0 @ update checksum 84 strb r8, [dst], #1 85 b .done 86 87FN_ENTRY 88 mov ip, sp 89 save_regs 90 sub fp, ip, #4 91 92 cmp len, #8 @ Ensure that we have at least 93 blo .less8 @ 8 bytes to copy. 94 95 adds sum, sum, #0 @ C = 0 96 tst dst, #3 @ Test destination alignment 97 blne .dst_unaligned @ align destination, return here 98 99 /* 100 * Ok, the dst pointer is now 32bit aligned, and we know 101 * that we must have more than 4 bytes to copy. Note 102 * that C contains the carry from the dst alignment above. 103 */ 104 105 tst src, #3 @ Test source alignment 106 bne .src_not_aligned 107 108 /* Routine for src & dst aligned */ 109 110 bics ip, len, #15 111 beq 2f 112 1131: load4l r4, r5, r6, r7 114 stmia dst!, {r4, r5, r6, r7} 115 adcs sum, sum, r4 116 adcs sum, sum, r5 117 adcs sum, sum, r6 118 adcs sum, sum, r7 119 sub ip, ip, #16 120 teq ip, #0 121 bne 1b 122 1232: ands ip, len, #12 124 beq 4f 125 tst ip, #8 126 beq 3f 127 load2l r4, r5 128 stmia dst!, {r4, r5} 129 adcs sum, sum, r4 130 adcs sum, sum, r5 131 tst ip, #4 132 beq 4f 133 1343: load1l r4 135 str r4, [dst], #4 136 adcs sum, sum, r4 137 1384: ands len, len, #3 139 beq .done 140 load1l r4 141 tst len, #2 142 mov r5, r4, get_byte_0 143 beq .exit 144 adcs sum, sum, r4, push #16 145 strb r5, [dst], #1 146 mov r5, r4, get_byte_1 147 strb r5, [dst], #1 148 mov r5, r4, get_byte_2 149.exit: tst len, #1 150 strneb r5, [dst], #1 151 andne r5, r5, #255 152 adcnes sum, sum, r5, put_byte_0 153 154 /* 155 * If the dst pointer was not 16-bit aligned, we 156 * need to rotate the checksum here to get around 157 * the inefficient byte manipulations in the 158 * architecture independent code. 159 */ 160.done: adc r0, sum, #0 161 ldr sum, [sp, #0] @ dst 162 tst sum, #1 163 movne r0, r0, ror #8 164 load_regs ea 165 166.src_not_aligned: 167 adc sum, sum, #0 @ include C from dst alignment 168 and ip, src, #3 169 bic src, src, #3 170 load1l r5 171 cmp ip, #2 172 beq .src2_aligned 173 bhi .src3_aligned 174 mov r4, r5, pull #8 @ C = 0 175 bics ip, len, #15 176 beq 2f 1771: load4l r5, r6, r7, r8 178 orr r4, r4, r5, push #24 179 mov r5, r5, pull #8 180 orr r5, r5, r6, push #24 181 mov r6, r6, pull #8 182 orr r6, r6, r7, push #24 183 mov r7, r7, pull #8 184 orr r7, r7, r8, push #24 185 stmia dst!, {r4, r5, r6, r7} 186 adcs sum, sum, r4 187 adcs sum, sum, r5 188 adcs sum, sum, r6 189 adcs sum, sum, r7 190 mov r4, r8, pull #8 191 sub ip, ip, #16 192 teq ip, #0 193 bne 1b 1942: ands ip, len, #12 195 beq 4f 196 tst ip, #8 197 beq 3f 198 load2l r5, r6 199 orr r4, r4, r5, push #24 200 mov r5, r5, pull #8 201 orr r5, r5, r6, push #24 202 stmia dst!, {r4, r5} 203 adcs sum, sum, r4 204 adcs sum, sum, r5 205 mov r4, r6, pull #8 206 tst ip, #4 207 beq 4f 2083: load1l r5 209 orr r4, r4, r5, push #24 210 str r4, [dst], #4 211 adcs sum, sum, r4 212 mov r4, r5, pull #8 2134: ands len, len, #3 214 beq .done 215 mov r5, r4, get_byte_0 216 tst len, #2 217 beq .exit 218 adcs sum, sum, r4, push #16 219 strb r5, [dst], #1 220 mov r5, r4, get_byte_1 221 strb r5, [dst], #1 222 mov r5, r4, get_byte_2 223 b .exit 224 225.src2_aligned: mov r4, r5, pull #16 226 adds sum, sum, #0 227 bics ip, len, #15 228 beq 2f 2291: load4l r5, r6, r7, r8 230 orr r4, r4, r5, push #16 231 mov r5, r5, pull #16 232 orr r5, r5, r6, push #16 233 mov r6, r6, pull #16 234 orr r6, r6, r7, push #16 235 mov r7, r7, pull #16 236 orr r7, r7, r8, push #16 237 stmia dst!, {r4, r5, r6, r7} 238 adcs sum, sum, r4 239 adcs sum, sum, r5 240 adcs sum, sum, r6 241 adcs sum, sum, r7 242 mov r4, r8, pull #16 243 sub ip, ip, #16 244 teq ip, #0 245 bne 1b 2462: ands ip, len, #12 247 beq 4f 248 tst ip, #8 249 beq 3f 250 load2l r5, r6 251 orr r4, r4, r5, push #16 252 mov r5, r5, pull #16 253 orr r5, r5, r6, push #16 254 stmia dst!, {r4, r5} 255 adcs sum, sum, r4 256 adcs sum, sum, r5 257 mov r4, r6, pull #16 258 tst ip, #4 259 beq 4f 2603: load1l r5 261 orr r4, r4, r5, push #16 262 str r4, [dst], #4 263 adcs sum, sum, r4 264 mov r4, r5, pull #16 2654: ands len, len, #3 266 beq .done 267 mov r5, r4, get_byte_0 268 tst len, #2 269 beq .exit 270 adcs sum, sum, r4 271 strb r5, [dst], #1 272 mov r5, r4, get_byte_1 273 strb r5, [dst], #1 274 tst len, #1 275 beq .done 276 load1b r5 277 b .exit 278 279.src3_aligned: mov r4, r5, pull #24 280 adds sum, sum, #0 281 bics ip, len, #15 282 beq 2f 2831: load4l r5, r6, r7, r8 284 orr r4, r4, r5, push #8 285 mov r5, r5, pull #24 286 orr r5, r5, r6, push #8 287 mov r6, r6, pull #24 288 orr r6, r6, r7, push #8 289 mov r7, r7, pull #24 290 orr r7, r7, r8, push #8 291 stmia dst!, {r4, r5, r6, r7} 292 adcs sum, sum, r4 293 adcs sum, sum, r5 294 adcs sum, sum, r6 295 adcs sum, sum, r7 296 mov r4, r8, pull #24 297 sub ip, ip, #16 298 teq ip, #0 299 bne 1b 3002: ands ip, len, #12 301 beq 4f 302 tst ip, #8 303 beq 3f 304 load2l r5, r6 305 orr r4, r4, r5, push #8 306 mov r5, r5, pull #24 307 orr r5, r5, r6, push #8 308 stmia dst!, {r4, r5} 309 adcs sum, sum, r4 310 adcs sum, sum, r5 311 mov r4, r6, pull #24 312 tst ip, #4 313 beq 4f 3143: load1l r5 315 orr r4, r4, r5, push #8 316 str r4, [dst], #4 317 adcs sum, sum, r4 318 mov r4, r5, pull #24 3194: ands len, len, #3 320 beq .done 321 mov r5, r4, get_byte_0 322 tst len, #2 323 beq .exit 324 strb r5, [dst], #1 325 adcs sum, sum, r4 326 load1l r4 327 mov r5, r4, get_byte_0 328 strb r5, [dst], #1 329 adcs sum, sum, r4, push #24 330 mov r5, r4, get_byte_1 331 b .exit 332