1/* 2 * linux/arch/arm/lib/csumpartialcopygeneric.S 3 * 4 * Copyright (C) 1995-2001 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11/* 12 * unsigned int 13 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) 14 * r0 = src, r1 = dst, r2 = len, r3 = sum 15 * Returns : r0 = checksum 16 * 17 * Note that 'tst' and 'teq' preserve the carry flag. 18 */ 19 20src .req r0 21dst .req r1 22len .req r2 23sum .req r3 24 25.Lzero: mov r0, sum 26 load_regs ea 27 28 /* 29 * Align an unaligned destination pointer. We know that 30 * we have >= 8 bytes here, so we don't need to check 31 * the length. Note that the source pointer hasn't been 32 * aligned yet. 33 */ 34.Ldst_unaligned: 35 tst dst, #1 36 beq .Ldst_16bit 37 38 load1b ip 39 sub len, len, #1 40 adcs sum, sum, ip, put_byte_1 @ update checksum 41 strb ip, [dst], #1 42 tst dst, #2 43 moveq pc, lr @ dst is now 32bit aligned 44 45.Ldst_16bit: load2b r8, ip 46 sub len, len, #2 47 adcs sum, sum, r8, put_byte_0 48 strb r8, [dst], #1 49 adcs sum, sum, ip, put_byte_1 50 strb ip, [dst], #1 51 mov pc, lr @ dst is now 32bit aligned 52 53 /* 54 * Handle 0 to 7 bytes, with any alignment of source and 55 * destination pointers. Note that when we get here, C = 0 56 */ 57.Lless8: teq len, #0 @ check for zero count 58 beq .Lzero 59 60 /* we must have at least one byte. */ 61 tst dst, #1 @ dst 16-bit aligned 62 beq .Lless8_aligned 63 64 /* Align dst */ 65 load1b ip 66 sub len, len, #1 67 adcs sum, sum, ip, put_byte_1 @ update checksum 68 strb ip, [dst], #1 69 tst len, #6 70 beq .Lless8_byteonly 71 721: load2b r8, ip 73 sub len, len, #2 74 adcs sum, sum, r8, put_byte_0 75 strb r8, [dst], #1 76 adcs sum, sum, ip, put_byte_1 77 strb ip, [dst], #1 78.Lless8_aligned: 79 tst len, #6 80 bne 1b 81.Lless8_byteonly: 82 tst len, #1 83 beq .Ldone 84 load1b r8 85 adcs sum, sum, r8, put_byte_0 @ update checksum 86 strb r8, [dst], #1 87 b .Ldone 88 89FN_ENTRY 90 mov ip, sp 91 save_regs 92 sub fp, ip, #4 93 94 cmp len, #8 @ Ensure that we have at least 95 blo .Lless8 @ 8 bytes to copy. 96 97 adds sum, sum, #0 @ C = 0 98 tst dst, #3 @ Test destination alignment 99 blne .Ldst_unaligned @ align destination, return here 100 101 /* 102 * Ok, the dst pointer is now 32bit aligned, and we know 103 * that we must have more than 4 bytes to copy. Note 104 * that C contains the carry from the dst alignment above. 105 */ 106 107 tst src, #3 @ Test source alignment 108 bne .Lsrc_not_aligned 109 110 /* Routine for src & dst aligned */ 111 112 bics ip, len, #15 113 beq 2f 114 1151: load4l r4, r5, r6, r7 116 stmia dst!, {r4, r5, r6, r7} 117 adcs sum, sum, r4 118 adcs sum, sum, r5 119 adcs sum, sum, r6 120 adcs sum, sum, r7 121 sub ip, ip, #16 122 teq ip, #0 123 bne 1b 124 1252: ands ip, len, #12 126 beq 4f 127 tst ip, #8 128 beq 3f 129 load2l r4, r5 130 stmia dst!, {r4, r5} 131 adcs sum, sum, r4 132 adcs sum, sum, r5 133 tst ip, #4 134 beq 4f 135 1363: load1l r4 137 str r4, [dst], #4 138 adcs sum, sum, r4 139 1404: ands len, len, #3 141 beq .Ldone 142 load1l r4 143 tst len, #2 144 mov r5, r4, get_byte_0 145 beq .Lexit 146 adcs sum, sum, r4, push #16 147 strb r5, [dst], #1 148 mov r5, r4, get_byte_1 149 strb r5, [dst], #1 150 mov r5, r4, get_byte_2 151.Lexit: tst len, #1 152 strneb r5, [dst], #1 153 andne r5, r5, #255 154 adcnes sum, sum, r5, put_byte_0 155 156 /* 157 * If the dst pointer was not 16-bit aligned, we 158 * need to rotate the checksum here to get around 159 * the inefficient byte manipulations in the 160 * architecture independent code. 161 */ 162.Ldone: adc r0, sum, #0 163 ldr sum, [sp, #0] @ dst 164 tst sum, #1 165 movne r0, r0, ror #8 166 load_regs ea 167 168.Lsrc_not_aligned: 169 adc sum, sum, #0 @ include C from dst alignment 170 and ip, src, #3 171 bic src, src, #3 172 load1l r5 173 cmp ip, #2 174 beq .Lsrc2_aligned 175 bhi .Lsrc3_aligned 176 mov r4, r5, pull #8 @ C = 0 177 bics ip, len, #15 178 beq 2f 1791: load4l r5, r6, r7, r8 180 orr r4, r4, r5, push #24 181 mov r5, r5, pull #8 182 orr r5, r5, r6, push #24 183 mov r6, r6, pull #8 184 orr r6, r6, r7, push #24 185 mov r7, r7, pull #8 186 orr r7, r7, r8, push #24 187 stmia dst!, {r4, r5, r6, r7} 188 adcs sum, sum, r4 189 adcs sum, sum, r5 190 adcs sum, sum, r6 191 adcs sum, sum, r7 192 mov r4, r8, pull #8 193 sub ip, ip, #16 194 teq ip, #0 195 bne 1b 1962: ands ip, len, #12 197 beq 4f 198 tst ip, #8 199 beq 3f 200 load2l r5, r6 201 orr r4, r4, r5, push #24 202 mov r5, r5, pull #8 203 orr r5, r5, r6, push #24 204 stmia dst!, {r4, r5} 205 adcs sum, sum, r4 206 adcs sum, sum, r5 207 mov r4, r6, pull #8 208 tst ip, #4 209 beq 4f 2103: load1l r5 211 orr r4, r4, r5, push #24 212 str r4, [dst], #4 213 adcs sum, sum, r4 214 mov r4, r5, pull #8 2154: ands len, len, #3 216 beq .Ldone 217 mov r5, r4, get_byte_0 218 tst len, #2 219 beq .Lexit 220 adcs sum, sum, r4, push #16 221 strb r5, [dst], #1 222 mov r5, r4, get_byte_1 223 strb r5, [dst], #1 224 mov r5, r4, get_byte_2 225 b .Lexit 226 227.Lsrc2_aligned: mov r4, r5, pull #16 228 adds sum, sum, #0 229 bics ip, len, #15 230 beq 2f 2311: load4l r5, r6, r7, r8 232 orr r4, r4, r5, push #16 233 mov r5, r5, pull #16 234 orr r5, r5, r6, push #16 235 mov r6, r6, pull #16 236 orr r6, r6, r7, push #16 237 mov r7, r7, pull #16 238 orr r7, r7, r8, push #16 239 stmia dst!, {r4, r5, r6, r7} 240 adcs sum, sum, r4 241 adcs sum, sum, r5 242 adcs sum, sum, r6 243 adcs sum, sum, r7 244 mov r4, r8, pull #16 245 sub ip, ip, #16 246 teq ip, #0 247 bne 1b 2482: ands ip, len, #12 249 beq 4f 250 tst ip, #8 251 beq 3f 252 load2l r5, r6 253 orr r4, r4, r5, push #16 254 mov r5, r5, pull #16 255 orr r5, r5, r6, push #16 256 stmia dst!, {r4, r5} 257 adcs sum, sum, r4 258 adcs sum, sum, r5 259 mov r4, r6, pull #16 260 tst ip, #4 261 beq 4f 2623: load1l r5 263 orr r4, r4, r5, push #16 264 str r4, [dst], #4 265 adcs sum, sum, r4 266 mov r4, r5, pull #16 2674: ands len, len, #3 268 beq .Ldone 269 mov r5, r4, get_byte_0 270 tst len, #2 271 beq .Lexit 272 adcs sum, sum, r4 273 strb r5, [dst], #1 274 mov r5, r4, get_byte_1 275 strb r5, [dst], #1 276 tst len, #1 277 beq .Ldone 278 load1b r5 279 b .Lexit 280 281.Lsrc3_aligned: mov r4, r5, pull #24 282 adds sum, sum, #0 283 bics ip, len, #15 284 beq 2f 2851: load4l r5, r6, r7, r8 286 orr r4, r4, r5, push #8 287 mov r5, r5, pull #24 288 orr r5, r5, r6, push #8 289 mov r6, r6, pull #24 290 orr r6, r6, r7, push #8 291 mov r7, r7, pull #24 292 orr r7, r7, r8, push #8 293 stmia dst!, {r4, r5, r6, r7} 294 adcs sum, sum, r4 295 adcs sum, sum, r5 296 adcs sum, sum, r6 297 adcs sum, sum, r7 298 mov r4, r8, pull #24 299 sub ip, ip, #16 300 teq ip, #0 301 bne 1b 3022: ands ip, len, #12 303 beq 4f 304 tst ip, #8 305 beq 3f 306 load2l r5, r6 307 orr r4, r4, r5, push #8 308 mov r5, r5, pull #24 309 orr r5, r5, r6, push #8 310 stmia dst!, {r4, r5} 311 adcs sum, sum, r4 312 adcs sum, sum, r5 313 mov r4, r6, pull #24 314 tst ip, #4 315 beq 4f 3163: load1l r5 317 orr r4, r4, r5, push #8 318 str r4, [dst], #4 319 adcs sum, sum, r4 320 mov r4, r5, pull #24 3214: ands len, len, #3 322 beq .Ldone 323 mov r5, r4, get_byte_0 324 tst len, #2 325 beq .Lexit 326 strb r5, [dst], #1 327 adcs sum, sum, r4 328 load1l r4 329 mov r5, r4, get_byte_0 330 strb r5, [dst], #1 331 adcs sum, sum, r4, push #24 332 mov r5, r4, get_byte_1 333 b .Lexit 334