1/* 2 * linux/arch/arm/lib/csumpartialcopygeneric.S 3 * 4 * Copyright (C) 1995-2001 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10#include <asm/assembler.h> 11 12/* 13 * unsigned int 14 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) 15 * r0 = src, r1 = dst, r2 = len, r3 = sum 16 * Returns : r0 = checksum 17 * 18 * Note that 'tst' and 'teq' preserve the carry flag. 19 */ 20 21src .req r0 22dst .req r1 23len .req r2 24sum .req r3 25 26.Lzero: mov r0, sum 27 load_regs 28 29 /* 30 * Align an unaligned destination pointer. We know that 31 * we have >= 8 bytes here, so we don't need to check 32 * the length. Note that the source pointer hasn't been 33 * aligned yet. 34 */ 35.Ldst_unaligned: 36 tst dst, #1 37 beq .Ldst_16bit 38 39 load1b ip 40 sub len, len, #1 41 adcs sum, sum, ip, put_byte_1 @ update checksum 42 strb ip, [dst], #1 43 tst dst, #2 44 reteq lr @ dst is now 32bit aligned 45 46.Ldst_16bit: load2b r8, ip 47 sub len, len, #2 48 adcs sum, sum, r8, put_byte_0 49 strb r8, [dst], #1 50 adcs sum, sum, ip, put_byte_1 51 strb ip, [dst], #1 52 ret lr @ dst is now 32bit aligned 53 54 /* 55 * Handle 0 to 7 bytes, with any alignment of source and 56 * destination pointers. Note that when we get here, C = 0 57 */ 58.Lless8: teq len, #0 @ check for zero count 59 beq .Lzero 60 61 /* we must have at least one byte. */ 62 tst dst, #1 @ dst 16-bit aligned 63 beq .Lless8_aligned 64 65 /* Align dst */ 66 load1b ip 67 sub len, len, #1 68 adcs sum, sum, ip, put_byte_1 @ update checksum 69 strb ip, [dst], #1 70 tst len, #6 71 beq .Lless8_byteonly 72 731: load2b r8, ip 74 sub len, len, #2 75 adcs sum, sum, r8, put_byte_0 76 strb r8, [dst], #1 77 adcs sum, sum, ip, put_byte_1 78 strb ip, [dst], #1 79.Lless8_aligned: 80 tst len, #6 81 bne 1b 82.Lless8_byteonly: 83 tst len, #1 84 beq .Ldone 85 load1b r8 86 adcs sum, sum, r8, put_byte_0 @ update checksum 87 strb r8, [dst], #1 88 b .Ldone 89 90FN_ENTRY 91 save_regs 92 93 cmp len, #8 @ Ensure that we have at least 94 blo .Lless8 @ 8 bytes to copy. 95 96 adds sum, sum, #0 @ C = 0 97 tst dst, #3 @ Test destination alignment 98 blne .Ldst_unaligned @ align destination, return here 99 100 /* 101 * Ok, the dst pointer is now 32bit aligned, and we know 102 * that we must have more than 4 bytes to copy. Note 103 * that C contains the carry from the dst alignment above. 104 */ 105 106 tst src, #3 @ Test source alignment 107 bne .Lsrc_not_aligned 108 109 /* Routine for src & dst aligned */ 110 111 bics ip, len, #15 112 beq 2f 113 1141: load4l r4, r5, r6, r7 115 stmia dst!, {r4, r5, r6, r7} 116 adcs sum, sum, r4 117 adcs sum, sum, r5 118 adcs sum, sum, r6 119 adcs sum, sum, r7 120 sub ip, ip, #16 121 teq ip, #0 122 bne 1b 123 1242: ands ip, len, #12 125 beq 4f 126 tst ip, #8 127 beq 3f 128 load2l r4, r5 129 stmia dst!, {r4, r5} 130 adcs sum, sum, r4 131 adcs sum, sum, r5 132 tst ip, #4 133 beq 4f 134 1353: load1l r4 136 str r4, [dst], #4 137 adcs sum, sum, r4 138 1394: ands len, len, #3 140 beq .Ldone 141 load1l r4 142 tst len, #2 143 mov r5, r4, get_byte_0 144 beq .Lexit 145 adcs sum, sum, r4, lspush #16 146 strb r5, [dst], #1 147 mov r5, r4, get_byte_1 148 strb r5, [dst], #1 149 mov r5, r4, get_byte_2 150.Lexit: tst len, #1 151 strneb r5, [dst], #1 152 andne r5, r5, #255 153 adcnes sum, sum, r5, put_byte_0 154 155 /* 156 * If the dst pointer was not 16-bit aligned, we 157 * need to rotate the checksum here to get around 158 * the inefficient byte manipulations in the 159 * architecture independent code. 160 */ 161.Ldone: adc r0, sum, #0 162 ldr sum, [sp, #0] @ dst 163 tst sum, #1 164 movne r0, r0, ror #8 165 load_regs 166 167.Lsrc_not_aligned: 168 adc sum, sum, #0 @ include C from dst alignment 169 and ip, src, #3 170 bic src, src, #3 171 load1l r5 172 cmp ip, #2 173 beq .Lsrc2_aligned 174 bhi .Lsrc3_aligned 175 mov r4, r5, lspull #8 @ C = 0 176 bics ip, len, #15 177 beq 2f 1781: load4l r5, r6, r7, r8 179 orr r4, r4, r5, lspush #24 180 mov r5, r5, lspull #8 181 orr r5, r5, r6, lspush #24 182 mov r6, r6, lspull #8 183 orr r6, r6, r7, lspush #24 184 mov r7, r7, lspull #8 185 orr r7, r7, r8, lspush #24 186 stmia dst!, {r4, r5, r6, r7} 187 adcs sum, sum, r4 188 adcs sum, sum, r5 189 adcs sum, sum, r6 190 adcs sum, sum, r7 191 mov r4, r8, lspull #8 192 sub ip, ip, #16 193 teq ip, #0 194 bne 1b 1952: ands ip, len, #12 196 beq 4f 197 tst ip, #8 198 beq 3f 199 load2l r5, r6 200 orr r4, r4, r5, lspush #24 201 mov r5, r5, lspull #8 202 orr r5, r5, r6, lspush #24 203 stmia dst!, {r4, r5} 204 adcs sum, sum, r4 205 adcs sum, sum, r5 206 mov r4, r6, lspull #8 207 tst ip, #4 208 beq 4f 2093: load1l r5 210 orr r4, r4, r5, lspush #24 211 str r4, [dst], #4 212 adcs sum, sum, r4 213 mov r4, r5, lspull #8 2144: ands len, len, #3 215 beq .Ldone 216 mov r5, r4, get_byte_0 217 tst len, #2 218 beq .Lexit 219 adcs sum, sum, r4, lspush #16 220 strb r5, [dst], #1 221 mov r5, r4, get_byte_1 222 strb r5, [dst], #1 223 mov r5, r4, get_byte_2 224 b .Lexit 225 226.Lsrc2_aligned: mov r4, r5, lspull #16 227 adds sum, sum, #0 228 bics ip, len, #15 229 beq 2f 2301: load4l r5, r6, r7, r8 231 orr r4, r4, r5, lspush #16 232 mov r5, r5, lspull #16 233 orr r5, r5, r6, lspush #16 234 mov r6, r6, lspull #16 235 orr r6, r6, r7, lspush #16 236 mov r7, r7, lspull #16 237 orr r7, r7, r8, lspush #16 238 stmia dst!, {r4, r5, r6, r7} 239 adcs sum, sum, r4 240 adcs sum, sum, r5 241 adcs sum, sum, r6 242 adcs sum, sum, r7 243 mov r4, r8, lspull #16 244 sub ip, ip, #16 245 teq ip, #0 246 bne 1b 2472: ands ip, len, #12 248 beq 4f 249 tst ip, #8 250 beq 3f 251 load2l r5, r6 252 orr r4, r4, r5, lspush #16 253 mov r5, r5, lspull #16 254 orr r5, r5, r6, lspush #16 255 stmia dst!, {r4, r5} 256 adcs sum, sum, r4 257 adcs sum, sum, r5 258 mov r4, r6, lspull #16 259 tst ip, #4 260 beq 4f 2613: load1l r5 262 orr r4, r4, r5, lspush #16 263 str r4, [dst], #4 264 adcs sum, sum, r4 265 mov r4, r5, lspull #16 2664: ands len, len, #3 267 beq .Ldone 268 mov r5, r4, get_byte_0 269 tst len, #2 270 beq .Lexit 271 adcs sum, sum, r4 272 strb r5, [dst], #1 273 mov r5, r4, get_byte_1 274 strb r5, [dst], #1 275 tst len, #1 276 beq .Ldone 277 load1b r5 278 b .Lexit 279 280.Lsrc3_aligned: mov r4, r5, lspull #24 281 adds sum, sum, #0 282 bics ip, len, #15 283 beq 2f 2841: load4l r5, r6, r7, r8 285 orr r4, r4, r5, lspush #8 286 mov r5, r5, lspull #24 287 orr r5, r5, r6, lspush #8 288 mov r6, r6, lspull #24 289 orr r6, r6, r7, lspush #8 290 mov r7, r7, lspull #24 291 orr r7, r7, r8, lspush #8 292 stmia dst!, {r4, r5, r6, r7} 293 adcs sum, sum, r4 294 adcs sum, sum, r5 295 adcs sum, sum, r6 296 adcs sum, sum, r7 297 mov r4, r8, lspull #24 298 sub ip, ip, #16 299 teq ip, #0 300 bne 1b 3012: ands ip, len, #12 302 beq 4f 303 tst ip, #8 304 beq 3f 305 load2l r5, r6 306 orr r4, r4, r5, lspush #8 307 mov r5, r5, lspull #24 308 orr r5, r5, r6, lspush #8 309 stmia dst!, {r4, r5} 310 adcs sum, sum, r4 311 adcs sum, sum, r5 312 mov r4, r6, lspull #24 313 tst ip, #4 314 beq 4f 3153: load1l r5 316 orr r4, r4, r5, lspush #8 317 str r4, [dst], #4 318 adcs sum, sum, r4 319 mov r4, r5, lspull #24 3204: ands len, len, #3 321 beq .Ldone 322 mov r5, r4, get_byte_0 323 tst len, #2 324 beq .Lexit 325 strb r5, [dst], #1 326 adcs sum, sum, r4 327 load1l r4 328 mov r5, r4, get_byte_0 329 strb r5, [dst], #1 330 adcs sum, sum, r4, lspush #24 331 mov r5, r4, get_byte_1 332 b .Lexit 333FN_EXIT 334