1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm/lib/csumpartialcopygeneric.S 4 * 5 * Copyright (C) 1995-2001 Russell King 6 */ 7#include <asm/assembler.h> 8 9/* 10 * unsigned int 11 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) 12 * r0 = src, r1 = dst, r2 = len, r3 = sum 13 * Returns : r0 = checksum 14 * 15 * Note that 'tst' and 'teq' preserve the carry flag. 16 */ 17 18src .req r0 19dst .req r1 20len .req r2 21sum .req r3 22 23.Lzero: mov r0, sum 24 load_regs 25 26 /* 27 * Align an unaligned destination pointer. We know that 28 * we have >= 8 bytes here, so we don't need to check 29 * the length. Note that the source pointer hasn't been 30 * aligned yet. 31 */ 32.Ldst_unaligned: 33 tst dst, #1 34 beq .Ldst_16bit 35 36 load1b ip 37 sub len, len, #1 38 adcs sum, sum, ip, put_byte_1 @ update checksum 39 strb ip, [dst], #1 40 tst dst, #2 41 reteq lr @ dst is now 32bit aligned 42 43.Ldst_16bit: load2b r8, ip 44 sub len, len, #2 45 adcs sum, sum, r8, put_byte_0 46 strb r8, [dst], #1 47 adcs sum, sum, ip, put_byte_1 48 strb ip, [dst], #1 49 ret lr @ dst is now 32bit aligned 50 51 /* 52 * Handle 0 to 7 bytes, with any alignment of source and 53 * destination pointers. Note that when we get here, C = 0 54 */ 55.Lless8: teq len, #0 @ check for zero count 56 beq .Lzero 57 58 /* we must have at least one byte. */ 59 tst dst, #1 @ dst 16-bit aligned 60 beq .Lless8_aligned 61 62 /* Align dst */ 63 load1b ip 64 sub len, len, #1 65 adcs sum, sum, ip, put_byte_1 @ update checksum 66 strb ip, [dst], #1 67 tst len, #6 68 beq .Lless8_byteonly 69 701: load2b r8, ip 71 sub len, len, #2 72 adcs sum, sum, r8, put_byte_0 73 strb r8, [dst], #1 74 adcs sum, sum, ip, put_byte_1 75 strb ip, [dst], #1 76.Lless8_aligned: 77 tst len, #6 78 bne 1b 79.Lless8_byteonly: 80 tst len, #1 81 beq .Ldone 82 load1b r8 83 adcs sum, sum, r8, put_byte_0 @ update checksum 84 strb r8, [dst], #1 85 b .Ldone 86 87FN_ENTRY 88 save_regs 89 90 cmp len, #8 @ Ensure that we have at least 91 blo .Lless8 @ 8 bytes to copy. 92 93 adds sum, sum, #0 @ C = 0 94 tst dst, #3 @ Test destination alignment 95 blne .Ldst_unaligned @ align destination, return here 96 97 /* 98 * Ok, the dst pointer is now 32bit aligned, and we know 99 * that we must have more than 4 bytes to copy. Note 100 * that C contains the carry from the dst alignment above. 101 */ 102 103 tst src, #3 @ Test source alignment 104 bne .Lsrc_not_aligned 105 106 /* Routine for src & dst aligned */ 107 108 bics ip, len, #15 109 beq 2f 110 1111: load4l r4, r5, r6, r7 112 stmia dst!, {r4, r5, r6, r7} 113 adcs sum, sum, r4 114 adcs sum, sum, r5 115 adcs sum, sum, r6 116 adcs sum, sum, r7 117 sub ip, ip, #16 118 teq ip, #0 119 bne 1b 120 1212: ands ip, len, #12 122 beq 4f 123 tst ip, #8 124 beq 3f 125 load2l r4, r5 126 stmia dst!, {r4, r5} 127 adcs sum, sum, r4 128 adcs sum, sum, r5 129 tst ip, #4 130 beq 4f 131 1323: load1l r4 133 str r4, [dst], #4 134 adcs sum, sum, r4 135 1364: ands len, len, #3 137 beq .Ldone 138 load1l r4 139 tst len, #2 140 mov r5, r4, get_byte_0 141 beq .Lexit 142 adcs sum, sum, r4, lspush #16 143 strb r5, [dst], #1 144 mov r5, r4, get_byte_1 145 strb r5, [dst], #1 146 mov r5, r4, get_byte_2 147.Lexit: tst len, #1 148 strbne r5, [dst], #1 149 andne r5, r5, #255 150 adcsne sum, sum, r5, put_byte_0 151 152 /* 153 * If the dst pointer was not 16-bit aligned, we 154 * need to rotate the checksum here to get around 155 * the inefficient byte manipulations in the 156 * architecture independent code. 157 */ 158.Ldone: adc r0, sum, #0 159 ldr sum, [sp, #0] @ dst 160 tst sum, #1 161 movne r0, r0, ror #8 162 load_regs 163 164.Lsrc_not_aligned: 165 adc sum, sum, #0 @ include C from dst alignment 166 and ip, src, #3 167 bic src, src, #3 168 load1l r5 169 cmp ip, #2 170 beq .Lsrc2_aligned 171 bhi .Lsrc3_aligned 172 mov r4, r5, lspull #8 @ C = 0 173 bics ip, len, #15 174 beq 2f 1751: load4l r5, r6, r7, r8 176 orr r4, r4, r5, lspush #24 177 mov r5, r5, lspull #8 178 orr r5, r5, r6, lspush #24 179 mov r6, r6, lspull #8 180 orr r6, r6, r7, lspush #24 181 mov r7, r7, lspull #8 182 orr r7, r7, r8, lspush #24 183 stmia dst!, {r4, r5, r6, r7} 184 adcs sum, sum, r4 185 adcs sum, sum, r5 186 adcs sum, sum, r6 187 adcs sum, sum, r7 188 mov r4, r8, lspull #8 189 sub ip, ip, #16 190 teq ip, #0 191 bne 1b 1922: ands ip, len, #12 193 beq 4f 194 tst ip, #8 195 beq 3f 196 load2l r5, r6 197 orr r4, r4, r5, lspush #24 198 mov r5, r5, lspull #8 199 orr r5, r5, r6, lspush #24 200 stmia dst!, {r4, r5} 201 adcs sum, sum, r4 202 adcs sum, sum, r5 203 mov r4, r6, lspull #8 204 tst ip, #4 205 beq 4f 2063: load1l r5 207 orr r4, r4, r5, lspush #24 208 str r4, [dst], #4 209 adcs sum, sum, r4 210 mov r4, r5, lspull #8 2114: ands len, len, #3 212 beq .Ldone 213 mov r5, r4, get_byte_0 214 tst len, #2 215 beq .Lexit 216 adcs sum, sum, r4, lspush #16 217 strb r5, [dst], #1 218 mov r5, r4, get_byte_1 219 strb r5, [dst], #1 220 mov r5, r4, get_byte_2 221 b .Lexit 222 223.Lsrc2_aligned: mov r4, r5, lspull #16 224 adds sum, sum, #0 225 bics ip, len, #15 226 beq 2f 2271: load4l r5, r6, r7, r8 228 orr r4, r4, r5, lspush #16 229 mov r5, r5, lspull #16 230 orr r5, r5, r6, lspush #16 231 mov r6, r6, lspull #16 232 orr r6, r6, r7, lspush #16 233 mov r7, r7, lspull #16 234 orr r7, r7, r8, lspush #16 235 stmia dst!, {r4, r5, r6, r7} 236 adcs sum, sum, r4 237 adcs sum, sum, r5 238 adcs sum, sum, r6 239 adcs sum, sum, r7 240 mov r4, r8, lspull #16 241 sub ip, ip, #16 242 teq ip, #0 243 bne 1b 2442: ands ip, len, #12 245 beq 4f 246 tst ip, #8 247 beq 3f 248 load2l r5, r6 249 orr r4, r4, r5, lspush #16 250 mov r5, r5, lspull #16 251 orr r5, r5, r6, lspush #16 252 stmia dst!, {r4, r5} 253 adcs sum, sum, r4 254 adcs sum, sum, r5 255 mov r4, r6, lspull #16 256 tst ip, #4 257 beq 4f 2583: load1l r5 259 orr r4, r4, r5, lspush #16 260 str r4, [dst], #4 261 adcs sum, sum, r4 262 mov r4, r5, lspull #16 2634: ands len, len, #3 264 beq .Ldone 265 mov r5, r4, get_byte_0 266 tst len, #2 267 beq .Lexit 268 adcs sum, sum, r4 269 strb r5, [dst], #1 270 mov r5, r4, get_byte_1 271 strb r5, [dst], #1 272 tst len, #1 273 beq .Ldone 274 load1b r5 275 b .Lexit 276 277.Lsrc3_aligned: mov r4, r5, lspull #24 278 adds sum, sum, #0 279 bics ip, len, #15 280 beq 2f 2811: load4l r5, r6, r7, r8 282 orr r4, r4, r5, lspush #8 283 mov r5, r5, lspull #24 284 orr r5, r5, r6, lspush #8 285 mov r6, r6, lspull #24 286 orr r6, r6, r7, lspush #8 287 mov r7, r7, lspull #24 288 orr r7, r7, r8, lspush #8 289 stmia dst!, {r4, r5, r6, r7} 290 adcs sum, sum, r4 291 adcs sum, sum, r5 292 adcs sum, sum, r6 293 adcs sum, sum, r7 294 mov r4, r8, lspull #24 295 sub ip, ip, #16 296 teq ip, #0 297 bne 1b 2982: ands ip, len, #12 299 beq 4f 300 tst ip, #8 301 beq 3f 302 load2l r5, r6 303 orr r4, r4, r5, lspush #8 304 mov r5, r5, lspull #24 305 orr r5, r5, r6, lspush #8 306 stmia dst!, {r4, r5} 307 adcs sum, sum, r4 308 adcs sum, sum, r5 309 mov r4, r6, lspull #24 310 tst ip, #4 311 beq 4f 3123: load1l r5 313 orr r4, r4, r5, lspush #8 314 str r4, [dst], #4 315 adcs sum, sum, r4 316 mov r4, r5, lspull #24 3174: ands len, len, #3 318 beq .Ldone 319 mov r5, r4, get_byte_0 320 tst len, #2 321 beq .Lexit 322 strb r5, [dst], #1 323 adcs sum, sum, r4 324 load1l r4 325 mov r5, r4, get_byte_0 326 strb r5, [dst], #1 327 adcs sum, sum, r4, lspush #24 328 mov r5, r4, get_byte_1 329 b .Lexit 330FN_EXIT 331