1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * This file contains assembly-language implementations 4 * of IP-style 1's complement checksum routines. 5 * 6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 7 * 8 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). 9 */ 10 11#include <linux/sys.h> 12#include <asm/processor.h> 13#include <asm/cache.h> 14#include <asm/errno.h> 15#include <asm/ppc_asm.h> 16#include <asm/export.h> 17 18 .text 19 20/* 21 * computes the checksum of a memory block at buff, length len, 22 * and adds in "sum" (32-bit) 23 * 24 * __csum_partial(buff, len, sum) 25 */ 26_GLOBAL(__csum_partial) 27 subi r3,r3,4 28 srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ 29 beq 3f /* if we're doing < 4 bytes */ 30 andi. r0,r3,2 /* Align buffer to longword boundary */ 31 beq+ 1f 32 lhz r0,4(r3) /* do 2 bytes to get aligned */ 33 subi r4,r4,2 34 addi r3,r3,2 35 srwi. r6,r4,2 /* # words to do */ 36 adde r5,r5,r0 37 beq 3f 381: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ 39 beq 21f 40 mtctr r6 412: lwzu r0,4(r3) 42 adde r5,r5,r0 43 bdnz 2b 4421: srwi. r6,r4,4 /* # blocks of 4 words to do */ 45 beq 3f 46 lwz r0,4(r3) 47 mtctr r6 48 lwz r6,8(r3) 49 adde r5,r5,r0 50 lwz r7,12(r3) 51 adde r5,r5,r6 52 lwzu r8,16(r3) 53 adde r5,r5,r7 54 bdz 23f 5522: lwz r0,4(r3) 56 adde r5,r5,r8 57 lwz r6,8(r3) 58 adde r5,r5,r0 59 lwz r7,12(r3) 60 adde r5,r5,r6 61 lwzu r8,16(r3) 62 adde r5,r5,r7 63 bdnz 22b 6423: adde r5,r5,r8 653: andi. r0,r4,2 66 beq+ 4f 67 lhz r0,4(r3) 68 addi r3,r3,2 69 adde r5,r5,r0 704: andi. r0,r4,1 71 beq+ 5f 72 lbz r0,4(r3) 73 slwi r0,r0,8 /* Upper byte of word */ 74 adde r5,r5,r0 755: addze r3,r5 /* add in final carry */ 76 blr 77EXPORT_SYMBOL(__csum_partial) 78 79/* 80 * Computes the checksum of a memory block at src, length len, 81 * and adds in "sum" (32-bit), while copying the block to dst. 82 * If an access exception occurs on src or dst, it stores -EFAULT 83 * to *src_err or *dst_err respectively, and (for an error on 84 * src) zeroes the rest of dst. 85 * 86 * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) 87 */ 88#define CSUM_COPY_16_BYTES_WITHEX(n) \ 898 ## n ## 0: \ 90 lwz r7,4(r4); \ 918 ## n ## 1: \ 92 lwz r8,8(r4); \ 938 ## n ## 2: \ 94 lwz r9,12(r4); \ 958 ## n ## 3: \ 96 lwzu r10,16(r4); \ 978 ## n ## 4: \ 98 stw r7,4(r6); \ 99 adde r12,r12,r7; \ 1008 ## n ## 5: \ 101 stw r8,8(r6); \ 102 adde r12,r12,r8; \ 1038 ## n ## 6: \ 104 stw r9,12(r6); \ 105 adde r12,r12,r9; \ 1068 ## n ## 7: \ 107 stwu r10,16(r6); \ 108 adde r12,r12,r10 109 110#define CSUM_COPY_16_BYTES_EXCODE(n) \ 111 EX_TABLE(8 ## n ## 0b, src_error); \ 112 EX_TABLE(8 ## n ## 1b, src_error); \ 113 EX_TABLE(8 ## n ## 2b, src_error); \ 114 EX_TABLE(8 ## n ## 3b, src_error); \ 115 EX_TABLE(8 ## n ## 4b, dst_error); \ 116 EX_TABLE(8 ## n ## 5b, dst_error); \ 117 EX_TABLE(8 ## n ## 6b, dst_error); \ 118 EX_TABLE(8 ## n ## 7b, dst_error); 119 120 .text 121 .stabs "arch/powerpc/lib/",N_SO,0,0,0f 122 .stabs "checksum_32.S",N_SO,0,0,0f 1230: 124 125CACHELINE_BYTES = L1_CACHE_BYTES 126LG_CACHELINE_BYTES = L1_CACHE_SHIFT 127CACHELINE_MASK = (L1_CACHE_BYTES-1) 128 129_GLOBAL(csum_partial_copy_generic) 130 stwu r1,-16(r1) 131 stw r7,12(r1) 132 stw r8,8(r1) 133 134 addic r12,r6,0 135 addi r6,r4,-4 136 neg r0,r4 137 addi r4,r3,-4 138 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 139 crset 4*cr7+eq 140 beq 58f 141 142 cmplw 0,r5,r0 /* is this more than total to do? */ 143 blt 63f /* if not much to do */ 144 rlwinm r7,r6,3,0x8 145 rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ 146 cmplwi cr7,r7,0 /* is destination address even ? */ 147 andi. r8,r0,3 /* get it word-aligned first */ 148 mtctr r8 149 beq+ 61f 150 li r3,0 15170: lbz r9,4(r4) /* do some bytes */ 152 addi r4,r4,1 153 slwi r3,r3,8 154 rlwimi r3,r9,0,24,31 15571: stb r9,4(r6) 156 addi r6,r6,1 157 bdnz 70b 158 adde r12,r12,r3 15961: subf r5,r0,r5 160 srwi. r0,r0,2 161 mtctr r0 162 beq 58f 16372: lwzu r9,4(r4) /* do some words */ 164 adde r12,r12,r9 16573: stwu r9,4(r6) 166 bdnz 72b 167 16858: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 169 clrlwi r5,r5,32-LG_CACHELINE_BYTES 170 li r11,4 171 beq 63f 172 173 /* Here we decide how far ahead to prefetch the source */ 174 li r3,4 175 cmpwi r0,1 176 li r7,0 177 ble 114f 178 li r7,1 179#if MAX_COPY_PREFETCH > 1 180 /* Heuristically, for large transfers we prefetch 181 MAX_COPY_PREFETCH cachelines ahead. For small transfers 182 we prefetch 1 cacheline ahead. */ 183 cmpwi r0,MAX_COPY_PREFETCH 184 ble 112f 185 li r7,MAX_COPY_PREFETCH 186112: mtctr r7 187111: dcbt r3,r4 188 addi r3,r3,CACHELINE_BYTES 189 bdnz 111b 190#else 191 dcbt r3,r4 192 addi r3,r3,CACHELINE_BYTES 193#endif /* MAX_COPY_PREFETCH > 1 */ 194 195114: subf r8,r7,r0 196 mr r0,r7 197 mtctr r8 198 19953: dcbt r3,r4 20054: dcbz r11,r6 201/* the main body of the cacheline loop */ 202 CSUM_COPY_16_BYTES_WITHEX(0) 203#if L1_CACHE_BYTES >= 32 204 CSUM_COPY_16_BYTES_WITHEX(1) 205#if L1_CACHE_BYTES >= 64 206 CSUM_COPY_16_BYTES_WITHEX(2) 207 CSUM_COPY_16_BYTES_WITHEX(3) 208#if L1_CACHE_BYTES >= 128 209 CSUM_COPY_16_BYTES_WITHEX(4) 210 CSUM_COPY_16_BYTES_WITHEX(5) 211 CSUM_COPY_16_BYTES_WITHEX(6) 212 CSUM_COPY_16_BYTES_WITHEX(7) 213#endif 214#endif 215#endif 216 bdnz 53b 217 cmpwi r0,0 218 li r3,4 219 li r7,0 220 bne 114b 221 22263: srwi. r0,r5,2 223 mtctr r0 224 beq 64f 22530: lwzu r0,4(r4) 226 adde r12,r12,r0 22731: stwu r0,4(r6) 228 bdnz 30b 229 23064: andi. r0,r5,2 231 beq+ 65f 23240: lhz r0,4(r4) 233 addi r4,r4,2 23441: sth r0,4(r6) 235 adde r12,r12,r0 236 addi r6,r6,2 23765: andi. r0,r5,1 238 beq+ 66f 23950: lbz r0,4(r4) 24051: stb r0,4(r6) 241 slwi r0,r0,8 242 adde r12,r12,r0 24366: addze r3,r12 244 addi r1,r1,16 245 beqlr+ cr7 246 rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ 247 blr 248 249/* read fault */ 250src_error: 251 lwz r7,12(r1) 252 addi r1,r1,16 253 cmpwi cr0,r7,0 254 beqlr 255 li r0,-EFAULT 256 stw r0,0(r7) 257 blr 258/* write fault */ 259dst_error: 260 lwz r8,8(r1) 261 addi r1,r1,16 262 cmpwi cr0,r8,0 263 beqlr 264 li r0,-EFAULT 265 stw r0,0(r8) 266 blr 267 268 EX_TABLE(70b, src_error); 269 EX_TABLE(71b, dst_error); 270 EX_TABLE(72b, src_error); 271 EX_TABLE(73b, dst_error); 272 EX_TABLE(54b, dst_error); 273 274/* 275 * this stuff handles faults in the cacheline loop and branches to either 276 * src_error (if in read part) or dst_error (if in write part) 277 */ 278 CSUM_COPY_16_BYTES_EXCODE(0) 279#if L1_CACHE_BYTES >= 32 280 CSUM_COPY_16_BYTES_EXCODE(1) 281#if L1_CACHE_BYTES >= 64 282 CSUM_COPY_16_BYTES_EXCODE(2) 283 CSUM_COPY_16_BYTES_EXCODE(3) 284#if L1_CACHE_BYTES >= 128 285 CSUM_COPY_16_BYTES_EXCODE(4) 286 CSUM_COPY_16_BYTES_EXCODE(5) 287 CSUM_COPY_16_BYTES_EXCODE(6) 288 CSUM_COPY_16_BYTES_EXCODE(7) 289#endif 290#endif 291#endif 292 293 EX_TABLE(30b, src_error); 294 EX_TABLE(31b, dst_error); 295 EX_TABLE(40b, src_error); 296 EX_TABLE(41b, dst_error); 297 EX_TABLE(50b, src_error); 298 EX_TABLE(51b, dst_error); 299 300EXPORT_SYMBOL(csum_partial_copy_generic) 301 302/* 303 * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, 304 * const struct in6_addr *daddr, 305 * __u32 len, __u8 proto, __wsum sum) 306 */ 307 308_GLOBAL(csum_ipv6_magic) 309 lwz r8, 0(r3) 310 lwz r9, 4(r3) 311 addc r0, r7, r8 312 lwz r10, 8(r3) 313 adde r0, r0, r9 314 lwz r11, 12(r3) 315 adde r0, r0, r10 316 lwz r8, 0(r4) 317 adde r0, r0, r11 318 lwz r9, 4(r4) 319 adde r0, r0, r8 320 lwz r10, 8(r4) 321 adde r0, r0, r9 322 lwz r11, 12(r4) 323 adde r0, r0, r10 324 add r5, r5, r6 /* assumption: len + proto doesn't carry */ 325 adde r0, r0, r11 326 adde r0, r0, r5 327 addze r0, r0 328 rotlwi r3, r0, 16 329 add r3, r0, r3 330 not r3, r3 331 rlwinm r3, r3, 16, 16, 31 332 blr 333EXPORT_SYMBOL(csum_ipv6_magic) 334