1/* 2 * This file contains assembly-language implementations 3 * of IP-style 1's complement checksum routines. 4 * 5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). 13 */ 14 15#include <linux/sys.h> 16#include <asm/processor.h> 17#include <asm/cache.h> 18#include <asm/errno.h> 19#include <asm/ppc_asm.h> 20#include <asm/export.h> 21 22 .text 23 24/* 25 * computes the checksum of a memory block at buff, length len, 26 * and adds in "sum" (32-bit) 27 * 28 * __csum_partial(buff, len, sum) 29 */ 30_GLOBAL(__csum_partial) 31 subi r3,r3,4 32 srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ 33 beq 3f /* if we're doing < 4 bytes */ 34 andi. r0,r3,2 /* Align buffer to longword boundary */ 35 beq+ 1f 36 lhz r0,4(r3) /* do 2 bytes to get aligned */ 37 subi r4,r4,2 38 addi r3,r3,2 39 srwi. r6,r4,2 /* # words to do */ 40 adde r5,r5,r0 41 beq 3f 421: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ 43 beq 21f 44 mtctr r6 452: lwzu r0,4(r3) 46 adde r5,r5,r0 47 bdnz 2b 4821: srwi. r6,r4,4 /* # blocks of 4 words to do */ 49 beq 3f 50 mtctr r6 5122: lwz r0,4(r3) 52 lwz r6,8(r3) 53 lwz r7,12(r3) 54 lwzu r8,16(r3) 55 adde r5,r5,r0 56 adde r5,r5,r6 57 adde r5,r5,r7 58 adde r5,r5,r8 59 bdnz 22b 603: andi. r0,r4,2 61 beq+ 4f 62 lhz r0,4(r3) 63 addi r3,r3,2 64 adde r5,r5,r0 654: andi. r0,r4,1 66 beq+ 5f 67 lbz r0,4(r3) 68 slwi r0,r0,8 /* Upper byte of word */ 69 adde r5,r5,r0 705: addze r3,r5 /* add in final carry */ 71 blr 72EXPORT_SYMBOL(__csum_partial) 73 74/* 75 * Computes the checksum of a memory block at src, length len, 76 * and adds in "sum" (32-bit), while copying the block to dst. 77 * If an access exception occurs on src or dst, it stores -EFAULT 78 * to *src_err or *dst_err respectively, and (for an error on 79 * src) zeroes the rest of dst. 80 * 81 * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) 82 */ 83#define CSUM_COPY_16_BYTES_WITHEX(n) \ 848 ## n ## 0: \ 85 lwz r7,4(r4); \ 868 ## n ## 1: \ 87 lwz r8,8(r4); \ 888 ## n ## 2: \ 89 lwz r9,12(r4); \ 908 ## n ## 3: \ 91 lwzu r10,16(r4); \ 928 ## n ## 4: \ 93 stw r7,4(r6); \ 94 adde r12,r12,r7; \ 958 ## n ## 5: \ 96 stw r8,8(r6); \ 97 adde r12,r12,r8; \ 988 ## n ## 6: \ 99 stw r9,12(r6); \ 100 adde r12,r12,r9; \ 1018 ## n ## 7: \ 102 stwu r10,16(r6); \ 103 adde r12,r12,r10 104 105#define CSUM_COPY_16_BYTES_EXCODE(n) \ 106 EX_TABLE(8 ## n ## 0b, src_error); \ 107 EX_TABLE(8 ## n ## 1b, src_error); \ 108 EX_TABLE(8 ## n ## 2b, src_error); \ 109 EX_TABLE(8 ## n ## 3b, src_error); \ 110 EX_TABLE(8 ## n ## 4b, dst_error); \ 111 EX_TABLE(8 ## n ## 5b, dst_error); \ 112 EX_TABLE(8 ## n ## 6b, dst_error); \ 113 EX_TABLE(8 ## n ## 7b, dst_error); 114 115 .text 116 .stabs "arch/powerpc/lib/",N_SO,0,0,0f 117 .stabs "checksum_32.S",N_SO,0,0,0f 1180: 119 120CACHELINE_BYTES = L1_CACHE_BYTES 121LG_CACHELINE_BYTES = L1_CACHE_SHIFT 122CACHELINE_MASK = (L1_CACHE_BYTES-1) 123 124_GLOBAL(csum_partial_copy_generic) 125 stwu r1,-16(r1) 126 stw r7,12(r1) 127 stw r8,8(r1) 128 129 addic r12,r6,0 130 addi r6,r4,-4 131 neg r0,r4 132 addi r4,r3,-4 133 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 134 crset 4*cr7+eq 135 beq 58f 136 137 cmplw 0,r5,r0 /* is this more than total to do? */ 138 blt 63f /* if not much to do */ 139 rlwinm r7,r6,3,0x8 140 rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ 141 cmplwi cr7,r7,0 /* is destination address even ? */ 142 andi. r8,r0,3 /* get it word-aligned first */ 143 mtctr r8 144 beq+ 61f 145 li r3,0 14670: lbz r9,4(r4) /* do some bytes */ 147 addi r4,r4,1 148 slwi r3,r3,8 149 rlwimi r3,r9,0,24,31 15071: stb r9,4(r6) 151 addi r6,r6,1 152 bdnz 70b 153 adde r12,r12,r3 15461: subf r5,r0,r5 155 srwi. r0,r0,2 156 mtctr r0 157 beq 58f 15872: lwzu r9,4(r4) /* do some words */ 159 adde r12,r12,r9 16073: stwu r9,4(r6) 161 bdnz 72b 162 16358: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 164 clrlwi r5,r5,32-LG_CACHELINE_BYTES 165 li r11,4 166 beq 63f 167 168 /* Here we decide how far ahead to prefetch the source */ 169 li r3,4 170 cmpwi r0,1 171 li r7,0 172 ble 114f 173 li r7,1 174#if MAX_COPY_PREFETCH > 1 175 /* Heuristically, for large transfers we prefetch 176 MAX_COPY_PREFETCH cachelines ahead. For small transfers 177 we prefetch 1 cacheline ahead. */ 178 cmpwi r0,MAX_COPY_PREFETCH 179 ble 112f 180 li r7,MAX_COPY_PREFETCH 181112: mtctr r7 182111: dcbt r3,r4 183 addi r3,r3,CACHELINE_BYTES 184 bdnz 111b 185#else 186 dcbt r3,r4 187 addi r3,r3,CACHELINE_BYTES 188#endif /* MAX_COPY_PREFETCH > 1 */ 189 190114: subf r8,r7,r0 191 mr r0,r7 192 mtctr r8 193 19453: dcbt r3,r4 19554: dcbz r11,r6 196/* the main body of the cacheline loop */ 197 CSUM_COPY_16_BYTES_WITHEX(0) 198#if L1_CACHE_BYTES >= 32 199 CSUM_COPY_16_BYTES_WITHEX(1) 200#if L1_CACHE_BYTES >= 64 201 CSUM_COPY_16_BYTES_WITHEX(2) 202 CSUM_COPY_16_BYTES_WITHEX(3) 203#if L1_CACHE_BYTES >= 128 204 CSUM_COPY_16_BYTES_WITHEX(4) 205 CSUM_COPY_16_BYTES_WITHEX(5) 206 CSUM_COPY_16_BYTES_WITHEX(6) 207 CSUM_COPY_16_BYTES_WITHEX(7) 208#endif 209#endif 210#endif 211 bdnz 53b 212 cmpwi r0,0 213 li r3,4 214 li r7,0 215 bne 114b 216 21763: srwi. r0,r5,2 218 mtctr r0 219 beq 64f 22030: lwzu r0,4(r4) 221 adde r12,r12,r0 22231: stwu r0,4(r6) 223 bdnz 30b 224 22564: andi. r0,r5,2 226 beq+ 65f 22740: lhz r0,4(r4) 228 addi r4,r4,2 22941: sth r0,4(r6) 230 adde r12,r12,r0 231 addi r6,r6,2 23265: andi. r0,r5,1 233 beq+ 66f 23450: lbz r0,4(r4) 23551: stb r0,4(r6) 236 slwi r0,r0,8 237 adde r12,r12,r0 23866: addze r3,r12 239 addi r1,r1,16 240 beqlr+ cr7 241 rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ 242 blr 243 244/* read fault */ 245src_error: 246 lwz r7,12(r1) 247 addi r1,r1,16 248 cmpwi cr0,r7,0 249 beqlr 250 li r0,-EFAULT 251 stw r0,0(r7) 252 blr 253/* write fault */ 254dst_error: 255 lwz r8,8(r1) 256 addi r1,r1,16 257 cmpwi cr0,r8,0 258 beqlr 259 li r0,-EFAULT 260 stw r0,0(r8) 261 blr 262 263 EX_TABLE(70b, src_error); 264 EX_TABLE(71b, dst_error); 265 EX_TABLE(72b, src_error); 266 EX_TABLE(73b, dst_error); 267 EX_TABLE(54b, dst_error); 268 269/* 270 * this stuff handles faults in the cacheline loop and branches to either 271 * src_error (if in read part) or dst_error (if in write part) 272 */ 273 CSUM_COPY_16_BYTES_EXCODE(0) 274#if L1_CACHE_BYTES >= 32 275 CSUM_COPY_16_BYTES_EXCODE(1) 276#if L1_CACHE_BYTES >= 64 277 CSUM_COPY_16_BYTES_EXCODE(2) 278 CSUM_COPY_16_BYTES_EXCODE(3) 279#if L1_CACHE_BYTES >= 128 280 CSUM_COPY_16_BYTES_EXCODE(4) 281 CSUM_COPY_16_BYTES_EXCODE(5) 282 CSUM_COPY_16_BYTES_EXCODE(6) 283 CSUM_COPY_16_BYTES_EXCODE(7) 284#endif 285#endif 286#endif 287 288 EX_TABLE(30b, src_error); 289 EX_TABLE(31b, dst_error); 290 EX_TABLE(40b, src_error); 291 EX_TABLE(41b, dst_error); 292 EX_TABLE(50b, src_error); 293 EX_TABLE(51b, dst_error); 294 295EXPORT_SYMBOL(csum_partial_copy_generic) 296