1/* 2 * This file contains assembly-language implementations 3 * of IP-style 1's complement checksum routines. 4 * 5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). 13 */ 14 15#include <linux/sys.h> 16#include <asm/processor.h> 17#include <asm/cache.h> 18#include <asm/errno.h> 19#include <asm/ppc_asm.h> 20#include <asm/export.h> 21 22 .text 23 24/* 25 * computes the checksum of a memory block at buff, length len, 26 * and adds in "sum" (32-bit) 27 * 28 * __csum_partial(buff, len, sum) 29 */ 30_GLOBAL(__csum_partial) 31 subi r3,r3,4 32 srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ 33 beq 3f /* if we're doing < 4 bytes */ 34 andi. r0,r3,2 /* Align buffer to longword boundary */ 35 beq+ 1f 36 lhz r0,4(r3) /* do 2 bytes to get aligned */ 37 subi r4,r4,2 38 addi r3,r3,2 39 srwi. r6,r4,2 /* # words to do */ 40 adde r5,r5,r0 41 beq 3f 421: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ 43 beq 21f 44 mtctr r6 452: lwzu r0,4(r3) 46 adde r5,r5,r0 47 bdnz 2b 4821: srwi. r6,r4,4 /* # blocks of 4 words to do */ 49 beq 3f 50 mtctr r6 5122: lwz r0,4(r3) 52 lwz r6,8(r3) 53 lwz r7,12(r3) 54 lwzu r8,16(r3) 55 adde r5,r5,r0 56 adde r5,r5,r6 57 adde r5,r5,r7 58 adde r5,r5,r8 59 bdnz 22b 603: andi. r0,r4,2 61 beq+ 4f 62 lhz r0,4(r3) 63 addi r3,r3,2 64 adde r5,r5,r0 654: andi. r0,r4,1 66 beq+ 5f 67 lbz r0,4(r3) 68 slwi r0,r0,8 /* Upper byte of word */ 69 adde r5,r5,r0 705: addze r3,r5 /* add in final carry */ 71 blr 72EXPORT_SYMBOL(__csum_partial) 73 74/* 75 * Computes the checksum of a memory block at src, length len, 76 * and adds in "sum" (32-bit), while copying the block to dst. 77 * If an access exception occurs on src or dst, it stores -EFAULT 78 * to *src_err or *dst_err respectively, and (for an error on 79 * src) zeroes the rest of dst. 80 * 81 * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) 82 */ 83#define CSUM_COPY_16_BYTES_WITHEX(n) \ 848 ## n ## 0: \ 85 lwz r7,4(r4); \ 868 ## n ## 1: \ 87 lwz r8,8(r4); \ 888 ## n ## 2: \ 89 lwz r9,12(r4); \ 908 ## n ## 3: \ 91 lwzu r10,16(r4); \ 928 ## n ## 4: \ 93 stw r7,4(r6); \ 94 adde r12,r12,r7; \ 958 ## n ## 5: \ 96 stw r8,8(r6); \ 97 adde r12,r12,r8; \ 988 ## n ## 6: \ 99 stw r9,12(r6); \ 100 adde r12,r12,r9; \ 1018 ## n ## 7: \ 102 stwu r10,16(r6); \ 103 adde r12,r12,r10 104 105#define CSUM_COPY_16_BYTES_EXCODE(n) \ 106.section __ex_table,"a"; \ 107 .align 2; \ 108 .long 8 ## n ## 0b,src_error; \ 109 .long 8 ## n ## 1b,src_error; \ 110 .long 8 ## n ## 2b,src_error; \ 111 .long 8 ## n ## 3b,src_error; \ 112 .long 8 ## n ## 4b,dst_error; \ 113 .long 8 ## n ## 5b,dst_error; \ 114 .long 8 ## n ## 6b,dst_error; \ 115 .long 8 ## n ## 7b,dst_error; \ 116 .text 117 118 .text 119 .stabs "arch/powerpc/lib/",N_SO,0,0,0f 120 .stabs "checksum_32.S",N_SO,0,0,0f 1210: 122 123CACHELINE_BYTES = L1_CACHE_BYTES 124LG_CACHELINE_BYTES = L1_CACHE_SHIFT 125CACHELINE_MASK = (L1_CACHE_BYTES-1) 126 127_GLOBAL(csum_partial_copy_generic) 128 stwu r1,-16(r1) 129 stw r7,12(r1) 130 stw r8,8(r1) 131 132 addic r12,r6,0 133 addi r6,r4,-4 134 neg r0,r4 135 addi r4,r3,-4 136 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 137 crset 4*cr7+eq 138 beq 58f 139 140 cmplw 0,r5,r0 /* is this more than total to do? */ 141 blt 63f /* if not much to do */ 142 rlwinm r7,r6,3,0x8 143 rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ 144 cmplwi cr7,r7,0 /* is destination address even ? */ 145 andi. r8,r0,3 /* get it word-aligned first */ 146 mtctr r8 147 beq+ 61f 148 li r3,0 14970: lbz r9,4(r4) /* do some bytes */ 150 addi r4,r4,1 151 slwi r3,r3,8 152 rlwimi r3,r9,0,24,31 15371: stb r9,4(r6) 154 addi r6,r6,1 155 bdnz 70b 156 adde r12,r12,r3 15761: subf r5,r0,r5 158 srwi. r0,r0,2 159 mtctr r0 160 beq 58f 16172: lwzu r9,4(r4) /* do some words */ 162 adde r12,r12,r9 16373: stwu r9,4(r6) 164 bdnz 72b 165 16658: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 167 clrlwi r5,r5,32-LG_CACHELINE_BYTES 168 li r11,4 169 beq 63f 170 171 /* Here we decide how far ahead to prefetch the source */ 172 li r3,4 173 cmpwi r0,1 174 li r7,0 175 ble 114f 176 li r7,1 177#if MAX_COPY_PREFETCH > 1 178 /* Heuristically, for large transfers we prefetch 179 MAX_COPY_PREFETCH cachelines ahead. For small transfers 180 we prefetch 1 cacheline ahead. */ 181 cmpwi r0,MAX_COPY_PREFETCH 182 ble 112f 183 li r7,MAX_COPY_PREFETCH 184112: mtctr r7 185111: dcbt r3,r4 186 addi r3,r3,CACHELINE_BYTES 187 bdnz 111b 188#else 189 dcbt r3,r4 190 addi r3,r3,CACHELINE_BYTES 191#endif /* MAX_COPY_PREFETCH > 1 */ 192 193114: subf r8,r7,r0 194 mr r0,r7 195 mtctr r8 196 19753: dcbt r3,r4 19854: dcbz r11,r6 199/* the main body of the cacheline loop */ 200 CSUM_COPY_16_BYTES_WITHEX(0) 201#if L1_CACHE_BYTES >= 32 202 CSUM_COPY_16_BYTES_WITHEX(1) 203#if L1_CACHE_BYTES >= 64 204 CSUM_COPY_16_BYTES_WITHEX(2) 205 CSUM_COPY_16_BYTES_WITHEX(3) 206#if L1_CACHE_BYTES >= 128 207 CSUM_COPY_16_BYTES_WITHEX(4) 208 CSUM_COPY_16_BYTES_WITHEX(5) 209 CSUM_COPY_16_BYTES_WITHEX(6) 210 CSUM_COPY_16_BYTES_WITHEX(7) 211#endif 212#endif 213#endif 214 bdnz 53b 215 cmpwi r0,0 216 li r3,4 217 li r7,0 218 bne 114b 219 22063: srwi. r0,r5,2 221 mtctr r0 222 beq 64f 22330: lwzu r0,4(r4) 224 adde r12,r12,r0 22531: stwu r0,4(r6) 226 bdnz 30b 227 22864: andi. r0,r5,2 229 beq+ 65f 23040: lhz r0,4(r4) 231 addi r4,r4,2 23241: sth r0,4(r6) 233 adde r12,r12,r0 234 addi r6,r6,2 23565: andi. r0,r5,1 236 beq+ 66f 23750: lbz r0,4(r4) 23851: stb r0,4(r6) 239 slwi r0,r0,8 240 adde r12,r12,r0 24166: addze r3,r12 242 addi r1,r1,16 243 beqlr+ cr7 244 rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ 245 blr 246 247/* read fault */ 248src_error: 249 lwz r7,12(r1) 250 addi r1,r1,16 251 cmpwi cr0,r7,0 252 beqlr 253 li r0,-EFAULT 254 stw r0,0(r7) 255 blr 256/* write fault */ 257dst_error: 258 lwz r8,8(r1) 259 addi r1,r1,16 260 cmpwi cr0,r8,0 261 beqlr 262 li r0,-EFAULT 263 stw r0,0(r8) 264 blr 265 266 .section __ex_table,"a" 267 .align 2 268 .long 70b,src_error 269 .long 71b,dst_error 270 .long 72b,src_error 271 .long 73b,dst_error 272 .long 54b,dst_error 273 .text 274 275/* 276 * this stuff handles faults in the cacheline loop and branches to either 277 * src_error (if in read part) or dst_error (if in write part) 278 */ 279 CSUM_COPY_16_BYTES_EXCODE(0) 280#if L1_CACHE_BYTES >= 32 281 CSUM_COPY_16_BYTES_EXCODE(1) 282#if L1_CACHE_BYTES >= 64 283 CSUM_COPY_16_BYTES_EXCODE(2) 284 CSUM_COPY_16_BYTES_EXCODE(3) 285#if L1_CACHE_BYTES >= 128 286 CSUM_COPY_16_BYTES_EXCODE(4) 287 CSUM_COPY_16_BYTES_EXCODE(5) 288 CSUM_COPY_16_BYTES_EXCODE(6) 289 CSUM_COPY_16_BYTES_EXCODE(7) 290#endif 291#endif 292#endif 293 294 .section __ex_table,"a" 295 .align 2 296 .long 30b,src_error 297 .long 31b,dst_error 298 .long 40b,src_error 299 .long 41b,dst_error 300 .long 50b,src_error 301 .long 51b,dst_error 302EXPORT_SYMBOL(csum_partial_copy_generic) 303