1/* 2 * This file contains assembly-language implementations 3 * of IP-style 1's complement checksum routines. 4 * 5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). 13 */ 14 15#include <linux/sys.h> 16#include <asm/processor.h> 17#include <asm/errno.h> 18#include <asm/ppc_asm.h> 19 20/* 21 * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header 22 * len is in words and is always >= 5. 23 * 24 * In practice len == 5, but this is not guaranteed. So this code does not 25 * attempt to use doubleword instructions. 26 */ 27_GLOBAL(ip_fast_csum) 28 lwz r0,0(r3) 29 lwzu r5,4(r3) 30 addic. r4,r4,-2 31 addc r0,r0,r5 32 mtctr r4 33 blelr- 341: lwzu r4,4(r3) 35 adde r0,r0,r4 36 bdnz 1b 37 addze r0,r0 /* add in final carry */ 38 rldicl r4,r0,32,0 /* fold two 32-bit halves together */ 39 add r0,r0,r4 40 srdi r0,r0,32 41 rlwinm r3,r0,16,0,31 /* fold two halves together */ 42 add r3,r0,r3 43 not r3,r3 44 srwi r3,r3,16 45 blr 46 47/* 48 * Compute checksum of TCP or UDP pseudo-header: 49 * csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum) 50 * No real gain trying to do this specially for 64 bit, but 51 * the 32 bit addition may spill into the upper bits of 52 * the doubleword so we still must fold it down from 64. 53 */ 54_GLOBAL(csum_tcpudp_magic) 55 rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ 56 addc r0,r3,r4 /* add 4 32-bit words together */ 57 adde r0,r0,r5 58 adde r0,r0,r7 59 rldicl r4,r0,32,0 /* fold 64 bit value */ 60 add r0,r4,r0 61 srdi r0,r0,32 62 rlwinm r3,r0,16,0,31 /* fold two halves together */ 63 add r3,r0,r3 64 not r3,r3 65 srwi r3,r3,16 66 blr 67 68/* 69 * Computes the checksum of a memory block at buff, length len, 70 * and adds in "sum" (32-bit). 71 * 72 * This code assumes at least halfword alignment, though the length 73 * can be any number of bytes. The sum is accumulated in r5. 74 * 75 * csum_partial(r3=buff, r4=len, r5=sum) 76 */ 77_GLOBAL(csum_partial) 78 subi r3,r3,8 /* we'll offset by 8 for the loads */ 79 srdi. r6,r4,3 /* divide by 8 for doubleword count */ 80 addic r5,r5,0 /* clear carry */ 81 beq 3f /* if we're doing < 8 bytes */ 82 andi. r0,r3,2 /* aligned on a word boundary already? */ 83 beq+ 1f 84 lhz r6,8(r3) /* do 2 bytes to get aligned */ 85 addi r3,r3,2 86 subi r4,r4,2 87 addc r5,r5,r6 88 srdi. r6,r4,3 /* recompute number of doublewords */ 89 beq 3f /* any left? */ 901: mtctr r6 912: ldu r6,8(r3) /* main sum loop */ 92 adde r5,r5,r6 93 bdnz 2b 94 andi. r4,r4,7 /* compute bytes left to sum after doublewords */ 953: cmpwi 0,r4,4 /* is at least a full word left? */ 96 blt 4f 97 lwz r6,8(r3) /* sum this word */ 98 addi r3,r3,4 99 subi r4,r4,4 100 adde r5,r5,r6 1014: cmpwi 0,r4,2 /* is at least a halfword left? */ 102 blt+ 5f 103 lhz r6,8(r3) /* sum this halfword */ 104 addi r3,r3,2 105 subi r4,r4,2 106 adde r5,r5,r6 1075: cmpwi 0,r4,1 /* is at least a byte left? */ 108 bne+ 6f 109 lbz r6,8(r3) /* sum this byte */ 110 slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ 111 adde r5,r5,r6 1126: addze r5,r5 /* add in final carry */ 113 rldicl r4,r5,32,0 /* fold two 32-bit halves together */ 114 add r3,r4,r5 115 srdi r3,r3,32 116 blr 117 118/* 119 * Computes the checksum of a memory block at src, length len, 120 * and adds in "sum" (32-bit), while copying the block to dst. 121 * If an access exception occurs on src or dst, it stores -EFAULT 122 * to *src_err or *dst_err respectively, and (for an error on 123 * src) zeroes the rest of dst. 124 * 125 * This code needs to be reworked to take advantage of 64 bit sum+copy. 126 * However, due to tokenring halfword alignment problems this will be very 127 * tricky. For now we'll leave it until we instrument it somehow. 128 * 129 * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) 130 */ 131_GLOBAL(csum_partial_copy_generic) 132 addic r0,r6,0 133 subi r3,r3,4 134 subi r4,r4,4 135 srwi. r6,r5,2 136 beq 3f /* if we're doing < 4 bytes */ 137 andi. r9,r4,2 /* Align dst to longword boundary */ 138 beq+ 1f 13981: lhz r6,4(r3) /* do 2 bytes to get aligned */ 140 addi r3,r3,2 141 subi r5,r5,2 14291: sth r6,4(r4) 143 addi r4,r4,2 144 addc r0,r0,r6 145 srwi. r6,r5,2 /* # words to do */ 146 beq 3f 1471: mtctr r6 14882: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ 14992: stwu r6,4(r4) /* be unnecessary to unroll this loop */ 150 adde r0,r0,r6 151 bdnz 82b 152 andi. r5,r5,3 1533: cmpwi 0,r5,2 154 blt+ 4f 15583: lhz r6,4(r3) 156 addi r3,r3,2 157 subi r5,r5,2 15893: sth r6,4(r4) 159 addi r4,r4,2 160 adde r0,r0,r6 1614: cmpwi 0,r5,1 162 bne+ 5f 16384: lbz r6,4(r3) 16494: stb r6,4(r4) 165 slwi r6,r6,8 /* Upper byte of word */ 166 adde r0,r0,r6 1675: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ 168 rldicl r4,r3,32,0 /* fold 64 bit value */ 169 add r3,r4,r3 170 srdi r3,r3,32 171 blr 172 173/* These shouldn't go in the fixup section, since that would 174 cause the ex_table addresses to get out of order. */ 175 176 .globl src_error_1 177src_error_1: 178 li r6,0 179 subi r5,r5,2 18095: sth r6,4(r4) 181 addi r4,r4,2 182 srwi. r6,r5,2 183 beq 3f 184 mtctr r6 185 .globl src_error_2 186src_error_2: 187 li r6,0 18896: stwu r6,4(r4) 189 bdnz 96b 1903: andi. r5,r5,3 191 beq src_error 192 .globl src_error_3 193src_error_3: 194 li r6,0 195 mtctr r5 196 addi r4,r4,3 19797: stbu r6,1(r4) 198 bdnz 97b 199 .globl src_error 200src_error: 201 cmpdi 0,r7,0 202 beq 1f 203 li r6,-EFAULT 204 stw r6,0(r7) 2051: addze r3,r0 206 blr 207 208 .globl dst_error 209dst_error: 210 cmpdi 0,r8,0 211 beq 1f 212 li r6,-EFAULT 213 stw r6,0(r8) 2141: addze r3,r0 215 blr 216 217.section __ex_table,"a" 218 .align 3 219 .llong 81b,src_error_1 220 .llong 91b,dst_error 221 .llong 82b,src_error_2 222 .llong 92b,dst_error 223 .llong 83b,src_error_3 224 .llong 93b,dst_error 225 .llong 84b,src_error_3 226 .llong 94b,dst_error 227 .llong 95b,dst_error 228 .llong 96b,dst_error 229 .llong 97b,dst_error 230