1/* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ 2 * 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IP/TCP/UDP checksumming routines 8 * 9 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11 * Tom May, <ftom@netcom.com> 12 * Pentium Pro/II routines: 13 * Alexander Kjeldaas <astor@guardian.no> 14 * Finn Arne Gangstad <finnag@guardian.no> 15 * Lots of code moved from tcp.c and ip.c; see those files 16 * for more names. 17 * 18 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 19 * handling. 20 * Andi Kleen, add zeroing on error 21 * converted to pure assembler 22 * 23 * SuperH version: Copyright (C) 1999 Niibe Yutaka 24 * 25 * This program is free software; you can redistribute it and/or 26 * modify it under the terms of the GNU General Public License 27 * as published by the Free Software Foundation; either version 28 * 2 of the License, or (at your option) any later version. 29 */ 30 31#include <asm/errno.h> 32#include <linux/linkage.h> 33 34/* 35 * computes a partial checksum, e.g. for TCP/UDP fragments 36 */ 37 38/* 39 * unsigned int csum_partial(const unsigned char *buf, int len, 40 * unsigned int sum); 41 */ 42 43.text 44ENTRY(csum_partial) 45 /* 46 * Experiments with Ethernet and SLIP connections show that buff 47 * is aligned on either a 2-byte or 4-byte boundary. We get at 48 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 49 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 50 * alignment for the unrolled loop. 51 */ 52 mov r5, r1 53 mov r4, r0 54 tst #2, r0 ! Check alignment. 55 bt 2f ! Jump if alignment is ok. 56 ! 57 add #-2, r5 ! Alignment uses up two bytes. 58 cmp/pz r5 ! 59 bt/s 1f ! Jump if we had at least two bytes. 60 clrt 61 bra 6f 62 add #2, r5 ! r5 was < 2. Deal with it. 631: 64 mov r5, r1 ! Save new len for later use. 65 mov.w @r4+, r0 66 extu.w r0, r0 67 addc r0, r6 68 bf 2f 69 add #1, r6 702: 71 mov #-5, r0 72 shld r0, r5 73 tst r5, r5 74 bt/s 4f ! if it's =0, go to 4f 75 clrt 76 .align 2 773: 78 mov.l @r4+, r0 79 mov.l @r4+, r2 80 mov.l @r4+, r3 81 addc r0, r6 82 mov.l @r4+, r0 83 addc r2, r6 84 mov.l @r4+, r2 85 addc r3, r6 86 mov.l @r4+, r3 87 addc r0, r6 88 mov.l @r4+, r0 89 addc r2, r6 90 mov.l @r4+, r2 91 addc r3, r6 92 addc r0, r6 93 addc r2, r6 94 movt r0 95 dt r5 96 bf/s 3b 97 cmp/eq #1, r0 98 ! here, we know r5==0 99 addc r5, r6 ! add carry to r6 1004: 101 mov r1, r0 102 and #0x1c, r0 103 tst r0, r0 104 bt/s 6f 105 mov r0, r5 106 shlr2 r5 107 mov #0, r2 1085: 109 addc r2, r6 110 mov.l @r4+, r2 111 movt r0 112 dt r5 113 bf/s 5b 114 cmp/eq #1, r0 115 addc r2, r6 116 addc r5, r6 ! r5==0 here, so it means add carry-bit 1176: 118 mov r1, r5 119 mov #3, r0 120 and r0, r5 121 tst r5, r5 122 bt 9f ! if it's =0 go to 9f 123 mov #2, r1 124 cmp/hs r1, r5 125 bf 7f 126 mov.w @r4+, r0 127 extu.w r0, r0 128 cmp/eq r1, r5 129 bt/s 8f 130 clrt 131 shll16 r0 132 addc r0, r6 1337: 134 mov.b @r4+, r0 135 extu.b r0, r0 136#ifndef __LITTLE_ENDIAN__ 137 shll8 r0 138#endif 1398: 140 addc r0, r6 141 mov #0, r0 142 addc r0, r6 1439: 144 rts 145 mov r6, r0 146 147/* 148unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, 149 int sum, int *src_err_ptr, int *dst_err_ptr) 150 */ 151 152/* 153 * Copy from ds while checksumming, otherwise like csum_partial 154 * 155 * The macros SRC and DST specify the type of access for the instruction. 156 * thus we can call a custom exception handler for all access types. 157 * 158 * FIXME: could someone double-check whether I haven't mixed up some SRC and 159 * DST definitions? It's damn hard to trigger all cases. I hope I got 160 * them all but there's no guarantee. 161 */ 162 163#define SRC(...) \ 164 9999: __VA_ARGS__ ; \ 165 .section __ex_table, "a"; \ 166 .long 9999b, 6001f ; \ 167 .previous 168 169#define DST(...) \ 170 9999: __VA_ARGS__ ; \ 171 .section __ex_table, "a"; \ 172 .long 9999b, 6002f ; \ 173 .previous 174 175! 176! r4: const char *SRC 177! r5: char *DST 178! r6: int LEN 179! r7: int SUM 180! 181! on stack: 182! int *SRC_ERR_PTR 183! int *DST_ERR_PTR 184! 185ENTRY(csum_partial_copy_generic) 186 mov.l r5,@-r15 187 mov.l r6,@-r15 188 189 mov #3,r0 ! Check src and dest are equally aligned 190 mov r4,r1 191 and r0,r1 192 and r5,r0 193 cmp/eq r1,r0 194 bf 3f ! Different alignments, use slow version 195 tst #1,r0 ! Check dest word aligned 196 bf 3f ! If not, do it the slow way 197 198 mov #2,r0 199 tst r0,r5 ! Check dest alignment. 200 bt 2f ! Jump if alignment is ok. 201 add #-2,r6 ! Alignment uses up two bytes. 202 cmp/pz r6 ! Jump if we had at least two bytes. 203 bt/s 1f 204 clrt 205 bra 4f 206 add #2,r6 ! r6 was < 2. Deal with it. 207 2083: ! Handle different src and dest alignments. 209 ! This is not common, so simple byte by byte copy will do. 210 mov r6,r2 211 shlr r6 212 tst r6,r6 213 bt 4f 214 clrt 215 .align 2 2165: 217SRC( mov.b @r4+,r1 ) 218SRC( mov.b @r4+,r0 ) 219 extu.b r1,r1 220DST( mov.b r1,@r5 ) 221DST( mov.b r0,@(1,r5) ) 222 extu.b r0,r0 223 add #2,r5 224 225#ifdef __LITTLE_ENDIAN__ 226 shll8 r0 227#else 228 shll8 r1 229#endif 230 or r1,r0 231 232 addc r0,r7 233 movt r0 234 dt r6 235 bf/s 5b 236 cmp/eq #1,r0 237 mov #0,r0 238 addc r0, r7 239 240 mov r2, r0 241 tst #1, r0 242 bt 7f 243 bra 5f 244 clrt 245 246 ! src and dest equally aligned, but to a two byte boundary. 247 ! Handle first two bytes as a special case 248 .align 2 2491: 250SRC( mov.w @r4+,r0 ) 251DST( mov.w r0,@r5 ) 252 add #2,r5 253 extu.w r0,r0 254 addc r0,r7 255 mov #0,r0 256 addc r0,r7 2572: 258 mov r6,r2 259 mov #-5,r0 260 shld r0,r6 261 tst r6,r6 262 bt/s 2f 263 clrt 264 .align 2 2651: 266SRC( mov.l @r4+,r0 ) 267SRC( mov.l @r4+,r1 ) 268 addc r0,r7 269DST( mov.l r0,@r5 ) 270DST( mov.l r1,@(4,r5) ) 271 addc r1,r7 272 273SRC( mov.l @r4+,r0 ) 274SRC( mov.l @r4+,r1 ) 275 addc r0,r7 276DST( mov.l r0,@(8,r5) ) 277DST( mov.l r1,@(12,r5) ) 278 addc r1,r7 279 280SRC( mov.l @r4+,r0 ) 281SRC( mov.l @r4+,r1 ) 282 addc r0,r7 283DST( mov.l r0,@(16,r5) ) 284DST( mov.l r1,@(20,r5) ) 285 addc r1,r7 286 287SRC( mov.l @r4+,r0 ) 288SRC( mov.l @r4+,r1 ) 289 addc r0,r7 290DST( mov.l r0,@(24,r5) ) 291DST( mov.l r1,@(28,r5) ) 292 addc r1,r7 293 add #32,r5 294 movt r0 295 dt r6 296 bf/s 1b 297 cmp/eq #1,r0 298 mov #0,r0 299 addc r0,r7 300 3012: mov r2,r6 302 mov #0x1c,r0 303 and r0,r6 304 cmp/pl r6 305 bf/s 4f 306 clrt 307 shlr2 r6 3083: 309SRC( mov.l @r4+,r0 ) 310 addc r0,r7 311DST( mov.l r0,@r5 ) 312 add #4,r5 313 movt r0 314 dt r6 315 bf/s 3b 316 cmp/eq #1,r0 317 mov #0,r0 318 addc r0,r7 3194: mov r2,r6 320 mov #3,r0 321 and r0,r6 322 cmp/pl r6 323 bf 7f 324 mov #2,r1 325 cmp/hs r1,r6 326 bf 5f 327SRC( mov.w @r4+,r0 ) 328DST( mov.w r0,@r5 ) 329 extu.w r0,r0 330 add #2,r5 331 cmp/eq r1,r6 332 bt/s 6f 333 clrt 334 shll16 r0 335 addc r0,r7 3365: 337SRC( mov.b @r4+,r0 ) 338DST( mov.b r0,@r5 ) 339 extu.b r0,r0 340#ifndef __LITTLE_ENDIAN__ 341 shll8 r0 342#endif 3436: addc r0,r7 344 mov #0,r0 345 addc r0,r7 3467: 3475000: 348 349# Exception handler: 350.section .fixup, "ax" 351 3526001: 353 mov.l @(8,r15),r0 ! src_err_ptr 354 mov #-EFAULT,r1 355 mov.l r1,@r0 356 357 ! zero the complete destination - computing the rest 358 ! is too much work 359 mov.l @(4,r15),r5 ! dst 360 mov.l @r15,r6 ! len 361 mov #0,r7 3621: mov.b r7,@r5 363 dt r6 364 bf/s 1b 365 add #1,r5 366 mov.l 8000f,r0 367 jmp @r0 368 nop 369 .align 2 3708000: .long 5000b 371 3726002: 373 mov.l @(12,r15),r0 ! dst_err_ptr 374 mov #-EFAULT,r1 375 mov.l r1,@r0 376 mov.l 8001f,r0 377 jmp @r0 378 nop 379 .align 2 3808001: .long 5000b 381 382.previous 383 add #8,r15 384 rts 385 mov r7,r0 386