1/* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ 2 * 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IP/TCP/UDP checksumming routines 8 * 9 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11 * Tom May, <ftom@netcom.com> 12 * Pentium Pro/II routines: 13 * Alexander Kjeldaas <astor@guardian.no> 14 * Finn Arne Gangstad <finnag@guardian.no> 15 * Lots of code moved from tcp.c and ip.c; see those files 16 * for more names. 17 * 18 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 19 * handling. 20 * Andi Kleen, add zeroing on error 21 * converted to pure assembler 22 * 23 * SuperH version: Copyright (C) 1999 Niibe Yutaka 24 * 25 * This program is free software; you can redistribute it and/or 26 * modify it under the terms of the GNU General Public License 27 * as published by the Free Software Foundation; either version 28 * 2 of the License, or (at your option) any later version. 29 */ 30 31#include <asm/errno.h> 32#include <linux/linkage.h> 33 34/* 35 * computes a partial checksum, e.g. for TCP/UDP fragments 36 */ 37 38/* 39 * unsigned int csum_partial(const unsigned char *buf, int len, 40 * unsigned int sum); 41 */ 42 43.text 44ENTRY(csum_partial) 45 /* 46 * Experiments with Ethernet and SLIP connections show that buff 47 * is aligned on either a 2-byte or 4-byte boundary. We get at 48 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 49 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 50 * alignment for the unrolled loop. 51 */ 52 mov r5, r1 53 mov r4, r0 54 tst #2, r0 ! Check alignment. 55 bt 2f ! Jump if alignment is ok. 56 ! 57 add #-2, r5 ! Alignment uses up two bytes. 58 cmp/pz r5 ! 59 bt/s 1f ! Jump if we had at least two bytes. 60 clrt 61 bra 6f 62 add #2, r5 ! r5 was < 2. Deal with it. 631: 64 mov r5, r1 ! Save new len for later use. 65 mov.w @r4+, r0 66 extu.w r0, r0 67 addc r0, r6 68 bf 2f 69 add #1, r6 702: 71 mov #-5, r0 72 shld r0, r5 73 tst r5, r5 74 bt/s 4f ! if it's =0, go to 4f 75 clrt 76 .align 2 773: 78 mov.l @r4+, r0 79 mov.l @r4+, r2 80 mov.l @r4+, r3 81 addc r0, r6 82 mov.l @r4+, r0 83 addc r2, r6 84 mov.l @r4+, r2 85 addc r3, r6 86 mov.l @r4+, r3 87 addc r0, r6 88 mov.l @r4+, r0 89 addc r2, r6 90 mov.l @r4+, r2 91 addc r3, r6 92 addc r0, r6 93 addc r2, r6 94 movt r0 95 dt r5 96 bf/s 3b 97 cmp/eq #1, r0 98 ! here, we know r5==0 99 addc r5, r6 ! add carry to r6 1004: 101 mov r1, r0 102 and #0x1c, r0 103 tst r0, r0 104 bt/s 6f 105 mov r0, r5 106 shlr2 r5 107 mov #0, r2 1085: 109 addc r2, r6 110 mov.l @r4+, r2 111 movt r0 112 dt r5 113 bf/s 5b 114 cmp/eq #1, r0 115 addc r2, r6 116 addc r5, r6 ! r5==0 here, so it means add carry-bit 1176: 118 mov r1, r5 119 mov #3, r0 120 and r0, r5 121 tst r5, r5 122 bt 9f ! if it's =0 go to 9f 123 mov #2, r1 124 cmp/hs r1, r5 125 bf 7f 126 mov.w @r4+, r0 127 extu.w r0, r0 128 cmp/eq r1, r5 129 bt/s 8f 130 clrt 131 shll16 r0 132 addc r0, r6 1337: 134 mov.b @r4+, r0 135 extu.b r0, r0 136#ifndef __LITTLE_ENDIAN__ 137 shll8 r0 138#endif 1398: 140 addc r0, r6 141 mov #0, r0 142 addc r0, r6 1439: 144 rts 145 mov r6, r0 146 147/* 148unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, 149 int sum, int *src_err_ptr, int *dst_err_ptr) 150 */ 151 152/* 153 * Copy from ds while checksumming, otherwise like csum_partial 154 * 155 * The macros SRC and DST specify the type of access for the instruction. 156 * thus we can call a custom exception handler for all access types. 157 * 158 * FIXME: could someone double-check whether I haven't mixed up some SRC and 159 * DST definitions? It's damn hard to trigger all cases. I hope I got 160 * them all but there's no guarantee. 161 */ 162 163#define SRC(...) \ 164 9999: __VA_ARGS__ ; \ 165 .section __ex_table, "a"; \ 166 .long 9999b, 6001f ; \ 167 .previous 168 169#define DST(...) \ 170 9999: __VA_ARGS__ ; \ 171 .section __ex_table, "a"; \ 172 .long 9999b, 6002f ; \ 173 .previous 174 175! 176! r4: const char *SRC 177! r5: char *DST 178! r6: int LEN 179! r7: int SUM 180! 181! on stack: 182! int *SRC_ERR_PTR 183! int *DST_ERR_PTR 184! 185ENTRY(csum_partial_copy_generic) 186 mov.l r5,@-r15 187 mov.l r6,@-r15 188 189 mov #3,r0 ! Check src and dest are equally aligned 190 mov r4,r1 191 and r0,r1 192 and r5,r0 193 cmp/eq r1,r0 194 bf 3f ! Different alignments, use slow version 195 tst #1,r0 ! Check dest word aligned 196 bf 3f ! If not, do it the slow way 197 198 mov #2,r0 199 tst r0,r5 ! Check dest alignment. 200 bt 2f ! Jump if alignment is ok. 201 add #-2,r6 ! Alignment uses up two bytes. 202 cmp/pz r6 ! Jump if we had at least two bytes. 203 bt/s 1f 204 clrt 205 add #2,r6 ! r6 was < 2. Deal with it. 206 bra 4f 207 mov r6,r2 208 2093: ! Handle different src and dest alignments. 210 ! This is not common, so simple byte by byte copy will do. 211 mov r6,r2 212 shlr r6 213 tst r6,r6 214 bt 4f 215 clrt 216 .align 2 2175: 218SRC( mov.b @r4+,r1 ) 219SRC( mov.b @r4+,r0 ) 220 extu.b r1,r1 221DST( mov.b r1,@r5 ) 222DST( mov.b r0,@(1,r5) ) 223 extu.b r0,r0 224 add #2,r5 225 226#ifdef __LITTLE_ENDIAN__ 227 shll8 r0 228#else 229 shll8 r1 230#endif 231 or r1,r0 232 233 addc r0,r7 234 movt r0 235 dt r6 236 bf/s 5b 237 cmp/eq #1,r0 238 mov #0,r0 239 addc r0, r7 240 241 mov r2, r0 242 tst #1, r0 243 bt 7f 244 bra 5f 245 clrt 246 247 ! src and dest equally aligned, but to a two byte boundary. 248 ! Handle first two bytes as a special case 249 .align 2 2501: 251SRC( mov.w @r4+,r0 ) 252DST( mov.w r0,@r5 ) 253 add #2,r5 254 extu.w r0,r0 255 addc r0,r7 256 mov #0,r0 257 addc r0,r7 2582: 259 mov r6,r2 260 mov #-5,r0 261 shld r0,r6 262 tst r6,r6 263 bt/s 2f 264 clrt 265 .align 2 2661: 267SRC( mov.l @r4+,r0 ) 268SRC( mov.l @r4+,r1 ) 269 addc r0,r7 270DST( mov.l r0,@r5 ) 271DST( mov.l r1,@(4,r5) ) 272 addc r1,r7 273 274SRC( mov.l @r4+,r0 ) 275SRC( mov.l @r4+,r1 ) 276 addc r0,r7 277DST( mov.l r0,@(8,r5) ) 278DST( mov.l r1,@(12,r5) ) 279 addc r1,r7 280 281SRC( mov.l @r4+,r0 ) 282SRC( mov.l @r4+,r1 ) 283 addc r0,r7 284DST( mov.l r0,@(16,r5) ) 285DST( mov.l r1,@(20,r5) ) 286 addc r1,r7 287 288SRC( mov.l @r4+,r0 ) 289SRC( mov.l @r4+,r1 ) 290 addc r0,r7 291DST( mov.l r0,@(24,r5) ) 292DST( mov.l r1,@(28,r5) ) 293 addc r1,r7 294 add #32,r5 295 movt r0 296 dt r6 297 bf/s 1b 298 cmp/eq #1,r0 299 mov #0,r0 300 addc r0,r7 301 3022: mov r2,r6 303 mov #0x1c,r0 304 and r0,r6 305 cmp/pl r6 306 bf/s 4f 307 clrt 308 shlr2 r6 3093: 310SRC( mov.l @r4+,r0 ) 311 addc r0,r7 312DST( mov.l r0,@r5 ) 313 add #4,r5 314 movt r0 315 dt r6 316 bf/s 3b 317 cmp/eq #1,r0 318 mov #0,r0 319 addc r0,r7 3204: mov r2,r6 321 mov #3,r0 322 and r0,r6 323 cmp/pl r6 324 bf 7f 325 mov #2,r1 326 cmp/hs r1,r6 327 bf 5f 328SRC( mov.w @r4+,r0 ) 329DST( mov.w r0,@r5 ) 330 extu.w r0,r0 331 add #2,r5 332 cmp/eq r1,r6 333 bt/s 6f 334 clrt 335 shll16 r0 336 addc r0,r7 3375: 338SRC( mov.b @r4+,r0 ) 339DST( mov.b r0,@r5 ) 340 extu.b r0,r0 341#ifndef __LITTLE_ENDIAN__ 342 shll8 r0 343#endif 3446: addc r0,r7 345 mov #0,r0 346 addc r0,r7 3477: 3485000: 349 350# Exception handler: 351.section .fixup, "ax" 352 3536001: 354 mov.l @(8,r15),r0 ! src_err_ptr 355 mov #-EFAULT,r1 356 mov.l r1,@r0 357 358 ! zero the complete destination - computing the rest 359 ! is too much work 360 mov.l @(4,r15),r5 ! dst 361 mov.l @r15,r6 ! len 362 mov #0,r7 3631: mov.b r7,@r5 364 dt r6 365 bf/s 1b 366 add #1,r5 367 mov.l 8000f,r0 368 jmp @r0 369 nop 370 .align 2 3718000: .long 5000b 372 3736002: 374 mov.l @(12,r15),r0 ! dst_err_ptr 375 mov #-EFAULT,r1 376 mov.l r1,@r0 377 mov.l 8001f,r0 378 jmp @r0 379 nop 380 .align 2 3818001: .long 5000b 382 383.previous 384 add #8,r15 385 rts 386 mov r7,r0 387