1/* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ 2 * 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IP/TCP/UDP checksumming routines 8 * 9 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11 * Tom May, <ftom@netcom.com> 12 * Pentium Pro/II routines: 13 * Alexander Kjeldaas <astor@guardian.no> 14 * Finn Arne Gangstad <finnag@guardian.no> 15 * Lots of code moved from tcp.c and ip.c; see those files 16 * for more names. 17 * 18 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 19 * handling. 20 * Andi Kleen, add zeroing on error 21 * converted to pure assembler 22 * 23 * SuperH version: Copyright (C) 1999 Niibe Yutaka 24 * 25 * This program is free software; you can redistribute it and/or 26 * modify it under the terms of the GNU General Public License 27 * as published by the Free Software Foundation; either version 28 * 2 of the License, or (at your option) any later version. 29 */ 30 31#include <asm/errno.h> 32#include <linux/linkage.h> 33 34/* 35 * computes a partial checksum, e.g. for TCP/UDP fragments 36 */ 37 38/* 39 * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); 40 */ 41 42.text 43ENTRY(csum_partial) 44 /* 45 * Experiments with Ethernet and SLIP connections show that buff 46 * is aligned on either a 2-byte or 4-byte boundary. We get at 47 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 49 * alignment for the unrolled loop. 50 */ 51 mov r4, r0 52 tst #3, r0 ! Check alignment. 53 bt/s 2f ! Jump if alignment is ok. 54 mov r4, r7 ! Keep a copy to check for alignment 55 ! 56 tst #1, r0 ! Check alignment. 57 bt 21f ! Jump if alignment is boundary of 2bytes. 58 59 ! buf is odd 60 tst r5, r5 61 add #-1, r5 62 bt 9f 63 mov.b @r4+, r0 64 extu.b r0, r0 65 addc r0, r6 ! t=0 from previous tst 66 mov r6, r0 67 shll8 r6 68 shlr16 r0 69 shlr8 r0 70 or r0, r6 71 mov r4, r0 72 tst #2, r0 73 bt 2f 7421: 75 ! buf is 2 byte aligned (len could be 0) 76 add #-2, r5 ! Alignment uses up two bytes. 77 cmp/pz r5 ! 78 bt/s 1f ! Jump if we had at least two bytes. 79 clrt 80 bra 6f 81 add #2, r5 ! r5 was < 2. Deal with it. 821: 83 mov.w @r4+, r0 84 extu.w r0, r0 85 addc r0, r6 86 bf 2f 87 add #1, r6 882: 89 ! buf is 4 byte aligned (len could be 0) 90 mov r5, r1 91 mov #-5, r0 92 shld r0, r1 93 tst r1, r1 94 bt/s 4f ! if it's =0, go to 4f 95 clrt 96 .align 2 973: 98 mov.l @r4+, r0 99 mov.l @r4+, r2 100 mov.l @r4+, r3 101 addc r0, r6 102 mov.l @r4+, r0 103 addc r2, r6 104 mov.l @r4+, r2 105 addc r3, r6 106 mov.l @r4+, r3 107 addc r0, r6 108 mov.l @r4+, r0 109 addc r2, r6 110 mov.l @r4+, r2 111 addc r3, r6 112 addc r0, r6 113 addc r2, r6 114 movt r0 115 dt r1 116 bf/s 3b 117 cmp/eq #1, r0 118 ! here, we know r1==0 119 addc r1, r6 ! add carry to r6 1204: 121 mov r5, r0 122 and #0x1c, r0 123 tst r0, r0 124 bt 6f 125 ! 4 bytes or more remaining 126 mov r0, r1 127 shlr2 r1 128 mov #0, r2 1295: 130 addc r2, r6 131 mov.l @r4+, r2 132 movt r0 133 dt r1 134 bf/s 5b 135 cmp/eq #1, r0 136 addc r2, r6 137 addc r1, r6 ! r1==0 here, so it means add carry-bit 1386: 139 ! 3 bytes or less remaining 140 mov #3, r0 141 and r0, r5 142 tst r5, r5 143 bt 9f ! if it's =0 go to 9f 144 mov #2, r1 145 cmp/hs r1, r5 146 bf 7f 147 mov.w @r4+, r0 148 extu.w r0, r0 149 cmp/eq r1, r5 150 bt/s 8f 151 clrt 152 shll16 r0 153 addc r0, r6 1547: 155 mov.b @r4+, r0 156 extu.b r0, r0 157#ifndef __LITTLE_ENDIAN__ 158 shll8 r0 159#endif 1608: 161 addc r0, r6 162 mov #0, r0 163 addc r0, r6 1649: 165 ! Check if the buffer was misaligned, if so realign sum 166 mov r7, r0 167 tst #1, r0 168 bt 10f 169 mov r6, r0 170 shll8 r6 171 shlr16 r0 172 shlr8 r0 173 or r0, r6 17410: 175 rts 176 mov r6, r0 177 178/* 179unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, 180 int sum, int *src_err_ptr, int *dst_err_ptr) 181 */ 182 183/* 184 * Copy from ds while checksumming, otherwise like csum_partial 185 * 186 * The macros SRC and DST specify the type of access for the instruction. 187 * thus we can call a custom exception handler for all access types. 188 * 189 * FIXME: could someone double-check whether I haven't mixed up some SRC and 190 * DST definitions? It's damn hard to trigger all cases. I hope I got 191 * them all but there's no guarantee. 192 */ 193 194#define SRC(...) \ 195 9999: __VA_ARGS__ ; \ 196 .section __ex_table, "a"; \ 197 .long 9999b, 6001f ; \ 198 .previous 199 200#define DST(...) \ 201 9999: __VA_ARGS__ ; \ 202 .section __ex_table, "a"; \ 203 .long 9999b, 6002f ; \ 204 .previous 205 206! 207! r4: const char *SRC 208! r5: char *DST 209! r6: int LEN 210! r7: int SUM 211! 212! on stack: 213! int *SRC_ERR_PTR 214! int *DST_ERR_PTR 215! 216ENTRY(csum_partial_copy_generic) 217 mov.l r5,@-r15 218 mov.l r6,@-r15 219 220 mov #3,r0 ! Check src and dest are equally aligned 221 mov r4,r1 222 and r0,r1 223 and r5,r0 224 cmp/eq r1,r0 225 bf 3f ! Different alignments, use slow version 226 tst #1,r0 ! Check dest word aligned 227 bf 3f ! If not, do it the slow way 228 229 mov #2,r0 230 tst r0,r5 ! Check dest alignment. 231 bt 2f ! Jump if alignment is ok. 232 add #-2,r6 ! Alignment uses up two bytes. 233 cmp/pz r6 ! Jump if we had at least two bytes. 234 bt/s 1f 235 clrt 236 add #2,r6 ! r6 was < 2. Deal with it. 237 bra 4f 238 mov r6,r2 239 2403: ! Handle different src and dest alignments. 241 ! This is not common, so simple byte by byte copy will do. 242 mov r6,r2 243 shlr r6 244 tst r6,r6 245 bt 4f 246 clrt 247 .align 2 2485: 249SRC( mov.b @r4+,r1 ) 250SRC( mov.b @r4+,r0 ) 251 extu.b r1,r1 252DST( mov.b r1,@r5 ) 253DST( mov.b r0,@(1,r5) ) 254 extu.b r0,r0 255 add #2,r5 256 257#ifdef __LITTLE_ENDIAN__ 258 shll8 r0 259#else 260 shll8 r1 261#endif 262 or r1,r0 263 264 addc r0,r7 265 movt r0 266 dt r6 267 bf/s 5b 268 cmp/eq #1,r0 269 mov #0,r0 270 addc r0, r7 271 272 mov r2, r0 273 tst #1, r0 274 bt 7f 275 bra 5f 276 clrt 277 278 ! src and dest equally aligned, but to a two byte boundary. 279 ! Handle first two bytes as a special case 280 .align 2 2811: 282SRC( mov.w @r4+,r0 ) 283DST( mov.w r0,@r5 ) 284 add #2,r5 285 extu.w r0,r0 286 addc r0,r7 287 mov #0,r0 288 addc r0,r7 2892: 290 mov r6,r2 291 mov #-5,r0 292 shld r0,r6 293 tst r6,r6 294 bt/s 2f 295 clrt 296 .align 2 2971: 298SRC( mov.l @r4+,r0 ) 299SRC( mov.l @r4+,r1 ) 300 addc r0,r7 301DST( mov.l r0,@r5 ) 302DST( mov.l r1,@(4,r5) ) 303 addc r1,r7 304 305SRC( mov.l @r4+,r0 ) 306SRC( mov.l @r4+,r1 ) 307 addc r0,r7 308DST( mov.l r0,@(8,r5) ) 309DST( mov.l r1,@(12,r5) ) 310 addc r1,r7 311 312SRC( mov.l @r4+,r0 ) 313SRC( mov.l @r4+,r1 ) 314 addc r0,r7 315DST( mov.l r0,@(16,r5) ) 316DST( mov.l r1,@(20,r5) ) 317 addc r1,r7 318 319SRC( mov.l @r4+,r0 ) 320SRC( mov.l @r4+,r1 ) 321 addc r0,r7 322DST( mov.l r0,@(24,r5) ) 323DST( mov.l r1,@(28,r5) ) 324 addc r1,r7 325 add #32,r5 326 movt r0 327 dt r6 328 bf/s 1b 329 cmp/eq #1,r0 330 mov #0,r0 331 addc r0,r7 332 3332: mov r2,r6 334 mov #0x1c,r0 335 and r0,r6 336 cmp/pl r6 337 bf/s 4f 338 clrt 339 shlr2 r6 3403: 341SRC( mov.l @r4+,r0 ) 342 addc r0,r7 343DST( mov.l r0,@r5 ) 344 add #4,r5 345 movt r0 346 dt r6 347 bf/s 3b 348 cmp/eq #1,r0 349 mov #0,r0 350 addc r0,r7 3514: mov r2,r6 352 mov #3,r0 353 and r0,r6 354 cmp/pl r6 355 bf 7f 356 mov #2,r1 357 cmp/hs r1,r6 358 bf 5f 359SRC( mov.w @r4+,r0 ) 360DST( mov.w r0,@r5 ) 361 extu.w r0,r0 362 add #2,r5 363 cmp/eq r1,r6 364 bt/s 6f 365 clrt 366 shll16 r0 367 addc r0,r7 3685: 369SRC( mov.b @r4+,r0 ) 370DST( mov.b r0,@r5 ) 371 extu.b r0,r0 372#ifndef __LITTLE_ENDIAN__ 373 shll8 r0 374#endif 3756: addc r0,r7 376 mov #0,r0 377 addc r0,r7 3787: 3795000: 380 381# Exception handler: 382.section .fixup, "ax" 383 3846001: 385 mov.l @(8,r15),r0 ! src_err_ptr 386 mov #-EFAULT,r1 387 mov.l r1,@r0 388 389 ! zero the complete destination - computing the rest 390 ! is too much work 391 mov.l @(4,r15),r5 ! dst 392 mov.l @r15,r6 ! len 393 mov #0,r7 3941: mov.b r7,@r5 395 dt r6 396 bf/s 1b 397 add #1,r5 398 mov.l 8000f,r0 399 jmp @r0 400 nop 401 .align 2 4028000: .long 5000b 403 4046002: 405 mov.l @(12,r15),r0 ! dst_err_ptr 406 mov #-EFAULT,r1 407 mov.l r1,@r0 408 mov.l 8001f,r0 409 jmp @r0 410 nop 411 .align 2 4128001: .long 5000b 413 414.previous 415 add #8,r15 416 rts 417 mov r7,r0 418