1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IP/TCP/UDP checksumming routines 7 * 8 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 10 * Tom May, <ftom@netcom.com> 11 * Pentium Pro/II routines: 12 * Alexander Kjeldaas <astor@guardian.no> 13 * Finn Arne Gangstad <finnag@guardian.no> 14 * Lots of code moved from tcp.c and ip.c; see those files 15 * for more names. 16 * 17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 18 * handling. 19 * Andi Kleen, add zeroing on error 20 * converted to pure assembler 21 * 22 * This program is free software; you can redistribute it and/or 23 * modify it under the terms of the GNU General Public License 24 * as published by the Free Software Foundation; either version 25 * 2 of the License, or (at your option) any later version. 26 */ 27 28#include <linux/linkage.h> 29#include <asm/dwarf2.h> 30#include <asm/errno.h> 31 32/* 33 * computes a partial checksum, e.g. for TCP/UDP fragments 34 */ 35 36/* 37unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 38 */ 39 40.text 41 42#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 43 44 /* 45 * Experiments with Ethernet and SLIP connections show that buff 46 * is aligned on either a 2-byte or 4-byte boundary. We get at 47 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 49 * alignment for the unrolled loop. 50 */ 51ENTRY(csum_partial) 52 CFI_STARTPROC 53 pushl %esi 54 CFI_ADJUST_CFA_OFFSET 4 55 CFI_REL_OFFSET esi, 0 56 pushl %ebx 57 CFI_ADJUST_CFA_OFFSET 4 58 CFI_REL_OFFSET ebx, 0 59 movl 20(%esp),%eax # Function arg: unsigned int sum 60 movl 16(%esp),%ecx # Function arg: int len 61 movl 12(%esp),%esi # Function arg: unsigned char *buff 62 testl $3, %esi # Check alignment. 63 jz 2f # Jump if alignment is ok. 64 testl $1, %esi # Check alignment. 65 jz 10f # Jump if alignment is boundary of 2bytes. 66 67 # buf is odd 68 dec %ecx 69 jl 8f 70 movzbl (%esi), %ebx 71 adcl %ebx, %eax 72 roll $8, %eax 73 inc %esi 74 testl $2, %esi 75 jz 2f 7610: 77 subl $2, %ecx # Alignment uses up two bytes. 78 jae 1f # Jump if we had at least two bytes. 79 addl $2, %ecx # ecx was < 2. Deal with it. 80 jmp 4f 811: movw (%esi), %bx 82 addl $2, %esi 83 addw %bx, %ax 84 adcl $0, %eax 852: 86 movl %ecx, %edx 87 shrl $5, %ecx 88 jz 2f 89 testl %esi, %esi 901: movl (%esi), %ebx 91 adcl %ebx, %eax 92 movl 4(%esi), %ebx 93 adcl %ebx, %eax 94 movl 8(%esi), %ebx 95 adcl %ebx, %eax 96 movl 12(%esi), %ebx 97 adcl %ebx, %eax 98 movl 16(%esi), %ebx 99 adcl %ebx, %eax 100 movl 20(%esi), %ebx 101 adcl %ebx, %eax 102 movl 24(%esi), %ebx 103 adcl %ebx, %eax 104 movl 28(%esi), %ebx 105 adcl %ebx, %eax 106 lea 32(%esi), %esi 107 dec %ecx 108 jne 1b 109 adcl $0, %eax 1102: movl %edx, %ecx 111 andl $0x1c, %edx 112 je 4f 113 shrl $2, %edx # This clears CF 1143: adcl (%esi), %eax 115 lea 4(%esi), %esi 116 dec %edx 117 jne 3b 118 adcl $0, %eax 1194: andl $3, %ecx 120 jz 7f 121 cmpl $2, %ecx 122 jb 5f 123 movw (%esi),%cx 124 leal 2(%esi),%esi 125 je 6f 126 shll $16,%ecx 1275: movb (%esi),%cl 1286: addl %ecx,%eax 129 adcl $0, %eax 1307: 131 testl $1, 12(%esp) 132 jz 8f 133 roll $8, %eax 1348: 135 popl %ebx 136 CFI_ADJUST_CFA_OFFSET -4 137 CFI_RESTORE ebx 138 popl %esi 139 CFI_ADJUST_CFA_OFFSET -4 140 CFI_RESTORE esi 141 ret 142 CFI_ENDPROC 143ENDPROC(csum_partial) 144 145#else 146 147/* Version for PentiumII/PPro */ 148 149ENTRY(csum_partial) 150 CFI_STARTPROC 151 pushl %esi 152 CFI_ADJUST_CFA_OFFSET 4 153 CFI_REL_OFFSET esi, 0 154 pushl %ebx 155 CFI_ADJUST_CFA_OFFSET 4 156 CFI_REL_OFFSET ebx, 0 157 movl 20(%esp),%eax # Function arg: unsigned int sum 158 movl 16(%esp),%ecx # Function arg: int len 159 movl 12(%esp),%esi # Function arg: const unsigned char *buf 160 161 testl $3, %esi 162 jnz 25f 16310: 164 movl %ecx, %edx 165 movl %ecx, %ebx 166 andl $0x7c, %ebx 167 shrl $7, %ecx 168 addl %ebx,%esi 169 shrl $2, %ebx 170 negl %ebx 171 lea 45f(%ebx,%ebx,2), %ebx 172 testl %esi, %esi 173 jmp *%ebx 174 175 # Handle 2-byte-aligned regions 17620: addw (%esi), %ax 177 lea 2(%esi), %esi 178 adcl $0, %eax 179 jmp 10b 18025: 181 testl $1, %esi 182 jz 30f 183 # buf is odd 184 dec %ecx 185 jl 90f 186 movzbl (%esi), %ebx 187 addl %ebx, %eax 188 adcl $0, %eax 189 roll $8, %eax 190 inc %esi 191 testl $2, %esi 192 jz 10b 193 19430: subl $2, %ecx 195 ja 20b 196 je 32f 197 addl $2, %ecx 198 jz 80f 199 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 200 addl %ebx, %eax 201 adcl $0, %eax 202 jmp 80f 20332: 204 addw (%esi), %ax # csumming 2 bytes, 2-aligned 205 adcl $0, %eax 206 jmp 80f 207 20840: 209 addl -128(%esi), %eax 210 adcl -124(%esi), %eax 211 adcl -120(%esi), %eax 212 adcl -116(%esi), %eax 213 adcl -112(%esi), %eax 214 adcl -108(%esi), %eax 215 adcl -104(%esi), %eax 216 adcl -100(%esi), %eax 217 adcl -96(%esi), %eax 218 adcl -92(%esi), %eax 219 adcl -88(%esi), %eax 220 adcl -84(%esi), %eax 221 adcl -80(%esi), %eax 222 adcl -76(%esi), %eax 223 adcl -72(%esi), %eax 224 adcl -68(%esi), %eax 225 adcl -64(%esi), %eax 226 adcl -60(%esi), %eax 227 adcl -56(%esi), %eax 228 adcl -52(%esi), %eax 229 adcl -48(%esi), %eax 230 adcl -44(%esi), %eax 231 adcl -40(%esi), %eax 232 adcl -36(%esi), %eax 233 adcl -32(%esi), %eax 234 adcl -28(%esi), %eax 235 adcl -24(%esi), %eax 236 adcl -20(%esi), %eax 237 adcl -16(%esi), %eax 238 adcl -12(%esi), %eax 239 adcl -8(%esi), %eax 240 adcl -4(%esi), %eax 24145: 242 lea 128(%esi), %esi 243 adcl $0, %eax 244 dec %ecx 245 jge 40b 246 movl %edx, %ecx 24750: andl $3, %ecx 248 jz 80f 249 250 # Handle the last 1-3 bytes without jumping 251 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 252 movl $0xffffff,%ebx # by the shll and shrl instructions 253 shll $3,%ecx 254 shrl %cl,%ebx 255 andl -128(%esi),%ebx # esi is 4-aligned so should be ok 256 addl %ebx,%eax 257 adcl $0,%eax 25880: 259 testl $1, 12(%esp) 260 jz 90f 261 roll $8, %eax 26290: 263 popl %ebx 264 CFI_ADJUST_CFA_OFFSET -4 265 CFI_RESTORE ebx 266 popl %esi 267 CFI_ADJUST_CFA_OFFSET -4 268 CFI_RESTORE esi 269 ret 270 CFI_ENDPROC 271ENDPROC(csum_partial) 272 273#endif 274 275/* 276unsigned int csum_partial_copy_generic (const char *src, char *dst, 277 int len, int sum, int *src_err_ptr, int *dst_err_ptr) 278 */ 279 280/* 281 * Copy from ds while checksumming, otherwise like csum_partial 282 * 283 * The macros SRC and DST specify the type of access for the instruction. 284 * thus we can call a custom exception handler for all access types. 285 * 286 * FIXME: could someone double-check whether I haven't mixed up some SRC and 287 * DST definitions? It's damn hard to trigger all cases. I hope I got 288 * them all but there's no guarantee. 289 */ 290 291#define SRC(y...) \ 292 9999: y; \ 293 .section __ex_table, "a"; \ 294 .long 9999b, 6001f ; \ 295 .previous 296 297#define DST(y...) \ 298 9999: y; \ 299 .section __ex_table, "a"; \ 300 .long 9999b, 6002f ; \ 301 .previous 302 303#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 304 305#define ARGBASE 16 306#define FP 12 307 308ENTRY(csum_partial_copy_generic) 309 CFI_STARTPROC 310 subl $4,%esp 311 CFI_ADJUST_CFA_OFFSET 4 312 pushl %edi 313 CFI_ADJUST_CFA_OFFSET 4 314 CFI_REL_OFFSET edi, 0 315 pushl %esi 316 CFI_ADJUST_CFA_OFFSET 4 317 CFI_REL_OFFSET esi, 0 318 pushl %ebx 319 CFI_ADJUST_CFA_OFFSET 4 320 CFI_REL_OFFSET ebx, 0 321 movl ARGBASE+16(%esp),%eax # sum 322 movl ARGBASE+12(%esp),%ecx # len 323 movl ARGBASE+4(%esp),%esi # src 324 movl ARGBASE+8(%esp),%edi # dst 325 326 testl $2, %edi # Check alignment. 327 jz 2f # Jump if alignment is ok. 328 subl $2, %ecx # Alignment uses up two bytes. 329 jae 1f # Jump if we had at least two bytes. 330 addl $2, %ecx # ecx was < 2. Deal with it. 331 jmp 4f 332SRC(1: movw (%esi), %bx ) 333 addl $2, %esi 334DST( movw %bx, (%edi) ) 335 addl $2, %edi 336 addw %bx, %ax 337 adcl $0, %eax 3382: 339 movl %ecx, FP(%esp) 340 shrl $5, %ecx 341 jz 2f 342 testl %esi, %esi 343SRC(1: movl (%esi), %ebx ) 344SRC( movl 4(%esi), %edx ) 345 adcl %ebx, %eax 346DST( movl %ebx, (%edi) ) 347 adcl %edx, %eax 348DST( movl %edx, 4(%edi) ) 349 350SRC( movl 8(%esi), %ebx ) 351SRC( movl 12(%esi), %edx ) 352 adcl %ebx, %eax 353DST( movl %ebx, 8(%edi) ) 354 adcl %edx, %eax 355DST( movl %edx, 12(%edi) ) 356 357SRC( movl 16(%esi), %ebx ) 358SRC( movl 20(%esi), %edx ) 359 adcl %ebx, %eax 360DST( movl %ebx, 16(%edi) ) 361 adcl %edx, %eax 362DST( movl %edx, 20(%edi) ) 363 364SRC( movl 24(%esi), %ebx ) 365SRC( movl 28(%esi), %edx ) 366 adcl %ebx, %eax 367DST( movl %ebx, 24(%edi) ) 368 adcl %edx, %eax 369DST( movl %edx, 28(%edi) ) 370 371 lea 32(%esi), %esi 372 lea 32(%edi), %edi 373 dec %ecx 374 jne 1b 375 adcl $0, %eax 3762: movl FP(%esp), %edx 377 movl %edx, %ecx 378 andl $0x1c, %edx 379 je 4f 380 shrl $2, %edx # This clears CF 381SRC(3: movl (%esi), %ebx ) 382 adcl %ebx, %eax 383DST( movl %ebx, (%edi) ) 384 lea 4(%esi), %esi 385 lea 4(%edi), %edi 386 dec %edx 387 jne 3b 388 adcl $0, %eax 3894: andl $3, %ecx 390 jz 7f 391 cmpl $2, %ecx 392 jb 5f 393SRC( movw (%esi), %cx ) 394 leal 2(%esi), %esi 395DST( movw %cx, (%edi) ) 396 leal 2(%edi), %edi 397 je 6f 398 shll $16,%ecx 399SRC(5: movb (%esi), %cl ) 400DST( movb %cl, (%edi) ) 4016: addl %ecx, %eax 402 adcl $0, %eax 4037: 4045000: 405 406# Exception handler: 407.section .fixup, "ax" 408 4096001: 410 movl ARGBASE+20(%esp), %ebx # src_err_ptr 411 movl $-EFAULT, (%ebx) 412 413 # zero the complete destination - computing the rest 414 # is too much work 415 movl ARGBASE+8(%esp), %edi # dst 416 movl ARGBASE+12(%esp), %ecx # len 417 xorl %eax,%eax 418 rep ; stosb 419 420 jmp 5000b 421 4226002: 423 movl ARGBASE+24(%esp), %ebx # dst_err_ptr 424 movl $-EFAULT,(%ebx) 425 jmp 5000b 426 427.previous 428 429 popl %ebx 430 CFI_ADJUST_CFA_OFFSET -4 431 CFI_RESTORE ebx 432 popl %esi 433 CFI_ADJUST_CFA_OFFSET -4 434 CFI_RESTORE esi 435 popl %edi 436 CFI_ADJUST_CFA_OFFSET -4 437 CFI_RESTORE edi 438 popl %ecx # equivalent to addl $4,%esp 439 CFI_ADJUST_CFA_OFFSET -4 440 ret 441 CFI_ENDPROC 442ENDPROC(csum_partial_copy_generic) 443 444#else 445 446/* Version for PentiumII/PPro */ 447 448#define ROUND1(x) \ 449 SRC(movl x(%esi), %ebx ) ; \ 450 addl %ebx, %eax ; \ 451 DST(movl %ebx, x(%edi) ) ; 452 453#define ROUND(x) \ 454 SRC(movl x(%esi), %ebx ) ; \ 455 adcl %ebx, %eax ; \ 456 DST(movl %ebx, x(%edi) ) ; 457 458#define ARGBASE 12 459 460ENTRY(csum_partial_copy_generic) 461 CFI_STARTPROC 462 pushl %ebx 463 CFI_ADJUST_CFA_OFFSET 4 464 CFI_REL_OFFSET ebx, 0 465 pushl %edi 466 CFI_ADJUST_CFA_OFFSET 4 467 CFI_REL_OFFSET edi, 0 468 pushl %esi 469 CFI_ADJUST_CFA_OFFSET 4 470 CFI_REL_OFFSET esi, 0 471 movl ARGBASE+4(%esp),%esi #src 472 movl ARGBASE+8(%esp),%edi #dst 473 movl ARGBASE+12(%esp),%ecx #len 474 movl ARGBASE+16(%esp),%eax #sum 475# movl %ecx, %edx 476 movl %ecx, %ebx 477 movl %esi, %edx 478 shrl $6, %ecx 479 andl $0x3c, %ebx 480 negl %ebx 481 subl %ebx, %esi 482 subl %ebx, %edi 483 lea -1(%esi),%edx 484 andl $-32,%edx 485 lea 3f(%ebx,%ebx), %ebx 486 testl %esi, %esi 487 jmp *%ebx 4881: addl $64,%esi 489 addl $64,%edi 490 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) 491 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) 492 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) 493 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) 494 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) 4953: adcl $0,%eax 496 addl $64, %edx 497 dec %ecx 498 jge 1b 4994: movl ARGBASE+12(%esp),%edx #len 500 andl $3, %edx 501 jz 7f 502 cmpl $2, %edx 503 jb 5f 504SRC( movw (%esi), %dx ) 505 leal 2(%esi), %esi 506DST( movw %dx, (%edi) ) 507 leal 2(%edi), %edi 508 je 6f 509 shll $16,%edx 5105: 511SRC( movb (%esi), %dl ) 512DST( movb %dl, (%edi) ) 5136: addl %edx, %eax 514 adcl $0, %eax 5157: 516.section .fixup, "ax" 5176001: movl ARGBASE+20(%esp), %ebx # src_err_ptr 518 movl $-EFAULT, (%ebx) 519 # zero the complete destination (computing the rest is too much work) 520 movl ARGBASE+8(%esp),%edi # dst 521 movl ARGBASE+12(%esp),%ecx # len 522 xorl %eax,%eax 523 rep; stosb 524 jmp 7b 5256002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr 526 movl $-EFAULT, (%ebx) 527 jmp 7b 528.previous 529 530 popl %esi 531 CFI_ADJUST_CFA_OFFSET -4 532 CFI_RESTORE esi 533 popl %edi 534 CFI_ADJUST_CFA_OFFSET -4 535 CFI_RESTORE edi 536 popl %ebx 537 CFI_ADJUST_CFA_OFFSET -4 538 CFI_RESTORE ebx 539 ret 540 CFI_ENDPROC 541ENDPROC(csum_partial_copy_generic) 542 543#undef ROUND 544#undef ROUND1 545 546#endif 547