1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IP/TCP/UDP checksumming routines 8 * 9 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11 * Tom May, <ftom@netcom.com> 12 * Pentium Pro/II routines: 13 * Alexander Kjeldaas <astor@guardian.no> 14 * Finn Arne Gangstad <finnag@guardian.no> 15 * Lots of code moved from tcp.c and ip.c; see those files 16 * for more names. 17 * 18 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 19 * handling. 20 * Andi Kleen, add zeroing on error 21 * converted to pure assembler 22 */ 23 24#include <linux/linkage.h> 25#include <asm/errno.h> 26#include <asm/asm.h> 27#include <asm/export.h> 28#include <asm/nospec-branch.h> 29 30/* 31 * computes a partial checksum, e.g. for TCP/UDP fragments 32 */ 33 34/* 35unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 36 */ 37 38.text 39 40#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 41 42 /* 43 * Experiments with Ethernet and SLIP connections show that buff 44 * is aligned on either a 2-byte or 4-byte boundary. We get at 45 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 46 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 47 * alignment for the unrolled loop. 48 */ 49ENTRY(csum_partial) 50 pushl %esi 51 pushl %ebx 52 movl 20(%esp),%eax # Function arg: unsigned int sum 53 movl 16(%esp),%ecx # Function arg: int len 54 movl 12(%esp),%esi # Function arg: unsigned char *buff 55 testl $3, %esi # Check alignment. 56 jz 2f # Jump if alignment is ok. 57 testl $1, %esi # Check alignment. 58 jz 10f # Jump if alignment is boundary of 2 bytes. 59 60 # buf is odd 61 dec %ecx 62 jl 8f 63 movzbl (%esi), %ebx 64 adcl %ebx, %eax 65 roll $8, %eax 66 inc %esi 67 testl $2, %esi 68 jz 2f 6910: 70 subl $2, %ecx # Alignment uses up two bytes. 71 jae 1f # Jump if we had at least two bytes. 72 addl $2, %ecx # ecx was < 2. Deal with it. 73 jmp 4f 741: movw (%esi), %bx 75 addl $2, %esi 76 addw %bx, %ax 77 adcl $0, %eax 782: 79 movl %ecx, %edx 80 shrl $5, %ecx 81 jz 2f 82 testl %esi, %esi 831: movl (%esi), %ebx 84 adcl %ebx, %eax 85 movl 4(%esi), %ebx 86 adcl %ebx, %eax 87 movl 8(%esi), %ebx 88 adcl %ebx, %eax 89 movl 12(%esi), %ebx 90 adcl %ebx, %eax 91 movl 16(%esi), %ebx 92 adcl %ebx, %eax 93 movl 20(%esi), %ebx 94 adcl %ebx, %eax 95 movl 24(%esi), %ebx 96 adcl %ebx, %eax 97 movl 28(%esi), %ebx 98 adcl %ebx, %eax 99 lea 32(%esi), %esi 100 dec %ecx 101 jne 1b 102 adcl $0, %eax 1032: movl %edx, %ecx 104 andl $0x1c, %edx 105 je 4f 106 shrl $2, %edx # This clears CF 1073: adcl (%esi), %eax 108 lea 4(%esi), %esi 109 dec %edx 110 jne 3b 111 adcl $0, %eax 1124: andl $3, %ecx 113 jz 7f 114 cmpl $2, %ecx 115 jb 5f 116 movw (%esi),%cx 117 leal 2(%esi),%esi 118 je 6f 119 shll $16,%ecx 1205: movb (%esi),%cl 1216: addl %ecx,%eax 122 adcl $0, %eax 1237: 124 testb $1, 12(%esp) 125 jz 8f 126 roll $8, %eax 1278: 128 popl %ebx 129 popl %esi 130 ret 131ENDPROC(csum_partial) 132 133#else 134 135/* Version for PentiumII/PPro */ 136 137ENTRY(csum_partial) 138 pushl %esi 139 pushl %ebx 140 movl 20(%esp),%eax # Function arg: unsigned int sum 141 movl 16(%esp),%ecx # Function arg: int len 142 movl 12(%esp),%esi # Function arg: const unsigned char *buf 143 144 testl $3, %esi 145 jnz 25f 14610: 147 movl %ecx, %edx 148 movl %ecx, %ebx 149 andl $0x7c, %ebx 150 shrl $7, %ecx 151 addl %ebx,%esi 152 shrl $2, %ebx 153 negl %ebx 154 lea 45f(%ebx,%ebx,2), %ebx 155 testl %esi, %esi 156 JMP_NOSPEC %ebx 157 158 # Handle 2-byte-aligned regions 15920: addw (%esi), %ax 160 lea 2(%esi), %esi 161 adcl $0, %eax 162 jmp 10b 16325: 164 testl $1, %esi 165 jz 30f 166 # buf is odd 167 dec %ecx 168 jl 90f 169 movzbl (%esi), %ebx 170 addl %ebx, %eax 171 adcl $0, %eax 172 roll $8, %eax 173 inc %esi 174 testl $2, %esi 175 jz 10b 176 17730: subl $2, %ecx 178 ja 20b 179 je 32f 180 addl $2, %ecx 181 jz 80f 182 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 183 addl %ebx, %eax 184 adcl $0, %eax 185 jmp 80f 18632: 187 addw (%esi), %ax # csumming 2 bytes, 2-aligned 188 adcl $0, %eax 189 jmp 80f 190 19140: 192 addl -128(%esi), %eax 193 adcl -124(%esi), %eax 194 adcl -120(%esi), %eax 195 adcl -116(%esi), %eax 196 adcl -112(%esi), %eax 197 adcl -108(%esi), %eax 198 adcl -104(%esi), %eax 199 adcl -100(%esi), %eax 200 adcl -96(%esi), %eax 201 adcl -92(%esi), %eax 202 adcl -88(%esi), %eax 203 adcl -84(%esi), %eax 204 adcl -80(%esi), %eax 205 adcl -76(%esi), %eax 206 adcl -72(%esi), %eax 207 adcl -68(%esi), %eax 208 adcl -64(%esi), %eax 209 adcl -60(%esi), %eax 210 adcl -56(%esi), %eax 211 adcl -52(%esi), %eax 212 adcl -48(%esi), %eax 213 adcl -44(%esi), %eax 214 adcl -40(%esi), %eax 215 adcl -36(%esi), %eax 216 adcl -32(%esi), %eax 217 adcl -28(%esi), %eax 218 adcl -24(%esi), %eax 219 adcl -20(%esi), %eax 220 adcl -16(%esi), %eax 221 adcl -12(%esi), %eax 222 adcl -8(%esi), %eax 223 adcl -4(%esi), %eax 22445: 225 lea 128(%esi), %esi 226 adcl $0, %eax 227 dec %ecx 228 jge 40b 229 movl %edx, %ecx 23050: andl $3, %ecx 231 jz 80f 232 233 # Handle the last 1-3 bytes without jumping 234 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 235 movl $0xffffff,%ebx # by the shll and shrl instructions 236 shll $3,%ecx 237 shrl %cl,%ebx 238 andl -128(%esi),%ebx # esi is 4-aligned so should be ok 239 addl %ebx,%eax 240 adcl $0,%eax 24180: 242 testb $1, 12(%esp) 243 jz 90f 244 roll $8, %eax 24590: 246 popl %ebx 247 popl %esi 248 ret 249ENDPROC(csum_partial) 250 251#endif 252EXPORT_SYMBOL(csum_partial) 253 254/* 255unsigned int csum_partial_copy_generic (const char *src, char *dst, 256 int len, int sum, int *src_err_ptr, int *dst_err_ptr) 257 */ 258 259/* 260 * Copy from ds while checksumming, otherwise like csum_partial 261 * 262 * The macros SRC and DST specify the type of access for the instruction. 263 * thus we can call a custom exception handler for all access types. 264 * 265 * FIXME: could someone double-check whether I haven't mixed up some SRC and 266 * DST definitions? It's damn hard to trigger all cases. I hope I got 267 * them all but there's no guarantee. 268 */ 269 270#define SRC(y...) \ 271 9999: y; \ 272 _ASM_EXTABLE_UA(9999b, 6001f) 273 274#define DST(y...) \ 275 9999: y; \ 276 _ASM_EXTABLE_UA(9999b, 6002f) 277 278#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 279 280#define ARGBASE 16 281#define FP 12 282 283ENTRY(csum_partial_copy_generic) 284 subl $4,%esp 285 pushl %edi 286 pushl %esi 287 pushl %ebx 288 movl ARGBASE+16(%esp),%eax # sum 289 movl ARGBASE+12(%esp),%ecx # len 290 movl ARGBASE+4(%esp),%esi # src 291 movl ARGBASE+8(%esp),%edi # dst 292 293 testl $2, %edi # Check alignment. 294 jz 2f # Jump if alignment is ok. 295 subl $2, %ecx # Alignment uses up two bytes. 296 jae 1f # Jump if we had at least two bytes. 297 addl $2, %ecx # ecx was < 2. Deal with it. 298 jmp 4f 299SRC(1: movw (%esi), %bx ) 300 addl $2, %esi 301DST( movw %bx, (%edi) ) 302 addl $2, %edi 303 addw %bx, %ax 304 adcl $0, %eax 3052: 306 movl %ecx, FP(%esp) 307 shrl $5, %ecx 308 jz 2f 309 testl %esi, %esi 310SRC(1: movl (%esi), %ebx ) 311SRC( movl 4(%esi), %edx ) 312 adcl %ebx, %eax 313DST( movl %ebx, (%edi) ) 314 adcl %edx, %eax 315DST( movl %edx, 4(%edi) ) 316 317SRC( movl 8(%esi), %ebx ) 318SRC( movl 12(%esi), %edx ) 319 adcl %ebx, %eax 320DST( movl %ebx, 8(%edi) ) 321 adcl %edx, %eax 322DST( movl %edx, 12(%edi) ) 323 324SRC( movl 16(%esi), %ebx ) 325SRC( movl 20(%esi), %edx ) 326 adcl %ebx, %eax 327DST( movl %ebx, 16(%edi) ) 328 adcl %edx, %eax 329DST( movl %edx, 20(%edi) ) 330 331SRC( movl 24(%esi), %ebx ) 332SRC( movl 28(%esi), %edx ) 333 adcl %ebx, %eax 334DST( movl %ebx, 24(%edi) ) 335 adcl %edx, %eax 336DST( movl %edx, 28(%edi) ) 337 338 lea 32(%esi), %esi 339 lea 32(%edi), %edi 340 dec %ecx 341 jne 1b 342 adcl $0, %eax 3432: movl FP(%esp), %edx 344 movl %edx, %ecx 345 andl $0x1c, %edx 346 je 4f 347 shrl $2, %edx # This clears CF 348SRC(3: movl (%esi), %ebx ) 349 adcl %ebx, %eax 350DST( movl %ebx, (%edi) ) 351 lea 4(%esi), %esi 352 lea 4(%edi), %edi 353 dec %edx 354 jne 3b 355 adcl $0, %eax 3564: andl $3, %ecx 357 jz 7f 358 cmpl $2, %ecx 359 jb 5f 360SRC( movw (%esi), %cx ) 361 leal 2(%esi), %esi 362DST( movw %cx, (%edi) ) 363 leal 2(%edi), %edi 364 je 6f 365 shll $16,%ecx 366SRC(5: movb (%esi), %cl ) 367DST( movb %cl, (%edi) ) 3686: addl %ecx, %eax 369 adcl $0, %eax 3707: 3715000: 372 373# Exception handler: 374.section .fixup, "ax" 375 3766001: 377 movl ARGBASE+20(%esp), %ebx # src_err_ptr 378 movl $-EFAULT, (%ebx) 379 380 # zero the complete destination - computing the rest 381 # is too much work 382 movl ARGBASE+8(%esp), %edi # dst 383 movl ARGBASE+12(%esp), %ecx # len 384 xorl %eax,%eax 385 rep ; stosb 386 387 jmp 5000b 388 3896002: 390 movl ARGBASE+24(%esp), %ebx # dst_err_ptr 391 movl $-EFAULT,(%ebx) 392 jmp 5000b 393 394.previous 395 396 popl %ebx 397 popl %esi 398 popl %edi 399 popl %ecx # equivalent to addl $4,%esp 400 ret 401ENDPROC(csum_partial_copy_generic) 402 403#else 404 405/* Version for PentiumII/PPro */ 406 407#define ROUND1(x) \ 408 SRC(movl x(%esi), %ebx ) ; \ 409 addl %ebx, %eax ; \ 410 DST(movl %ebx, x(%edi) ) ; 411 412#define ROUND(x) \ 413 SRC(movl x(%esi), %ebx ) ; \ 414 adcl %ebx, %eax ; \ 415 DST(movl %ebx, x(%edi) ) ; 416 417#define ARGBASE 12 418 419ENTRY(csum_partial_copy_generic) 420 pushl %ebx 421 pushl %edi 422 pushl %esi 423 movl ARGBASE+4(%esp),%esi #src 424 movl ARGBASE+8(%esp),%edi #dst 425 movl ARGBASE+12(%esp),%ecx #len 426 movl ARGBASE+16(%esp),%eax #sum 427# movl %ecx, %edx 428 movl %ecx, %ebx 429 movl %esi, %edx 430 shrl $6, %ecx 431 andl $0x3c, %ebx 432 negl %ebx 433 subl %ebx, %esi 434 subl %ebx, %edi 435 lea -1(%esi),%edx 436 andl $-32,%edx 437 lea 3f(%ebx,%ebx), %ebx 438 testl %esi, %esi 439 JMP_NOSPEC %ebx 4401: addl $64,%esi 441 addl $64,%edi 442 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) 443 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) 444 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) 445 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) 446 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) 4473: adcl $0,%eax 448 addl $64, %edx 449 dec %ecx 450 jge 1b 4514: movl ARGBASE+12(%esp),%edx #len 452 andl $3, %edx 453 jz 7f 454 cmpl $2, %edx 455 jb 5f 456SRC( movw (%esi), %dx ) 457 leal 2(%esi), %esi 458DST( movw %dx, (%edi) ) 459 leal 2(%edi), %edi 460 je 6f 461 shll $16,%edx 4625: 463SRC( movb (%esi), %dl ) 464DST( movb %dl, (%edi) ) 4656: addl %edx, %eax 466 adcl $0, %eax 4677: 468.section .fixup, "ax" 4696001: movl ARGBASE+20(%esp), %ebx # src_err_ptr 470 movl $-EFAULT, (%ebx) 471 # zero the complete destination (computing the rest is too much work) 472 movl ARGBASE+8(%esp),%edi # dst 473 movl ARGBASE+12(%esp),%ecx # len 474 xorl %eax,%eax 475 rep; stosb 476 jmp 7b 4776002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr 478 movl $-EFAULT, (%ebx) 479 jmp 7b 480.previous 481 482 popl %esi 483 popl %edi 484 popl %ebx 485 ret 486ENDPROC(csum_partial_copy_generic) 487 488#undef ROUND 489#undef ROUND1 490 491#endif 492EXPORT_SYMBOL(csum_partial_copy_generic) 493