14494ce4fSKuninori Morimoto/* SPDX-License-Identifier: GPL-2.0+ 24494ce4fSKuninori Morimoto * 34494ce4fSKuninori Morimoto * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 61da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 71da177e4SLinus Torvalds * interface as the means of communication with the user level. 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * IP/TCP/UDP checksumming routines 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * Authors: Jorge Cwik, <jorge@laser.satlink.net> 121da177e4SLinus Torvalds * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 131da177e4SLinus Torvalds * Tom May, <ftom@netcom.com> 141da177e4SLinus Torvalds * Pentium Pro/II routines: 151da177e4SLinus Torvalds * Alexander Kjeldaas <astor@guardian.no> 161da177e4SLinus Torvalds * Finn Arne Gangstad <finnag@guardian.no> 171da177e4SLinus Torvalds * Lots of code moved from tcp.c and ip.c; see those files 181da177e4SLinus Torvalds * for more names. 191da177e4SLinus Torvalds * 201da177e4SLinus Torvalds * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 211da177e4SLinus Torvalds * handling. 221da177e4SLinus Torvalds * Andi Kleen, add zeroing on error 231da177e4SLinus Torvalds * converted to pure assembler 241da177e4SLinus Torvalds * 251da177e4SLinus Torvalds * SuperH version: Copyright (C) 1999 Niibe Yutaka 261da177e4SLinus Torvalds */ 271da177e4SLinus Torvalds 281da177e4SLinus Torvalds#include <asm/errno.h> 291da177e4SLinus Torvalds#include <linux/linkage.h> 301da177e4SLinus Torvalds 311da177e4SLinus Torvalds/* 321da177e4SLinus Torvalds * computes a partial checksum, e.g. for TCP/UDP fragments 331da177e4SLinus Torvalds */ 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds/* 361f29d857SGuenter Roeck * unsigned int csum_partial(const unsigned char *buf, int len, 371f29d857SGuenter Roeck * unsigned int sum); 381da177e4SLinus Torvalds */ 391da177e4SLinus Torvalds 401da177e4SLinus Torvalds.text 411da177e4SLinus TorvaldsENTRY(csum_partial) 421da177e4SLinus Torvalds /* 431da177e4SLinus Torvalds * Experiments with Ethernet and SLIP connections show that buff 441da177e4SLinus Torvalds * is aligned on either a 2-byte or 4-byte boundary. We get at 451da177e4SLinus Torvalds * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 461da177e4SLinus Torvalds * Fortunately, it is easy to convert 2-byte alignment to 4-byte 471da177e4SLinus Torvalds * alignment for the unrolled loop. 481da177e4SLinus Torvalds */ 491f29d857SGuenter Roeck mov r5, r1 501da177e4SLinus Torvalds mov r4, r0 511f29d857SGuenter Roeck tst #2, r0 ! Check alignment. 521f29d857SGuenter Roeck bt 2f ! Jump if alignment is ok. 531da177e4SLinus Torvalds ! 541da177e4SLinus Torvalds add #-2, r5 ! Alignment uses up two bytes. 551da177e4SLinus Torvalds cmp/pz r5 ! 561da177e4SLinus Torvalds bt/s 1f ! Jump if we had at least two bytes. 571da177e4SLinus Torvalds clrt 581da177e4SLinus Torvalds bra 6f 591da177e4SLinus Torvalds add #2, r5 ! r5 was < 2. Deal with it. 601da177e4SLinus Torvalds1: 611f29d857SGuenter Roeck mov r5, r1 ! Save new len for later use. 621da177e4SLinus Torvalds mov.w @r4+, r0 631da177e4SLinus Torvalds extu.w r0, r0 641da177e4SLinus Torvalds addc r0, r6 651da177e4SLinus Torvalds bf 2f 661da177e4SLinus Torvalds add #1, r6 671da177e4SLinus Torvalds2: 681da177e4SLinus Torvalds mov #-5, r0 691f29d857SGuenter Roeck shld r0, r5 701f29d857SGuenter Roeck tst r5, r5 711da177e4SLinus Torvalds bt/s 4f ! if it's =0, go to 4f 721da177e4SLinus Torvalds clrt 731da177e4SLinus Torvalds .align 2 741da177e4SLinus Torvalds3: 751da177e4SLinus Torvalds mov.l @r4+, r0 761da177e4SLinus Torvalds mov.l @r4+, r2 771da177e4SLinus Torvalds mov.l @r4+, r3 781da177e4SLinus Torvalds addc r0, r6 791da177e4SLinus Torvalds mov.l @r4+, r0 801da177e4SLinus Torvalds addc r2, r6 811da177e4SLinus Torvalds mov.l @r4+, r2 821da177e4SLinus Torvalds addc r3, r6 831da177e4SLinus Torvalds mov.l @r4+, r3 841da177e4SLinus Torvalds addc r0, r6 851da177e4SLinus Torvalds mov.l @r4+, r0 861da177e4SLinus Torvalds addc r2, r6 871da177e4SLinus Torvalds mov.l @r4+, r2 881da177e4SLinus Torvalds addc r3, r6 891da177e4SLinus Torvalds addc r0, r6 901da177e4SLinus Torvalds addc r2, r6 911da177e4SLinus Torvalds movt r0 921f29d857SGuenter Roeck dt r5 931da177e4SLinus Torvalds bf/s 3b 941da177e4SLinus Torvalds cmp/eq #1, r0 951f29d857SGuenter Roeck ! here, we know r5==0 961f29d857SGuenter Roeck addc r5, r6 ! add carry to r6 971da177e4SLinus Torvalds4: 981f29d857SGuenter Roeck mov r1, r0 991da177e4SLinus Torvalds and #0x1c, r0 1001da177e4SLinus Torvalds tst r0, r0 1011f29d857SGuenter Roeck bt/s 6f 1021f29d857SGuenter Roeck mov r0, r5 1031f29d857SGuenter Roeck shlr2 r5 1041da177e4SLinus Torvalds mov #0, r2 1051da177e4SLinus Torvalds5: 1061da177e4SLinus Torvalds addc r2, r6 1071da177e4SLinus Torvalds mov.l @r4+, r2 1081da177e4SLinus Torvalds movt r0 1091f29d857SGuenter Roeck dt r5 1101da177e4SLinus Torvalds bf/s 5b 1111da177e4SLinus Torvalds cmp/eq #1, r0 1121da177e4SLinus Torvalds addc r2, r6 1131f29d857SGuenter Roeck addc r5, r6 ! r5==0 here, so it means add carry-bit 1141da177e4SLinus Torvalds6: 1151f29d857SGuenter Roeck mov r1, r5 1161da177e4SLinus Torvalds mov #3, r0 1171da177e4SLinus Torvalds and r0, r5 1181da177e4SLinus Torvalds tst r5, r5 1191da177e4SLinus Torvalds bt 9f ! if it's =0 go to 9f 1201da177e4SLinus Torvalds mov #2, r1 1211da177e4SLinus Torvalds cmp/hs r1, r5 1221da177e4SLinus Torvalds bf 7f 1231da177e4SLinus Torvalds mov.w @r4+, r0 1241da177e4SLinus Torvalds extu.w r0, r0 1251da177e4SLinus Torvalds cmp/eq r1, r5 1261da177e4SLinus Torvalds bt/s 8f 1271da177e4SLinus Torvalds clrt 1281da177e4SLinus Torvalds shll16 r0 1291da177e4SLinus Torvalds addc r0, r6 1301da177e4SLinus Torvalds7: 1311da177e4SLinus Torvalds mov.b @r4+, r0 1321da177e4SLinus Torvalds extu.b r0, r0 1331da177e4SLinus Torvalds#ifndef __LITTLE_ENDIAN__ 1341da177e4SLinus Torvalds shll8 r0 1351da177e4SLinus Torvalds#endif 1361da177e4SLinus Torvalds8: 1371da177e4SLinus Torvalds addc r0, r6 1381da177e4SLinus Torvalds mov #0, r0 1391da177e4SLinus Torvalds addc r0, r6 1401da177e4SLinus Torvalds9: 1411da177e4SLinus Torvalds rts 1421da177e4SLinus Torvalds mov r6, r0 1431da177e4SLinus Torvalds 1441da177e4SLinus Torvalds/* 145dc16c8a9SAl Virounsigned int csum_partial_copy_generic (const char *src, char *dst, int len) 1461da177e4SLinus Torvalds */ 1471da177e4SLinus Torvalds 1481da177e4SLinus Torvalds/* 149dc16c8a9SAl Viro * Copy from ds while checksumming, otherwise like csum_partial with initial 150dc16c8a9SAl Viro * sum being ~0U 1511da177e4SLinus Torvalds */ 1521da177e4SLinus Torvalds 153dc16c8a9SAl Viro#define EXC(...) \ 1541da177e4SLinus Torvalds 9999: __VA_ARGS__ ; \ 1551da177e4SLinus Torvalds .section __ex_table, "a"; \ 1561da177e4SLinus Torvalds .long 9999b, 6001f ; \ 1571da177e4SLinus Torvalds .previous 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds! 1601da177e4SLinus Torvalds! r4: const char *SRC 1611da177e4SLinus Torvalds! r5: char *DST 1621da177e4SLinus Torvalds! r6: int LEN 1631da177e4SLinus Torvalds! 1641da177e4SLinus TorvaldsENTRY(csum_partial_copy_generic) 165dc16c8a9SAl Viro mov #-1,r7 1661da177e4SLinus Torvalds mov #3,r0 ! Check src and dest are equally aligned 1671da177e4SLinus Torvalds mov r4,r1 1681da177e4SLinus Torvalds and r0,r1 1691da177e4SLinus Torvalds and r5,r0 1701da177e4SLinus Torvalds cmp/eq r1,r0 1711da177e4SLinus Torvalds bf 3f ! Different alignments, use slow version 1721da177e4SLinus Torvalds tst #1,r0 ! Check dest word aligned 1731da177e4SLinus Torvalds bf 3f ! If not, do it the slow way 1741da177e4SLinus Torvalds 1751da177e4SLinus Torvalds mov #2,r0 1761da177e4SLinus Torvalds tst r0,r5 ! Check dest alignment. 1771da177e4SLinus Torvalds bt 2f ! Jump if alignment is ok. 1781da177e4SLinus Torvalds add #-2,r6 ! Alignment uses up two bytes. 1791da177e4SLinus Torvalds cmp/pz r6 ! Jump if we had at least two bytes. 1801da177e4SLinus Torvalds bt/s 1f 1811da177e4SLinus Torvalds clrt 1821da177e4SLinus Torvalds add #2,r6 ! r6 was < 2. Deal with it. 18324ab54cbSOllie Wild bra 4f 18424ab54cbSOllie Wild mov r6,r2 1851da177e4SLinus Torvalds 1861da177e4SLinus Torvalds3: ! Handle different src and dest alignments. 1871da177e4SLinus Torvalds ! This is not common, so simple byte by byte copy will do. 1881da177e4SLinus Torvalds mov r6,r2 1891da177e4SLinus Torvalds shlr r6 1901da177e4SLinus Torvalds tst r6,r6 1911da177e4SLinus Torvalds bt 4f 1921da177e4SLinus Torvalds clrt 1931da177e4SLinus Torvalds .align 2 1941da177e4SLinus Torvalds5: 195dc16c8a9SAl ViroEXC( mov.b @r4+,r1 ) 196dc16c8a9SAl ViroEXC( mov.b @r4+,r0 ) 1971da177e4SLinus Torvalds extu.b r1,r1 198dc16c8a9SAl ViroEXC( mov.b r1,@r5 ) 199dc16c8a9SAl ViroEXC( mov.b r0,@(1,r5) ) 2001da177e4SLinus Torvalds extu.b r0,r0 2011da177e4SLinus Torvalds add #2,r5 2021da177e4SLinus Torvalds 2031da177e4SLinus Torvalds#ifdef __LITTLE_ENDIAN__ 2041da177e4SLinus Torvalds shll8 r0 2051da177e4SLinus Torvalds#else 2061da177e4SLinus Torvalds shll8 r1 2071da177e4SLinus Torvalds#endif 2081da177e4SLinus Torvalds or r1,r0 2091da177e4SLinus Torvalds 2101da177e4SLinus Torvalds addc r0,r7 2111da177e4SLinus Torvalds movt r0 2121da177e4SLinus Torvalds dt r6 2131da177e4SLinus Torvalds bf/s 5b 2141da177e4SLinus Torvalds cmp/eq #1,r0 2151da177e4SLinus Torvalds mov #0,r0 2161da177e4SLinus Torvalds addc r0, r7 2171da177e4SLinus Torvalds 2181da177e4SLinus Torvalds mov r2, r0 2191da177e4SLinus Torvalds tst #1, r0 2201da177e4SLinus Torvalds bt 7f 2211da177e4SLinus Torvalds bra 5f 2221da177e4SLinus Torvalds clrt 2231da177e4SLinus Torvalds 2241da177e4SLinus Torvalds ! src and dest equally aligned, but to a two byte boundary. 2251da177e4SLinus Torvalds ! Handle first two bytes as a special case 2261da177e4SLinus Torvalds .align 2 2271da177e4SLinus Torvalds1: 228dc16c8a9SAl ViroEXC( mov.w @r4+,r0 ) 229dc16c8a9SAl ViroEXC( mov.w r0,@r5 ) 2301da177e4SLinus Torvalds add #2,r5 2311da177e4SLinus Torvalds extu.w r0,r0 2321da177e4SLinus Torvalds addc r0,r7 2331da177e4SLinus Torvalds mov #0,r0 2341da177e4SLinus Torvalds addc r0,r7 2351da177e4SLinus Torvalds2: 2361da177e4SLinus Torvalds mov r6,r2 2371da177e4SLinus Torvalds mov #-5,r0 2381da177e4SLinus Torvalds shld r0,r6 2391da177e4SLinus Torvalds tst r6,r6 2401da177e4SLinus Torvalds bt/s 2f 2411da177e4SLinus Torvalds clrt 2421da177e4SLinus Torvalds .align 2 2431da177e4SLinus Torvalds1: 244dc16c8a9SAl ViroEXC( mov.l @r4+,r0 ) 245dc16c8a9SAl ViroEXC( mov.l @r4+,r1 ) 2461da177e4SLinus Torvalds addc r0,r7 247dc16c8a9SAl ViroEXC( mov.l r0,@r5 ) 248dc16c8a9SAl ViroEXC( mov.l r1,@(4,r5) ) 2491da177e4SLinus Torvalds addc r1,r7 2501da177e4SLinus Torvalds 251dc16c8a9SAl ViroEXC( mov.l @r4+,r0 ) 252dc16c8a9SAl ViroEXC( mov.l @r4+,r1 ) 2531da177e4SLinus Torvalds addc r0,r7 254dc16c8a9SAl ViroEXC( mov.l r0,@(8,r5) ) 255dc16c8a9SAl ViroEXC( mov.l r1,@(12,r5) ) 2561da177e4SLinus Torvalds addc r1,r7 2571da177e4SLinus Torvalds 258dc16c8a9SAl ViroEXC( mov.l @r4+,r0 ) 259dc16c8a9SAl ViroEXC( mov.l @r4+,r1 ) 2601da177e4SLinus Torvalds addc r0,r7 261dc16c8a9SAl ViroEXC( mov.l r0,@(16,r5) ) 262dc16c8a9SAl ViroEXC( mov.l r1,@(20,r5) ) 2631da177e4SLinus Torvalds addc r1,r7 2641da177e4SLinus Torvalds 265dc16c8a9SAl ViroEXC( mov.l @r4+,r0 ) 266dc16c8a9SAl ViroEXC( mov.l @r4+,r1 ) 2671da177e4SLinus Torvalds addc r0,r7 268dc16c8a9SAl ViroEXC( mov.l r0,@(24,r5) ) 269dc16c8a9SAl ViroEXC( mov.l r1,@(28,r5) ) 2701da177e4SLinus Torvalds addc r1,r7 2711da177e4SLinus Torvalds add #32,r5 2721da177e4SLinus Torvalds movt r0 2731da177e4SLinus Torvalds dt r6 2741da177e4SLinus Torvalds bf/s 1b 2751da177e4SLinus Torvalds cmp/eq #1,r0 2761da177e4SLinus Torvalds mov #0,r0 2771da177e4SLinus Torvalds addc r0,r7 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds2: mov r2,r6 2801da177e4SLinus Torvalds mov #0x1c,r0 2811da177e4SLinus Torvalds and r0,r6 2821da177e4SLinus Torvalds cmp/pl r6 2831da177e4SLinus Torvalds bf/s 4f 2841da177e4SLinus Torvalds clrt 2851da177e4SLinus Torvalds shlr2 r6 2861da177e4SLinus Torvalds3: 287dc16c8a9SAl ViroEXC( mov.l @r4+,r0 ) 2881da177e4SLinus Torvalds addc r0,r7 289dc16c8a9SAl ViroEXC( mov.l r0,@r5 ) 2901da177e4SLinus Torvalds add #4,r5 2911da177e4SLinus Torvalds movt r0 2921da177e4SLinus Torvalds dt r6 2931da177e4SLinus Torvalds bf/s 3b 2941da177e4SLinus Torvalds cmp/eq #1,r0 2951da177e4SLinus Torvalds mov #0,r0 2961da177e4SLinus Torvalds addc r0,r7 2971da177e4SLinus Torvalds4: mov r2,r6 2981da177e4SLinus Torvalds mov #3,r0 2991da177e4SLinus Torvalds and r0,r6 3001da177e4SLinus Torvalds cmp/pl r6 3011da177e4SLinus Torvalds bf 7f 3021da177e4SLinus Torvalds mov #2,r1 3031da177e4SLinus Torvalds cmp/hs r1,r6 3041da177e4SLinus Torvalds bf 5f 305dc16c8a9SAl ViroEXC( mov.w @r4+,r0 ) 306dc16c8a9SAl ViroEXC( mov.w r0,@r5 ) 3071da177e4SLinus Torvalds extu.w r0,r0 3081da177e4SLinus Torvalds add #2,r5 3091da177e4SLinus Torvalds cmp/eq r1,r6 3101da177e4SLinus Torvalds bt/s 6f 3111da177e4SLinus Torvalds clrt 3121da177e4SLinus Torvalds shll16 r0 3131da177e4SLinus Torvalds addc r0,r7 3141da177e4SLinus Torvalds5: 315dc16c8a9SAl ViroEXC( mov.b @r4+,r0 ) 316dc16c8a9SAl ViroEXC( mov.b r0,@r5 ) 3171da177e4SLinus Torvalds extu.b r0,r0 3181da177e4SLinus Torvalds#ifndef __LITTLE_ENDIAN__ 3191da177e4SLinus Torvalds shll8 r0 3201da177e4SLinus Torvalds#endif 3211da177e4SLinus Torvalds6: addc r0,r7 3221da177e4SLinus Torvalds mov #0,r0 3231da177e4SLinus Torvalds addc r0,r7 3241da177e4SLinus Torvalds7: 3251da177e4SLinus Torvalds 3261da177e4SLinus Torvalds# Exception handler: 3271da177e4SLinus Torvalds.section .fixup, "ax" 3281da177e4SLinus Torvalds 3291da177e4SLinus Torvalds6001: 330dc16c8a9SAl Viro rts 331dc16c8a9SAl Viro mov #0,r0 3321da177e4SLinus Torvalds.previous 3331da177e4SLinus Torvalds rts 3341da177e4SLinus Torvalds mov r7,r0 335