xref: /openbmc/linux/arch/x86/lib/csum-partial_64.c (revision 2f09679b)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2185f3d38SThomas Gleixner /*
3185f3d38SThomas Gleixner  * arch/x86_64/lib/csum-partial.c
4185f3d38SThomas Gleixner  *
5185f3d38SThomas Gleixner  * This file contains network checksum routines that are better done
6185f3d38SThomas Gleixner  * in an architecture-specific manner due to speed.
7185f3d38SThomas Gleixner  */
8185f3d38SThomas Gleixner 
9185f3d38SThomas Gleixner #include <linux/compiler.h>
10e683014cSPaul Gortmaker #include <linux/export.h>
11185f3d38SThomas Gleixner #include <asm/checksum.h>
1234115065SEric Dumazet #include <asm/word-at-a-time.h>
13185f3d38SThomas Gleixner 
csum_finalize_sum(u64 temp64)141078f257SNoah Goldstein static inline __wsum csum_finalize_sum(u64 temp64)
15185f3d38SThomas Gleixner {
161078f257SNoah Goldstein 	return (__force __wsum)((temp64 + ror64(temp64, 32)) >> 32);
17688eb819SNoah Goldstein }
18688eb819SNoah Goldstein 
update_csum_40b(unsigned long sum,const unsigned long m[5])19*2f09679bSLinus Torvalds static inline unsigned long update_csum_40b(unsigned long sum, const unsigned long m[5])
20*2f09679bSLinus Torvalds {
21*2f09679bSLinus Torvalds 	asm("addq %1,%0\n\t"
22*2f09679bSLinus Torvalds 	     "adcq %2,%0\n\t"
23*2f09679bSLinus Torvalds 	     "adcq %3,%0\n\t"
24*2f09679bSLinus Torvalds 	     "adcq %4,%0\n\t"
25*2f09679bSLinus Torvalds 	     "adcq %5,%0\n\t"
26*2f09679bSLinus Torvalds 	     "adcq $0,%0"
27*2f09679bSLinus Torvalds 		:"+r" (sum)
28*2f09679bSLinus Torvalds 		:"m" (m[0]), "m" (m[1]), "m" (m[2]),
29*2f09679bSLinus Torvalds 		 "m" (m[3]), "m" (m[4]));
30*2f09679bSLinus Torvalds 	return sum;
31*2f09679bSLinus Torvalds }
32*2f09679bSLinus Torvalds 
33185f3d38SThomas Gleixner /*
3434115065SEric Dumazet  * Do a checksum on an arbitrary memory area.
35185f3d38SThomas Gleixner  * Returns a 32bit checksum.
36185f3d38SThomas Gleixner  *
37185f3d38SThomas Gleixner  * This isn't as time critical as it used to be because many NICs
38185f3d38SThomas Gleixner  * do hardware checksumming these days.
39185f3d38SThomas Gleixner  *
4034115065SEric Dumazet  * Still, with CHECKSUM_COMPLETE this is called to compute
4134115065SEric Dumazet  * checksums on IPv6 headers (40 bytes) and other small parts.
4234115065SEric Dumazet  * it's best to have buff aligned on a 64-bit boundary
43185f3d38SThomas Gleixner  */
csum_partial(const void * buff,int len,__wsum sum)4434115065SEric Dumazet __wsum csum_partial(const void *buff, int len, __wsum sum)
45185f3d38SThomas Gleixner {
4634115065SEric Dumazet 	u64 temp64 = (__force u64)sum;
47185f3d38SThomas Gleixner 
48*2f09679bSLinus Torvalds 	/* Do two 40-byte chunks in parallel to get better ILP */
49*2f09679bSLinus Torvalds 	if (likely(len >= 80)) {
50*2f09679bSLinus Torvalds 		u64 temp64_2 = 0;
51*2f09679bSLinus Torvalds 		do {
52*2f09679bSLinus Torvalds 			temp64 = update_csum_40b(temp64, buff);
53*2f09679bSLinus Torvalds 			temp64_2 = update_csum_40b(temp64_2, buff + 40);
54*2f09679bSLinus Torvalds 			buff += 80;
55*2f09679bSLinus Torvalds 			len -= 80;
56*2f09679bSLinus Torvalds 		} while (len >= 80);
57688eb819SNoah Goldstein 
58*2f09679bSLinus Torvalds 		asm("addq %1,%0\n\t"
59*2f09679bSLinus Torvalds 		    "adcq $0,%0"
60*2f09679bSLinus Torvalds 		    :"+r" (temp64): "r" (temp64_2));
61*2f09679bSLinus Torvalds 	}
62*2f09679bSLinus Torvalds 
63*2f09679bSLinus Torvalds 	/*
64*2f09679bSLinus Torvalds 	 * len == 40 is the hot case due to IPv6 headers, so return
65*2f09679bSLinus Torvalds 	 * early for that exact case without checking the tail bytes.
66*2f09679bSLinus Torvalds 	 */
67*2f09679bSLinus Torvalds 	if (len >= 40) {
68*2f09679bSLinus Torvalds 		temp64 = update_csum_40b(temp64, buff);
69*2f09679bSLinus Torvalds 		len -= 40;
70*2f09679bSLinus Torvalds 		if (!len)
71*2f09679bSLinus Torvalds 			return csum_finalize_sum(temp64);
72*2f09679bSLinus Torvalds 		buff += 40;
73185f3d38SThomas Gleixner 	}
74185f3d38SThomas Gleixner 
7534115065SEric Dumazet 	if (len & 32) {
7634115065SEric Dumazet 		asm("addq 0*8(%[src]),%[res]\n\t"
7734115065SEric Dumazet 		    "adcq 1*8(%[src]),%[res]\n\t"
7834115065SEric Dumazet 		    "adcq 2*8(%[src]),%[res]\n\t"
7934115065SEric Dumazet 		    "adcq 3*8(%[src]),%[res]\n\t"
8034115065SEric Dumazet 		    "adcq $0,%[res]"
8134115065SEric Dumazet 		    : [res] "+r"(temp64)
82688eb819SNoah Goldstein 		    : [src] "r"(buff), "m"(*(const char(*)[32])buff));
8334115065SEric Dumazet 		buff += 32;
8434115065SEric Dumazet 	}
8534115065SEric Dumazet 	if (len & 16) {
8634115065SEric Dumazet 		asm("addq 0*8(%[src]),%[res]\n\t"
8734115065SEric Dumazet 		    "adcq 1*8(%[src]),%[res]\n\t"
8834115065SEric Dumazet 		    "adcq $0,%[res]"
8934115065SEric Dumazet 		    : [res] "+r"(temp64)
90688eb819SNoah Goldstein 		    : [src] "r"(buff), "m"(*(const char(*)[16])buff));
9134115065SEric Dumazet 		buff += 16;
9234115065SEric Dumazet 	}
9334115065SEric Dumazet 	if (len & 8) {
9434115065SEric Dumazet 		asm("addq 0*8(%[src]),%[res]\n\t"
9534115065SEric Dumazet 		    "adcq $0,%[res]"
9634115065SEric Dumazet 		    : [res] "+r"(temp64)
97688eb819SNoah Goldstein 		    : [src] "r"(buff), "m"(*(const char(*)[8])buff));
98185f3d38SThomas Gleixner 		buff += 8;
99185f3d38SThomas Gleixner 	}
10034115065SEric Dumazet 	if (len & 7) {
101688eb819SNoah Goldstein 		unsigned int shift = (-len << 3) & 63;
10234115065SEric Dumazet 		unsigned long trail;
103185f3d38SThomas Gleixner 
10434115065SEric Dumazet 		trail = (load_unaligned_zeropad(buff) << shift) >> shift;
10534115065SEric Dumazet 
10634115065SEric Dumazet 		asm("addq %[trail],%[res]\n\t"
10734115065SEric Dumazet 		    "adcq $0,%[res]"
10834115065SEric Dumazet 		    : [res] "+r"(temp64)
10934115065SEric Dumazet 		    : [trail] "r"(trail));
11034115065SEric Dumazet 	}
1111078f257SNoah Goldstein 	return csum_finalize_sum(temp64);
112185f3d38SThomas Gleixner }
113784d5699SAl Viro EXPORT_SYMBOL(csum_partial);
114185f3d38SThomas Gleixner 
115185f3d38SThomas Gleixner /*
116185f3d38SThomas Gleixner  * this routine is used for miscellaneous IP-like checksums, mainly
117185f3d38SThomas Gleixner  * in icmp.c
118185f3d38SThomas Gleixner  */
ip_compute_csum(const void * buff,int len)119185f3d38SThomas Gleixner __sum16 ip_compute_csum(const void *buff, int len)
120185f3d38SThomas Gleixner {
121185f3d38SThomas Gleixner 	return csum_fold(csum_partial(buff, len, 0));
122185f3d38SThomas Gleixner }
123185f3d38SThomas Gleixner EXPORT_SYMBOL(ip_compute_csum);
124