169e3a6aaSBibo Mao // SPDX-License-Identifier: GPL-2.0-only
269e3a6aaSBibo Mao // Copyright (C) 2019-2020 Arm Ltd.
369e3a6aaSBibo Mao
469e3a6aaSBibo Mao #include <linux/compiler.h>
569e3a6aaSBibo Mao #include <linux/kasan-checks.h>
669e3a6aaSBibo Mao #include <linux/kernel.h>
769e3a6aaSBibo Mao
869e3a6aaSBibo Mao #include <net/checksum.h>
969e3a6aaSBibo Mao
accumulate(u64 sum,u64 data)1069e3a6aaSBibo Mao static u64 accumulate(u64 sum, u64 data)
1169e3a6aaSBibo Mao {
1269e3a6aaSBibo Mao sum += data;
1369e3a6aaSBibo Mao if (sum < data)
1469e3a6aaSBibo Mao sum += 1;
1569e3a6aaSBibo Mao return sum;
1669e3a6aaSBibo Mao }
1769e3a6aaSBibo Mao
1869e3a6aaSBibo Mao /*
1969e3a6aaSBibo Mao * We over-read the buffer and this makes KASAN unhappy. Instead, disable
2069e3a6aaSBibo Mao * instrumentation and call kasan explicitly.
2169e3a6aaSBibo Mao */
do_csum(const unsigned char * buff,int len)2269e3a6aaSBibo Mao unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
2369e3a6aaSBibo Mao {
2469e3a6aaSBibo Mao unsigned int offset, shift, sum;
2569e3a6aaSBibo Mao const u64 *ptr;
2669e3a6aaSBibo Mao u64 data, sum64 = 0;
2769e3a6aaSBibo Mao
28*964a8895SYuli Wang if (unlikely(len <= 0))
2969e3a6aaSBibo Mao return 0;
3069e3a6aaSBibo Mao
3169e3a6aaSBibo Mao offset = (unsigned long)buff & 7;
3269e3a6aaSBibo Mao /*
3369e3a6aaSBibo Mao * This is to all intents and purposes safe, since rounding down cannot
3469e3a6aaSBibo Mao * result in a different page or cache line being accessed, and @buff
3569e3a6aaSBibo Mao * should absolutely not be pointing to anything read-sensitive. We do,
3669e3a6aaSBibo Mao * however, have to be careful not to piss off KASAN, which means using
3769e3a6aaSBibo Mao * unchecked reads to accommodate the head and tail, for which we'll
3869e3a6aaSBibo Mao * compensate with an explicit check up-front.
3969e3a6aaSBibo Mao */
4069e3a6aaSBibo Mao kasan_check_read(buff, len);
4169e3a6aaSBibo Mao ptr = (u64 *)(buff - offset);
4269e3a6aaSBibo Mao len = len + offset - 8;
4369e3a6aaSBibo Mao
4469e3a6aaSBibo Mao /*
4569e3a6aaSBibo Mao * Head: zero out any excess leading bytes. Shifting back by the same
4669e3a6aaSBibo Mao * amount should be at least as fast as any other way of handling the
4769e3a6aaSBibo Mao * odd/even alignment, and means we can ignore it until the very end.
4869e3a6aaSBibo Mao */
4969e3a6aaSBibo Mao shift = offset * 8;
5069e3a6aaSBibo Mao data = *ptr++;
5169e3a6aaSBibo Mao data = (data >> shift) << shift;
5269e3a6aaSBibo Mao
5369e3a6aaSBibo Mao /*
5469e3a6aaSBibo Mao * Body: straightforward aligned loads from here on (the paired loads
5569e3a6aaSBibo Mao * underlying the quadword type still only need dword alignment). The
5669e3a6aaSBibo Mao * main loop strictly excludes the tail, so the second loop will always
5769e3a6aaSBibo Mao * run at least once.
5869e3a6aaSBibo Mao */
5969e3a6aaSBibo Mao while (unlikely(len > 64)) {
6069e3a6aaSBibo Mao __uint128_t tmp1, tmp2, tmp3, tmp4;
6169e3a6aaSBibo Mao
6269e3a6aaSBibo Mao tmp1 = *(__uint128_t *)ptr;
6369e3a6aaSBibo Mao tmp2 = *(__uint128_t *)(ptr + 2);
6469e3a6aaSBibo Mao tmp3 = *(__uint128_t *)(ptr + 4);
6569e3a6aaSBibo Mao tmp4 = *(__uint128_t *)(ptr + 6);
6669e3a6aaSBibo Mao
6769e3a6aaSBibo Mao len -= 64;
6869e3a6aaSBibo Mao ptr += 8;
6969e3a6aaSBibo Mao
7069e3a6aaSBibo Mao /* This is the "don't dump the carry flag into a GPR" idiom */
7169e3a6aaSBibo Mao tmp1 += (tmp1 >> 64) | (tmp1 << 64);
7269e3a6aaSBibo Mao tmp2 += (tmp2 >> 64) | (tmp2 << 64);
7369e3a6aaSBibo Mao tmp3 += (tmp3 >> 64) | (tmp3 << 64);
7469e3a6aaSBibo Mao tmp4 += (tmp4 >> 64) | (tmp4 << 64);
7569e3a6aaSBibo Mao tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64);
7669e3a6aaSBibo Mao tmp1 += (tmp1 >> 64) | (tmp1 << 64);
7769e3a6aaSBibo Mao tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64);
7869e3a6aaSBibo Mao tmp3 += (tmp3 >> 64) | (tmp3 << 64);
7969e3a6aaSBibo Mao tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64);
8069e3a6aaSBibo Mao tmp1 += (tmp1 >> 64) | (tmp1 << 64);
8169e3a6aaSBibo Mao tmp1 = ((tmp1 >> 64) << 64) | sum64;
8269e3a6aaSBibo Mao tmp1 += (tmp1 >> 64) | (tmp1 << 64);
8369e3a6aaSBibo Mao sum64 = tmp1 >> 64;
8469e3a6aaSBibo Mao }
8569e3a6aaSBibo Mao while (len > 8) {
8669e3a6aaSBibo Mao __uint128_t tmp;
8769e3a6aaSBibo Mao
8869e3a6aaSBibo Mao sum64 = accumulate(sum64, data);
8969e3a6aaSBibo Mao tmp = *(__uint128_t *)ptr;
9069e3a6aaSBibo Mao
9169e3a6aaSBibo Mao len -= 16;
9269e3a6aaSBibo Mao ptr += 2;
9369e3a6aaSBibo Mao
9469e3a6aaSBibo Mao data = tmp >> 64;
9569e3a6aaSBibo Mao sum64 = accumulate(sum64, tmp);
9669e3a6aaSBibo Mao }
9769e3a6aaSBibo Mao if (len > 0) {
9869e3a6aaSBibo Mao sum64 = accumulate(sum64, data);
9969e3a6aaSBibo Mao data = *ptr;
10069e3a6aaSBibo Mao len -= 8;
10169e3a6aaSBibo Mao }
10269e3a6aaSBibo Mao /*
10369e3a6aaSBibo Mao * Tail: zero any over-read bytes similarly to the head, again
10469e3a6aaSBibo Mao * preserving odd/even alignment.
10569e3a6aaSBibo Mao */
10669e3a6aaSBibo Mao shift = len * -8;
10769e3a6aaSBibo Mao data = (data << shift) >> shift;
10869e3a6aaSBibo Mao sum64 = accumulate(sum64, data);
10969e3a6aaSBibo Mao
11069e3a6aaSBibo Mao /* Finally, folding */
11169e3a6aaSBibo Mao sum64 += (sum64 >> 32) | (sum64 << 32);
11269e3a6aaSBibo Mao sum = sum64 >> 32;
11369e3a6aaSBibo Mao sum += (sum >> 16) | (sum << 16);
11469e3a6aaSBibo Mao if (offset & 1)
11569e3a6aaSBibo Mao return (u16)swab32(sum);
11669e3a6aaSBibo Mao
11769e3a6aaSBibo Mao return sum >> 16;
11869e3a6aaSBibo Mao }
11969e3a6aaSBibo Mao
csum_ipv6_magic(const struct in6_addr * saddr,const struct in6_addr * daddr,__u32 len,__u8 proto,__wsum csum)12069e3a6aaSBibo Mao __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
12169e3a6aaSBibo Mao const struct in6_addr *daddr,
12269e3a6aaSBibo Mao __u32 len, __u8 proto, __wsum csum)
12369e3a6aaSBibo Mao {
12469e3a6aaSBibo Mao __uint128_t src, dst;
12569e3a6aaSBibo Mao u64 sum = (__force u64)csum;
12669e3a6aaSBibo Mao
12769e3a6aaSBibo Mao src = *(const __uint128_t *)saddr->s6_addr;
12869e3a6aaSBibo Mao dst = *(const __uint128_t *)daddr->s6_addr;
12969e3a6aaSBibo Mao
13069e3a6aaSBibo Mao sum += (__force u32)htonl(len);
13169e3a6aaSBibo Mao sum += (u32)proto << 24;
13269e3a6aaSBibo Mao src += (src >> 64) | (src << 64);
13369e3a6aaSBibo Mao dst += (dst >> 64) | (dst << 64);
13469e3a6aaSBibo Mao
13569e3a6aaSBibo Mao sum = accumulate(sum, src >> 64);
13669e3a6aaSBibo Mao sum = accumulate(sum, dst >> 64);
13769e3a6aaSBibo Mao
13869e3a6aaSBibo Mao sum += ((sum >> 32) | (sum << 32));
13969e3a6aaSBibo Mao return csum_fold((__force __wsum)(sum >> 32));
14069e3a6aaSBibo Mao }
14169e3a6aaSBibo Mao EXPORT_SYMBOL(csum_ipv6_magic);
142