12874c5fdSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */ 270d64ceaSPaul Mackerras/* 370d64ceaSPaul Mackerras * This file contains assembly-language implementations 470d64ceaSPaul Mackerras * of IP-style 1's complement checksum routines. 570d64ceaSPaul Mackerras * 670d64ceaSPaul Mackerras * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 770d64ceaSPaul Mackerras * 870d64ceaSPaul Mackerras * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). 970d64ceaSPaul Mackerras */ 1070d64ceaSPaul Mackerras 11*39326182SMasahiro Yamada#include <linux/export.h> 1270d64ceaSPaul Mackerras#include <linux/sys.h> 1370d64ceaSPaul Mackerras#include <asm/processor.h> 147aef4136SChristophe Leroy#include <asm/cache.h> 1570d64ceaSPaul Mackerras#include <asm/errno.h> 1670d64ceaSPaul Mackerras#include <asm/ppc_asm.h> 1770d64ceaSPaul Mackerras 1870d64ceaSPaul Mackerras .text 1970d64ceaSPaul Mackerras 2070d64ceaSPaul Mackerras/* 2170d64ceaSPaul Mackerras * computes the checksum of a memory block at buff, length len, 2270d64ceaSPaul Mackerras * and adds in "sum" (32-bit) 2370d64ceaSPaul Mackerras * 247e393220SChristophe Leroy * __csum_partial(buff, len, sum) 2570d64ceaSPaul Mackerras */ 267e393220SChristophe Leroy_GLOBAL(__csum_partial) 2770d64ceaSPaul Mackerras subi r3,r3,4 2848821a34SChristophe Leroy srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ 2970d64ceaSPaul Mackerras beq 3f /* if we're doing < 4 bytes */ 3048821a34SChristophe Leroy andi. r0,r3,2 /* Align buffer to longword boundary */ 3170d64ceaSPaul Mackerras beq+ 1f 3248821a34SChristophe Leroy lhz r0,4(r3) /* do 2 bytes to get aligned */ 3370d64ceaSPaul Mackerras subi r4,r4,2 3448821a34SChristophe Leroy addi r3,r3,2 3570d64ceaSPaul Mackerras srwi. r6,r4,2 /* # words to do */ 3648821a34SChristophe Leroy adde r5,r5,r0 3770d64ceaSPaul Mackerras beq 3f 38f867d556SChristophe Leroy1: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ 39f867d556SChristophe Leroy beq 21f 40f867d556SChristophe Leroy mtctr r6 4148821a34SChristophe Leroy2: lwzu r0,4(r3) 4248821a34SChristophe Leroy adde r5,r5,r0 4370d64ceaSPaul Mackerras bdnz 2b 44f867d556SChristophe Leroy21: srwi. r6,r4,4 /* # blocks of 4 words to do */ 45f867d556SChristophe Leroy beq 3f 46373e098eSChristophe Leroy lwz r0,4(r3) 47f867d556SChristophe Leroy mtctr r6 48f867d556SChristophe Leroy lwz r6,8(r3) 49f867d556SChristophe Leroy adde r5,r5,r0 50373e098eSChristophe Leroy lwz r7,12(r3) 51f867d556SChristophe Leroy adde r5,r5,r6 52373e098eSChristophe Leroy lwzu r8,16(r3) 53f867d556SChristophe Leroy adde r5,r5,r7 54373e098eSChristophe Leroy bdz 23f 55373e098eSChristophe Leroy22: lwz r0,4(r3) 56f867d556SChristophe Leroy adde r5,r5,r8 57373e098eSChristophe Leroy lwz r6,8(r3) 58373e098eSChristophe Leroy adde r5,r5,r0 59373e098eSChristophe Leroy lwz r7,12(r3) 60373e098eSChristophe Leroy adde r5,r5,r6 61373e098eSChristophe Leroy lwzu r8,16(r3) 62373e098eSChristophe Leroy adde r5,r5,r7 63f867d556SChristophe Leroy bdnz 22b 64373e098eSChristophe Leroy23: adde r5,r5,r8 6548821a34SChristophe Leroy3: andi. r0,r4,2 6648821a34SChristophe Leroy beq+ 4f 6748821a34SChristophe Leroy lhz r0,4(r3) 6870d64ceaSPaul Mackerras addi r3,r3,2 6948821a34SChristophe Leroy adde r5,r5,r0 7048821a34SChristophe Leroy4: andi. r0,r4,1 7148821a34SChristophe Leroy beq+ 5f 7248821a34SChristophe Leroy lbz r0,4(r3) 7348821a34SChristophe Leroy slwi r0,r0,8 /* Upper byte of word */ 7448821a34SChristophe Leroy adde r5,r5,r0 7548821a34SChristophe Leroy5: addze r3,r5 /* add in final carry */ 7670d64ceaSPaul Mackerras blr 779445aa1aSAl ViroEXPORT_SYMBOL(__csum_partial) 7870d64ceaSPaul Mackerras 7970d64ceaSPaul Mackerras/* 8070d64ceaSPaul Mackerras * Computes the checksum of a memory block at src, length len, 8170d65cd5SAl Viro * and adds in 0xffffffff, while copying the block to dst. 8270d65cd5SAl Viro * If an access exception occurs it returns zero. 8370d64ceaSPaul Mackerras * 8470d65cd5SAl Viro * csum_partial_copy_generic(src, dst, len) 8570d64ceaSPaul Mackerras */ 867aef4136SChristophe Leroy#define CSUM_COPY_16_BYTES_WITHEX(n) \ 877aef4136SChristophe Leroy8 ## n ## 0: \ 887aef4136SChristophe Leroy lwz r7,4(r4); \ 897aef4136SChristophe Leroy8 ## n ## 1: \ 907aef4136SChristophe Leroy lwz r8,8(r4); \ 917aef4136SChristophe Leroy8 ## n ## 2: \ 927aef4136SChristophe Leroy lwz r9,12(r4); \ 937aef4136SChristophe Leroy8 ## n ## 3: \ 947aef4136SChristophe Leroy lwzu r10,16(r4); \ 957aef4136SChristophe Leroy8 ## n ## 4: \ 967aef4136SChristophe Leroy stw r7,4(r6); \ 977aef4136SChristophe Leroy adde r12,r12,r7; \ 987aef4136SChristophe Leroy8 ## n ## 5: \ 997aef4136SChristophe Leroy stw r8,8(r6); \ 1007aef4136SChristophe Leroy adde r12,r12,r8; \ 1017aef4136SChristophe Leroy8 ## n ## 6: \ 1027aef4136SChristophe Leroy stw r9,12(r6); \ 1037aef4136SChristophe Leroy adde r12,r12,r9; \ 1047aef4136SChristophe Leroy8 ## n ## 7: \ 1057aef4136SChristophe Leroy stwu r10,16(r6); \ 1067aef4136SChristophe Leroy adde r12,r12,r10 1077aef4136SChristophe Leroy 1087aef4136SChristophe Leroy#define CSUM_COPY_16_BYTES_EXCODE(n) \ 10970d65cd5SAl Viro EX_TABLE(8 ## n ## 0b, fault); \ 11070d65cd5SAl Viro EX_TABLE(8 ## n ## 1b, fault); \ 11170d65cd5SAl Viro EX_TABLE(8 ## n ## 2b, fault); \ 11270d65cd5SAl Viro EX_TABLE(8 ## n ## 3b, fault); \ 11370d65cd5SAl Viro EX_TABLE(8 ## n ## 4b, fault); \ 11470d65cd5SAl Viro EX_TABLE(8 ## n ## 5b, fault); \ 11570d65cd5SAl Viro EX_TABLE(8 ## n ## 6b, fault); \ 11670d65cd5SAl Viro EX_TABLE(8 ## n ## 7b, fault); 1177aef4136SChristophe Leroy 1187aef4136SChristophe Leroy .text 1197aef4136SChristophe Leroy 1207aef4136SChristophe LeroyCACHELINE_BYTES = L1_CACHE_BYTES 1217aef4136SChristophe LeroyLG_CACHELINE_BYTES = L1_CACHE_SHIFT 1227aef4136SChristophe LeroyCACHELINE_MASK = (L1_CACHE_BYTES-1) 1237aef4136SChristophe Leroy 12470d64ceaSPaul Mackerras_GLOBAL(csum_partial_copy_generic) 12570d65cd5SAl Viro li r12,-1 12670d65cd5SAl Viro addic r0,r0,0 /* clear carry */ 1277aef4136SChristophe Leroy addi r6,r4,-4 1287aef4136SChristophe Leroy neg r0,r4 1297aef4136SChristophe Leroy addi r4,r3,-4 1307aef4136SChristophe Leroy andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 1318540571eSChristophe Leroy crset 4*cr7+eq 1327aef4136SChristophe Leroy beq 58f 1337aef4136SChristophe Leroy 1347aef4136SChristophe Leroy cmplw 0,r5,r0 /* is this more than total to do? */ 1357aef4136SChristophe Leroy blt 63f /* if not much to do */ 1368540571eSChristophe Leroy rlwinm r7,r6,3,0x8 1378540571eSChristophe Leroy rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ 1388540571eSChristophe Leroy cmplwi cr7,r7,0 /* is destination address even ? */ 1397aef4136SChristophe Leroy andi. r8,r0,3 /* get it word-aligned first */ 1407aef4136SChristophe Leroy mtctr r8 1417aef4136SChristophe Leroy beq+ 61f 1427aef4136SChristophe Leroy li r3,0 1437aef4136SChristophe Leroy70: lbz r9,4(r4) /* do some bytes */ 1447aef4136SChristophe Leroy addi r4,r4,1 1457aef4136SChristophe Leroy slwi r3,r3,8 1467aef4136SChristophe Leroy rlwimi r3,r9,0,24,31 1477aef4136SChristophe Leroy71: stb r9,4(r6) 1487aef4136SChristophe Leroy addi r6,r6,1 1497aef4136SChristophe Leroy bdnz 70b 1507aef4136SChristophe Leroy adde r12,r12,r3 1517aef4136SChristophe Leroy61: subf r5,r0,r5 1527aef4136SChristophe Leroy srwi. r0,r0,2 1537aef4136SChristophe Leroy mtctr r0 1547aef4136SChristophe Leroy beq 58f 1557aef4136SChristophe Leroy72: lwzu r9,4(r4) /* do some words */ 1567aef4136SChristophe Leroy adde r12,r12,r9 1577aef4136SChristophe Leroy73: stwu r9,4(r6) 1587aef4136SChristophe Leroy bdnz 72b 1597aef4136SChristophe Leroy 1607aef4136SChristophe Leroy58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 1617aef4136SChristophe Leroy clrlwi r5,r5,32-LG_CACHELINE_BYTES 1627aef4136SChristophe Leroy li r11,4 1637aef4136SChristophe Leroy beq 63f 1647aef4136SChristophe Leroy 1657aef4136SChristophe Leroy /* Here we decide how far ahead to prefetch the source */ 1667aef4136SChristophe Leroy li r3,4 1677aef4136SChristophe Leroy cmpwi r0,1 1687aef4136SChristophe Leroy li r7,0 1697aef4136SChristophe Leroy ble 114f 1707aef4136SChristophe Leroy li r7,1 1717aef4136SChristophe Leroy#if MAX_COPY_PREFETCH > 1 1727aef4136SChristophe Leroy /* Heuristically, for large transfers we prefetch 1737aef4136SChristophe Leroy MAX_COPY_PREFETCH cachelines ahead. For small transfers 1747aef4136SChristophe Leroy we prefetch 1 cacheline ahead. */ 1757aef4136SChristophe Leroy cmpwi r0,MAX_COPY_PREFETCH 1767aef4136SChristophe Leroy ble 112f 1777aef4136SChristophe Leroy li r7,MAX_COPY_PREFETCH 1787aef4136SChristophe Leroy112: mtctr r7 1797aef4136SChristophe Leroy111: dcbt r3,r4 1807aef4136SChristophe Leroy addi r3,r3,CACHELINE_BYTES 1817aef4136SChristophe Leroy bdnz 111b 1827aef4136SChristophe Leroy#else 1837aef4136SChristophe Leroy dcbt r3,r4 1847aef4136SChristophe Leroy addi r3,r3,CACHELINE_BYTES 1857aef4136SChristophe Leroy#endif /* MAX_COPY_PREFETCH > 1 */ 1867aef4136SChristophe Leroy 1877aef4136SChristophe Leroy114: subf r8,r7,r0 1887aef4136SChristophe Leroy mr r0,r7 1897aef4136SChristophe Leroy mtctr r8 1907aef4136SChristophe Leroy 1917aef4136SChristophe Leroy53: dcbt r3,r4 1927aef4136SChristophe Leroy54: dcbz r11,r6 1937aef4136SChristophe Leroy/* the main body of the cacheline loop */ 1947aef4136SChristophe Leroy CSUM_COPY_16_BYTES_WITHEX(0) 1957aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 32 1967aef4136SChristophe Leroy CSUM_COPY_16_BYTES_WITHEX(1) 1977aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 64 1987aef4136SChristophe Leroy CSUM_COPY_16_BYTES_WITHEX(2) 1997aef4136SChristophe Leroy CSUM_COPY_16_BYTES_WITHEX(3) 2007aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 128 2017aef4136SChristophe Leroy CSUM_COPY_16_BYTES_WITHEX(4) 2027aef4136SChristophe Leroy CSUM_COPY_16_BYTES_WITHEX(5) 2037aef4136SChristophe Leroy CSUM_COPY_16_BYTES_WITHEX(6) 2047aef4136SChristophe Leroy CSUM_COPY_16_BYTES_WITHEX(7) 2057aef4136SChristophe Leroy#endif 2067aef4136SChristophe Leroy#endif 2077aef4136SChristophe Leroy#endif 2087aef4136SChristophe Leroy bdnz 53b 2097aef4136SChristophe Leroy cmpwi r0,0 2107aef4136SChristophe Leroy li r3,4 2117aef4136SChristophe Leroy li r7,0 2127aef4136SChristophe Leroy bne 114b 2137aef4136SChristophe Leroy 2147aef4136SChristophe Leroy63: srwi. r0,r5,2 2157aef4136SChristophe Leroy mtctr r0 2167aef4136SChristophe Leroy beq 64f 2177aef4136SChristophe Leroy30: lwzu r0,4(r4) 2187aef4136SChristophe Leroy adde r12,r12,r0 2197aef4136SChristophe Leroy31: stwu r0,4(r6) 2207aef4136SChristophe Leroy bdnz 30b 2217aef4136SChristophe Leroy 2227aef4136SChristophe Leroy64: andi. r0,r5,2 2237aef4136SChristophe Leroy beq+ 65f 2247aef4136SChristophe Leroy40: lhz r0,4(r4) 22570d64ceaSPaul Mackerras addi r4,r4,2 2267aef4136SChristophe Leroy41: sth r0,4(r6) 2277aef4136SChristophe Leroy adde r12,r12,r0 2287aef4136SChristophe Leroy addi r6,r6,2 2297aef4136SChristophe Leroy65: andi. r0,r5,1 2307aef4136SChristophe Leroy beq+ 66f 2317aef4136SChristophe Leroy50: lbz r0,4(r4) 2327aef4136SChristophe Leroy51: stb r0,4(r6) 2337aef4136SChristophe Leroy slwi r0,r0,8 2347aef4136SChristophe Leroy adde r12,r12,r0 2357aef4136SChristophe Leroy66: addze r3,r12 2367aef4136SChristophe Leroy beqlr+ cr7 2371bc8b816SChristophe Leroy rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ 23870d64ceaSPaul Mackerras blr 23970d64ceaSPaul Mackerras 24070d65cd5SAl Virofault: 24170d65cd5SAl Viro li r3,0 24270d64ceaSPaul Mackerras blr 24370d64ceaSPaul Mackerras 24470d65cd5SAl Viro EX_TABLE(70b, fault); 24570d65cd5SAl Viro EX_TABLE(71b, fault); 24670d65cd5SAl Viro EX_TABLE(72b, fault); 24770d65cd5SAl Viro EX_TABLE(73b, fault); 24870d65cd5SAl Viro EX_TABLE(54b, fault); 2497aef4136SChristophe Leroy 2507aef4136SChristophe Leroy/* 2517aef4136SChristophe Leroy * this stuff handles faults in the cacheline loop and branches to either 25270d65cd5SAl Viro * fault (if in read part) or fault (if in write part) 2537aef4136SChristophe Leroy */ 2547aef4136SChristophe Leroy CSUM_COPY_16_BYTES_EXCODE(0) 2557aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 32 2567aef4136SChristophe Leroy CSUM_COPY_16_BYTES_EXCODE(1) 2577aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 64 2587aef4136SChristophe Leroy CSUM_COPY_16_BYTES_EXCODE(2) 2597aef4136SChristophe Leroy CSUM_COPY_16_BYTES_EXCODE(3) 2607aef4136SChristophe Leroy#if L1_CACHE_BYTES >= 128 2617aef4136SChristophe Leroy CSUM_COPY_16_BYTES_EXCODE(4) 2627aef4136SChristophe Leroy CSUM_COPY_16_BYTES_EXCODE(5) 2637aef4136SChristophe Leroy CSUM_COPY_16_BYTES_EXCODE(6) 2647aef4136SChristophe Leroy CSUM_COPY_16_BYTES_EXCODE(7) 2657aef4136SChristophe Leroy#endif 2667aef4136SChristophe Leroy#endif 2677aef4136SChristophe Leroy#endif 2687aef4136SChristophe Leroy 26970d65cd5SAl Viro EX_TABLE(30b, fault); 27070d65cd5SAl Viro EX_TABLE(31b, fault); 27170d65cd5SAl Viro EX_TABLE(40b, fault); 27270d65cd5SAl Viro EX_TABLE(41b, fault); 27370d65cd5SAl Viro EX_TABLE(50b, fault); 27470d65cd5SAl Viro EX_TABLE(51b, fault); 27524bfa6a9SNicholas Piggin 2769445aa1aSAl ViroEXPORT_SYMBOL(csum_partial_copy_generic) 277e9c4943aSChristophe Leroy 278e9c4943aSChristophe Leroy/* 279e9c4943aSChristophe Leroy * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, 280e9c4943aSChristophe Leroy * const struct in6_addr *daddr, 281e9c4943aSChristophe Leroy * __u32 len, __u8 proto, __wsum sum) 282e9c4943aSChristophe Leroy */ 283e9c4943aSChristophe Leroy 284e9c4943aSChristophe Leroy_GLOBAL(csum_ipv6_magic) 285e9c4943aSChristophe Leroy lwz r8, 0(r3) 286e9c4943aSChristophe Leroy lwz r9, 4(r3) 287e9c4943aSChristophe Leroy addc r0, r7, r8 288e9c4943aSChristophe Leroy lwz r10, 8(r3) 289e9c4943aSChristophe Leroy adde r0, r0, r9 290e9c4943aSChristophe Leroy lwz r11, 12(r3) 291e9c4943aSChristophe Leroy adde r0, r0, r10 292e9c4943aSChristophe Leroy lwz r8, 0(r4) 293e9c4943aSChristophe Leroy adde r0, r0, r11 294e9c4943aSChristophe Leroy lwz r9, 4(r4) 295e9c4943aSChristophe Leroy adde r0, r0, r8 296e9c4943aSChristophe Leroy lwz r10, 8(r4) 297e9c4943aSChristophe Leroy adde r0, r0, r9 298e9c4943aSChristophe Leroy lwz r11, 12(r4) 299e9c4943aSChristophe Leroy adde r0, r0, r10 300e9c4943aSChristophe Leroy add r5, r5, r6 /* assumption: len + proto doesn't carry */ 301e9c4943aSChristophe Leroy adde r0, r0, r11 302e9c4943aSChristophe Leroy adde r0, r0, r5 303e9c4943aSChristophe Leroy addze r0, r0 304e9c4943aSChristophe Leroy rotlwi r3, r0, 16 305e9c4943aSChristophe Leroy add r3, r0, r3 306e9c4943aSChristophe Leroy not r3, r3 307e9c4943aSChristophe Leroy rlwinm r3, r3, 16, 16, 31 308e9c4943aSChristophe Leroy blr 309e9c4943aSChristophe LeroyEXPORT_SYMBOL(csum_ipv6_magic) 310