1 /* 2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 * 8 * Joern Rennecke <joern.rennecke@embecosm.com>: Jan 2012 9 * -Insn Scheduling improvements to csum core routines. 10 * = csum_fold( ) largely derived from ARM version. 11 * = ip_fast_cum( ) to have module scheduling 12 * -gcc 4.4.x broke networking. Alias analysis needed to be primed. 13 * worked around by adding memory clobber to ip_fast_csum( ) 14 * 15 * vineetg: May 2010 16 * -Rewrote ip_fast_cscum( ) and csum_fold( ) with fast inline asm 17 */ 18 19 #ifndef _ASM_ARC_CHECKSUM_H 20 #define _ASM_ARC_CHECKSUM_H 21 22 /* 23 * Fold a partial checksum 24 * 25 * The 2 swords comprising the 32bit sum are added, any carry to 16th bit 26 * added back and final sword result inverted. 27 */ 28 static inline __sum16 csum_fold(__wsum s) 29 { 30 unsigned r = s << 16 | s >> 16; /* ror */ 31 s = ~s; 32 s -= r; 33 return s >> 16; 34 } 35 36 /* 37 * This is a version of ip_compute_csum() optimized for IP headers, 38 * which always checksum on 4 octet boundaries. 39 */ 40 static inline __sum16 41 ip_fast_csum(const void *iph, unsigned int ihl) 42 { 43 const void *ptr = iph; 44 unsigned int tmp, tmp2, sum; 45 46 __asm__( 47 " ld.ab %0, [%3, 4] \n" 48 " ld.ab %2, [%3, 4] \n" 49 " sub %1, %4, 2 \n" 50 " lsr.f lp_count, %1, 1 \n" 51 " bcc 0f \n" 52 " add.f %0, %0, %2 \n" 53 " ld.ab %2, [%3, 4] \n" 54 "0: lp 1f \n" 55 " ld.ab %1, [%3, 4] \n" 56 " adc.f %0, %0, %2 \n" 57 " ld.ab %2, [%3, 4] \n" 58 " adc.f %0, %0, %1 \n" 59 "1: adc.f %0, %0, %2 \n" 60 " add.cs %0,%0,1 \n" 61 : "=&r"(sum), "=r"(tmp), "=&r"(tmp2), "+&r" (ptr) 62 : "r"(ihl) 63 : "cc", "lp_count", "memory"); 64 65 return csum_fold(sum); 66 } 67 68 /* 69 * TCP pseudo Header is 12 bytes: 70 * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2] 71 */ 72 static inline __wsum 73 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, 74 __u8 proto, __wsum sum) 75 { 76 __asm__ __volatile__( 77 " add.f %0, %0, %1 \n" 78 " adc.f %0, %0, %2 \n" 79 " adc.f %0, %0, %3 \n" 80 " adc.f %0, %0, %4 \n" 81 " adc %0, %0, 0 \n" 82 : "+&r"(sum) 83 : "r"(saddr), "r"(daddr), 84 #ifdef CONFIG_CPU_BIG_ENDIAN 85 "r"(len), 86 #else 87 "r"(len << 8), 88 #endif 89 "r"(htons(proto)) 90 : "cc"); 91 92 return sum; 93 } 94 95 #define csum_fold csum_fold 96 #define ip_fast_csum ip_fast_csum 97 #define csum_tcpudp_nofold csum_tcpudp_nofold 98 99 #include <asm-generic/checksum.h> 100 101 #endif /* _ASM_ARC_CHECKSUM_H */ 102