1 /* 2 * arch/x86_64/lib/csum-partial.c 3 * 4 * This file contains network checksum routines that are better done 5 * in an architecture-specific manner due to speed. 6 */ 7 8 #include <linux/compiler.h> 9 #include <linux/module.h> 10 #include <asm/checksum.h> 11 12 static inline unsigned short from32to16(unsigned a) 13 { 14 unsigned short b = a >> 16; 15 asm("addw %w2,%w0\n\t" 16 "adcw $0,%w0\n" 17 : "=r" (b) 18 : "0" (b), "r" (a)); 19 return b; 20 } 21 22 /* 23 * Do a 64-bit checksum on an arbitrary memory area. 24 * Returns a 32bit checksum. 25 * 26 * This isn't as time critical as it used to be because many NICs 27 * do hardware checksumming these days. 28 * 29 * Things tried and found to not make it faster: 30 * Manual Prefetching 31 * Unrolling to an 128 bytes inner loop. 32 * Using interleaving with more registers to break the carry chains. 33 */ 34 static unsigned do_csum(const unsigned char *buff, unsigned len) 35 { 36 unsigned odd, count; 37 unsigned long result = 0; 38 39 if (unlikely(len == 0)) 40 return result; 41 odd = 1 & (unsigned long) buff; 42 if (unlikely(odd)) { 43 result = *buff << 8; 44 len--; 45 buff++; 46 } 47 count = len >> 1; /* nr of 16-bit words.. */ 48 if (count) { 49 if (2 & (unsigned long) buff) { 50 result += *(unsigned short *)buff; 51 count--; 52 len -= 2; 53 buff += 2; 54 } 55 count >>= 1; /* nr of 32-bit words.. */ 56 if (count) { 57 unsigned long zero; 58 unsigned count64; 59 if (4 & (unsigned long) buff) { 60 result += *(unsigned int *) buff; 61 count--; 62 len -= 4; 63 buff += 4; 64 } 65 count >>= 1; /* nr of 64-bit words.. */ 66 67 /* main loop using 64byte blocks */ 68 zero = 0; 69 count64 = count >> 3; 70 while (count64) { 71 asm("addq 0*8(%[src]),%[res]\n\t" 72 "adcq 1*8(%[src]),%[res]\n\t" 73 "adcq 2*8(%[src]),%[res]\n\t" 74 "adcq 3*8(%[src]),%[res]\n\t" 75 "adcq 4*8(%[src]),%[res]\n\t" 76 "adcq 5*8(%[src]),%[res]\n\t" 77 "adcq 6*8(%[src]),%[res]\n\t" 78 "adcq 7*8(%[src]),%[res]\n\t" 79 "adcq %[zero],%[res]" 80 : [res] "=r" (result) 81 : [src] "r" (buff), [zero] "r" (zero), 82 "[res]" (result)); 83 buff += 64; 84 count64--; 85 } 86 87 /* last upto 7 8byte blocks */ 88 count %= 8; 89 while (count) { 90 asm("addq %1,%0\n\t" 91 "adcq %2,%0\n" 92 : "=r" (result) 93 : "m" (*(unsigned long *)buff), 94 "r" (zero), "0" (result)); 95 --count; 96 buff += 8; 97 } 98 result = add32_with_carry(result>>32, 99 result&0xffffffff); 100 101 if (len & 4) { 102 result += *(unsigned int *) buff; 103 buff += 4; 104 } 105 } 106 if (len & 2) { 107 result += *(unsigned short *) buff; 108 buff += 2; 109 } 110 } 111 if (len & 1) 112 result += *buff; 113 result = add32_with_carry(result>>32, result & 0xffffffff); 114 if (unlikely(odd)) { 115 result = from32to16(result); 116 result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); 117 } 118 return result; 119 } 120 121 /* 122 * computes the checksum of a memory block at buff, length len, 123 * and adds in "sum" (32-bit) 124 * 125 * returns a 32-bit number suitable for feeding into itself 126 * or csum_tcpudp_magic 127 * 128 * this function must be called with even lengths, except 129 * for the last fragment, which may be odd 130 * 131 * it's best to have buff aligned on a 64-bit boundary 132 */ 133 __wsum csum_partial(const void *buff, int len, __wsum sum) 134 { 135 return (__force __wsum)add32_with_carry(do_csum(buff, len), 136 (__force u32)sum); 137 } 138 139 /* 140 * this routine is used for miscellaneous IP-like checksums, mainly 141 * in icmp.c 142 */ 143 __sum16 ip_compute_csum(const void *buff, int len) 144 { 145 return csum_fold(csum_partial(buff,len,0)); 146 } 147 EXPORT_SYMBOL(ip_compute_csum); 148 149