1/* 2 * Optmized version of the ip_fast_csum() function 3 * Used for calculating IP header checksum 4 * 5 * Return: 16bit checksum, complemented 6 * 7 * Inputs: 8 * in0: address of buffer to checksum (char *) 9 * in1: length of the buffer (int) 10 * 11 * Copyright (C) 2002, 2006 Intel Corp. 12 * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com> 13 */ 14 15#include <asm/asmmacro.h> 16#include <asm/export.h> 17 18/* 19 * Since we know that most likely this function is called with buf aligned 20 * on 4-byte boundary and 20 bytes in length, we can execution rather quickly 21 * versus calling generic version of do_csum, which has lots of overhead in 22 * handling various alignments and sizes. However, due to lack of constrains 23 * put on the function input argument, cases with alignment not on 4-byte or 24 * size not equal to 20 bytes will be handled by the generic do_csum function. 25 */ 26 27#define in0 r32 28#define in1 r33 29#define in2 r34 30#define in3 r35 31#define in4 r36 32#define ret0 r8 33 34GLOBAL_ENTRY(ip_fast_csum) 35 .prologue 36 .body 37 cmp.ne p6,p7=5,in1 // size other than 20 byte? 38 and r14=3,in0 // is it aligned on 4-byte? 39 add r15=4,in0 // second source pointer 40 ;; 41 cmp.ne.or.andcm p6,p7=r14,r0 42 ;; 43(p7) ld4 r20=[in0],8 44(p7) ld4 r21=[r15],8 45(p6) br.spnt .generic 46 ;; 47 ld4 r22=[in0],8 48 ld4 r23=[r15],8 49 ;; 50 ld4 r24=[in0] 51 add r20=r20,r21 52 add r22=r22,r23 53 ;; 54 add r20=r20,r22 55 ;; 56 add r20=r20,r24 57 ;; 58 shr.u ret0=r20,16 // now need to add the carry 59 zxt2 r20=r20 60 ;; 61 add r20=ret0,r20 62 ;; 63 shr.u ret0=r20,16 // add carry again 64 zxt2 r20=r20 65 ;; 66 add r20=ret0,r20 67 ;; 68 shr.u ret0=r20,16 69 zxt2 r20=r20 70 ;; 71 add r20=ret0,r20 72 mov r9=0xffff 73 ;; 74 andcm ret0=r9,r20 75 .restore sp // reset frame state 76 br.ret.sptk.many b0 77 ;; 78 79.generic: 80 .prologue 81 .save ar.pfs, r35 82 alloc r35=ar.pfs,2,2,2,0 83 .save rp, r34 84 mov r34=b0 85 .body 86 dep.z out1=in1,2,30 87 mov out0=in0 88 ;; 89 br.call.sptk.many b0=do_csum 90 ;; 91 andcm ret0=-1,ret0 92 mov ar.pfs=r35 93 mov b0=r34 94 br.ret.sptk.many b0 95END(ip_fast_csum) 96EXPORT_SYMBOL(ip_fast_csum) 97 98GLOBAL_ENTRY(csum_ipv6_magic) 99 ld4 r20=[in0],4 100 ld4 r21=[in1],4 101 zxt4 in2=in2 102 ;; 103 ld4 r22=[in0],4 104 ld4 r23=[in1],4 105 dep r15=in3,in2,32,16 106 ;; 107 ld4 r24=[in0],4 108 ld4 r25=[in1],4 109 mux1 r15=r15,@rev 110 add r16=r20,r21 111 add r17=r22,r23 112 zxt4 in4=in4 113 ;; 114 ld4 r26=[in0],4 115 ld4 r27=[in1],4 116 shr.u r15=r15,16 117 add r18=r24,r25 118 add r8=r16,r17 119 ;; 120 add r19=r26,r27 121 add r8=r8,r18 122 ;; 123 add r8=r8,r19 124 add r15=r15,in4 125 ;; 126 add r8=r8,r15 127 ;; 128 shr.u r10=r8,32 // now fold sum into short 129 zxt4 r11=r8 130 ;; 131 add r8=r10,r11 132 ;; 133 shr.u r10=r8,16 // yeah, keep it rolling 134 zxt2 r11=r8 135 ;; 136 add r8=r10,r11 137 ;; 138 shr.u r10=r8,16 // three times lucky 139 zxt2 r11=r8 140 ;; 141 add r8=r10,r11 142 mov r9=0xffff 143 ;; 144 andcm r8=r9,r8 145 br.ret.sptk.many b0 146END(csum_ipv6_magic) 147EXPORT_SYMBOL(csum_ipv6_magic) 148