1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/*************************************************************************** 3* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> * 4* * 5***************************************************************************/ 6 7.file "twofish-i586-asm.S" 8.text 9 10#include <linux/linkage.h> 11#include <asm/asm-offsets.h> 12 13/* return address at 0 */ 14 15#define in_blk 12 /* input byte array address parameter*/ 16#define out_blk 8 /* output byte array address parameter*/ 17#define ctx 4 /* Twofish context structure */ 18 19#define a_offset 0 20#define b_offset 4 21#define c_offset 8 22#define d_offset 12 23 24/* Structure of the crypto context struct*/ 25 26#define s0 0 /* S0 Array 256 Words each */ 27#define s1 1024 /* S1 Array */ 28#define s2 2048 /* S2 Array */ 29#define s3 3072 /* S3 Array */ 30#define w 4096 /* 8 whitening keys (word) */ 31#define k 4128 /* key 1-32 ( word ) */ 32 33/* define a few register aliases to allow macro substitution */ 34 35#define R0D %eax 36#define R0B %al 37#define R0H %ah 38 39#define R1D %ebx 40#define R1B %bl 41#define R1H %bh 42 43#define R2D %ecx 44#define R2B %cl 45#define R2H %ch 46 47#define R3D %edx 48#define R3B %dl 49#define R3H %dh 50 51 52/* performs input whitening */ 53#define input_whitening(src,context,offset)\ 54 xor w+offset(context), src; 55 56/* performs input whitening */ 57#define output_whitening(src,context,offset)\ 58 xor w+16+offset(context), src; 59 60/* 61 * a input register containing a (rotated 16) 62 * b input register containing b 63 * c input register containing c 64 * d input register containing d (already rol $1) 65 * operations on a and b are interleaved to increase performance 66 */ 67#define encrypt_round(a,b,c,d,round)\ 68 push d ## D;\ 69 movzx b ## B, %edi;\ 70 mov s1(%ebp,%edi,4),d ## D;\ 71 movzx a ## B, %edi;\ 72 mov s2(%ebp,%edi,4),%esi;\ 73 movzx b ## H, %edi;\ 74 ror $16, b ## D;\ 75 xor s2(%ebp,%edi,4),d ## D;\ 76 movzx a ## H, %edi;\ 77 ror $16, a ## D;\ 78 xor s3(%ebp,%edi,4),%esi;\ 79 movzx b ## B, %edi;\ 80 xor s3(%ebp,%edi,4),d ## D;\ 81 movzx a ## B, %edi;\ 82 xor (%ebp,%edi,4), %esi;\ 83 movzx b ## H, %edi;\ 84 ror $15, b ## D;\ 85 xor (%ebp,%edi,4), d ## D;\ 86 movzx a ## H, %edi;\ 87 xor s1(%ebp,%edi,4),%esi;\ 88 pop %edi;\ 89 add d ## D, %esi;\ 90 add %esi, d ## D;\ 91 add k+round(%ebp), %esi;\ 92 xor %esi, c ## D;\ 93 rol $15, c ## D;\ 94 add k+4+round(%ebp),d ## D;\ 95 xor %edi, d ## D; 96 97/* 98 * a input register containing a (rotated 16) 99 * b input register containing b 100 * c input register containing c 101 * d input register containing d (already rol $1) 102 * operations on a and b are interleaved to increase performance 103 * last round has different rotations for the output preparation 104 */ 105#define encrypt_last_round(a,b,c,d,round)\ 106 push d ## D;\ 107 movzx b ## B, %edi;\ 108 mov s1(%ebp,%edi,4),d ## D;\ 109 movzx a ## B, %edi;\ 110 mov s2(%ebp,%edi,4),%esi;\ 111 movzx b ## H, %edi;\ 112 ror $16, b ## D;\ 113 xor s2(%ebp,%edi,4),d ## D;\ 114 movzx a ## H, %edi;\ 115 ror $16, a ## D;\ 116 xor s3(%ebp,%edi,4),%esi;\ 117 movzx b ## B, %edi;\ 118 xor s3(%ebp,%edi,4),d ## D;\ 119 movzx a ## B, %edi;\ 120 xor (%ebp,%edi,4), %esi;\ 121 movzx b ## H, %edi;\ 122 ror $16, b ## D;\ 123 xor (%ebp,%edi,4), d ## D;\ 124 movzx a ## H, %edi;\ 125 xor s1(%ebp,%edi,4),%esi;\ 126 pop %edi;\ 127 add d ## D, %esi;\ 128 add %esi, d ## D;\ 129 add k+round(%ebp), %esi;\ 130 xor %esi, c ## D;\ 131 ror $1, c ## D;\ 132 add k+4+round(%ebp),d ## D;\ 133 xor %edi, d ## D; 134 135/* 136 * a input register containing a 137 * b input register containing b (rotated 16) 138 * c input register containing c 139 * d input register containing d (already rol $1) 140 * operations on a and b are interleaved to increase performance 141 */ 142#define decrypt_round(a,b,c,d,round)\ 143 push c ## D;\ 144 movzx a ## B, %edi;\ 145 mov (%ebp,%edi,4), c ## D;\ 146 movzx b ## B, %edi;\ 147 mov s3(%ebp,%edi,4),%esi;\ 148 movzx a ## H, %edi;\ 149 ror $16, a ## D;\ 150 xor s1(%ebp,%edi,4),c ## D;\ 151 movzx b ## H, %edi;\ 152 ror $16, b ## D;\ 153 xor (%ebp,%edi,4), %esi;\ 154 movzx a ## B, %edi;\ 155 xor s2(%ebp,%edi,4),c ## D;\ 156 movzx b ## B, %edi;\ 157 xor s1(%ebp,%edi,4),%esi;\ 158 movzx a ## H, %edi;\ 159 ror $15, a ## D;\ 160 xor s3(%ebp,%edi,4),c ## D;\ 161 movzx b ## H, %edi;\ 162 xor s2(%ebp,%edi,4),%esi;\ 163 pop %edi;\ 164 add %esi, c ## D;\ 165 add c ## D, %esi;\ 166 add k+round(%ebp), c ## D;\ 167 xor %edi, c ## D;\ 168 add k+4+round(%ebp),%esi;\ 169 xor %esi, d ## D;\ 170 rol $15, d ## D; 171 172/* 173 * a input register containing a 174 * b input register containing b (rotated 16) 175 * c input register containing c 176 * d input register containing d (already rol $1) 177 * operations on a and b are interleaved to increase performance 178 * last round has different rotations for the output preparation 179 */ 180#define decrypt_last_round(a,b,c,d,round)\ 181 push c ## D;\ 182 movzx a ## B, %edi;\ 183 mov (%ebp,%edi,4), c ## D;\ 184 movzx b ## B, %edi;\ 185 mov s3(%ebp,%edi,4),%esi;\ 186 movzx a ## H, %edi;\ 187 ror $16, a ## D;\ 188 xor s1(%ebp,%edi,4),c ## D;\ 189 movzx b ## H, %edi;\ 190 ror $16, b ## D;\ 191 xor (%ebp,%edi,4), %esi;\ 192 movzx a ## B, %edi;\ 193 xor s2(%ebp,%edi,4),c ## D;\ 194 movzx b ## B, %edi;\ 195 xor s1(%ebp,%edi,4),%esi;\ 196 movzx a ## H, %edi;\ 197 ror $16, a ## D;\ 198 xor s3(%ebp,%edi,4),c ## D;\ 199 movzx b ## H, %edi;\ 200 xor s2(%ebp,%edi,4),%esi;\ 201 pop %edi;\ 202 add %esi, c ## D;\ 203 add c ## D, %esi;\ 204 add k+round(%ebp), c ## D;\ 205 xor %edi, c ## D;\ 206 add k+4+round(%ebp),%esi;\ 207 xor %esi, d ## D;\ 208 ror $1, d ## D; 209 210SYM_FUNC_START(twofish_enc_blk) 211 push %ebp /* save registers according to calling convention*/ 212 push %ebx 213 push %esi 214 push %edi 215 216 mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base 217 * pointer to the ctx address */ 218 mov in_blk+16(%esp),%edi /* input address in edi */ 219 220 mov (%edi), %eax 221 mov b_offset(%edi), %ebx 222 mov c_offset(%edi), %ecx 223 mov d_offset(%edi), %edx 224 input_whitening(%eax,%ebp,a_offset) 225 ror $16, %eax 226 input_whitening(%ebx,%ebp,b_offset) 227 input_whitening(%ecx,%ebp,c_offset) 228 input_whitening(%edx,%ebp,d_offset) 229 rol $1, %edx 230 231 encrypt_round(R0,R1,R2,R3,0); 232 encrypt_round(R2,R3,R0,R1,8); 233 encrypt_round(R0,R1,R2,R3,2*8); 234 encrypt_round(R2,R3,R0,R1,3*8); 235 encrypt_round(R0,R1,R2,R3,4*8); 236 encrypt_round(R2,R3,R0,R1,5*8); 237 encrypt_round(R0,R1,R2,R3,6*8); 238 encrypt_round(R2,R3,R0,R1,7*8); 239 encrypt_round(R0,R1,R2,R3,8*8); 240 encrypt_round(R2,R3,R0,R1,9*8); 241 encrypt_round(R0,R1,R2,R3,10*8); 242 encrypt_round(R2,R3,R0,R1,11*8); 243 encrypt_round(R0,R1,R2,R3,12*8); 244 encrypt_round(R2,R3,R0,R1,13*8); 245 encrypt_round(R0,R1,R2,R3,14*8); 246 encrypt_last_round(R2,R3,R0,R1,15*8); 247 248 output_whitening(%eax,%ebp,c_offset) 249 output_whitening(%ebx,%ebp,d_offset) 250 output_whitening(%ecx,%ebp,a_offset) 251 output_whitening(%edx,%ebp,b_offset) 252 mov out_blk+16(%esp),%edi; 253 mov %eax, c_offset(%edi) 254 mov %ebx, d_offset(%edi) 255 mov %ecx, (%edi) 256 mov %edx, b_offset(%edi) 257 258 pop %edi 259 pop %esi 260 pop %ebx 261 pop %ebp 262 mov $1, %eax 263 RET 264SYM_FUNC_END(twofish_enc_blk) 265 266SYM_FUNC_START(twofish_dec_blk) 267 push %ebp /* save registers according to calling convention*/ 268 push %ebx 269 push %esi 270 push %edi 271 272 273 mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base 274 * pointer to the ctx address */ 275 mov in_blk+16(%esp),%edi /* input address in edi */ 276 277 mov (%edi), %eax 278 mov b_offset(%edi), %ebx 279 mov c_offset(%edi), %ecx 280 mov d_offset(%edi), %edx 281 output_whitening(%eax,%ebp,a_offset) 282 output_whitening(%ebx,%ebp,b_offset) 283 ror $16, %ebx 284 output_whitening(%ecx,%ebp,c_offset) 285 output_whitening(%edx,%ebp,d_offset) 286 rol $1, %ecx 287 288 decrypt_round(R0,R1,R2,R3,15*8); 289 decrypt_round(R2,R3,R0,R1,14*8); 290 decrypt_round(R0,R1,R2,R3,13*8); 291 decrypt_round(R2,R3,R0,R1,12*8); 292 decrypt_round(R0,R1,R2,R3,11*8); 293 decrypt_round(R2,R3,R0,R1,10*8); 294 decrypt_round(R0,R1,R2,R3,9*8); 295 decrypt_round(R2,R3,R0,R1,8*8); 296 decrypt_round(R0,R1,R2,R3,7*8); 297 decrypt_round(R2,R3,R0,R1,6*8); 298 decrypt_round(R0,R1,R2,R3,5*8); 299 decrypt_round(R2,R3,R0,R1,4*8); 300 decrypt_round(R0,R1,R2,R3,3*8); 301 decrypt_round(R2,R3,R0,R1,2*8); 302 decrypt_round(R0,R1,R2,R3,1*8); 303 decrypt_last_round(R2,R3,R0,R1,0); 304 305 input_whitening(%eax,%ebp,c_offset) 306 input_whitening(%ebx,%ebp,d_offset) 307 input_whitening(%ecx,%ebp,a_offset) 308 input_whitening(%edx,%ebp,b_offset) 309 mov out_blk+16(%esp),%edi; 310 mov %eax, c_offset(%edi) 311 mov %ebx, d_offset(%edi) 312 mov %ecx, (%edi) 313 mov %edx, b_offset(%edi) 314 315 pop %edi 316 pop %esi 317 pop %ebx 318 pop %ebp 319 mov $1, %eax 320 RET 321SYM_FUNC_END(twofish_dec_blk) 322