19c201942SThomas Gleixner/*************************************************************************** 29c201942SThomas Gleixner* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> * 39c201942SThomas Gleixner* * 49c201942SThomas Gleixner* This program is free software; you can redistribute it and/or modify * 59c201942SThomas Gleixner* it under the terms of the GNU General Public License as published by * 69c201942SThomas Gleixner* the Free Software Foundation; either version 2 of the License, or * 79c201942SThomas Gleixner* (at your option) any later version. * 89c201942SThomas Gleixner* * 99c201942SThomas Gleixner* This program is distributed in the hope that it will be useful, * 109c201942SThomas Gleixner* but WITHOUT ANY WARRANTY; without even the implied warranty of * 119c201942SThomas Gleixner* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 129c201942SThomas Gleixner* GNU General Public License for more details. * 139c201942SThomas Gleixner* * 149c201942SThomas Gleixner* You should have received a copy of the GNU General Public License * 159c201942SThomas Gleixner* along with this program; if not, write to the * 169c201942SThomas Gleixner* Free Software Foundation, Inc., * 179c201942SThomas Gleixner* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 189c201942SThomas Gleixner***************************************************************************/ 199c201942SThomas Gleixner 209c201942SThomas Gleixner.file "twofish-i586-asm.S" 219c201942SThomas Gleixner.text 229c201942SThomas Gleixner 23d3f5188dSJussi Kivilinna#include <linux/linkage.h> 249c201942SThomas Gleixner#include <asm/asm-offsets.h> 259c201942SThomas Gleixner 263ad2f3fbSDaniel Mack/* return address at 0 */ 279c201942SThomas Gleixner 289c201942SThomas Gleixner#define in_blk 12 /* input byte array address parameter*/ 299c201942SThomas Gleixner#define out_blk 8 /* output byte array address parameter*/ 3091d41f15SJussi Kivilinna#define ctx 4 /* Twofish context structure */ 319c201942SThomas Gleixner 329c201942SThomas Gleixner#define a_offset 0 339c201942SThomas Gleixner#define b_offset 4 349c201942SThomas Gleixner#define c_offset 8 359c201942SThomas Gleixner#define d_offset 12 369c201942SThomas Gleixner 379c201942SThomas Gleixner/* Structure of the crypto context struct*/ 389c201942SThomas Gleixner 399c201942SThomas Gleixner#define s0 0 /* S0 Array 256 Words each */ 409c201942SThomas Gleixner#define s1 1024 /* S1 Array */ 419c201942SThomas Gleixner#define s2 2048 /* S2 Array */ 429c201942SThomas Gleixner#define s3 3072 /* S3 Array */ 439c201942SThomas Gleixner#define w 4096 /* 8 whitening keys (word) */ 449c201942SThomas Gleixner#define k 4128 /* key 1-32 ( word ) */ 459c201942SThomas Gleixner 469c201942SThomas Gleixner/* define a few register aliases to allow macro substitution */ 479c201942SThomas Gleixner 489c201942SThomas Gleixner#define R0D %eax 499c201942SThomas Gleixner#define R0B %al 509c201942SThomas Gleixner#define R0H %ah 519c201942SThomas Gleixner 529c201942SThomas Gleixner#define R1D %ebx 539c201942SThomas Gleixner#define R1B %bl 549c201942SThomas Gleixner#define R1H %bh 559c201942SThomas Gleixner 569c201942SThomas Gleixner#define R2D %ecx 579c201942SThomas Gleixner#define R2B %cl 589c201942SThomas Gleixner#define R2H %ch 599c201942SThomas Gleixner 609c201942SThomas Gleixner#define R3D %edx 619c201942SThomas Gleixner#define R3B %dl 629c201942SThomas Gleixner#define R3H %dh 639c201942SThomas Gleixner 649c201942SThomas Gleixner 659c201942SThomas Gleixner/* performs input whitening */ 669c201942SThomas Gleixner#define input_whitening(src,context,offset)\ 679c201942SThomas Gleixner xor w+offset(context), src; 689c201942SThomas Gleixner 699c201942SThomas Gleixner/* performs input whitening */ 709c201942SThomas Gleixner#define output_whitening(src,context,offset)\ 719c201942SThomas Gleixner xor w+16+offset(context), src; 729c201942SThomas Gleixner 739c201942SThomas Gleixner/* 749c201942SThomas Gleixner * a input register containing a (rotated 16) 759c201942SThomas Gleixner * b input register containing b 769c201942SThomas Gleixner * c input register containing c 779c201942SThomas Gleixner * d input register containing d (already rol $1) 789c201942SThomas Gleixner * operations on a and b are interleaved to increase performance 799c201942SThomas Gleixner */ 809c201942SThomas Gleixner#define encrypt_round(a,b,c,d,round)\ 819c201942SThomas Gleixner push d ## D;\ 829c201942SThomas Gleixner movzx b ## B, %edi;\ 839c201942SThomas Gleixner mov s1(%ebp,%edi,4),d ## D;\ 849c201942SThomas Gleixner movzx a ## B, %edi;\ 859c201942SThomas Gleixner mov s2(%ebp,%edi,4),%esi;\ 869c201942SThomas Gleixner movzx b ## H, %edi;\ 879c201942SThomas Gleixner ror $16, b ## D;\ 889c201942SThomas Gleixner xor s2(%ebp,%edi,4),d ## D;\ 899c201942SThomas Gleixner movzx a ## H, %edi;\ 909c201942SThomas Gleixner ror $16, a ## D;\ 919c201942SThomas Gleixner xor s3(%ebp,%edi,4),%esi;\ 929c201942SThomas Gleixner movzx b ## B, %edi;\ 939c201942SThomas Gleixner xor s3(%ebp,%edi,4),d ## D;\ 949c201942SThomas Gleixner movzx a ## B, %edi;\ 959c201942SThomas Gleixner xor (%ebp,%edi,4), %esi;\ 969c201942SThomas Gleixner movzx b ## H, %edi;\ 979c201942SThomas Gleixner ror $15, b ## D;\ 989c201942SThomas Gleixner xor (%ebp,%edi,4), d ## D;\ 999c201942SThomas Gleixner movzx a ## H, %edi;\ 1009c201942SThomas Gleixner xor s1(%ebp,%edi,4),%esi;\ 1019c201942SThomas Gleixner pop %edi;\ 1029c201942SThomas Gleixner add d ## D, %esi;\ 1039c201942SThomas Gleixner add %esi, d ## D;\ 1049c201942SThomas Gleixner add k+round(%ebp), %esi;\ 1059c201942SThomas Gleixner xor %esi, c ## D;\ 1069c201942SThomas Gleixner rol $15, c ## D;\ 1079c201942SThomas Gleixner add k+4+round(%ebp),d ## D;\ 1089c201942SThomas Gleixner xor %edi, d ## D; 1099c201942SThomas Gleixner 1109c201942SThomas Gleixner/* 1119c201942SThomas Gleixner * a input register containing a (rotated 16) 1129c201942SThomas Gleixner * b input register containing b 1139c201942SThomas Gleixner * c input register containing c 1149c201942SThomas Gleixner * d input register containing d (already rol $1) 1159c201942SThomas Gleixner * operations on a and b are interleaved to increase performance 1169c201942SThomas Gleixner * last round has different rotations for the output preparation 1179c201942SThomas Gleixner */ 1189c201942SThomas Gleixner#define encrypt_last_round(a,b,c,d,round)\ 1199c201942SThomas Gleixner push d ## D;\ 1209c201942SThomas Gleixner movzx b ## B, %edi;\ 1219c201942SThomas Gleixner mov s1(%ebp,%edi,4),d ## D;\ 1229c201942SThomas Gleixner movzx a ## B, %edi;\ 1239c201942SThomas Gleixner mov s2(%ebp,%edi,4),%esi;\ 1249c201942SThomas Gleixner movzx b ## H, %edi;\ 1259c201942SThomas Gleixner ror $16, b ## D;\ 1269c201942SThomas Gleixner xor s2(%ebp,%edi,4),d ## D;\ 1279c201942SThomas Gleixner movzx a ## H, %edi;\ 1289c201942SThomas Gleixner ror $16, a ## D;\ 1299c201942SThomas Gleixner xor s3(%ebp,%edi,4),%esi;\ 1309c201942SThomas Gleixner movzx b ## B, %edi;\ 1319c201942SThomas Gleixner xor s3(%ebp,%edi,4),d ## D;\ 1329c201942SThomas Gleixner movzx a ## B, %edi;\ 1339c201942SThomas Gleixner xor (%ebp,%edi,4), %esi;\ 1349c201942SThomas Gleixner movzx b ## H, %edi;\ 1359c201942SThomas Gleixner ror $16, b ## D;\ 1369c201942SThomas Gleixner xor (%ebp,%edi,4), d ## D;\ 1379c201942SThomas Gleixner movzx a ## H, %edi;\ 1389c201942SThomas Gleixner xor s1(%ebp,%edi,4),%esi;\ 1399c201942SThomas Gleixner pop %edi;\ 1409c201942SThomas Gleixner add d ## D, %esi;\ 1419c201942SThomas Gleixner add %esi, d ## D;\ 1429c201942SThomas Gleixner add k+round(%ebp), %esi;\ 1439c201942SThomas Gleixner xor %esi, c ## D;\ 1449c201942SThomas Gleixner ror $1, c ## D;\ 1459c201942SThomas Gleixner add k+4+round(%ebp),d ## D;\ 1469c201942SThomas Gleixner xor %edi, d ## D; 1479c201942SThomas Gleixner 1489c201942SThomas Gleixner/* 1499c201942SThomas Gleixner * a input register containing a 1509c201942SThomas Gleixner * b input register containing b (rotated 16) 1519c201942SThomas Gleixner * c input register containing c 1529c201942SThomas Gleixner * d input register containing d (already rol $1) 1539c201942SThomas Gleixner * operations on a and b are interleaved to increase performance 1549c201942SThomas Gleixner */ 1559c201942SThomas Gleixner#define decrypt_round(a,b,c,d,round)\ 1569c201942SThomas Gleixner push c ## D;\ 1579c201942SThomas Gleixner movzx a ## B, %edi;\ 1589c201942SThomas Gleixner mov (%ebp,%edi,4), c ## D;\ 1599c201942SThomas Gleixner movzx b ## B, %edi;\ 1609c201942SThomas Gleixner mov s3(%ebp,%edi,4),%esi;\ 1619c201942SThomas Gleixner movzx a ## H, %edi;\ 1629c201942SThomas Gleixner ror $16, a ## D;\ 1639c201942SThomas Gleixner xor s1(%ebp,%edi,4),c ## D;\ 1649c201942SThomas Gleixner movzx b ## H, %edi;\ 1659c201942SThomas Gleixner ror $16, b ## D;\ 1669c201942SThomas Gleixner xor (%ebp,%edi,4), %esi;\ 1679c201942SThomas Gleixner movzx a ## B, %edi;\ 1689c201942SThomas Gleixner xor s2(%ebp,%edi,4),c ## D;\ 1699c201942SThomas Gleixner movzx b ## B, %edi;\ 1709c201942SThomas Gleixner xor s1(%ebp,%edi,4),%esi;\ 1719c201942SThomas Gleixner movzx a ## H, %edi;\ 1729c201942SThomas Gleixner ror $15, a ## D;\ 1739c201942SThomas Gleixner xor s3(%ebp,%edi,4),c ## D;\ 1749c201942SThomas Gleixner movzx b ## H, %edi;\ 1759c201942SThomas Gleixner xor s2(%ebp,%edi,4),%esi;\ 1769c201942SThomas Gleixner pop %edi;\ 1779c201942SThomas Gleixner add %esi, c ## D;\ 1789c201942SThomas Gleixner add c ## D, %esi;\ 1799c201942SThomas Gleixner add k+round(%ebp), c ## D;\ 1809c201942SThomas Gleixner xor %edi, c ## D;\ 1819c201942SThomas Gleixner add k+4+round(%ebp),%esi;\ 1829c201942SThomas Gleixner xor %esi, d ## D;\ 1839c201942SThomas Gleixner rol $15, d ## D; 1849c201942SThomas Gleixner 1859c201942SThomas Gleixner/* 1869c201942SThomas Gleixner * a input register containing a 1879c201942SThomas Gleixner * b input register containing b (rotated 16) 1889c201942SThomas Gleixner * c input register containing c 1899c201942SThomas Gleixner * d input register containing d (already rol $1) 1909c201942SThomas Gleixner * operations on a and b are interleaved to increase performance 1919c201942SThomas Gleixner * last round has different rotations for the output preparation 1929c201942SThomas Gleixner */ 1939c201942SThomas Gleixner#define decrypt_last_round(a,b,c,d,round)\ 1949c201942SThomas Gleixner push c ## D;\ 1959c201942SThomas Gleixner movzx a ## B, %edi;\ 1969c201942SThomas Gleixner mov (%ebp,%edi,4), c ## D;\ 1979c201942SThomas Gleixner movzx b ## B, %edi;\ 1989c201942SThomas Gleixner mov s3(%ebp,%edi,4),%esi;\ 1999c201942SThomas Gleixner movzx a ## H, %edi;\ 2009c201942SThomas Gleixner ror $16, a ## D;\ 2019c201942SThomas Gleixner xor s1(%ebp,%edi,4),c ## D;\ 2029c201942SThomas Gleixner movzx b ## H, %edi;\ 2039c201942SThomas Gleixner ror $16, b ## D;\ 2049c201942SThomas Gleixner xor (%ebp,%edi,4), %esi;\ 2059c201942SThomas Gleixner movzx a ## B, %edi;\ 2069c201942SThomas Gleixner xor s2(%ebp,%edi,4),c ## D;\ 2079c201942SThomas Gleixner movzx b ## B, %edi;\ 2089c201942SThomas Gleixner xor s1(%ebp,%edi,4),%esi;\ 2099c201942SThomas Gleixner movzx a ## H, %edi;\ 2109c201942SThomas Gleixner ror $16, a ## D;\ 2119c201942SThomas Gleixner xor s3(%ebp,%edi,4),c ## D;\ 2129c201942SThomas Gleixner movzx b ## H, %edi;\ 2139c201942SThomas Gleixner xor s2(%ebp,%edi,4),%esi;\ 2149c201942SThomas Gleixner pop %edi;\ 2159c201942SThomas Gleixner add %esi, c ## D;\ 2169c201942SThomas Gleixner add c ## D, %esi;\ 2179c201942SThomas Gleixner add k+round(%ebp), c ## D;\ 2189c201942SThomas Gleixner xor %edi, c ## D;\ 2199c201942SThomas Gleixner add k+4+round(%ebp),%esi;\ 2209c201942SThomas Gleixner xor %esi, d ## D;\ 2219c201942SThomas Gleixner ror $1, d ## D; 2229c201942SThomas Gleixner 223d3f5188dSJussi KivilinnaENTRY(twofish_enc_blk) 2249c201942SThomas Gleixner push %ebp /* save registers according to calling convention*/ 2259c201942SThomas Gleixner push %ebx 2269c201942SThomas Gleixner push %esi 2279c201942SThomas Gleixner push %edi 2289c201942SThomas Gleixner 22991d41f15SJussi Kivilinna mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base 23091d41f15SJussi Kivilinna * pointer to the ctx address */ 2313ad2f3fbSDaniel Mack mov in_blk+16(%esp),%edi /* input address in edi */ 2329c201942SThomas Gleixner 2339c201942SThomas Gleixner mov (%edi), %eax 2349c201942SThomas Gleixner mov b_offset(%edi), %ebx 2359c201942SThomas Gleixner mov c_offset(%edi), %ecx 2369c201942SThomas Gleixner mov d_offset(%edi), %edx 2379c201942SThomas Gleixner input_whitening(%eax,%ebp,a_offset) 2389c201942SThomas Gleixner ror $16, %eax 2399c201942SThomas Gleixner input_whitening(%ebx,%ebp,b_offset) 2409c201942SThomas Gleixner input_whitening(%ecx,%ebp,c_offset) 2419c201942SThomas Gleixner input_whitening(%edx,%ebp,d_offset) 2429c201942SThomas Gleixner rol $1, %edx 2439c201942SThomas Gleixner 2449c201942SThomas Gleixner encrypt_round(R0,R1,R2,R3,0); 2459c201942SThomas Gleixner encrypt_round(R2,R3,R0,R1,8); 2469c201942SThomas Gleixner encrypt_round(R0,R1,R2,R3,2*8); 2479c201942SThomas Gleixner encrypt_round(R2,R3,R0,R1,3*8); 2489c201942SThomas Gleixner encrypt_round(R0,R1,R2,R3,4*8); 2499c201942SThomas Gleixner encrypt_round(R2,R3,R0,R1,5*8); 2509c201942SThomas Gleixner encrypt_round(R0,R1,R2,R3,6*8); 2519c201942SThomas Gleixner encrypt_round(R2,R3,R0,R1,7*8); 2529c201942SThomas Gleixner encrypt_round(R0,R1,R2,R3,8*8); 2539c201942SThomas Gleixner encrypt_round(R2,R3,R0,R1,9*8); 2549c201942SThomas Gleixner encrypt_round(R0,R1,R2,R3,10*8); 2559c201942SThomas Gleixner encrypt_round(R2,R3,R0,R1,11*8); 2569c201942SThomas Gleixner encrypt_round(R0,R1,R2,R3,12*8); 2579c201942SThomas Gleixner encrypt_round(R2,R3,R0,R1,13*8); 2589c201942SThomas Gleixner encrypt_round(R0,R1,R2,R3,14*8); 2599c201942SThomas Gleixner encrypt_last_round(R2,R3,R0,R1,15*8); 2609c201942SThomas Gleixner 2619c201942SThomas Gleixner output_whitening(%eax,%ebp,c_offset) 2629c201942SThomas Gleixner output_whitening(%ebx,%ebp,d_offset) 2639c201942SThomas Gleixner output_whitening(%ecx,%ebp,a_offset) 2649c201942SThomas Gleixner output_whitening(%edx,%ebp,b_offset) 2659c201942SThomas Gleixner mov out_blk+16(%esp),%edi; 2669c201942SThomas Gleixner mov %eax, c_offset(%edi) 2679c201942SThomas Gleixner mov %ebx, d_offset(%edi) 2689c201942SThomas Gleixner mov %ecx, (%edi) 2699c201942SThomas Gleixner mov %edx, b_offset(%edi) 2709c201942SThomas Gleixner 2719c201942SThomas Gleixner pop %edi 2729c201942SThomas Gleixner pop %esi 2739c201942SThomas Gleixner pop %ebx 2749c201942SThomas Gleixner pop %ebp 2759c201942SThomas Gleixner mov $1, %eax 2769c201942SThomas Gleixner ret 277d3f5188dSJussi KivilinnaENDPROC(twofish_enc_blk) 2789c201942SThomas Gleixner 279d3f5188dSJussi KivilinnaENTRY(twofish_dec_blk) 2809c201942SThomas Gleixner push %ebp /* save registers according to calling convention*/ 2819c201942SThomas Gleixner push %ebx 2829c201942SThomas Gleixner push %esi 2839c201942SThomas Gleixner push %edi 2849c201942SThomas Gleixner 2859c201942SThomas Gleixner 28691d41f15SJussi Kivilinna mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base 28791d41f15SJussi Kivilinna * pointer to the ctx address */ 2883ad2f3fbSDaniel Mack mov in_blk+16(%esp),%edi /* input address in edi */ 2899c201942SThomas Gleixner 2909c201942SThomas Gleixner mov (%edi), %eax 2919c201942SThomas Gleixner mov b_offset(%edi), %ebx 2929c201942SThomas Gleixner mov c_offset(%edi), %ecx 2939c201942SThomas Gleixner mov d_offset(%edi), %edx 2949c201942SThomas Gleixner output_whitening(%eax,%ebp,a_offset) 2959c201942SThomas Gleixner output_whitening(%ebx,%ebp,b_offset) 2969c201942SThomas Gleixner ror $16, %ebx 2979c201942SThomas Gleixner output_whitening(%ecx,%ebp,c_offset) 2989c201942SThomas Gleixner output_whitening(%edx,%ebp,d_offset) 2999c201942SThomas Gleixner rol $1, %ecx 3009c201942SThomas Gleixner 3019c201942SThomas Gleixner decrypt_round(R0,R1,R2,R3,15*8); 3029c201942SThomas Gleixner decrypt_round(R2,R3,R0,R1,14*8); 3039c201942SThomas Gleixner decrypt_round(R0,R1,R2,R3,13*8); 3049c201942SThomas Gleixner decrypt_round(R2,R3,R0,R1,12*8); 3059c201942SThomas Gleixner decrypt_round(R0,R1,R2,R3,11*8); 3069c201942SThomas Gleixner decrypt_round(R2,R3,R0,R1,10*8); 3079c201942SThomas Gleixner decrypt_round(R0,R1,R2,R3,9*8); 3089c201942SThomas Gleixner decrypt_round(R2,R3,R0,R1,8*8); 3099c201942SThomas Gleixner decrypt_round(R0,R1,R2,R3,7*8); 3109c201942SThomas Gleixner decrypt_round(R2,R3,R0,R1,6*8); 3119c201942SThomas Gleixner decrypt_round(R0,R1,R2,R3,5*8); 3129c201942SThomas Gleixner decrypt_round(R2,R3,R0,R1,4*8); 3139c201942SThomas Gleixner decrypt_round(R0,R1,R2,R3,3*8); 3149c201942SThomas Gleixner decrypt_round(R2,R3,R0,R1,2*8); 3159c201942SThomas Gleixner decrypt_round(R0,R1,R2,R3,1*8); 3169c201942SThomas Gleixner decrypt_last_round(R2,R3,R0,R1,0); 3179c201942SThomas Gleixner 3189c201942SThomas Gleixner input_whitening(%eax,%ebp,c_offset) 3199c201942SThomas Gleixner input_whitening(%ebx,%ebp,d_offset) 3209c201942SThomas Gleixner input_whitening(%ecx,%ebp,a_offset) 3219c201942SThomas Gleixner input_whitening(%edx,%ebp,b_offset) 3229c201942SThomas Gleixner mov out_blk+16(%esp),%edi; 3239c201942SThomas Gleixner mov %eax, c_offset(%edi) 3249c201942SThomas Gleixner mov %ebx, d_offset(%edi) 3259c201942SThomas Gleixner mov %ecx, (%edi) 3269c201942SThomas Gleixner mov %edx, b_offset(%edi) 3279c201942SThomas Gleixner 3289c201942SThomas Gleixner pop %edi 3299c201942SThomas Gleixner pop %esi 3309c201942SThomas Gleixner pop %ebx 3319c201942SThomas Gleixner pop %ebp 3329c201942SThomas Gleixner mov $1, %eax 3339c201942SThomas Gleixner ret 334d3f5188dSJussi KivilinnaENDPROC(twofish_dec_blk) 335