19c201942SThomas Gleixner/***************************************************************************
29c201942SThomas Gleixner*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
39c201942SThomas Gleixner*                                                                         *
49c201942SThomas Gleixner*   This program is free software; you can redistribute it and/or modify  *
59c201942SThomas Gleixner*   it under the terms of the GNU General Public License as published by  *
69c201942SThomas Gleixner*   the Free Software Foundation; either version 2 of the License, or     *
79c201942SThomas Gleixner*   (at your option) any later version.                                   *
89c201942SThomas Gleixner*                                                                         *
99c201942SThomas Gleixner*   This program is distributed in the hope that it will be useful,       *
109c201942SThomas Gleixner*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
119c201942SThomas Gleixner*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
129c201942SThomas Gleixner*   GNU General Public License for more details.                          *
139c201942SThomas Gleixner*                                                                         *
149c201942SThomas Gleixner*   You should have received a copy of the GNU General Public License     *
159c201942SThomas Gleixner*   along with this program; if not, write to the                         *
169c201942SThomas Gleixner*   Free Software Foundation, Inc.,                                       *
179c201942SThomas Gleixner*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
189c201942SThomas Gleixner***************************************************************************/
199c201942SThomas Gleixner
209c201942SThomas Gleixner.file "twofish-i586-asm.S"
219c201942SThomas Gleixner.text
229c201942SThomas Gleixner
23d3f5188dSJussi Kivilinna#include <linux/linkage.h>
249c201942SThomas Gleixner#include <asm/asm-offsets.h>
259c201942SThomas Gleixner
263ad2f3fbSDaniel Mack/* return address at 0 */
279c201942SThomas Gleixner
289c201942SThomas Gleixner#define in_blk    12  /* input byte array address parameter*/
299c201942SThomas Gleixner#define out_blk   8  /* output byte array address parameter*/
3091d41f15SJussi Kivilinna#define ctx       4  /* Twofish context structure */
319c201942SThomas Gleixner
329c201942SThomas Gleixner#define a_offset	0
339c201942SThomas Gleixner#define b_offset	4
349c201942SThomas Gleixner#define c_offset	8
359c201942SThomas Gleixner#define d_offset	12
369c201942SThomas Gleixner
379c201942SThomas Gleixner/* Structure of the crypto context struct*/
389c201942SThomas Gleixner
399c201942SThomas Gleixner#define s0	0	/* S0 Array 256 Words each */
409c201942SThomas Gleixner#define s1	1024	/* S1 Array */
419c201942SThomas Gleixner#define s2	2048	/* S2 Array */
429c201942SThomas Gleixner#define s3	3072	/* S3 Array */
439c201942SThomas Gleixner#define w	4096	/* 8 whitening keys (word) */
449c201942SThomas Gleixner#define k	4128	/* key 1-32 ( word ) */
459c201942SThomas Gleixner
469c201942SThomas Gleixner/* define a few register aliases to allow macro substitution */
479c201942SThomas Gleixner
489c201942SThomas Gleixner#define R0D    %eax
499c201942SThomas Gleixner#define R0B    %al
509c201942SThomas Gleixner#define R0H    %ah
519c201942SThomas Gleixner
529c201942SThomas Gleixner#define R1D    %ebx
539c201942SThomas Gleixner#define R1B    %bl
549c201942SThomas Gleixner#define R1H    %bh
559c201942SThomas Gleixner
569c201942SThomas Gleixner#define R2D    %ecx
579c201942SThomas Gleixner#define R2B    %cl
589c201942SThomas Gleixner#define R2H    %ch
599c201942SThomas Gleixner
609c201942SThomas Gleixner#define R3D    %edx
619c201942SThomas Gleixner#define R3B    %dl
629c201942SThomas Gleixner#define R3H    %dh
639c201942SThomas Gleixner
649c201942SThomas Gleixner
659c201942SThomas Gleixner/* performs input whitening */
669c201942SThomas Gleixner#define input_whitening(src,context,offset)\
679c201942SThomas Gleixner	xor	w+offset(context),	src;
689c201942SThomas Gleixner
699c201942SThomas Gleixner/* performs input whitening */
709c201942SThomas Gleixner#define output_whitening(src,context,offset)\
719c201942SThomas Gleixner	xor	w+16+offset(context),	src;
729c201942SThomas Gleixner
739c201942SThomas Gleixner/*
749c201942SThomas Gleixner * a input register containing a (rotated 16)
759c201942SThomas Gleixner * b input register containing b
769c201942SThomas Gleixner * c input register containing c
779c201942SThomas Gleixner * d input register containing d (already rol $1)
789c201942SThomas Gleixner * operations on a and b are interleaved to increase performance
799c201942SThomas Gleixner */
809c201942SThomas Gleixner#define encrypt_round(a,b,c,d,round)\
819c201942SThomas Gleixner	push	d ## D;\
829c201942SThomas Gleixner	movzx	b ## B,		%edi;\
839c201942SThomas Gleixner	mov	s1(%ebp,%edi,4),d ## D;\
849c201942SThomas Gleixner	movzx	a ## B,		%edi;\
859c201942SThomas Gleixner	mov	s2(%ebp,%edi,4),%esi;\
869c201942SThomas Gleixner	movzx	b ## H,		%edi;\
879c201942SThomas Gleixner	ror	$16,		b ## D;\
889c201942SThomas Gleixner	xor	s2(%ebp,%edi,4),d ## D;\
899c201942SThomas Gleixner	movzx	a ## H,		%edi;\
909c201942SThomas Gleixner	ror	$16,		a ## D;\
919c201942SThomas Gleixner	xor	s3(%ebp,%edi,4),%esi;\
929c201942SThomas Gleixner	movzx	b ## B,		%edi;\
939c201942SThomas Gleixner	xor	s3(%ebp,%edi,4),d ## D;\
949c201942SThomas Gleixner	movzx	a ## B,		%edi;\
959c201942SThomas Gleixner	xor	(%ebp,%edi,4),	%esi;\
969c201942SThomas Gleixner	movzx	b ## H,		%edi;\
979c201942SThomas Gleixner	ror	$15,		b ## D;\
989c201942SThomas Gleixner	xor	(%ebp,%edi,4),	d ## D;\
999c201942SThomas Gleixner	movzx	a ## H,		%edi;\
1009c201942SThomas Gleixner	xor	s1(%ebp,%edi,4),%esi;\
1019c201942SThomas Gleixner	pop	%edi;\
1029c201942SThomas Gleixner	add	d ## D,		%esi;\
1039c201942SThomas Gleixner	add	%esi,		d ## D;\
1049c201942SThomas Gleixner	add	k+round(%ebp),	%esi;\
1059c201942SThomas Gleixner	xor	%esi,		c ## D;\
1069c201942SThomas Gleixner	rol	$15,		c ## D;\
1079c201942SThomas Gleixner	add	k+4+round(%ebp),d ## D;\
1089c201942SThomas Gleixner	xor	%edi,		d ## D;
1099c201942SThomas Gleixner
1109c201942SThomas Gleixner/*
1119c201942SThomas Gleixner * a input register containing a (rotated 16)
1129c201942SThomas Gleixner * b input register containing b
1139c201942SThomas Gleixner * c input register containing c
1149c201942SThomas Gleixner * d input register containing d (already rol $1)
1159c201942SThomas Gleixner * operations on a and b are interleaved to increase performance
1169c201942SThomas Gleixner * last round has different rotations for the output preparation
1179c201942SThomas Gleixner */
1189c201942SThomas Gleixner#define encrypt_last_round(a,b,c,d,round)\
1199c201942SThomas Gleixner	push	d ## D;\
1209c201942SThomas Gleixner	movzx	b ## B,		%edi;\
1219c201942SThomas Gleixner	mov	s1(%ebp,%edi,4),d ## D;\
1229c201942SThomas Gleixner	movzx	a ## B,		%edi;\
1239c201942SThomas Gleixner	mov	s2(%ebp,%edi,4),%esi;\
1249c201942SThomas Gleixner	movzx	b ## H,		%edi;\
1259c201942SThomas Gleixner	ror	$16,		b ## D;\
1269c201942SThomas Gleixner	xor	s2(%ebp,%edi,4),d ## D;\
1279c201942SThomas Gleixner	movzx	a ## H,		%edi;\
1289c201942SThomas Gleixner	ror	$16,		a ## D;\
1299c201942SThomas Gleixner	xor	s3(%ebp,%edi,4),%esi;\
1309c201942SThomas Gleixner	movzx	b ## B,		%edi;\
1319c201942SThomas Gleixner	xor	s3(%ebp,%edi,4),d ## D;\
1329c201942SThomas Gleixner	movzx	a ## B,		%edi;\
1339c201942SThomas Gleixner	xor	(%ebp,%edi,4),	%esi;\
1349c201942SThomas Gleixner	movzx	b ## H,		%edi;\
1359c201942SThomas Gleixner	ror	$16,		b ## D;\
1369c201942SThomas Gleixner	xor	(%ebp,%edi,4),	d ## D;\
1379c201942SThomas Gleixner	movzx	a ## H,		%edi;\
1389c201942SThomas Gleixner	xor	s1(%ebp,%edi,4),%esi;\
1399c201942SThomas Gleixner	pop	%edi;\
1409c201942SThomas Gleixner	add	d ## D,		%esi;\
1419c201942SThomas Gleixner	add	%esi,		d ## D;\
1429c201942SThomas Gleixner	add	k+round(%ebp),	%esi;\
1439c201942SThomas Gleixner	xor	%esi,		c ## D;\
1449c201942SThomas Gleixner	ror	$1,		c ## D;\
1459c201942SThomas Gleixner	add	k+4+round(%ebp),d ## D;\
1469c201942SThomas Gleixner	xor	%edi,		d ## D;
1479c201942SThomas Gleixner
1489c201942SThomas Gleixner/*
1499c201942SThomas Gleixner * a input register containing a
1509c201942SThomas Gleixner * b input register containing b (rotated 16)
1519c201942SThomas Gleixner * c input register containing c
1529c201942SThomas Gleixner * d input register containing d (already rol $1)
1539c201942SThomas Gleixner * operations on a and b are interleaved to increase performance
1549c201942SThomas Gleixner */
1559c201942SThomas Gleixner#define decrypt_round(a,b,c,d,round)\
1569c201942SThomas Gleixner	push	c ## D;\
1579c201942SThomas Gleixner	movzx	a ## B,		%edi;\
1589c201942SThomas Gleixner	mov	(%ebp,%edi,4),	c ## D;\
1599c201942SThomas Gleixner	movzx	b ## B,		%edi;\
1609c201942SThomas Gleixner	mov	s3(%ebp,%edi,4),%esi;\
1619c201942SThomas Gleixner	movzx	a ## H,		%edi;\
1629c201942SThomas Gleixner	ror	$16,		a ## D;\
1639c201942SThomas Gleixner	xor	s1(%ebp,%edi,4),c ## D;\
1649c201942SThomas Gleixner	movzx	b ## H,		%edi;\
1659c201942SThomas Gleixner	ror	$16,		b ## D;\
1669c201942SThomas Gleixner	xor	(%ebp,%edi,4),	%esi;\
1679c201942SThomas Gleixner	movzx	a ## B,		%edi;\
1689c201942SThomas Gleixner	xor	s2(%ebp,%edi,4),c ## D;\
1699c201942SThomas Gleixner	movzx	b ## B,		%edi;\
1709c201942SThomas Gleixner	xor	s1(%ebp,%edi,4),%esi;\
1719c201942SThomas Gleixner	movzx	a ## H,		%edi;\
1729c201942SThomas Gleixner	ror	$15,		a ## D;\
1739c201942SThomas Gleixner	xor	s3(%ebp,%edi,4),c ## D;\
1749c201942SThomas Gleixner	movzx	b ## H,		%edi;\
1759c201942SThomas Gleixner	xor	s2(%ebp,%edi,4),%esi;\
1769c201942SThomas Gleixner	pop	%edi;\
1779c201942SThomas Gleixner	add	%esi,		c ## D;\
1789c201942SThomas Gleixner	add	c ## D,		%esi;\
1799c201942SThomas Gleixner	add	k+round(%ebp),	c ## D;\
1809c201942SThomas Gleixner	xor	%edi,		c ## D;\
1819c201942SThomas Gleixner	add	k+4+round(%ebp),%esi;\
1829c201942SThomas Gleixner	xor	%esi,		d ## D;\
1839c201942SThomas Gleixner	rol	$15,		d ## D;
1849c201942SThomas Gleixner
1859c201942SThomas Gleixner/*
1869c201942SThomas Gleixner * a input register containing a
1879c201942SThomas Gleixner * b input register containing b (rotated 16)
1889c201942SThomas Gleixner * c input register containing c
1899c201942SThomas Gleixner * d input register containing d (already rol $1)
1909c201942SThomas Gleixner * operations on a and b are interleaved to increase performance
1919c201942SThomas Gleixner * last round has different rotations for the output preparation
1929c201942SThomas Gleixner */
1939c201942SThomas Gleixner#define decrypt_last_round(a,b,c,d,round)\
1949c201942SThomas Gleixner	push	c ## D;\
1959c201942SThomas Gleixner	movzx	a ## B,		%edi;\
1969c201942SThomas Gleixner	mov	(%ebp,%edi,4),	c ## D;\
1979c201942SThomas Gleixner	movzx	b ## B,		%edi;\
1989c201942SThomas Gleixner	mov	s3(%ebp,%edi,4),%esi;\
1999c201942SThomas Gleixner	movzx	a ## H,		%edi;\
2009c201942SThomas Gleixner	ror	$16,		a ## D;\
2019c201942SThomas Gleixner	xor	s1(%ebp,%edi,4),c ## D;\
2029c201942SThomas Gleixner	movzx	b ## H,		%edi;\
2039c201942SThomas Gleixner	ror	$16,		b ## D;\
2049c201942SThomas Gleixner	xor	(%ebp,%edi,4),	%esi;\
2059c201942SThomas Gleixner	movzx	a ## B,		%edi;\
2069c201942SThomas Gleixner	xor	s2(%ebp,%edi,4),c ## D;\
2079c201942SThomas Gleixner	movzx	b ## B,		%edi;\
2089c201942SThomas Gleixner	xor	s1(%ebp,%edi,4),%esi;\
2099c201942SThomas Gleixner	movzx	a ## H,		%edi;\
2109c201942SThomas Gleixner	ror	$16,		a ## D;\
2119c201942SThomas Gleixner	xor	s3(%ebp,%edi,4),c ## D;\
2129c201942SThomas Gleixner	movzx	b ## H,		%edi;\
2139c201942SThomas Gleixner	xor	s2(%ebp,%edi,4),%esi;\
2149c201942SThomas Gleixner	pop	%edi;\
2159c201942SThomas Gleixner	add	%esi,		c ## D;\
2169c201942SThomas Gleixner	add	c ## D,		%esi;\
2179c201942SThomas Gleixner	add	k+round(%ebp),	c ## D;\
2189c201942SThomas Gleixner	xor	%edi,		c ## D;\
2199c201942SThomas Gleixner	add	k+4+round(%ebp),%esi;\
2209c201942SThomas Gleixner	xor	%esi,		d ## D;\
2219c201942SThomas Gleixner	ror	$1,		d ## D;
2229c201942SThomas Gleixner
223d3f5188dSJussi KivilinnaENTRY(twofish_enc_blk)
2249c201942SThomas Gleixner	push	%ebp			/* save registers according to calling convention*/
2259c201942SThomas Gleixner	push    %ebx
2269c201942SThomas Gleixner	push    %esi
2279c201942SThomas Gleixner	push    %edi
2289c201942SThomas Gleixner
22991d41f15SJussi Kivilinna	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
23091d41f15SJussi Kivilinna					 * pointer to the ctx address */
2313ad2f3fbSDaniel Mack	mov     in_blk+16(%esp),%edi	/* input address in edi */
2329c201942SThomas Gleixner
2339c201942SThomas Gleixner	mov	(%edi),		%eax
2349c201942SThomas Gleixner	mov	b_offset(%edi),	%ebx
2359c201942SThomas Gleixner	mov	c_offset(%edi),	%ecx
2369c201942SThomas Gleixner	mov	d_offset(%edi),	%edx
2379c201942SThomas Gleixner	input_whitening(%eax,%ebp,a_offset)
2389c201942SThomas Gleixner	ror	$16,	%eax
2399c201942SThomas Gleixner	input_whitening(%ebx,%ebp,b_offset)
2409c201942SThomas Gleixner	input_whitening(%ecx,%ebp,c_offset)
2419c201942SThomas Gleixner	input_whitening(%edx,%ebp,d_offset)
2429c201942SThomas Gleixner	rol	$1,	%edx
2439c201942SThomas Gleixner
2449c201942SThomas Gleixner	encrypt_round(R0,R1,R2,R3,0);
2459c201942SThomas Gleixner	encrypt_round(R2,R3,R0,R1,8);
2469c201942SThomas Gleixner	encrypt_round(R0,R1,R2,R3,2*8);
2479c201942SThomas Gleixner	encrypt_round(R2,R3,R0,R1,3*8);
2489c201942SThomas Gleixner	encrypt_round(R0,R1,R2,R3,4*8);
2499c201942SThomas Gleixner	encrypt_round(R2,R3,R0,R1,5*8);
2509c201942SThomas Gleixner	encrypt_round(R0,R1,R2,R3,6*8);
2519c201942SThomas Gleixner	encrypt_round(R2,R3,R0,R1,7*8);
2529c201942SThomas Gleixner	encrypt_round(R0,R1,R2,R3,8*8);
2539c201942SThomas Gleixner	encrypt_round(R2,R3,R0,R1,9*8);
2549c201942SThomas Gleixner	encrypt_round(R0,R1,R2,R3,10*8);
2559c201942SThomas Gleixner	encrypt_round(R2,R3,R0,R1,11*8);
2569c201942SThomas Gleixner	encrypt_round(R0,R1,R2,R3,12*8);
2579c201942SThomas Gleixner	encrypt_round(R2,R3,R0,R1,13*8);
2589c201942SThomas Gleixner	encrypt_round(R0,R1,R2,R3,14*8);
2599c201942SThomas Gleixner	encrypt_last_round(R2,R3,R0,R1,15*8);
2609c201942SThomas Gleixner
2619c201942SThomas Gleixner	output_whitening(%eax,%ebp,c_offset)
2629c201942SThomas Gleixner	output_whitening(%ebx,%ebp,d_offset)
2639c201942SThomas Gleixner	output_whitening(%ecx,%ebp,a_offset)
2649c201942SThomas Gleixner	output_whitening(%edx,%ebp,b_offset)
2659c201942SThomas Gleixner	mov	out_blk+16(%esp),%edi;
2669c201942SThomas Gleixner	mov	%eax,		c_offset(%edi)
2679c201942SThomas Gleixner	mov	%ebx,		d_offset(%edi)
2689c201942SThomas Gleixner	mov	%ecx,		(%edi)
2699c201942SThomas Gleixner	mov	%edx,		b_offset(%edi)
2709c201942SThomas Gleixner
2719c201942SThomas Gleixner	pop	%edi
2729c201942SThomas Gleixner	pop	%esi
2739c201942SThomas Gleixner	pop	%ebx
2749c201942SThomas Gleixner	pop	%ebp
2759c201942SThomas Gleixner	mov	$1,	%eax
2769c201942SThomas Gleixner	ret
277d3f5188dSJussi KivilinnaENDPROC(twofish_enc_blk)
2789c201942SThomas Gleixner
279d3f5188dSJussi KivilinnaENTRY(twofish_dec_blk)
2809c201942SThomas Gleixner	push	%ebp			/* save registers according to calling convention*/
2819c201942SThomas Gleixner	push    %ebx
2829c201942SThomas Gleixner	push    %esi
2839c201942SThomas Gleixner	push    %edi
2849c201942SThomas Gleixner
2859c201942SThomas Gleixner
28691d41f15SJussi Kivilinna	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
28791d41f15SJussi Kivilinna					 * pointer to the ctx address */
2883ad2f3fbSDaniel Mack	mov     in_blk+16(%esp),%edi	/* input address in edi */
2899c201942SThomas Gleixner
2909c201942SThomas Gleixner	mov	(%edi),		%eax
2919c201942SThomas Gleixner	mov	b_offset(%edi),	%ebx
2929c201942SThomas Gleixner	mov	c_offset(%edi),	%ecx
2939c201942SThomas Gleixner	mov	d_offset(%edi),	%edx
2949c201942SThomas Gleixner	output_whitening(%eax,%ebp,a_offset)
2959c201942SThomas Gleixner	output_whitening(%ebx,%ebp,b_offset)
2969c201942SThomas Gleixner	ror	$16,	%ebx
2979c201942SThomas Gleixner	output_whitening(%ecx,%ebp,c_offset)
2989c201942SThomas Gleixner	output_whitening(%edx,%ebp,d_offset)
2999c201942SThomas Gleixner	rol	$1,	%ecx
3009c201942SThomas Gleixner
3019c201942SThomas Gleixner	decrypt_round(R0,R1,R2,R3,15*8);
3029c201942SThomas Gleixner	decrypt_round(R2,R3,R0,R1,14*8);
3039c201942SThomas Gleixner	decrypt_round(R0,R1,R2,R3,13*8);
3049c201942SThomas Gleixner	decrypt_round(R2,R3,R0,R1,12*8);
3059c201942SThomas Gleixner	decrypt_round(R0,R1,R2,R3,11*8);
3069c201942SThomas Gleixner	decrypt_round(R2,R3,R0,R1,10*8);
3079c201942SThomas Gleixner	decrypt_round(R0,R1,R2,R3,9*8);
3089c201942SThomas Gleixner	decrypt_round(R2,R3,R0,R1,8*8);
3099c201942SThomas Gleixner	decrypt_round(R0,R1,R2,R3,7*8);
3109c201942SThomas Gleixner	decrypt_round(R2,R3,R0,R1,6*8);
3119c201942SThomas Gleixner	decrypt_round(R0,R1,R2,R3,5*8);
3129c201942SThomas Gleixner	decrypt_round(R2,R3,R0,R1,4*8);
3139c201942SThomas Gleixner	decrypt_round(R0,R1,R2,R3,3*8);
3149c201942SThomas Gleixner	decrypt_round(R2,R3,R0,R1,2*8);
3159c201942SThomas Gleixner	decrypt_round(R0,R1,R2,R3,1*8);
3169c201942SThomas Gleixner	decrypt_last_round(R2,R3,R0,R1,0);
3179c201942SThomas Gleixner
3189c201942SThomas Gleixner	input_whitening(%eax,%ebp,c_offset)
3199c201942SThomas Gleixner	input_whitening(%ebx,%ebp,d_offset)
3209c201942SThomas Gleixner	input_whitening(%ecx,%ebp,a_offset)
3219c201942SThomas Gleixner	input_whitening(%edx,%ebp,b_offset)
3229c201942SThomas Gleixner	mov	out_blk+16(%esp),%edi;
3239c201942SThomas Gleixner	mov	%eax,		c_offset(%edi)
3249c201942SThomas Gleixner	mov	%ebx,		d_offset(%edi)
3259c201942SThomas Gleixner	mov	%ecx,		(%edi)
3269c201942SThomas Gleixner	mov	%edx,		b_offset(%edi)
3279c201942SThomas Gleixner
3289c201942SThomas Gleixner	pop	%edi
3299c201942SThomas Gleixner	pop	%esi
3309c201942SThomas Gleixner	pop	%ebx
3319c201942SThomas Gleixner	pop	%ebp
3329c201942SThomas Gleixner	mov	$1,	%eax
3339c201942SThomas Gleixner	ret
334d3f5188dSJussi KivilinnaENDPROC(twofish_dec_blk)
335