12874c5fdSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */
21c201e64SMarkus Stockhausen/*
31c201e64SMarkus Stockhausen * Fast AES implementation for SPE instruction set (PPC)
41c201e64SMarkus Stockhausen *
51c201e64SMarkus Stockhausen * This code makes use of the SPE SIMD instruction set as defined in
61c201e64SMarkus Stockhausen * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
71c201e64SMarkus Stockhausen * Implementation is based on optimization guide notes from
81c201e64SMarkus Stockhausen * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
91c201e64SMarkus Stockhausen *
101c201e64SMarkus Stockhausen * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
111c201e64SMarkus Stockhausen */
121c201e64SMarkus Stockhausen
131c201e64SMarkus Stockhausen#include <asm/ppc_asm.h>
141c201e64SMarkus Stockhausen#include "aes-spe-regs.h"
151c201e64SMarkus Stockhausen
161c201e64SMarkus Stockhausen#define	EAD(in, bpos) \
171c201e64SMarkus Stockhausen	rlwimi		rT0,in,28-((bpos+3)%4)*8,20,27;
181c201e64SMarkus Stockhausen
191c201e64SMarkus Stockhausen#define DAD(in, bpos) \
201c201e64SMarkus Stockhausen	rlwimi		rT1,in,24-((bpos+3)%4)*8,24,31;
211c201e64SMarkus Stockhausen
221c201e64SMarkus Stockhausen#define LWH(out, off) \
231c201e64SMarkus Stockhausen	evlwwsplat	out,off(rT0);	/* load word high		*/
241c201e64SMarkus Stockhausen
251c201e64SMarkus Stockhausen#define LWL(out, off) \
261c201e64SMarkus Stockhausen	lwz		out,off(rT0);	/* load word low		*/
271c201e64SMarkus Stockhausen
281c201e64SMarkus Stockhausen#define LBZ(out, tab, off) \
291c201e64SMarkus Stockhausen	lbz		out,off(tab);	/* load byte			*/
301c201e64SMarkus Stockhausen
311c201e64SMarkus Stockhausen#define LAH(out, in, bpos, off) \
321c201e64SMarkus Stockhausen	EAD(in, bpos)			/* calc addr + load word high	*/ \
331c201e64SMarkus Stockhausen	LWH(out, off)
341c201e64SMarkus Stockhausen
351c201e64SMarkus Stockhausen#define LAL(out, in, bpos, off) \
361c201e64SMarkus Stockhausen	EAD(in, bpos)			/* calc addr + load word low	*/ \
371c201e64SMarkus Stockhausen	LWL(out, off)
381c201e64SMarkus Stockhausen
391c201e64SMarkus Stockhausen#define LAE(out, in, bpos) \
401c201e64SMarkus Stockhausen	EAD(in, bpos)			/* calc addr + load enc byte	*/ \
411c201e64SMarkus Stockhausen	LBZ(out, rT0, 8)
421c201e64SMarkus Stockhausen
431c201e64SMarkus Stockhausen#define LBE(out) \
441c201e64SMarkus Stockhausen	LBZ(out, rT0, 8)		/* load enc byte		*/
451c201e64SMarkus Stockhausen
461c201e64SMarkus Stockhausen#define LAD(out, in, bpos) \
471c201e64SMarkus Stockhausen	DAD(in, bpos)			/* calc addr + load dec byte	*/ \
481c201e64SMarkus Stockhausen	LBZ(out, rT1, 0)
491c201e64SMarkus Stockhausen
501c201e64SMarkus Stockhausen#define LBD(out) \
511c201e64SMarkus Stockhausen	LBZ(out, rT1, 0)
521c201e64SMarkus Stockhausen
531c201e64SMarkus Stockhausen/*
541c201e64SMarkus Stockhausen * ppc_encrypt_block: The central encryption function for a single 16 bytes
551c201e64SMarkus Stockhausen * block. It does no stack handling or register saving to support fast calls
561c201e64SMarkus Stockhausen * via bl/blr. It expects that caller has pre-xored input data with first
571c201e64SMarkus Stockhausen * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
581c201e64SMarkus Stockhausen * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
59446957baSAdam Buchbinder * and rW0-rW3 and caller must execute a final xor on the output registers.
601c201e64SMarkus Stockhausen * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
611c201e64SMarkus Stockhausen *
621c201e64SMarkus Stockhausen */
631c201e64SMarkus Stockhausen_GLOBAL(ppc_encrypt_block)
641c201e64SMarkus Stockhausen	LAH(rW4, rD1, 2, 4)
651c201e64SMarkus Stockhausen	LAH(rW6, rD0, 3, 0)
661c201e64SMarkus Stockhausen	LAH(rW3, rD0, 1, 8)
671c201e64SMarkus Stockhausenppc_encrypt_block_loop:
681c201e64SMarkus Stockhausen	LAH(rW0, rD3, 0, 12)
691c201e64SMarkus Stockhausen	LAL(rW0, rD0, 0, 12)
701c201e64SMarkus Stockhausen	LAH(rW1, rD1, 0, 12)
711c201e64SMarkus Stockhausen	LAH(rW2, rD2, 1, 8)
721c201e64SMarkus Stockhausen	LAL(rW2, rD3, 1, 8)
731c201e64SMarkus Stockhausen	LAL(rW3, rD1, 1, 8)
741c201e64SMarkus Stockhausen	LAL(rW4, rD2, 2, 4)
751c201e64SMarkus Stockhausen	LAL(rW6, rD1, 3, 0)
761c201e64SMarkus Stockhausen	LAH(rW5, rD3, 2, 4)
771c201e64SMarkus Stockhausen	LAL(rW5, rD0, 2, 4)
781c201e64SMarkus Stockhausen	LAH(rW7, rD2, 3, 0)
791c201e64SMarkus Stockhausen	evldw		rD1,16(rKP)
801c201e64SMarkus Stockhausen	EAD(rD3, 3)
811c201e64SMarkus Stockhausen	evxor		rW2,rW2,rW4
821c201e64SMarkus Stockhausen	LWL(rW7, 0)
831c201e64SMarkus Stockhausen	evxor		rW2,rW2,rW6
841c201e64SMarkus Stockhausen	EAD(rD2, 0)
851c201e64SMarkus Stockhausen	evxor		rD1,rD1,rW2
861c201e64SMarkus Stockhausen	LWL(rW1, 12)
871c201e64SMarkus Stockhausen	evxor		rD1,rD1,rW0
881c201e64SMarkus Stockhausen	evldw		rD3,24(rKP)
891c201e64SMarkus Stockhausen	evmergehi	rD0,rD0,rD1
901c201e64SMarkus Stockhausen	EAD(rD1, 2)
911c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW5
921c201e64SMarkus Stockhausen	LWH(rW4, 4)
931c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW7
941c201e64SMarkus Stockhausen	EAD(rD0, 3)
951c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW3
961c201e64SMarkus Stockhausen	LWH(rW6, 0)
971c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW1
981c201e64SMarkus Stockhausen	EAD(rD0, 1)
991c201e64SMarkus Stockhausen	evmergehi	rD2,rD2,rD3
1001c201e64SMarkus Stockhausen	LWH(rW3, 8)
1011c201e64SMarkus Stockhausen	LAH(rW0, rD3, 0, 12)
1021c201e64SMarkus Stockhausen	LAL(rW0, rD0, 0, 12)
1031c201e64SMarkus Stockhausen	LAH(rW1, rD1, 0, 12)
1041c201e64SMarkus Stockhausen	LAH(rW2, rD2, 1, 8)
1051c201e64SMarkus Stockhausen	LAL(rW2, rD3, 1, 8)
1061c201e64SMarkus Stockhausen	LAL(rW3, rD1, 1, 8)
1071c201e64SMarkus Stockhausen	LAL(rW4, rD2, 2, 4)
1081c201e64SMarkus Stockhausen	LAL(rW6, rD1, 3, 0)
1091c201e64SMarkus Stockhausen	LAH(rW5, rD3, 2, 4)
1101c201e64SMarkus Stockhausen	LAL(rW5, rD0, 2, 4)
1111c201e64SMarkus Stockhausen	LAH(rW7, rD2, 3, 0)
1121c201e64SMarkus Stockhausen	evldw		rD1,32(rKP)
1131c201e64SMarkus Stockhausen	EAD(rD3, 3)
1141c201e64SMarkus Stockhausen	evxor		rW2,rW2,rW4
1151c201e64SMarkus Stockhausen	LWL(rW7, 0)
1161c201e64SMarkus Stockhausen	evxor		rW2,rW2,rW6
1171c201e64SMarkus Stockhausen	EAD(rD2, 0)
1181c201e64SMarkus Stockhausen	evxor		rD1,rD1,rW2
1191c201e64SMarkus Stockhausen	LWL(rW1, 12)
1201c201e64SMarkus Stockhausen	evxor		rD1,rD1,rW0
1211c201e64SMarkus Stockhausen	evldw		rD3,40(rKP)
1221c201e64SMarkus Stockhausen	evmergehi	rD0,rD0,rD1
1231c201e64SMarkus Stockhausen	EAD(rD1, 2)
1241c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW5
1251c201e64SMarkus Stockhausen	LWH(rW4, 4)
1261c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW7
1271c201e64SMarkus Stockhausen	EAD(rD0, 3)
1281c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW3
1291c201e64SMarkus Stockhausen	LWH(rW6, 0)
1301c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW1
1311c201e64SMarkus Stockhausen	EAD(rD0, 1)
1321c201e64SMarkus Stockhausen	evmergehi	rD2,rD2,rD3
1331c201e64SMarkus Stockhausen	LWH(rW3, 8)
1341c201e64SMarkus Stockhausen	addi		rKP,rKP,32
1351c201e64SMarkus Stockhausen	bdnz		ppc_encrypt_block_loop
1361c201e64SMarkus Stockhausen	LAH(rW0, rD3, 0, 12)
1371c201e64SMarkus Stockhausen	LAL(rW0, rD0, 0, 12)
1381c201e64SMarkus Stockhausen	LAH(rW1, rD1, 0, 12)
1391c201e64SMarkus Stockhausen	LAH(rW2, rD2, 1, 8)
1401c201e64SMarkus Stockhausen	LAL(rW2, rD3, 1, 8)
1411c201e64SMarkus Stockhausen	LAL(rW3, rD1, 1, 8)
1421c201e64SMarkus Stockhausen	LAL(rW4, rD2, 2, 4)
1431c201e64SMarkus Stockhausen	LAH(rW5, rD3, 2, 4)
1441c201e64SMarkus Stockhausen	LAL(rW6, rD1, 3, 0)
1451c201e64SMarkus Stockhausen	LAL(rW5, rD0, 2, 4)
1461c201e64SMarkus Stockhausen	LAH(rW7, rD2, 3, 0)
1471c201e64SMarkus Stockhausen	evldw		rD1,16(rKP)
1481c201e64SMarkus Stockhausen	EAD(rD3, 3)
1491c201e64SMarkus Stockhausen	evxor		rW2,rW2,rW4
1501c201e64SMarkus Stockhausen	LWL(rW7, 0)
1511c201e64SMarkus Stockhausen	evxor		rW2,rW2,rW6
1521c201e64SMarkus Stockhausen	EAD(rD2, 0)
1531c201e64SMarkus Stockhausen	evxor		rD1,rD1,rW2
1541c201e64SMarkus Stockhausen	LWL(rW1, 12)
1551c201e64SMarkus Stockhausen	evxor		rD1,rD1,rW0
1561c201e64SMarkus Stockhausen	evldw		rD3,24(rKP)
1571c201e64SMarkus Stockhausen	evmergehi	rD0,rD0,rD1
1581c201e64SMarkus Stockhausen	EAD(rD1, 0)
1591c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW5
1601c201e64SMarkus Stockhausen	LBE(rW2)
1611c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW7
1621c201e64SMarkus Stockhausen	EAD(rD0, 1)
1631c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW3
1641c201e64SMarkus Stockhausen	LBE(rW6)
1651c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW1
1661c201e64SMarkus Stockhausen	EAD(rD0, 0)
1671c201e64SMarkus Stockhausen	evmergehi	rD2,rD2,rD3
1681c201e64SMarkus Stockhausen	LBE(rW1)
1691c201e64SMarkus Stockhausen	LAE(rW0, rD3, 0)
1701c201e64SMarkus Stockhausen	LAE(rW1, rD0, 0)
1711c201e64SMarkus Stockhausen	LAE(rW4, rD2, 1)
1721c201e64SMarkus Stockhausen	LAE(rW5, rD3, 1)
1731c201e64SMarkus Stockhausen	LAE(rW3, rD2, 0)
1741c201e64SMarkus Stockhausen	LAE(rW7, rD1, 1)
1751c201e64SMarkus Stockhausen	rlwimi		rW0,rW4,8,16,23
1761c201e64SMarkus Stockhausen	rlwimi		rW1,rW5,8,16,23
1771c201e64SMarkus Stockhausen	LAE(rW4, rD1, 2)
1781c201e64SMarkus Stockhausen	LAE(rW5, rD2, 2)
1791c201e64SMarkus Stockhausen	rlwimi		rW2,rW6,8,16,23
1801c201e64SMarkus Stockhausen	rlwimi		rW3,rW7,8,16,23
1811c201e64SMarkus Stockhausen	LAE(rW6, rD3, 2)
1821c201e64SMarkus Stockhausen	LAE(rW7, rD0, 2)
1831c201e64SMarkus Stockhausen	rlwimi		rW0,rW4,16,8,15
1841c201e64SMarkus Stockhausen	rlwimi		rW1,rW5,16,8,15
1851c201e64SMarkus Stockhausen	LAE(rW4, rD0, 3)
1861c201e64SMarkus Stockhausen	LAE(rW5, rD1, 3)
1871c201e64SMarkus Stockhausen	rlwimi		rW2,rW6,16,8,15
1881c201e64SMarkus Stockhausen	lwz		rD0,32(rKP)
1891c201e64SMarkus Stockhausen	rlwimi		rW3,rW7,16,8,15
1901c201e64SMarkus Stockhausen	lwz		rD1,36(rKP)
1911c201e64SMarkus Stockhausen	LAE(rW6, rD2, 3)
1921c201e64SMarkus Stockhausen	LAE(rW7, rD3, 3)
1931c201e64SMarkus Stockhausen	rlwimi		rW0,rW4,24,0,7
1941c201e64SMarkus Stockhausen	lwz		rD2,40(rKP)
1951c201e64SMarkus Stockhausen	rlwimi		rW1,rW5,24,0,7
1961c201e64SMarkus Stockhausen	lwz		rD3,44(rKP)
1971c201e64SMarkus Stockhausen	rlwimi		rW2,rW6,24,0,7
1981c201e64SMarkus Stockhausen	rlwimi		rW3,rW7,24,0,7
1991c201e64SMarkus Stockhausen	blr
2001c201e64SMarkus Stockhausen
2011c201e64SMarkus Stockhausen/*
2021c201e64SMarkus Stockhausen * ppc_decrypt_block: The central decryption function for a single 16 bytes
2031c201e64SMarkus Stockhausen * block. It does no stack handling or register saving to support fast calls
2041c201e64SMarkus Stockhausen * via bl/blr. It expects that caller has pre-xored input data with first
2051c201e64SMarkus Stockhausen * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
2061c201e64SMarkus Stockhausen * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
207446957baSAdam Buchbinder * and rW0-rW3 and caller must execute a final xor on the output registers.
2081c201e64SMarkus Stockhausen * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
2091c201e64SMarkus Stockhausen *
2101c201e64SMarkus Stockhausen */
2111c201e64SMarkus Stockhausen_GLOBAL(ppc_decrypt_block)
2121c201e64SMarkus Stockhausen	LAH(rW0, rD1, 0, 12)
2131c201e64SMarkus Stockhausen	LAH(rW6, rD0, 3, 0)
2141c201e64SMarkus Stockhausen	LAH(rW3, rD0, 1, 8)
2151c201e64SMarkus Stockhausenppc_decrypt_block_loop:
2161c201e64SMarkus Stockhausen	LAH(rW1, rD3, 0, 12)
2171c201e64SMarkus Stockhausen	LAL(rW0, rD2, 0, 12)
2181c201e64SMarkus Stockhausen	LAH(rW2, rD2, 1, 8)
2191c201e64SMarkus Stockhausen	LAL(rW2, rD3, 1, 8)
2201c201e64SMarkus Stockhausen	LAH(rW4, rD3, 2, 4)
2211c201e64SMarkus Stockhausen	LAL(rW4, rD0, 2, 4)
2221c201e64SMarkus Stockhausen	LAL(rW6, rD1, 3, 0)
2231c201e64SMarkus Stockhausen	LAH(rW5, rD1, 2, 4)
2241c201e64SMarkus Stockhausen	LAH(rW7, rD2, 3, 0)
2251c201e64SMarkus Stockhausen	LAL(rW7, rD3, 3, 0)
2261c201e64SMarkus Stockhausen	LAL(rW3, rD1, 1, 8)
2271c201e64SMarkus Stockhausen	evldw		rD1,16(rKP)
2281c201e64SMarkus Stockhausen	EAD(rD0, 0)
2291c201e64SMarkus Stockhausen	evxor		rW4,rW4,rW6
2301c201e64SMarkus Stockhausen	LWL(rW1, 12)
2311c201e64SMarkus Stockhausen	evxor		rW0,rW0,rW4
2321c201e64SMarkus Stockhausen	EAD(rD2, 2)
2331c201e64SMarkus Stockhausen	evxor		rW0,rW0,rW2
2341c201e64SMarkus Stockhausen	LWL(rW5, 4)
2351c201e64SMarkus Stockhausen	evxor		rD1,rD1,rW0
2361c201e64SMarkus Stockhausen	evldw		rD3,24(rKP)
2371c201e64SMarkus Stockhausen	evmergehi	rD0,rD0,rD1
2381c201e64SMarkus Stockhausen	EAD(rD1, 0)
2391c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW7
2401c201e64SMarkus Stockhausen	LWH(rW0, 12)
2411c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW1
2421c201e64SMarkus Stockhausen	EAD(rD0, 3)
2431c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW3
2441c201e64SMarkus Stockhausen	LWH(rW6, 0)
2451c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW5
2461c201e64SMarkus Stockhausen	EAD(rD0, 1)
2471c201e64SMarkus Stockhausen	evmergehi	rD2,rD2,rD3
2481c201e64SMarkus Stockhausen	LWH(rW3, 8)
2491c201e64SMarkus Stockhausen	LAH(rW1, rD3, 0, 12)
2501c201e64SMarkus Stockhausen	LAL(rW0, rD2, 0, 12)
2511c201e64SMarkus Stockhausen	LAH(rW2, rD2, 1, 8)
2521c201e64SMarkus Stockhausen	LAL(rW2, rD3, 1, 8)
2531c201e64SMarkus Stockhausen	LAH(rW4, rD3, 2, 4)
2541c201e64SMarkus Stockhausen	LAL(rW4, rD0, 2, 4)
2551c201e64SMarkus Stockhausen	LAL(rW6, rD1, 3, 0)
2561c201e64SMarkus Stockhausen	LAH(rW5, rD1, 2, 4)
2571c201e64SMarkus Stockhausen	LAH(rW7, rD2, 3, 0)
2581c201e64SMarkus Stockhausen	LAL(rW7, rD3, 3, 0)
2591c201e64SMarkus Stockhausen	LAL(rW3, rD1, 1, 8)
2601c201e64SMarkus Stockhausen	evldw		 rD1,32(rKP)
2611c201e64SMarkus Stockhausen	EAD(rD0, 0)
2621c201e64SMarkus Stockhausen	evxor		rW4,rW4,rW6
2631c201e64SMarkus Stockhausen	LWL(rW1, 12)
2641c201e64SMarkus Stockhausen	evxor		rW0,rW0,rW4
2651c201e64SMarkus Stockhausen	EAD(rD2, 2)
2661c201e64SMarkus Stockhausen	evxor		rW0,rW0,rW2
2671c201e64SMarkus Stockhausen	LWL(rW5, 4)
2681c201e64SMarkus Stockhausen	evxor		rD1,rD1,rW0
2691c201e64SMarkus Stockhausen	evldw		rD3,40(rKP)
2701c201e64SMarkus Stockhausen	evmergehi	rD0,rD0,rD1
2711c201e64SMarkus Stockhausen	EAD(rD1, 0)
2721c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW7
2731c201e64SMarkus Stockhausen	LWH(rW0, 12)
2741c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW1
2751c201e64SMarkus Stockhausen	EAD(rD0, 3)
2761c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW3
2771c201e64SMarkus Stockhausen	LWH(rW6, 0)
2781c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW5
2791c201e64SMarkus Stockhausen	EAD(rD0, 1)
2801c201e64SMarkus Stockhausen	evmergehi	rD2,rD2,rD3
2811c201e64SMarkus Stockhausen	LWH(rW3, 8)
2821c201e64SMarkus Stockhausen	addi		rKP,rKP,32
2831c201e64SMarkus Stockhausen	bdnz		ppc_decrypt_block_loop
2841c201e64SMarkus Stockhausen	LAH(rW1, rD3, 0, 12)
2851c201e64SMarkus Stockhausen	LAL(rW0, rD2, 0, 12)
2861c201e64SMarkus Stockhausen	LAH(rW2, rD2, 1, 8)
2871c201e64SMarkus Stockhausen	LAL(rW2, rD3, 1, 8)
2881c201e64SMarkus Stockhausen	LAH(rW4, rD3, 2, 4)
2891c201e64SMarkus Stockhausen	LAL(rW4, rD0, 2, 4)
2901c201e64SMarkus Stockhausen	LAL(rW6, rD1, 3, 0)
2911c201e64SMarkus Stockhausen	LAH(rW5, rD1, 2, 4)
2921c201e64SMarkus Stockhausen	LAH(rW7, rD2, 3, 0)
2931c201e64SMarkus Stockhausen	LAL(rW7, rD3, 3, 0)
2941c201e64SMarkus Stockhausen	LAL(rW3, rD1, 1, 8)
2951c201e64SMarkus Stockhausen	evldw		 rD1,16(rKP)
2961c201e64SMarkus Stockhausen	EAD(rD0, 0)
2971c201e64SMarkus Stockhausen	evxor		rW4,rW4,rW6
2981c201e64SMarkus Stockhausen	LWL(rW1, 12)
2991c201e64SMarkus Stockhausen	evxor		rW0,rW0,rW4
3001c201e64SMarkus Stockhausen	EAD(rD2, 2)
3011c201e64SMarkus Stockhausen	evxor		rW0,rW0,rW2
3021c201e64SMarkus Stockhausen	LWL(rW5, 4)
3031c201e64SMarkus Stockhausen	evxor		rD1,rD1,rW0
3041c201e64SMarkus Stockhausen	evldw		rD3,24(rKP)
3051c201e64SMarkus Stockhausen	evmergehi	rD0,rD0,rD1
3061c201e64SMarkus Stockhausen	DAD(rD1, 0)
3071c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW7
3081c201e64SMarkus Stockhausen	LBD(rW0)
3091c201e64SMarkus Stockhausen	evxor		rW3,rW3,rW1
3101c201e64SMarkus Stockhausen	DAD(rD0, 1)
3111c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW3
3121c201e64SMarkus Stockhausen	LBD(rW6)
3131c201e64SMarkus Stockhausen	evxor		rD3,rD3,rW5
3141c201e64SMarkus Stockhausen	DAD(rD0, 0)
3151c201e64SMarkus Stockhausen	evmergehi	rD2,rD2,rD3
3161c201e64SMarkus Stockhausen	LBD(rW3)
3171c201e64SMarkus Stockhausen	LAD(rW2, rD3, 0)
3181c201e64SMarkus Stockhausen	LAD(rW1, rD2, 0)
3191c201e64SMarkus Stockhausen	LAD(rW4, rD2, 1)
3201c201e64SMarkus Stockhausen	LAD(rW5, rD3, 1)
3211c201e64SMarkus Stockhausen	LAD(rW7, rD1, 1)
3221c201e64SMarkus Stockhausen	rlwimi		rW0,rW4,8,16,23
3231c201e64SMarkus Stockhausen	rlwimi		rW1,rW5,8,16,23
3241c201e64SMarkus Stockhausen	LAD(rW4, rD3, 2)
3251c201e64SMarkus Stockhausen	LAD(rW5, rD0, 2)
3261c201e64SMarkus Stockhausen	rlwimi		rW2,rW6,8,16,23
3271c201e64SMarkus Stockhausen	rlwimi		rW3,rW7,8,16,23
3281c201e64SMarkus Stockhausen	LAD(rW6, rD1, 2)
3291c201e64SMarkus Stockhausen	LAD(rW7, rD2, 2)
3301c201e64SMarkus Stockhausen	rlwimi		rW0,rW4,16,8,15
3311c201e64SMarkus Stockhausen	rlwimi		rW1,rW5,16,8,15
3321c201e64SMarkus Stockhausen	LAD(rW4, rD0, 3)
3331c201e64SMarkus Stockhausen	LAD(rW5, rD1, 3)
3341c201e64SMarkus Stockhausen	rlwimi		rW2,rW6,16,8,15
3351c201e64SMarkus Stockhausen	lwz		rD0,32(rKP)
3361c201e64SMarkus Stockhausen	rlwimi		rW3,rW7,16,8,15
3371c201e64SMarkus Stockhausen	lwz		rD1,36(rKP)
3381c201e64SMarkus Stockhausen	LAD(rW6, rD2, 3)
3391c201e64SMarkus Stockhausen	LAD(rW7, rD3, 3)
3401c201e64SMarkus Stockhausen	rlwimi		rW0,rW4,24,0,7
3411c201e64SMarkus Stockhausen	lwz		rD2,40(rKP)
3421c201e64SMarkus Stockhausen	rlwimi		rW1,rW5,24,0,7
3431c201e64SMarkus Stockhausen	lwz		rD3,44(rKP)
3441c201e64SMarkus Stockhausen	rlwimi		rW2,rW6,24,0,7
3451c201e64SMarkus Stockhausen	rlwimi		rW3,rW7,24,0,7
3461c201e64SMarkus Stockhausen	blr
347