11c201e64SMarkus Stockhausen/* 21c201e64SMarkus Stockhausen * Fast AES implementation for SPE instruction set (PPC) 31c201e64SMarkus Stockhausen * 41c201e64SMarkus Stockhausen * This code makes use of the SPE SIMD instruction set as defined in 51c201e64SMarkus Stockhausen * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf 61c201e64SMarkus Stockhausen * Implementation is based on optimization guide notes from 71c201e64SMarkus Stockhausen * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf 81c201e64SMarkus Stockhausen * 91c201e64SMarkus Stockhausen * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 101c201e64SMarkus Stockhausen * 111c201e64SMarkus Stockhausen * This program is free software; you can redistribute it and/or modify it 121c201e64SMarkus Stockhausen * under the terms of the GNU General Public License as published by the Free 131c201e64SMarkus Stockhausen * Software Foundation; either version 2 of the License, or (at your option) 141c201e64SMarkus Stockhausen * any later version. 151c201e64SMarkus Stockhausen * 161c201e64SMarkus Stockhausen */ 171c201e64SMarkus Stockhausen 181c201e64SMarkus Stockhausen#include <asm/ppc_asm.h> 191c201e64SMarkus Stockhausen#include "aes-spe-regs.h" 201c201e64SMarkus Stockhausen 211c201e64SMarkus Stockhausen#define EAD(in, bpos) \ 221c201e64SMarkus Stockhausen rlwimi rT0,in,28-((bpos+3)%4)*8,20,27; 231c201e64SMarkus Stockhausen 241c201e64SMarkus Stockhausen#define DAD(in, bpos) \ 251c201e64SMarkus Stockhausen rlwimi rT1,in,24-((bpos+3)%4)*8,24,31; 261c201e64SMarkus Stockhausen 271c201e64SMarkus Stockhausen#define LWH(out, off) \ 281c201e64SMarkus Stockhausen evlwwsplat out,off(rT0); /* load word high */ 291c201e64SMarkus Stockhausen 301c201e64SMarkus Stockhausen#define LWL(out, off) \ 311c201e64SMarkus Stockhausen lwz out,off(rT0); /* load word low */ 321c201e64SMarkus Stockhausen 331c201e64SMarkus Stockhausen#define LBZ(out, tab, off) \ 341c201e64SMarkus Stockhausen lbz out,off(tab); /* load byte */ 351c201e64SMarkus Stockhausen 361c201e64SMarkus Stockhausen#define LAH(out, in, bpos, off) \ 371c201e64SMarkus Stockhausen EAD(in, bpos) /* calc addr + load word high */ \ 381c201e64SMarkus Stockhausen LWH(out, off) 391c201e64SMarkus Stockhausen 401c201e64SMarkus Stockhausen#define LAL(out, in, bpos, off) \ 411c201e64SMarkus Stockhausen EAD(in, bpos) /* calc addr + load word low */ \ 421c201e64SMarkus Stockhausen LWL(out, off) 431c201e64SMarkus Stockhausen 441c201e64SMarkus Stockhausen#define LAE(out, in, bpos) \ 451c201e64SMarkus Stockhausen EAD(in, bpos) /* calc addr + load enc byte */ \ 461c201e64SMarkus Stockhausen LBZ(out, rT0, 8) 471c201e64SMarkus Stockhausen 481c201e64SMarkus Stockhausen#define LBE(out) \ 491c201e64SMarkus Stockhausen LBZ(out, rT0, 8) /* load enc byte */ 501c201e64SMarkus Stockhausen 511c201e64SMarkus Stockhausen#define LAD(out, in, bpos) \ 521c201e64SMarkus Stockhausen DAD(in, bpos) /* calc addr + load dec byte */ \ 531c201e64SMarkus Stockhausen LBZ(out, rT1, 0) 541c201e64SMarkus Stockhausen 551c201e64SMarkus Stockhausen#define LBD(out) \ 561c201e64SMarkus Stockhausen LBZ(out, rT1, 0) 571c201e64SMarkus Stockhausen 581c201e64SMarkus Stockhausen/* 591c201e64SMarkus Stockhausen * ppc_encrypt_block: The central encryption function for a single 16 bytes 601c201e64SMarkus Stockhausen * block. It does no stack handling or register saving to support fast calls 611c201e64SMarkus Stockhausen * via bl/blr. It expects that caller has pre-xored input data with first 621c201e64SMarkus Stockhausen * 4 words of encryption key into rD0-rD3. Pointer/counter registers must 631c201e64SMarkus Stockhausen * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 64446957baSAdam Buchbinder * and rW0-rW3 and caller must execute a final xor on the output registers. 651c201e64SMarkus Stockhausen * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. 661c201e64SMarkus Stockhausen * 671c201e64SMarkus Stockhausen */ 681c201e64SMarkus Stockhausen_GLOBAL(ppc_encrypt_block) 691c201e64SMarkus Stockhausen LAH(rW4, rD1, 2, 4) 701c201e64SMarkus Stockhausen LAH(rW6, rD0, 3, 0) 711c201e64SMarkus Stockhausen LAH(rW3, rD0, 1, 8) 721c201e64SMarkus Stockhausenppc_encrypt_block_loop: 731c201e64SMarkus Stockhausen LAH(rW0, rD3, 0, 12) 741c201e64SMarkus Stockhausen LAL(rW0, rD0, 0, 12) 751c201e64SMarkus Stockhausen LAH(rW1, rD1, 0, 12) 761c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 771c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 781c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 791c201e64SMarkus Stockhausen LAL(rW4, rD2, 2, 4) 801c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 811c201e64SMarkus Stockhausen LAH(rW5, rD3, 2, 4) 821c201e64SMarkus Stockhausen LAL(rW5, rD0, 2, 4) 831c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 841c201e64SMarkus Stockhausen evldw rD1,16(rKP) 851c201e64SMarkus Stockhausen EAD(rD3, 3) 861c201e64SMarkus Stockhausen evxor rW2,rW2,rW4 871c201e64SMarkus Stockhausen LWL(rW7, 0) 881c201e64SMarkus Stockhausen evxor rW2,rW2,rW6 891c201e64SMarkus Stockhausen EAD(rD2, 0) 901c201e64SMarkus Stockhausen evxor rD1,rD1,rW2 911c201e64SMarkus Stockhausen LWL(rW1, 12) 921c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 931c201e64SMarkus Stockhausen evldw rD3,24(rKP) 941c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 951c201e64SMarkus Stockhausen EAD(rD1, 2) 961c201e64SMarkus Stockhausen evxor rW3,rW3,rW5 971c201e64SMarkus Stockhausen LWH(rW4, 4) 981c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 991c201e64SMarkus Stockhausen EAD(rD0, 3) 1001c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 1011c201e64SMarkus Stockhausen LWH(rW6, 0) 1021c201e64SMarkus Stockhausen evxor rD3,rD3,rW1 1031c201e64SMarkus Stockhausen EAD(rD0, 1) 1041c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 1051c201e64SMarkus Stockhausen LWH(rW3, 8) 1061c201e64SMarkus Stockhausen LAH(rW0, rD3, 0, 12) 1071c201e64SMarkus Stockhausen LAL(rW0, rD0, 0, 12) 1081c201e64SMarkus Stockhausen LAH(rW1, rD1, 0, 12) 1091c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 1101c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 1111c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 1121c201e64SMarkus Stockhausen LAL(rW4, rD2, 2, 4) 1131c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 1141c201e64SMarkus Stockhausen LAH(rW5, rD3, 2, 4) 1151c201e64SMarkus Stockhausen LAL(rW5, rD0, 2, 4) 1161c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 1171c201e64SMarkus Stockhausen evldw rD1,32(rKP) 1181c201e64SMarkus Stockhausen EAD(rD3, 3) 1191c201e64SMarkus Stockhausen evxor rW2,rW2,rW4 1201c201e64SMarkus Stockhausen LWL(rW7, 0) 1211c201e64SMarkus Stockhausen evxor rW2,rW2,rW6 1221c201e64SMarkus Stockhausen EAD(rD2, 0) 1231c201e64SMarkus Stockhausen evxor rD1,rD1,rW2 1241c201e64SMarkus Stockhausen LWL(rW1, 12) 1251c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 1261c201e64SMarkus Stockhausen evldw rD3,40(rKP) 1271c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 1281c201e64SMarkus Stockhausen EAD(rD1, 2) 1291c201e64SMarkus Stockhausen evxor rW3,rW3,rW5 1301c201e64SMarkus Stockhausen LWH(rW4, 4) 1311c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 1321c201e64SMarkus Stockhausen EAD(rD0, 3) 1331c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 1341c201e64SMarkus Stockhausen LWH(rW6, 0) 1351c201e64SMarkus Stockhausen evxor rD3,rD3,rW1 1361c201e64SMarkus Stockhausen EAD(rD0, 1) 1371c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 1381c201e64SMarkus Stockhausen LWH(rW3, 8) 1391c201e64SMarkus Stockhausen addi rKP,rKP,32 1401c201e64SMarkus Stockhausen bdnz ppc_encrypt_block_loop 1411c201e64SMarkus Stockhausen LAH(rW0, rD3, 0, 12) 1421c201e64SMarkus Stockhausen LAL(rW0, rD0, 0, 12) 1431c201e64SMarkus Stockhausen LAH(rW1, rD1, 0, 12) 1441c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 1451c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 1461c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 1471c201e64SMarkus Stockhausen LAL(rW4, rD2, 2, 4) 1481c201e64SMarkus Stockhausen LAH(rW5, rD3, 2, 4) 1491c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 1501c201e64SMarkus Stockhausen LAL(rW5, rD0, 2, 4) 1511c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 1521c201e64SMarkus Stockhausen evldw rD1,16(rKP) 1531c201e64SMarkus Stockhausen EAD(rD3, 3) 1541c201e64SMarkus Stockhausen evxor rW2,rW2,rW4 1551c201e64SMarkus Stockhausen LWL(rW7, 0) 1561c201e64SMarkus Stockhausen evxor rW2,rW2,rW6 1571c201e64SMarkus Stockhausen EAD(rD2, 0) 1581c201e64SMarkus Stockhausen evxor rD1,rD1,rW2 1591c201e64SMarkus Stockhausen LWL(rW1, 12) 1601c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 1611c201e64SMarkus Stockhausen evldw rD3,24(rKP) 1621c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 1631c201e64SMarkus Stockhausen EAD(rD1, 0) 1641c201e64SMarkus Stockhausen evxor rW3,rW3,rW5 1651c201e64SMarkus Stockhausen LBE(rW2) 1661c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 1671c201e64SMarkus Stockhausen EAD(rD0, 1) 1681c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 1691c201e64SMarkus Stockhausen LBE(rW6) 1701c201e64SMarkus Stockhausen evxor rD3,rD3,rW1 1711c201e64SMarkus Stockhausen EAD(rD0, 0) 1721c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 1731c201e64SMarkus Stockhausen LBE(rW1) 1741c201e64SMarkus Stockhausen LAE(rW0, rD3, 0) 1751c201e64SMarkus Stockhausen LAE(rW1, rD0, 0) 1761c201e64SMarkus Stockhausen LAE(rW4, rD2, 1) 1771c201e64SMarkus Stockhausen LAE(rW5, rD3, 1) 1781c201e64SMarkus Stockhausen LAE(rW3, rD2, 0) 1791c201e64SMarkus Stockhausen LAE(rW7, rD1, 1) 1801c201e64SMarkus Stockhausen rlwimi rW0,rW4,8,16,23 1811c201e64SMarkus Stockhausen rlwimi rW1,rW5,8,16,23 1821c201e64SMarkus Stockhausen LAE(rW4, rD1, 2) 1831c201e64SMarkus Stockhausen LAE(rW5, rD2, 2) 1841c201e64SMarkus Stockhausen rlwimi rW2,rW6,8,16,23 1851c201e64SMarkus Stockhausen rlwimi rW3,rW7,8,16,23 1861c201e64SMarkus Stockhausen LAE(rW6, rD3, 2) 1871c201e64SMarkus Stockhausen LAE(rW7, rD0, 2) 1881c201e64SMarkus Stockhausen rlwimi rW0,rW4,16,8,15 1891c201e64SMarkus Stockhausen rlwimi rW1,rW5,16,8,15 1901c201e64SMarkus Stockhausen LAE(rW4, rD0, 3) 1911c201e64SMarkus Stockhausen LAE(rW5, rD1, 3) 1921c201e64SMarkus Stockhausen rlwimi rW2,rW6,16,8,15 1931c201e64SMarkus Stockhausen lwz rD0,32(rKP) 1941c201e64SMarkus Stockhausen rlwimi rW3,rW7,16,8,15 1951c201e64SMarkus Stockhausen lwz rD1,36(rKP) 1961c201e64SMarkus Stockhausen LAE(rW6, rD2, 3) 1971c201e64SMarkus Stockhausen LAE(rW7, rD3, 3) 1981c201e64SMarkus Stockhausen rlwimi rW0,rW4,24,0,7 1991c201e64SMarkus Stockhausen lwz rD2,40(rKP) 2001c201e64SMarkus Stockhausen rlwimi rW1,rW5,24,0,7 2011c201e64SMarkus Stockhausen lwz rD3,44(rKP) 2021c201e64SMarkus Stockhausen rlwimi rW2,rW6,24,0,7 2031c201e64SMarkus Stockhausen rlwimi rW3,rW7,24,0,7 2041c201e64SMarkus Stockhausen blr 2051c201e64SMarkus Stockhausen 2061c201e64SMarkus Stockhausen/* 2071c201e64SMarkus Stockhausen * ppc_decrypt_block: The central decryption function for a single 16 bytes 2081c201e64SMarkus Stockhausen * block. It does no stack handling or register saving to support fast calls 2091c201e64SMarkus Stockhausen * via bl/blr. It expects that caller has pre-xored input data with first 2101c201e64SMarkus Stockhausen * 4 words of encryption key into rD0-rD3. Pointer/counter registers must 2111c201e64SMarkus Stockhausen * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 212446957baSAdam Buchbinder * and rW0-rW3 and caller must execute a final xor on the output registers. 2131c201e64SMarkus Stockhausen * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. 2141c201e64SMarkus Stockhausen * 2151c201e64SMarkus Stockhausen */ 2161c201e64SMarkus Stockhausen_GLOBAL(ppc_decrypt_block) 2171c201e64SMarkus Stockhausen LAH(rW0, rD1, 0, 12) 2181c201e64SMarkus Stockhausen LAH(rW6, rD0, 3, 0) 2191c201e64SMarkus Stockhausen LAH(rW3, rD0, 1, 8) 2201c201e64SMarkus Stockhausenppc_decrypt_block_loop: 2211c201e64SMarkus Stockhausen LAH(rW1, rD3, 0, 12) 2221c201e64SMarkus Stockhausen LAL(rW0, rD2, 0, 12) 2231c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 2241c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 2251c201e64SMarkus Stockhausen LAH(rW4, rD3, 2, 4) 2261c201e64SMarkus Stockhausen LAL(rW4, rD0, 2, 4) 2271c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 2281c201e64SMarkus Stockhausen LAH(rW5, rD1, 2, 4) 2291c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 2301c201e64SMarkus Stockhausen LAL(rW7, rD3, 3, 0) 2311c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 2321c201e64SMarkus Stockhausen evldw rD1,16(rKP) 2331c201e64SMarkus Stockhausen EAD(rD0, 0) 2341c201e64SMarkus Stockhausen evxor rW4,rW4,rW6 2351c201e64SMarkus Stockhausen LWL(rW1, 12) 2361c201e64SMarkus Stockhausen evxor rW0,rW0,rW4 2371c201e64SMarkus Stockhausen EAD(rD2, 2) 2381c201e64SMarkus Stockhausen evxor rW0,rW0,rW2 2391c201e64SMarkus Stockhausen LWL(rW5, 4) 2401c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 2411c201e64SMarkus Stockhausen evldw rD3,24(rKP) 2421c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 2431c201e64SMarkus Stockhausen EAD(rD1, 0) 2441c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 2451c201e64SMarkus Stockhausen LWH(rW0, 12) 2461c201e64SMarkus Stockhausen evxor rW3,rW3,rW1 2471c201e64SMarkus Stockhausen EAD(rD0, 3) 2481c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 2491c201e64SMarkus Stockhausen LWH(rW6, 0) 2501c201e64SMarkus Stockhausen evxor rD3,rD3,rW5 2511c201e64SMarkus Stockhausen EAD(rD0, 1) 2521c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 2531c201e64SMarkus Stockhausen LWH(rW3, 8) 2541c201e64SMarkus Stockhausen LAH(rW1, rD3, 0, 12) 2551c201e64SMarkus Stockhausen LAL(rW0, rD2, 0, 12) 2561c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 2571c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 2581c201e64SMarkus Stockhausen LAH(rW4, rD3, 2, 4) 2591c201e64SMarkus Stockhausen LAL(rW4, rD0, 2, 4) 2601c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 2611c201e64SMarkus Stockhausen LAH(rW5, rD1, 2, 4) 2621c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 2631c201e64SMarkus Stockhausen LAL(rW7, rD3, 3, 0) 2641c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 2651c201e64SMarkus Stockhausen evldw rD1,32(rKP) 2661c201e64SMarkus Stockhausen EAD(rD0, 0) 2671c201e64SMarkus Stockhausen evxor rW4,rW4,rW6 2681c201e64SMarkus Stockhausen LWL(rW1, 12) 2691c201e64SMarkus Stockhausen evxor rW0,rW0,rW4 2701c201e64SMarkus Stockhausen EAD(rD2, 2) 2711c201e64SMarkus Stockhausen evxor rW0,rW0,rW2 2721c201e64SMarkus Stockhausen LWL(rW5, 4) 2731c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 2741c201e64SMarkus Stockhausen evldw rD3,40(rKP) 2751c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 2761c201e64SMarkus Stockhausen EAD(rD1, 0) 2771c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 2781c201e64SMarkus Stockhausen LWH(rW0, 12) 2791c201e64SMarkus Stockhausen evxor rW3,rW3,rW1 2801c201e64SMarkus Stockhausen EAD(rD0, 3) 2811c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 2821c201e64SMarkus Stockhausen LWH(rW6, 0) 2831c201e64SMarkus Stockhausen evxor rD3,rD3,rW5 2841c201e64SMarkus Stockhausen EAD(rD0, 1) 2851c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 2861c201e64SMarkus Stockhausen LWH(rW3, 8) 2871c201e64SMarkus Stockhausen addi rKP,rKP,32 2881c201e64SMarkus Stockhausen bdnz ppc_decrypt_block_loop 2891c201e64SMarkus Stockhausen LAH(rW1, rD3, 0, 12) 2901c201e64SMarkus Stockhausen LAL(rW0, rD2, 0, 12) 2911c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 2921c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 2931c201e64SMarkus Stockhausen LAH(rW4, rD3, 2, 4) 2941c201e64SMarkus Stockhausen LAL(rW4, rD0, 2, 4) 2951c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 2961c201e64SMarkus Stockhausen LAH(rW5, rD1, 2, 4) 2971c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 2981c201e64SMarkus Stockhausen LAL(rW7, rD3, 3, 0) 2991c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 3001c201e64SMarkus Stockhausen evldw rD1,16(rKP) 3011c201e64SMarkus Stockhausen EAD(rD0, 0) 3021c201e64SMarkus Stockhausen evxor rW4,rW4,rW6 3031c201e64SMarkus Stockhausen LWL(rW1, 12) 3041c201e64SMarkus Stockhausen evxor rW0,rW0,rW4 3051c201e64SMarkus Stockhausen EAD(rD2, 2) 3061c201e64SMarkus Stockhausen evxor rW0,rW0,rW2 3071c201e64SMarkus Stockhausen LWL(rW5, 4) 3081c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 3091c201e64SMarkus Stockhausen evldw rD3,24(rKP) 3101c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 3111c201e64SMarkus Stockhausen DAD(rD1, 0) 3121c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 3131c201e64SMarkus Stockhausen LBD(rW0) 3141c201e64SMarkus Stockhausen evxor rW3,rW3,rW1 3151c201e64SMarkus Stockhausen DAD(rD0, 1) 3161c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 3171c201e64SMarkus Stockhausen LBD(rW6) 3181c201e64SMarkus Stockhausen evxor rD3,rD3,rW5 3191c201e64SMarkus Stockhausen DAD(rD0, 0) 3201c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 3211c201e64SMarkus Stockhausen LBD(rW3) 3221c201e64SMarkus Stockhausen LAD(rW2, rD3, 0) 3231c201e64SMarkus Stockhausen LAD(rW1, rD2, 0) 3241c201e64SMarkus Stockhausen LAD(rW4, rD2, 1) 3251c201e64SMarkus Stockhausen LAD(rW5, rD3, 1) 3261c201e64SMarkus Stockhausen LAD(rW7, rD1, 1) 3271c201e64SMarkus Stockhausen rlwimi rW0,rW4,8,16,23 3281c201e64SMarkus Stockhausen rlwimi rW1,rW5,8,16,23 3291c201e64SMarkus Stockhausen LAD(rW4, rD3, 2) 3301c201e64SMarkus Stockhausen LAD(rW5, rD0, 2) 3311c201e64SMarkus Stockhausen rlwimi rW2,rW6,8,16,23 3321c201e64SMarkus Stockhausen rlwimi rW3,rW7,8,16,23 3331c201e64SMarkus Stockhausen LAD(rW6, rD1, 2) 3341c201e64SMarkus Stockhausen LAD(rW7, rD2, 2) 3351c201e64SMarkus Stockhausen rlwimi rW0,rW4,16,8,15 3361c201e64SMarkus Stockhausen rlwimi rW1,rW5,16,8,15 3371c201e64SMarkus Stockhausen LAD(rW4, rD0, 3) 3381c201e64SMarkus Stockhausen LAD(rW5, rD1, 3) 3391c201e64SMarkus Stockhausen rlwimi rW2,rW6,16,8,15 3401c201e64SMarkus Stockhausen lwz rD0,32(rKP) 3411c201e64SMarkus Stockhausen rlwimi rW3,rW7,16,8,15 3421c201e64SMarkus Stockhausen lwz rD1,36(rKP) 3431c201e64SMarkus Stockhausen LAD(rW6, rD2, 3) 3441c201e64SMarkus Stockhausen LAD(rW7, rD3, 3) 3451c201e64SMarkus Stockhausen rlwimi rW0,rW4,24,0,7 3461c201e64SMarkus Stockhausen lwz rD2,40(rKP) 3471c201e64SMarkus Stockhausen rlwimi rW1,rW5,24,0,7 3481c201e64SMarkus Stockhausen lwz rD3,44(rKP) 3491c201e64SMarkus Stockhausen rlwimi rW2,rW6,24,0,7 3501c201e64SMarkus Stockhausen rlwimi rW3,rW7,24,0,7 3511c201e64SMarkus Stockhausen blr 352