12874c5fdSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */ 21c201e64SMarkus Stockhausen/* 31c201e64SMarkus Stockhausen * Fast AES implementation for SPE instruction set (PPC) 41c201e64SMarkus Stockhausen * 51c201e64SMarkus Stockhausen * This code makes use of the SPE SIMD instruction set as defined in 61c201e64SMarkus Stockhausen * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf 71c201e64SMarkus Stockhausen * Implementation is based on optimization guide notes from 81c201e64SMarkus Stockhausen * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf 91c201e64SMarkus Stockhausen * 101c201e64SMarkus Stockhausen * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 111c201e64SMarkus Stockhausen */ 121c201e64SMarkus Stockhausen 131c201e64SMarkus Stockhausen#include <asm/ppc_asm.h> 141c201e64SMarkus Stockhausen#include "aes-spe-regs.h" 151c201e64SMarkus Stockhausen 161c201e64SMarkus Stockhausen#define EAD(in, bpos) \ 171c201e64SMarkus Stockhausen rlwimi rT0,in,28-((bpos+3)%4)*8,20,27; 181c201e64SMarkus Stockhausen 191c201e64SMarkus Stockhausen#define DAD(in, bpos) \ 201c201e64SMarkus Stockhausen rlwimi rT1,in,24-((bpos+3)%4)*8,24,31; 211c201e64SMarkus Stockhausen 221c201e64SMarkus Stockhausen#define LWH(out, off) \ 231c201e64SMarkus Stockhausen evlwwsplat out,off(rT0); /* load word high */ 241c201e64SMarkus Stockhausen 251c201e64SMarkus Stockhausen#define LWL(out, off) \ 261c201e64SMarkus Stockhausen lwz out,off(rT0); /* load word low */ 271c201e64SMarkus Stockhausen 281c201e64SMarkus Stockhausen#define LBZ(out, tab, off) \ 291c201e64SMarkus Stockhausen lbz out,off(tab); /* load byte */ 301c201e64SMarkus Stockhausen 311c201e64SMarkus Stockhausen#define LAH(out, in, bpos, off) \ 321c201e64SMarkus Stockhausen EAD(in, bpos) /* calc addr + load word high */ \ 331c201e64SMarkus Stockhausen LWH(out, off) 341c201e64SMarkus Stockhausen 351c201e64SMarkus Stockhausen#define LAL(out, in, bpos, off) \ 361c201e64SMarkus Stockhausen EAD(in, bpos) /* calc addr + load word low */ \ 371c201e64SMarkus Stockhausen LWL(out, off) 381c201e64SMarkus Stockhausen 391c201e64SMarkus Stockhausen#define LAE(out, in, bpos) \ 401c201e64SMarkus Stockhausen EAD(in, bpos) /* calc addr + load enc byte */ \ 411c201e64SMarkus Stockhausen LBZ(out, rT0, 8) 421c201e64SMarkus Stockhausen 431c201e64SMarkus Stockhausen#define LBE(out) \ 441c201e64SMarkus Stockhausen LBZ(out, rT0, 8) /* load enc byte */ 451c201e64SMarkus Stockhausen 461c201e64SMarkus Stockhausen#define LAD(out, in, bpos) \ 471c201e64SMarkus Stockhausen DAD(in, bpos) /* calc addr + load dec byte */ \ 481c201e64SMarkus Stockhausen LBZ(out, rT1, 0) 491c201e64SMarkus Stockhausen 501c201e64SMarkus Stockhausen#define LBD(out) \ 511c201e64SMarkus Stockhausen LBZ(out, rT1, 0) 521c201e64SMarkus Stockhausen 531c201e64SMarkus Stockhausen/* 541c201e64SMarkus Stockhausen * ppc_encrypt_block: The central encryption function for a single 16 bytes 551c201e64SMarkus Stockhausen * block. It does no stack handling or register saving to support fast calls 561c201e64SMarkus Stockhausen * via bl/blr. It expects that caller has pre-xored input data with first 571c201e64SMarkus Stockhausen * 4 words of encryption key into rD0-rD3. Pointer/counter registers must 581c201e64SMarkus Stockhausen * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 59446957baSAdam Buchbinder * and rW0-rW3 and caller must execute a final xor on the output registers. 601c201e64SMarkus Stockhausen * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. 611c201e64SMarkus Stockhausen * 621c201e64SMarkus Stockhausen */ 631c201e64SMarkus Stockhausen_GLOBAL(ppc_encrypt_block) 641c201e64SMarkus Stockhausen LAH(rW4, rD1, 2, 4) 651c201e64SMarkus Stockhausen LAH(rW6, rD0, 3, 0) 661c201e64SMarkus Stockhausen LAH(rW3, rD0, 1, 8) 671c201e64SMarkus Stockhausenppc_encrypt_block_loop: 681c201e64SMarkus Stockhausen LAH(rW0, rD3, 0, 12) 691c201e64SMarkus Stockhausen LAL(rW0, rD0, 0, 12) 701c201e64SMarkus Stockhausen LAH(rW1, rD1, 0, 12) 711c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 721c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 731c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 741c201e64SMarkus Stockhausen LAL(rW4, rD2, 2, 4) 751c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 761c201e64SMarkus Stockhausen LAH(rW5, rD3, 2, 4) 771c201e64SMarkus Stockhausen LAL(rW5, rD0, 2, 4) 781c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 791c201e64SMarkus Stockhausen evldw rD1,16(rKP) 801c201e64SMarkus Stockhausen EAD(rD3, 3) 811c201e64SMarkus Stockhausen evxor rW2,rW2,rW4 821c201e64SMarkus Stockhausen LWL(rW7, 0) 831c201e64SMarkus Stockhausen evxor rW2,rW2,rW6 841c201e64SMarkus Stockhausen EAD(rD2, 0) 851c201e64SMarkus Stockhausen evxor rD1,rD1,rW2 861c201e64SMarkus Stockhausen LWL(rW1, 12) 871c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 881c201e64SMarkus Stockhausen evldw rD3,24(rKP) 891c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 901c201e64SMarkus Stockhausen EAD(rD1, 2) 911c201e64SMarkus Stockhausen evxor rW3,rW3,rW5 921c201e64SMarkus Stockhausen LWH(rW4, 4) 931c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 941c201e64SMarkus Stockhausen EAD(rD0, 3) 951c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 961c201e64SMarkus Stockhausen LWH(rW6, 0) 971c201e64SMarkus Stockhausen evxor rD3,rD3,rW1 981c201e64SMarkus Stockhausen EAD(rD0, 1) 991c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 1001c201e64SMarkus Stockhausen LWH(rW3, 8) 1011c201e64SMarkus Stockhausen LAH(rW0, rD3, 0, 12) 1021c201e64SMarkus Stockhausen LAL(rW0, rD0, 0, 12) 1031c201e64SMarkus Stockhausen LAH(rW1, rD1, 0, 12) 1041c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 1051c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 1061c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 1071c201e64SMarkus Stockhausen LAL(rW4, rD2, 2, 4) 1081c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 1091c201e64SMarkus Stockhausen LAH(rW5, rD3, 2, 4) 1101c201e64SMarkus Stockhausen LAL(rW5, rD0, 2, 4) 1111c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 1121c201e64SMarkus Stockhausen evldw rD1,32(rKP) 1131c201e64SMarkus Stockhausen EAD(rD3, 3) 1141c201e64SMarkus Stockhausen evxor rW2,rW2,rW4 1151c201e64SMarkus Stockhausen LWL(rW7, 0) 1161c201e64SMarkus Stockhausen evxor rW2,rW2,rW6 1171c201e64SMarkus Stockhausen EAD(rD2, 0) 1181c201e64SMarkus Stockhausen evxor rD1,rD1,rW2 1191c201e64SMarkus Stockhausen LWL(rW1, 12) 1201c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 1211c201e64SMarkus Stockhausen evldw rD3,40(rKP) 1221c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 1231c201e64SMarkus Stockhausen EAD(rD1, 2) 1241c201e64SMarkus Stockhausen evxor rW3,rW3,rW5 1251c201e64SMarkus Stockhausen LWH(rW4, 4) 1261c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 1271c201e64SMarkus Stockhausen EAD(rD0, 3) 1281c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 1291c201e64SMarkus Stockhausen LWH(rW6, 0) 1301c201e64SMarkus Stockhausen evxor rD3,rD3,rW1 1311c201e64SMarkus Stockhausen EAD(rD0, 1) 1321c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 1331c201e64SMarkus Stockhausen LWH(rW3, 8) 1341c201e64SMarkus Stockhausen addi rKP,rKP,32 1351c201e64SMarkus Stockhausen bdnz ppc_encrypt_block_loop 1361c201e64SMarkus Stockhausen LAH(rW0, rD3, 0, 12) 1371c201e64SMarkus Stockhausen LAL(rW0, rD0, 0, 12) 1381c201e64SMarkus Stockhausen LAH(rW1, rD1, 0, 12) 1391c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 1401c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 1411c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 1421c201e64SMarkus Stockhausen LAL(rW4, rD2, 2, 4) 1431c201e64SMarkus Stockhausen LAH(rW5, rD3, 2, 4) 1441c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 1451c201e64SMarkus Stockhausen LAL(rW5, rD0, 2, 4) 1461c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 1471c201e64SMarkus Stockhausen evldw rD1,16(rKP) 1481c201e64SMarkus Stockhausen EAD(rD3, 3) 1491c201e64SMarkus Stockhausen evxor rW2,rW2,rW4 1501c201e64SMarkus Stockhausen LWL(rW7, 0) 1511c201e64SMarkus Stockhausen evxor rW2,rW2,rW6 1521c201e64SMarkus Stockhausen EAD(rD2, 0) 1531c201e64SMarkus Stockhausen evxor rD1,rD1,rW2 1541c201e64SMarkus Stockhausen LWL(rW1, 12) 1551c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 1561c201e64SMarkus Stockhausen evldw rD3,24(rKP) 1571c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 1581c201e64SMarkus Stockhausen EAD(rD1, 0) 1591c201e64SMarkus Stockhausen evxor rW3,rW3,rW5 1601c201e64SMarkus Stockhausen LBE(rW2) 1611c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 1621c201e64SMarkus Stockhausen EAD(rD0, 1) 1631c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 1641c201e64SMarkus Stockhausen LBE(rW6) 1651c201e64SMarkus Stockhausen evxor rD3,rD3,rW1 1661c201e64SMarkus Stockhausen EAD(rD0, 0) 1671c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 1681c201e64SMarkus Stockhausen LBE(rW1) 1691c201e64SMarkus Stockhausen LAE(rW0, rD3, 0) 1701c201e64SMarkus Stockhausen LAE(rW1, rD0, 0) 1711c201e64SMarkus Stockhausen LAE(rW4, rD2, 1) 1721c201e64SMarkus Stockhausen LAE(rW5, rD3, 1) 1731c201e64SMarkus Stockhausen LAE(rW3, rD2, 0) 1741c201e64SMarkus Stockhausen LAE(rW7, rD1, 1) 1751c201e64SMarkus Stockhausen rlwimi rW0,rW4,8,16,23 1761c201e64SMarkus Stockhausen rlwimi rW1,rW5,8,16,23 1771c201e64SMarkus Stockhausen LAE(rW4, rD1, 2) 1781c201e64SMarkus Stockhausen LAE(rW5, rD2, 2) 1791c201e64SMarkus Stockhausen rlwimi rW2,rW6,8,16,23 1801c201e64SMarkus Stockhausen rlwimi rW3,rW7,8,16,23 1811c201e64SMarkus Stockhausen LAE(rW6, rD3, 2) 1821c201e64SMarkus Stockhausen LAE(rW7, rD0, 2) 1831c201e64SMarkus Stockhausen rlwimi rW0,rW4,16,8,15 1841c201e64SMarkus Stockhausen rlwimi rW1,rW5,16,8,15 1851c201e64SMarkus Stockhausen LAE(rW4, rD0, 3) 1861c201e64SMarkus Stockhausen LAE(rW5, rD1, 3) 1871c201e64SMarkus Stockhausen rlwimi rW2,rW6,16,8,15 1881c201e64SMarkus Stockhausen lwz rD0,32(rKP) 1891c201e64SMarkus Stockhausen rlwimi rW3,rW7,16,8,15 1901c201e64SMarkus Stockhausen lwz rD1,36(rKP) 1911c201e64SMarkus Stockhausen LAE(rW6, rD2, 3) 1921c201e64SMarkus Stockhausen LAE(rW7, rD3, 3) 1931c201e64SMarkus Stockhausen rlwimi rW0,rW4,24,0,7 1941c201e64SMarkus Stockhausen lwz rD2,40(rKP) 1951c201e64SMarkus Stockhausen rlwimi rW1,rW5,24,0,7 1961c201e64SMarkus Stockhausen lwz rD3,44(rKP) 1971c201e64SMarkus Stockhausen rlwimi rW2,rW6,24,0,7 1981c201e64SMarkus Stockhausen rlwimi rW3,rW7,24,0,7 1991c201e64SMarkus Stockhausen blr 2001c201e64SMarkus Stockhausen 2011c201e64SMarkus Stockhausen/* 2021c201e64SMarkus Stockhausen * ppc_decrypt_block: The central decryption function for a single 16 bytes 2031c201e64SMarkus Stockhausen * block. It does no stack handling or register saving to support fast calls 2041c201e64SMarkus Stockhausen * via bl/blr. It expects that caller has pre-xored input data with first 2051c201e64SMarkus Stockhausen * 4 words of encryption key into rD0-rD3. Pointer/counter registers must 2061c201e64SMarkus Stockhausen * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 207446957baSAdam Buchbinder * and rW0-rW3 and caller must execute a final xor on the output registers. 2081c201e64SMarkus Stockhausen * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. 2091c201e64SMarkus Stockhausen * 2101c201e64SMarkus Stockhausen */ 2111c201e64SMarkus Stockhausen_GLOBAL(ppc_decrypt_block) 2121c201e64SMarkus Stockhausen LAH(rW0, rD1, 0, 12) 2131c201e64SMarkus Stockhausen LAH(rW6, rD0, 3, 0) 2141c201e64SMarkus Stockhausen LAH(rW3, rD0, 1, 8) 2151c201e64SMarkus Stockhausenppc_decrypt_block_loop: 2161c201e64SMarkus Stockhausen LAH(rW1, rD3, 0, 12) 2171c201e64SMarkus Stockhausen LAL(rW0, rD2, 0, 12) 2181c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 2191c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 2201c201e64SMarkus Stockhausen LAH(rW4, rD3, 2, 4) 2211c201e64SMarkus Stockhausen LAL(rW4, rD0, 2, 4) 2221c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 2231c201e64SMarkus Stockhausen LAH(rW5, rD1, 2, 4) 2241c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 2251c201e64SMarkus Stockhausen LAL(rW7, rD3, 3, 0) 2261c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 2271c201e64SMarkus Stockhausen evldw rD1,16(rKP) 2281c201e64SMarkus Stockhausen EAD(rD0, 0) 2291c201e64SMarkus Stockhausen evxor rW4,rW4,rW6 2301c201e64SMarkus Stockhausen LWL(rW1, 12) 2311c201e64SMarkus Stockhausen evxor rW0,rW0,rW4 2321c201e64SMarkus Stockhausen EAD(rD2, 2) 2331c201e64SMarkus Stockhausen evxor rW0,rW0,rW2 2341c201e64SMarkus Stockhausen LWL(rW5, 4) 2351c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 2361c201e64SMarkus Stockhausen evldw rD3,24(rKP) 2371c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 2381c201e64SMarkus Stockhausen EAD(rD1, 0) 2391c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 2401c201e64SMarkus Stockhausen LWH(rW0, 12) 2411c201e64SMarkus Stockhausen evxor rW3,rW3,rW1 2421c201e64SMarkus Stockhausen EAD(rD0, 3) 2431c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 2441c201e64SMarkus Stockhausen LWH(rW6, 0) 2451c201e64SMarkus Stockhausen evxor rD3,rD3,rW5 2461c201e64SMarkus Stockhausen EAD(rD0, 1) 2471c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 2481c201e64SMarkus Stockhausen LWH(rW3, 8) 2491c201e64SMarkus Stockhausen LAH(rW1, rD3, 0, 12) 2501c201e64SMarkus Stockhausen LAL(rW0, rD2, 0, 12) 2511c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 2521c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 2531c201e64SMarkus Stockhausen LAH(rW4, rD3, 2, 4) 2541c201e64SMarkus Stockhausen LAL(rW4, rD0, 2, 4) 2551c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 2561c201e64SMarkus Stockhausen LAH(rW5, rD1, 2, 4) 2571c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 2581c201e64SMarkus Stockhausen LAL(rW7, rD3, 3, 0) 2591c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 2601c201e64SMarkus Stockhausen evldw rD1,32(rKP) 2611c201e64SMarkus Stockhausen EAD(rD0, 0) 2621c201e64SMarkus Stockhausen evxor rW4,rW4,rW6 2631c201e64SMarkus Stockhausen LWL(rW1, 12) 2641c201e64SMarkus Stockhausen evxor rW0,rW0,rW4 2651c201e64SMarkus Stockhausen EAD(rD2, 2) 2661c201e64SMarkus Stockhausen evxor rW0,rW0,rW2 2671c201e64SMarkus Stockhausen LWL(rW5, 4) 2681c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 2691c201e64SMarkus Stockhausen evldw rD3,40(rKP) 2701c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 2711c201e64SMarkus Stockhausen EAD(rD1, 0) 2721c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 2731c201e64SMarkus Stockhausen LWH(rW0, 12) 2741c201e64SMarkus Stockhausen evxor rW3,rW3,rW1 2751c201e64SMarkus Stockhausen EAD(rD0, 3) 2761c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 2771c201e64SMarkus Stockhausen LWH(rW6, 0) 2781c201e64SMarkus Stockhausen evxor rD3,rD3,rW5 2791c201e64SMarkus Stockhausen EAD(rD0, 1) 2801c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 2811c201e64SMarkus Stockhausen LWH(rW3, 8) 2821c201e64SMarkus Stockhausen addi rKP,rKP,32 2831c201e64SMarkus Stockhausen bdnz ppc_decrypt_block_loop 2841c201e64SMarkus Stockhausen LAH(rW1, rD3, 0, 12) 2851c201e64SMarkus Stockhausen LAL(rW0, rD2, 0, 12) 2861c201e64SMarkus Stockhausen LAH(rW2, rD2, 1, 8) 2871c201e64SMarkus Stockhausen LAL(rW2, rD3, 1, 8) 2881c201e64SMarkus Stockhausen LAH(rW4, rD3, 2, 4) 2891c201e64SMarkus Stockhausen LAL(rW4, rD0, 2, 4) 2901c201e64SMarkus Stockhausen LAL(rW6, rD1, 3, 0) 2911c201e64SMarkus Stockhausen LAH(rW5, rD1, 2, 4) 2921c201e64SMarkus Stockhausen LAH(rW7, rD2, 3, 0) 2931c201e64SMarkus Stockhausen LAL(rW7, rD3, 3, 0) 2941c201e64SMarkus Stockhausen LAL(rW3, rD1, 1, 8) 2951c201e64SMarkus Stockhausen evldw rD1,16(rKP) 2961c201e64SMarkus Stockhausen EAD(rD0, 0) 2971c201e64SMarkus Stockhausen evxor rW4,rW4,rW6 2981c201e64SMarkus Stockhausen LWL(rW1, 12) 2991c201e64SMarkus Stockhausen evxor rW0,rW0,rW4 3001c201e64SMarkus Stockhausen EAD(rD2, 2) 3011c201e64SMarkus Stockhausen evxor rW0,rW0,rW2 3021c201e64SMarkus Stockhausen LWL(rW5, 4) 3031c201e64SMarkus Stockhausen evxor rD1,rD1,rW0 3041c201e64SMarkus Stockhausen evldw rD3,24(rKP) 3051c201e64SMarkus Stockhausen evmergehi rD0,rD0,rD1 3061c201e64SMarkus Stockhausen DAD(rD1, 0) 3071c201e64SMarkus Stockhausen evxor rW3,rW3,rW7 3081c201e64SMarkus Stockhausen LBD(rW0) 3091c201e64SMarkus Stockhausen evxor rW3,rW3,rW1 3101c201e64SMarkus Stockhausen DAD(rD0, 1) 3111c201e64SMarkus Stockhausen evxor rD3,rD3,rW3 3121c201e64SMarkus Stockhausen LBD(rW6) 3131c201e64SMarkus Stockhausen evxor rD3,rD3,rW5 3141c201e64SMarkus Stockhausen DAD(rD0, 0) 3151c201e64SMarkus Stockhausen evmergehi rD2,rD2,rD3 3161c201e64SMarkus Stockhausen LBD(rW3) 3171c201e64SMarkus Stockhausen LAD(rW2, rD3, 0) 3181c201e64SMarkus Stockhausen LAD(rW1, rD2, 0) 3191c201e64SMarkus Stockhausen LAD(rW4, rD2, 1) 3201c201e64SMarkus Stockhausen LAD(rW5, rD3, 1) 3211c201e64SMarkus Stockhausen LAD(rW7, rD1, 1) 3221c201e64SMarkus Stockhausen rlwimi rW0,rW4,8,16,23 3231c201e64SMarkus Stockhausen rlwimi rW1,rW5,8,16,23 3241c201e64SMarkus Stockhausen LAD(rW4, rD3, 2) 3251c201e64SMarkus Stockhausen LAD(rW5, rD0, 2) 3261c201e64SMarkus Stockhausen rlwimi rW2,rW6,8,16,23 3271c201e64SMarkus Stockhausen rlwimi rW3,rW7,8,16,23 3281c201e64SMarkus Stockhausen LAD(rW6, rD1, 2) 3291c201e64SMarkus Stockhausen LAD(rW7, rD2, 2) 3301c201e64SMarkus Stockhausen rlwimi rW0,rW4,16,8,15 3311c201e64SMarkus Stockhausen rlwimi rW1,rW5,16,8,15 3321c201e64SMarkus Stockhausen LAD(rW4, rD0, 3) 3331c201e64SMarkus Stockhausen LAD(rW5, rD1, 3) 3341c201e64SMarkus Stockhausen rlwimi rW2,rW6,16,8,15 3351c201e64SMarkus Stockhausen lwz rD0,32(rKP) 3361c201e64SMarkus Stockhausen rlwimi rW3,rW7,16,8,15 3371c201e64SMarkus Stockhausen lwz rD1,36(rKP) 3381c201e64SMarkus Stockhausen LAD(rW6, rD2, 3) 3391c201e64SMarkus Stockhausen LAD(rW7, rD3, 3) 3401c201e64SMarkus Stockhausen rlwimi rW0,rW4,24,0,7 3411c201e64SMarkus Stockhausen lwz rD2,40(rKP) 3421c201e64SMarkus Stockhausen rlwimi rW1,rW5,24,0,7 3431c201e64SMarkus Stockhausen lwz rD3,44(rKP) 3441c201e64SMarkus Stockhausen rlwimi rW2,rW6,24,0,7 3451c201e64SMarkus Stockhausen rlwimi rW3,rW7,24,0,7 3461c201e64SMarkus Stockhausen blr 347