1/* 2 * Fast MD5 implementation for PPC 3 * 4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the Free 8 * Software Foundation; either version 2 of the License, or (at your option) 9 * any later version. 10 * 11 */ 12#include <asm/ppc_asm.h> 13#include <asm/asm-offsets.h> 14 15#define rHP r3 16#define rWP r4 17 18#define rH0 r0 19#define rH1 r6 20#define rH2 r7 21#define rH3 r5 22 23#define rW00 r8 24#define rW01 r9 25#define rW02 r10 26#define rW03 r11 27#define rW04 r12 28#define rW05 r14 29#define rW06 r15 30#define rW07 r16 31#define rW08 r17 32#define rW09 r18 33#define rW10 r19 34#define rW11 r20 35#define rW12 r21 36#define rW13 r22 37#define rW14 r23 38#define rW15 r24 39 40#define rT0 r25 41#define rT1 r26 42 43#define INITIALIZE \ 44 PPC_STLU r1,-INT_FRAME_SIZE(r1); \ 45 SAVE_8GPRS(14, r1); /* push registers onto stack */ \ 46 SAVE_4GPRS(22, r1); \ 47 SAVE_GPR(26, r1) 48 49#define FINALIZE \ 50 REST_8GPRS(14, r1); /* pop registers from stack */ \ 51 REST_4GPRS(22, r1); \ 52 REST_GPR(26, r1); \ 53 addi r1,r1,INT_FRAME_SIZE; 54 55#ifdef __BIG_ENDIAN__ 56#define LOAD_DATA(reg, off) \ 57 lwbrx reg,0,rWP; /* load data */ 58#define INC_PTR \ 59 addi rWP,rWP,4; /* increment per word */ 60#define NEXT_BLOCK /* nothing to do */ 61#else 62#define LOAD_DATA(reg, off) \ 63 lwz reg,off(rWP); /* load data */ 64#define INC_PTR /* nothing to do */ 65#define NEXT_BLOCK \ 66 addi rWP,rWP,64; /* increment per block */ 67#endif 68 69#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \ 70 LOAD_DATA(w0, off) /* W */ \ 71 and rT0,b,c; /* 1: f = b and c */ \ 72 INC_PTR /* ptr++ */ \ 73 andc rT1,d,b; /* 1: f' = ~b and d */ \ 74 LOAD_DATA(w1, off+4) /* W */ \ 75 or rT0,rT0,rT1; /* 1: f = f or f' */ \ 76 addi w0,w0,k0l; /* 1: wk = w + k */ \ 77 add a,a,rT0; /* 1: a = a + f */ \ 78 addis w0,w0,k0h; /* 1: wk = w + k' */ \ 79 addis w1,w1,k1h; /* 2: wk = w + k */ \ 80 add a,a,w0; /* 1: a = a + wk */ \ 81 addi w1,w1,k1l; /* 2: wk = w + k' */ \ 82 rotrwi a,a,p; /* 1: a = a rotl x */ \ 83 add d,d,w1; /* 2: a = a + wk */ \ 84 add a,a,b; /* 1: a = a + b */ \ 85 and rT0,a,b; /* 2: f = b and c */ \ 86 andc rT1,c,a; /* 2: f' = ~b and d */ \ 87 or rT0,rT0,rT1; /* 2: f = f or f' */ \ 88 add d,d,rT0; /* 2: a = a + f */ \ 89 INC_PTR /* ptr++ */ \ 90 rotrwi d,d,q; /* 2: a = a rotl x */ \ 91 add d,d,a; /* 2: a = a + b */ 92 93#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ 94 andc rT0,c,d; /* 1: f = c and ~d */ \ 95 and rT1,b,d; /* 1: f' = b and d */ \ 96 addi w0,w0,k0l; /* 1: wk = w + k */ \ 97 or rT0,rT0,rT1; /* 1: f = f or f' */ \ 98 addis w0,w0,k0h; /* 1: wk = w + k' */ \ 99 add a,a,rT0; /* 1: a = a + f */ \ 100 addi w1,w1,k1l; /* 2: wk = w + k */ \ 101 add a,a,w0; /* 1: a = a + wk */ \ 102 addis w1,w1,k1h; /* 2: wk = w + k' */ \ 103 andc rT0,b,c; /* 2: f = c and ~d */ \ 104 rotrwi a,a,p; /* 1: a = a rotl x */ \ 105 add a,a,b; /* 1: a = a + b */ \ 106 add d,d,w1; /* 2: a = a + wk */ \ 107 and rT1,a,c; /* 2: f' = b and d */ \ 108 or rT0,rT0,rT1; /* 2: f = f or f' */ \ 109 add d,d,rT0; /* 2: a = a + f */ \ 110 rotrwi d,d,q; /* 2: a = a rotl x */ \ 111 add d,d,a; /* 2: a = a +b */ 112 113#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ 114 xor rT0,b,c; /* 1: f' = b xor c */ \ 115 addi w0,w0,k0l; /* 1: wk = w + k */ \ 116 xor rT1,rT0,d; /* 1: f = f xor f' */ \ 117 addis w0,w0,k0h; /* 1: wk = w + k' */ \ 118 add a,a,rT1; /* 1: a = a + f */ \ 119 addi w1,w1,k1l; /* 2: wk = w + k */ \ 120 add a,a,w0; /* 1: a = a + wk */ \ 121 addis w1,w1,k1h; /* 2: wk = w + k' */ \ 122 rotrwi a,a,p; /* 1: a = a rotl x */ \ 123 add d,d,w1; /* 2: a = a + wk */ \ 124 add a,a,b; /* 1: a = a + b */ \ 125 xor rT1,rT0,a; /* 2: f = b xor f' */ \ 126 add d,d,rT1; /* 2: a = a + f */ \ 127 rotrwi d,d,q; /* 2: a = a rotl x */ \ 128 add d,d,a; /* 2: a = a + b */ 129 130#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ 131 addi w0,w0,k0l; /* 1: w = w + k */ \ 132 orc rT0,b,d; /* 1: f = b or ~d */ \ 133 addis w0,w0,k0h; /* 1: w = w + k' */ \ 134 xor rT0,rT0,c; /* 1: f = f xor c */ \ 135 add a,a,w0; /* 1: a = a + wk */ \ 136 addi w1,w1,k1l; /* 2: w = w + k */ \ 137 add a,a,rT0; /* 1: a = a + f */ \ 138 addis w1,w1,k1h; /* 2: w = w + k' */ \ 139 rotrwi a,a,p; /* 1: a = a rotl x */ \ 140 add a,a,b; /* 1: a = a + b */ \ 141 orc rT0,a,c; /* 2: f = b or ~d */ \ 142 add d,d,w1; /* 2: a = a + wk */ \ 143 xor rT0,rT0,b; /* 2: f = f xor c */ \ 144 add d,d,rT0; /* 2: a = a + f */ \ 145 rotrwi d,d,q; /* 2: a = a rotl x */ \ 146 add d,d,a; /* 2: a = a + b */ 147 148_GLOBAL(ppc_md5_transform) 149 INITIALIZE 150 151 mtctr r5 152 lwz rH0,0(rHP) 153 lwz rH1,4(rHP) 154 lwz rH2,8(rHP) 155 lwz rH3,12(rHP) 156 157ppc_md5_main: 158 R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0, 159 0xd76b, -23432, 0xe8c8, -18602) 160 R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8, 161 0x2420, 0x70db, 0xc1be, -12562) 162 R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16, 163 0xf57c, 0x0faf, 0x4788, -14806) 164 R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24, 165 0xa830, 0x4613, 0xfd47, -27391) 166 R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32, 167 0x6981, -26408, 0x8b45, -2129) 168 R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40, 169 0xffff, 0x5bb1, 0x895d, -10306) 170 R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48, 171 0x6b90, 0x1122, 0xfd98, 0x7193) 172 R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56, 173 0xa679, 0x438e, 0x49b4, 0x0821) 174 175 R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23, 176 0x0d56, 0x6e0c, 0x1810, 0x6d2d) 177 R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12, 178 0x9d02, -32109, 0x124c, 0x2332) 179 R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23, 180 0x8ea7, 0x4a33, 0x0245, -18270) 181 R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12, 182 0x8eee, -8608, 0xf258, -5095) 183 R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23, 184 0x969d, -10697, 0x1cbe, -15288) 185 R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12, 186 0x3317, 0x3e99, 0xdbd9, 0x7c15) 187 R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23, 188 0xac4b, 0x7772, 0xd8cf, 0x331d) 189 R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12, 190 0x6a28, 0x6dd8, 0x219a, 0x3b68) 191 192 R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21, 193 0x29cb, 0x28e5, 0x4218, -7788) 194 R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9, 195 0x473f, 0x06d1, 0x3aae, 0x3036) 196 R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21, 197 0xaea1, -15134, 0x640b, -11295) 198 R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9, 199 0x8f4c, 0x4887, 0xbc7c, -22499) 200 R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21, 201 0x7eb8, -27199, 0x00ea, 0x6050) 202 R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9, 203 0xe01a, 0x22fe, 0x4447, 0x69c5) 204 R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21, 205 0xb7f3, 0x0253, 0x59b1, 0x4d5b) 206 R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9, 207 0x4701, -27017, 0xc7bd, -19859) 208 209 R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22, 210 0x0988, -1462, 0x4c70, -19401) 211 R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11, 212 0xadaf, -5221, 0xfc99, 0x66f7) 213 R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22, 214 0x7e80, -16418, 0xba1e, -25587) 215 R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11, 216 0x4130, 0x380d, 0xe0c5, 0x738d) 217 lwz rW00,0(rHP) 218 R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22, 219 0xe837, -30770, 0xde8a, 0x69e8) 220 lwz rW14,4(rHP) 221 R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11, 222 0x9e79, 0x260f, 0x256d, -27941) 223 lwz rW12,8(rHP) 224 R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22, 225 0xab75, -20775, 0x4f9e, -28397) 226 lwz rW10,12(rHP) 227 R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11, 228 0x662b, 0x7c56, 0x11b2, 0x0358) 229 230 add rH0,rH0,rW00 231 stw rH0,0(rHP) 232 add rH1,rH1,rW14 233 stw rH1,4(rHP) 234 add rH2,rH2,rW12 235 stw rH2,8(rHP) 236 add rH3,rH3,rW10 237 stw rH3,12(rHP) 238 NEXT_BLOCK 239 240 bdnz ppc_md5_main 241 242 FINALIZE 243 blr 244