1/* 2 * Fast MD5 implementation for PPC 3 * 4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the Free 8 * Software Foundation; either version 2 of the License, or (at your option) 9 * any later version. 10 * 11 */ 12#include <asm/ppc_asm.h> 13#include <asm/asm-offsets.h> 14#include <asm/asm-compat.h> 15 16#define rHP r3 17#define rWP r4 18 19#define rH0 r0 20#define rH1 r6 21#define rH2 r7 22#define rH3 r5 23 24#define rW00 r8 25#define rW01 r9 26#define rW02 r10 27#define rW03 r11 28#define rW04 r12 29#define rW05 r14 30#define rW06 r15 31#define rW07 r16 32#define rW08 r17 33#define rW09 r18 34#define rW10 r19 35#define rW11 r20 36#define rW12 r21 37#define rW13 r22 38#define rW14 r23 39#define rW15 r24 40 41#define rT0 r25 42#define rT1 r26 43 44#define INITIALIZE \ 45 PPC_STLU r1,-INT_FRAME_SIZE(r1); \ 46 SAVE_8GPRS(14, r1); /* push registers onto stack */ \ 47 SAVE_4GPRS(22, r1); \ 48 SAVE_GPR(26, r1) 49 50#define FINALIZE \ 51 REST_8GPRS(14, r1); /* pop registers from stack */ \ 52 REST_4GPRS(22, r1); \ 53 REST_GPR(26, r1); \ 54 addi r1,r1,INT_FRAME_SIZE; 55 56#ifdef __BIG_ENDIAN__ 57#define LOAD_DATA(reg, off) \ 58 lwbrx reg,0,rWP; /* load data */ 59#define INC_PTR \ 60 addi rWP,rWP,4; /* increment per word */ 61#define NEXT_BLOCK /* nothing to do */ 62#else 63#define LOAD_DATA(reg, off) \ 64 lwz reg,off(rWP); /* load data */ 65#define INC_PTR /* nothing to do */ 66#define NEXT_BLOCK \ 67 addi rWP,rWP,64; /* increment per block */ 68#endif 69 70#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \ 71 LOAD_DATA(w0, off) /* W */ \ 72 and rT0,b,c; /* 1: f = b and c */ \ 73 INC_PTR /* ptr++ */ \ 74 andc rT1,d,b; /* 1: f' = ~b and d */ \ 75 LOAD_DATA(w1, off+4) /* W */ \ 76 or rT0,rT0,rT1; /* 1: f = f or f' */ \ 77 addi w0,w0,k0l; /* 1: wk = w + k */ \ 78 add a,a,rT0; /* 1: a = a + f */ \ 79 addis w0,w0,k0h; /* 1: wk = w + k' */ \ 80 addis w1,w1,k1h; /* 2: wk = w + k */ \ 81 add a,a,w0; /* 1: a = a + wk */ \ 82 addi w1,w1,k1l; /* 2: wk = w + k' */ \ 83 rotrwi a,a,p; /* 1: a = a rotl x */ \ 84 add d,d,w1; /* 2: a = a + wk */ \ 85 add a,a,b; /* 1: a = a + b */ \ 86 and rT0,a,b; /* 2: f = b and c */ \ 87 andc rT1,c,a; /* 2: f' = ~b and d */ \ 88 or rT0,rT0,rT1; /* 2: f = f or f' */ \ 89 add d,d,rT0; /* 2: a = a + f */ \ 90 INC_PTR /* ptr++ */ \ 91 rotrwi d,d,q; /* 2: a = a rotl x */ \ 92 add d,d,a; /* 2: a = a + b */ 93 94#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ 95 andc rT0,c,d; /* 1: f = c and ~d */ \ 96 and rT1,b,d; /* 1: f' = b and d */ \ 97 addi w0,w0,k0l; /* 1: wk = w + k */ \ 98 or rT0,rT0,rT1; /* 1: f = f or f' */ \ 99 addis w0,w0,k0h; /* 1: wk = w + k' */ \ 100 add a,a,rT0; /* 1: a = a + f */ \ 101 addi w1,w1,k1l; /* 2: wk = w + k */ \ 102 add a,a,w0; /* 1: a = a + wk */ \ 103 addis w1,w1,k1h; /* 2: wk = w + k' */ \ 104 andc rT0,b,c; /* 2: f = c and ~d */ \ 105 rotrwi a,a,p; /* 1: a = a rotl x */ \ 106 add a,a,b; /* 1: a = a + b */ \ 107 add d,d,w1; /* 2: a = a + wk */ \ 108 and rT1,a,c; /* 2: f' = b and d */ \ 109 or rT0,rT0,rT1; /* 2: f = f or f' */ \ 110 add d,d,rT0; /* 2: a = a + f */ \ 111 rotrwi d,d,q; /* 2: a = a rotl x */ \ 112 add d,d,a; /* 2: a = a +b */ 113 114#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ 115 xor rT0,b,c; /* 1: f' = b xor c */ \ 116 addi w0,w0,k0l; /* 1: wk = w + k */ \ 117 xor rT1,rT0,d; /* 1: f = f xor f' */ \ 118 addis w0,w0,k0h; /* 1: wk = w + k' */ \ 119 add a,a,rT1; /* 1: a = a + f */ \ 120 addi w1,w1,k1l; /* 2: wk = w + k */ \ 121 add a,a,w0; /* 1: a = a + wk */ \ 122 addis w1,w1,k1h; /* 2: wk = w + k' */ \ 123 rotrwi a,a,p; /* 1: a = a rotl x */ \ 124 add d,d,w1; /* 2: a = a + wk */ \ 125 add a,a,b; /* 1: a = a + b */ \ 126 xor rT1,rT0,a; /* 2: f = b xor f' */ \ 127 add d,d,rT1; /* 2: a = a + f */ \ 128 rotrwi d,d,q; /* 2: a = a rotl x */ \ 129 add d,d,a; /* 2: a = a + b */ 130 131#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ 132 addi w0,w0,k0l; /* 1: w = w + k */ \ 133 orc rT0,b,d; /* 1: f = b or ~d */ \ 134 addis w0,w0,k0h; /* 1: w = w + k' */ \ 135 xor rT0,rT0,c; /* 1: f = f xor c */ \ 136 add a,a,w0; /* 1: a = a + wk */ \ 137 addi w1,w1,k1l; /* 2: w = w + k */ \ 138 add a,a,rT0; /* 1: a = a + f */ \ 139 addis w1,w1,k1h; /* 2: w = w + k' */ \ 140 rotrwi a,a,p; /* 1: a = a rotl x */ \ 141 add a,a,b; /* 1: a = a + b */ \ 142 orc rT0,a,c; /* 2: f = b or ~d */ \ 143 add d,d,w1; /* 2: a = a + wk */ \ 144 xor rT0,rT0,b; /* 2: f = f xor c */ \ 145 add d,d,rT0; /* 2: a = a + f */ \ 146 rotrwi d,d,q; /* 2: a = a rotl x */ \ 147 add d,d,a; /* 2: a = a + b */ 148 149_GLOBAL(ppc_md5_transform) 150 INITIALIZE 151 152 mtctr r5 153 lwz rH0,0(rHP) 154 lwz rH1,4(rHP) 155 lwz rH2,8(rHP) 156 lwz rH3,12(rHP) 157 158ppc_md5_main: 159 R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0, 160 0xd76b, -23432, 0xe8c8, -18602) 161 R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8, 162 0x2420, 0x70db, 0xc1be, -12562) 163 R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16, 164 0xf57c, 0x0faf, 0x4788, -14806) 165 R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24, 166 0xa830, 0x4613, 0xfd47, -27391) 167 R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32, 168 0x6981, -26408, 0x8b45, -2129) 169 R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40, 170 0xffff, 0x5bb1, 0x895d, -10306) 171 R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48, 172 0x6b90, 0x1122, 0xfd98, 0x7193) 173 R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56, 174 0xa679, 0x438e, 0x49b4, 0x0821) 175 176 R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23, 177 0x0d56, 0x6e0c, 0x1810, 0x6d2d) 178 R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12, 179 0x9d02, -32109, 0x124c, 0x2332) 180 R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23, 181 0x8ea7, 0x4a33, 0x0245, -18270) 182 R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12, 183 0x8eee, -8608, 0xf258, -5095) 184 R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23, 185 0x969d, -10697, 0x1cbe, -15288) 186 R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12, 187 0x3317, 0x3e99, 0xdbd9, 0x7c15) 188 R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23, 189 0xac4b, 0x7772, 0xd8cf, 0x331d) 190 R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12, 191 0x6a28, 0x6dd8, 0x219a, 0x3b68) 192 193 R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21, 194 0x29cb, 0x28e5, 0x4218, -7788) 195 R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9, 196 0x473f, 0x06d1, 0x3aae, 0x3036) 197 R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21, 198 0xaea1, -15134, 0x640b, -11295) 199 R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9, 200 0x8f4c, 0x4887, 0xbc7c, -22499) 201 R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21, 202 0x7eb8, -27199, 0x00ea, 0x6050) 203 R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9, 204 0xe01a, 0x22fe, 0x4447, 0x69c5) 205 R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21, 206 0xb7f3, 0x0253, 0x59b1, 0x4d5b) 207 R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9, 208 0x4701, -27017, 0xc7bd, -19859) 209 210 R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22, 211 0x0988, -1462, 0x4c70, -19401) 212 R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11, 213 0xadaf, -5221, 0xfc99, 0x66f7) 214 R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22, 215 0x7e80, -16418, 0xba1e, -25587) 216 R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11, 217 0x4130, 0x380d, 0xe0c5, 0x738d) 218 lwz rW00,0(rHP) 219 R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22, 220 0xe837, -30770, 0xde8a, 0x69e8) 221 lwz rW14,4(rHP) 222 R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11, 223 0x9e79, 0x260f, 0x256d, -27941) 224 lwz rW12,8(rHP) 225 R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22, 226 0xab75, -20775, 0x4f9e, -28397) 227 lwz rW10,12(rHP) 228 R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11, 229 0x662b, 0x7c56, 0x11b2, 0x0358) 230 231 add rH0,rH0,rW00 232 stw rH0,0(rHP) 233 add rH1,rH1,rW14 234 stw rH1,4(rHP) 235 add rH2,rH2,rW12 236 stw rH2,8(rHP) 237 add rH3,rH3,rW10 238 stw rH3,12(rHP) 239 NEXT_BLOCK 240 241 bdnz ppc_md5_main 242 243 FINALIZE 244 blr 245