xref: /openbmc/qemu/target/arm/tcg/crypto_helper.c (revision f6b0de53)
1 /*
2  * crypto_helper.c - emulate v8 Crypto Extensions instructions
3  *
4  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "cpu.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/aes.h"
18 #include "crypto/sm4.h"
19 #include "vec_internal.h"
20 
21 union CRYPTO_STATE {
22     uint8_t    bytes[16];
23     uint32_t   words[4];
24     uint64_t   l[2];
25 };
26 
27 #if HOST_BIG_ENDIAN
28 #define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
29 #define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
30 #else
31 #define CR_ST_BYTE(state, i)   ((state).bytes[i])
32 #define CR_ST_WORD(state, i)   ((state).words[i])
33 #endif
34 
35 /*
36  * The caller has not been converted to full gvec, and so only
37  * modifies the low 16 bytes of the vector register.
38  */
39 static void clear_tail_16(void *vd, uint32_t desc)
40 {
41     int opr_sz = simd_oprsz(desc);
42     int max_sz = simd_maxsz(desc);
43 
44     assert(opr_sz == 16);
45     clear_tail(vd, opr_sz, max_sz);
46 }
47 
48 static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
49                            uint64_t *rm, bool decrypt)
50 {
51     static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
52     static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
53     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
54     union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
55     int i;
56 
57     /* xor state vector with round key */
58     rk.l[0] ^= st.l[0];
59     rk.l[1] ^= st.l[1];
60 
61     /* combine ShiftRows operation and sbox substitution */
62     for (i = 0; i < 16; i++) {
63         CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
64     }
65 
66     rd[0] = st.l[0];
67     rd[1] = st.l[1];
68 }
69 
70 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
71 {
72     intptr_t i, opr_sz = simd_oprsz(desc);
73     bool decrypt = simd_data(desc);
74 
75     for (i = 0; i < opr_sz; i += 16) {
76         do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
77     }
78     clear_tail(vd, opr_sz, simd_maxsz(desc));
79 }
80 
81 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
82 {
83     static uint32_t const mc[][256] = { {
84         /* MixColumns lookup table */
85         0x00000000, 0x03010102, 0x06020204, 0x05030306,
86         0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
87         0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
88         0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
89         0x30101020, 0x33111122, 0x36121224, 0x35131326,
90         0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
91         0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
92         0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
93         0x60202040, 0x63212142, 0x66222244, 0x65232346,
94         0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
95         0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
96         0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
97         0x50303060, 0x53313162, 0x56323264, 0x55333366,
98         0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
99         0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
100         0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
101         0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
102         0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
103         0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
104         0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
105         0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
106         0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
107         0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
108         0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
109         0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
110         0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
111         0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
112         0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
113         0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
114         0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
115         0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
116         0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
117         0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
118         0x97848413, 0x94858511, 0x91868617, 0x92878715,
119         0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
120         0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
121         0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
122         0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
123         0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
124         0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
125         0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
126         0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
127         0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
128         0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
129         0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
130         0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
131         0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
132         0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
133         0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
134         0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
135         0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
136         0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
137         0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
138         0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
139         0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
140         0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
141         0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
142         0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
143         0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
144         0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
145         0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
146         0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
147         0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
148         0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
149     }, {
150         /* Inverse MixColumns lookup table */
151         0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
152         0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
153         0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
154         0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
155         0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
156         0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
157         0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
158         0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
159         0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
160         0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
161         0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
162         0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
163         0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
164         0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
165         0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
166         0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
167         0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
168         0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
169         0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
170         0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
171         0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
172         0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
173         0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
174         0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
175         0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
176         0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
177         0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
178         0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
179         0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
180         0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
181         0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
182         0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
183         0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
184         0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
185         0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
186         0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
187         0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
188         0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
189         0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
190         0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
191         0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
192         0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
193         0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
194         0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
195         0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
196         0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
197         0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
198         0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
199         0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
200         0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
201         0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
202         0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
203         0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
204         0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
205         0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
206         0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
207         0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
208         0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
209         0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
210         0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
211         0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
212         0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
213         0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
214         0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
215     } };
216 
217     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
218     int i;
219 
220     for (i = 0; i < 16; i += 4) {
221         CR_ST_WORD(st, i >> 2) =
222             mc[decrypt][CR_ST_BYTE(st, i)] ^
223             rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
224             rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
225             rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
226     }
227 
228     rd[0] = st.l[0];
229     rd[1] = st.l[1];
230 }
231 
232 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
233 {
234     intptr_t i, opr_sz = simd_oprsz(desc);
235     bool decrypt = simd_data(desc);
236 
237     for (i = 0; i < opr_sz; i += 16) {
238         do_crypto_aesmc(vd + i, vm + i, decrypt);
239     }
240     clear_tail(vd, opr_sz, simd_maxsz(desc));
241 }
242 
243 /*
244  * SHA-1 logical functions
245  */
246 
247 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
248 {
249     return (x & (y ^ z)) ^ z;
250 }
251 
252 static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
253 {
254     return x ^ y ^ z;
255 }
256 
257 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
258 {
259     return (x & y) | ((x | y) & z);
260 }
261 
262 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
263 {
264     uint64_t *d = vd, *n = vn, *m = vm;
265     uint64_t d0, d1;
266 
267     d0 = d[1] ^ d[0] ^ m[0];
268     d1 = n[0] ^ d[1] ^ m[1];
269     d[0] = d0;
270     d[1] = d1;
271 
272     clear_tail_16(vd, desc);
273 }
274 
275 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
276                                     uint64_t *rm, uint32_t desc,
277                                     uint32_t (*fn)(union CRYPTO_STATE *d))
278 {
279     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
280     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
281     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
282     int i;
283 
284     for (i = 0; i < 4; i++) {
285         uint32_t t = fn(&d);
286 
287         t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
288              + CR_ST_WORD(m, i);
289 
290         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
291         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
292         CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
293         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
294         CR_ST_WORD(d, 0) = t;
295     }
296     rd[0] = d.l[0];
297     rd[1] = d.l[1];
298 
299     clear_tail_16(rd, desc);
300 }
301 
302 static uint32_t do_sha1c(union CRYPTO_STATE *d)
303 {
304     return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
305 }
306 
307 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
308 {
309     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
310 }
311 
312 static uint32_t do_sha1p(union CRYPTO_STATE *d)
313 {
314     return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
315 }
316 
317 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
318 {
319     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
320 }
321 
322 static uint32_t do_sha1m(union CRYPTO_STATE *d)
323 {
324     return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
325 }
326 
327 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
328 {
329     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
330 }
331 
332 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
333 {
334     uint64_t *rd = vd;
335     uint64_t *rm = vm;
336     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
337 
338     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
339     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
340 
341     rd[0] = m.l[0];
342     rd[1] = m.l[1];
343 
344     clear_tail_16(vd, desc);
345 }
346 
347 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
348 {
349     uint64_t *rd = vd;
350     uint64_t *rm = vm;
351     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
352     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
353 
354     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
355     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
356     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
357     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
358 
359     rd[0] = d.l[0];
360     rd[1] = d.l[1];
361 
362     clear_tail_16(vd, desc);
363 }
364 
365 /*
366  * The SHA-256 logical functions, according to
367  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
368  */
369 
370 static uint32_t S0(uint32_t x)
371 {
372     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
373 }
374 
375 static uint32_t S1(uint32_t x)
376 {
377     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
378 }
379 
380 static uint32_t s0(uint32_t x)
381 {
382     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
383 }
384 
385 static uint32_t s1(uint32_t x)
386 {
387     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
388 }
389 
390 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
391 {
392     uint64_t *rd = vd;
393     uint64_t *rn = vn;
394     uint64_t *rm = vm;
395     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
396     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
397     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
398     int i;
399 
400     for (i = 0; i < 4; i++) {
401         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
402                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
403                      + CR_ST_WORD(m, i);
404 
405         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
406         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
407         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
408         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
409 
410         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
411              + S0(CR_ST_WORD(d, 0));
412 
413         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
414         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
415         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
416         CR_ST_WORD(d, 0) = t;
417     }
418 
419     rd[0] = d.l[0];
420     rd[1] = d.l[1];
421 
422     clear_tail_16(vd, desc);
423 }
424 
425 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
426 {
427     uint64_t *rd = vd;
428     uint64_t *rn = vn;
429     uint64_t *rm = vm;
430     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
431     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
432     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
433     int i;
434 
435     for (i = 0; i < 4; i++) {
436         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
437                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
438                      + CR_ST_WORD(m, i);
439 
440         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
441         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
442         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
443         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
444     }
445 
446     rd[0] = d.l[0];
447     rd[1] = d.l[1];
448 
449     clear_tail_16(vd, desc);
450 }
451 
452 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
453 {
454     uint64_t *rd = vd;
455     uint64_t *rm = vm;
456     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
457     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
458 
459     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
460     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
461     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
462     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
463 
464     rd[0] = d.l[0];
465     rd[1] = d.l[1];
466 
467     clear_tail_16(vd, desc);
468 }
469 
470 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
471 {
472     uint64_t *rd = vd;
473     uint64_t *rn = vn;
474     uint64_t *rm = vm;
475     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
476     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
477     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
478 
479     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
480     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
481     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
482     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
483 
484     rd[0] = d.l[0];
485     rd[1] = d.l[1];
486 
487     clear_tail_16(vd, desc);
488 }
489 
490 /*
491  * The SHA-512 logical functions (same as above but using 64-bit operands)
492  */
493 
494 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
495 {
496     return (x & (y ^ z)) ^ z;
497 }
498 
499 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
500 {
501     return (x & y) | ((x | y) & z);
502 }
503 
504 static uint64_t S0_512(uint64_t x)
505 {
506     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
507 }
508 
509 static uint64_t S1_512(uint64_t x)
510 {
511     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
512 }
513 
514 static uint64_t s0_512(uint64_t x)
515 {
516     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
517 }
518 
519 static uint64_t s1_512(uint64_t x)
520 {
521     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
522 }
523 
524 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
525 {
526     uint64_t *rd = vd;
527     uint64_t *rn = vn;
528     uint64_t *rm = vm;
529     uint64_t d0 = rd[0];
530     uint64_t d1 = rd[1];
531 
532     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
533     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
534 
535     rd[0] = d0;
536     rd[1] = d1;
537 
538     clear_tail_16(vd, desc);
539 }
540 
541 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
542 {
543     uint64_t *rd = vd;
544     uint64_t *rn = vn;
545     uint64_t *rm = vm;
546     uint64_t d0 = rd[0];
547     uint64_t d1 = rd[1];
548 
549     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
550     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
551 
552     rd[0] = d0;
553     rd[1] = d1;
554 
555     clear_tail_16(vd, desc);
556 }
557 
558 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
559 {
560     uint64_t *rd = vd;
561     uint64_t *rn = vn;
562     uint64_t d0 = rd[0];
563     uint64_t d1 = rd[1];
564 
565     d0 += s0_512(rd[1]);
566     d1 += s0_512(rn[0]);
567 
568     rd[0] = d0;
569     rd[1] = d1;
570 
571     clear_tail_16(vd, desc);
572 }
573 
574 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
575 {
576     uint64_t *rd = vd;
577     uint64_t *rn = vn;
578     uint64_t *rm = vm;
579 
580     rd[0] += s1_512(rn[0]) + rm[0];
581     rd[1] += s1_512(rn[1]) + rm[1];
582 
583     clear_tail_16(vd, desc);
584 }
585 
586 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
587 {
588     uint64_t *rd = vd;
589     uint64_t *rn = vn;
590     uint64_t *rm = vm;
591     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
592     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
593     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
594     uint32_t t;
595 
596     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
597     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
598 
599     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
600     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
601 
602     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
603     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
604 
605     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
606     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
607 
608     rd[0] = d.l[0];
609     rd[1] = d.l[1];
610 
611     clear_tail_16(vd, desc);
612 }
613 
614 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
615 {
616     uint64_t *rd = vd;
617     uint64_t *rn = vn;
618     uint64_t *rm = vm;
619     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
620     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
621     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
622     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
623 
624     CR_ST_WORD(d, 0) ^= t;
625     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
626     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
627     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
628                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
629 
630     rd[0] = d.l[0];
631     rd[1] = d.l[1];
632 
633     clear_tail_16(vd, desc);
634 }
635 
636 static inline void QEMU_ALWAYS_INLINE
637 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
638              uint32_t desc, uint32_t opcode)
639 {
640     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
641     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
642     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
643     uint32_t imm2 = simd_data(desc);
644     uint32_t t;
645 
646     assert(imm2 < 4);
647 
648     if (opcode == 0 || opcode == 2) {
649         /* SM3TT1A, SM3TT2A */
650         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
651     } else if (opcode == 1) {
652         /* SM3TT1B */
653         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
654     } else if (opcode == 3) {
655         /* SM3TT2B */
656         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
657     } else {
658         qemu_build_not_reached();
659     }
660 
661     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
662 
663     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
664 
665     if (opcode < 2) {
666         /* SM3TT1A, SM3TT1B */
667         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
668 
669         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
670     } else {
671         /* SM3TT2A, SM3TT2B */
672         t += CR_ST_WORD(n, 3);
673         t ^= rol32(t, 9) ^ rol32(t, 17);
674 
675         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
676     }
677 
678     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
679     CR_ST_WORD(d, 3) = t;
680 
681     rd[0] = d.l[0];
682     rd[1] = d.l[1];
683 
684     clear_tail_16(rd, desc);
685 }
686 
687 #define DO_SM3TT(NAME, OPCODE) \
688     void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
689     { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
690 
691 DO_SM3TT(crypto_sm3tt1a, 0)
692 DO_SM3TT(crypto_sm3tt1b, 1)
693 DO_SM3TT(crypto_sm3tt2a, 2)
694 DO_SM3TT(crypto_sm3tt2b, 3)
695 
696 #undef DO_SM3TT
697 
698 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
699 {
700     union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
701     union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
702     uint32_t t, i;
703 
704     for (i = 0; i < 4; i++) {
705         t = CR_ST_WORD(d, (i + 1) % 4) ^
706             CR_ST_WORD(d, (i + 2) % 4) ^
707             CR_ST_WORD(d, (i + 3) % 4) ^
708             CR_ST_WORD(n, i);
709 
710         t = sm4_sbox[t & 0xff] |
711             sm4_sbox[(t >> 8) & 0xff] << 8 |
712             sm4_sbox[(t >> 16) & 0xff] << 16 |
713             sm4_sbox[(t >> 24) & 0xff] << 24;
714 
715         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
716                             rol32(t, 24);
717     }
718 
719     rd[0] = d.l[0];
720     rd[1] = d.l[1];
721 }
722 
723 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
724 {
725     intptr_t i, opr_sz = simd_oprsz(desc);
726 
727     for (i = 0; i < opr_sz; i += 16) {
728         do_crypto_sm4e(vd + i, vn + i, vm + i);
729     }
730     clear_tail(vd, opr_sz, simd_maxsz(desc));
731 }
732 
733 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
734 {
735     union CRYPTO_STATE d;
736     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
737     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
738     uint32_t t, i;
739 
740     d = n;
741     for (i = 0; i < 4; i++) {
742         t = CR_ST_WORD(d, (i + 1) % 4) ^
743             CR_ST_WORD(d, (i + 2) % 4) ^
744             CR_ST_WORD(d, (i + 3) % 4) ^
745             CR_ST_WORD(m, i);
746 
747         t = sm4_sbox[t & 0xff] |
748             sm4_sbox[(t >> 8) & 0xff] << 8 |
749             sm4_sbox[(t >> 16) & 0xff] << 16 |
750             sm4_sbox[(t >> 24) & 0xff] << 24;
751 
752         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
753     }
754 
755     rd[0] = d.l[0];
756     rd[1] = d.l[1];
757 }
758 
759 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
760 {
761     intptr_t i, opr_sz = simd_oprsz(desc);
762 
763     for (i = 0; i < opr_sz; i += 16) {
764         do_crypto_sm4ekey(vd + i, vn + i, vm + i);
765     }
766     clear_tail(vd, opr_sz, simd_maxsz(desc));
767 }
768 
769 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
770 {
771     intptr_t i, opr_sz = simd_oprsz(desc);
772     uint64_t *d = vd, *n = vn, *m = vm;
773 
774     for (i = 0; i < opr_sz / 8; ++i) {
775         d[i] = n[i] ^ rol64(m[i], 1);
776     }
777     clear_tail(vd, opr_sz, simd_maxsz(desc));
778 }
779