xref: /openbmc/qemu/target/arm/tcg/crypto_helper.c (revision 57357322e4bd35c42816c769e36f39af11fc3ddc)
1 /*
2  * crypto_helper.c - emulate v8 Crypto Extensions instructions
3  *
4  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "cpu.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/aes.h"
18 #include "crypto/sm4.h"
19 #include "vec_internal.h"
20 
21 union CRYPTO_STATE {
22     uint8_t    bytes[16];
23     uint32_t   words[4];
24     uint64_t   l[2];
25 };
26 
27 #if HOST_BIG_ENDIAN
28 #define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
29 #define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
30 #else
31 #define CR_ST_BYTE(state, i)   ((state).bytes[i])
32 #define CR_ST_WORD(state, i)   ((state).words[i])
33 #endif
34 
35 /*
36  * The caller has not been converted to full gvec, and so only
37  * modifies the low 16 bytes of the vector register.
38  */
39 static void clear_tail_16(void *vd, uint32_t desc)
40 {
41     int opr_sz = simd_oprsz(desc);
42     int max_sz = simd_maxsz(desc);
43 
44     assert(opr_sz == 16);
45     clear_tail(vd, opr_sz, max_sz);
46 }
47 
48 static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
49                            uint64_t *rm, bool decrypt)
50 {
51     static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
52     static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
53     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
54     union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
55     int i;
56 
57     /* xor state vector with round key */
58     rk.l[0] ^= st.l[0];
59     rk.l[1] ^= st.l[1];
60 
61     /* combine ShiftRows operation and sbox substitution */
62     for (i = 0; i < 16; i++) {
63         CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
64     }
65 
66     rd[0] = st.l[0];
67     rd[1] = st.l[1];
68 }
69 
70 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
71 {
72     intptr_t i, opr_sz = simd_oprsz(desc);
73     bool decrypt = simd_data(desc);
74 
75     for (i = 0; i < opr_sz; i += 16) {
76         do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
77     }
78     clear_tail(vd, opr_sz, simd_maxsz(desc));
79 }
80 
81 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
82 {
83     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
84     const uint32_t *mc = decrypt ? AES_imc_rot : AES_mc_rot;
85     int i;
86 
87     for (i = 0; i < 16; i += 4) {
88         CR_ST_WORD(st, i >> 2) =
89             mc[CR_ST_BYTE(st, i)] ^
90             rol32(mc[CR_ST_BYTE(st, i + 1)], 8) ^
91             rol32(mc[CR_ST_BYTE(st, i + 2)], 16) ^
92             rol32(mc[CR_ST_BYTE(st, i + 3)], 24);
93     }
94 
95     rd[0] = st.l[0];
96     rd[1] = st.l[1];
97 }
98 
99 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
100 {
101     intptr_t i, opr_sz = simd_oprsz(desc);
102     bool decrypt = simd_data(desc);
103 
104     for (i = 0; i < opr_sz; i += 16) {
105         do_crypto_aesmc(vd + i, vm + i, decrypt);
106     }
107     clear_tail(vd, opr_sz, simd_maxsz(desc));
108 }
109 
110 /*
111  * SHA-1 logical functions
112  */
113 
114 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
115 {
116     return (x & (y ^ z)) ^ z;
117 }
118 
119 static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
120 {
121     return x ^ y ^ z;
122 }
123 
124 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
125 {
126     return (x & y) | ((x | y) & z);
127 }
128 
129 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
130 {
131     uint64_t *d = vd, *n = vn, *m = vm;
132     uint64_t d0, d1;
133 
134     d0 = d[1] ^ d[0] ^ m[0];
135     d1 = n[0] ^ d[1] ^ m[1];
136     d[0] = d0;
137     d[1] = d1;
138 
139     clear_tail_16(vd, desc);
140 }
141 
142 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
143                                     uint64_t *rm, uint32_t desc,
144                                     uint32_t (*fn)(union CRYPTO_STATE *d))
145 {
146     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
147     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
148     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
149     int i;
150 
151     for (i = 0; i < 4; i++) {
152         uint32_t t = fn(&d);
153 
154         t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
155              + CR_ST_WORD(m, i);
156 
157         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
158         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
159         CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
160         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
161         CR_ST_WORD(d, 0) = t;
162     }
163     rd[0] = d.l[0];
164     rd[1] = d.l[1];
165 
166     clear_tail_16(rd, desc);
167 }
168 
169 static uint32_t do_sha1c(union CRYPTO_STATE *d)
170 {
171     return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
172 }
173 
174 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
175 {
176     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
177 }
178 
179 static uint32_t do_sha1p(union CRYPTO_STATE *d)
180 {
181     return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
182 }
183 
184 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
185 {
186     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
187 }
188 
189 static uint32_t do_sha1m(union CRYPTO_STATE *d)
190 {
191     return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
192 }
193 
194 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
195 {
196     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
197 }
198 
199 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
200 {
201     uint64_t *rd = vd;
202     uint64_t *rm = vm;
203     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
204 
205     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
206     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
207 
208     rd[0] = m.l[0];
209     rd[1] = m.l[1];
210 
211     clear_tail_16(vd, desc);
212 }
213 
214 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
215 {
216     uint64_t *rd = vd;
217     uint64_t *rm = vm;
218     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
219     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
220 
221     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
222     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
223     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
224     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
225 
226     rd[0] = d.l[0];
227     rd[1] = d.l[1];
228 
229     clear_tail_16(vd, desc);
230 }
231 
232 /*
233  * The SHA-256 logical functions, according to
234  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
235  */
236 
237 static uint32_t S0(uint32_t x)
238 {
239     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
240 }
241 
242 static uint32_t S1(uint32_t x)
243 {
244     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
245 }
246 
247 static uint32_t s0(uint32_t x)
248 {
249     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
250 }
251 
252 static uint32_t s1(uint32_t x)
253 {
254     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
255 }
256 
257 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
258 {
259     uint64_t *rd = vd;
260     uint64_t *rn = vn;
261     uint64_t *rm = vm;
262     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
263     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
264     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
265     int i;
266 
267     for (i = 0; i < 4; i++) {
268         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
269                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
270                      + CR_ST_WORD(m, i);
271 
272         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
273         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
274         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
275         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
276 
277         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
278              + S0(CR_ST_WORD(d, 0));
279 
280         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
281         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
282         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
283         CR_ST_WORD(d, 0) = t;
284     }
285 
286     rd[0] = d.l[0];
287     rd[1] = d.l[1];
288 
289     clear_tail_16(vd, desc);
290 }
291 
292 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
293 {
294     uint64_t *rd = vd;
295     uint64_t *rn = vn;
296     uint64_t *rm = vm;
297     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
298     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
299     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
300     int i;
301 
302     for (i = 0; i < 4; i++) {
303         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
304                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
305                      + CR_ST_WORD(m, i);
306 
307         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
308         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
309         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
310         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
311     }
312 
313     rd[0] = d.l[0];
314     rd[1] = d.l[1];
315 
316     clear_tail_16(vd, desc);
317 }
318 
319 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
320 {
321     uint64_t *rd = vd;
322     uint64_t *rm = vm;
323     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
324     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
325 
326     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
327     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
328     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
329     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
330 
331     rd[0] = d.l[0];
332     rd[1] = d.l[1];
333 
334     clear_tail_16(vd, desc);
335 }
336 
337 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
338 {
339     uint64_t *rd = vd;
340     uint64_t *rn = vn;
341     uint64_t *rm = vm;
342     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
343     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
344     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
345 
346     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
347     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
348     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
349     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
350 
351     rd[0] = d.l[0];
352     rd[1] = d.l[1];
353 
354     clear_tail_16(vd, desc);
355 }
356 
357 /*
358  * The SHA-512 logical functions (same as above but using 64-bit operands)
359  */
360 
361 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
362 {
363     return (x & (y ^ z)) ^ z;
364 }
365 
366 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
367 {
368     return (x & y) | ((x | y) & z);
369 }
370 
371 static uint64_t S0_512(uint64_t x)
372 {
373     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
374 }
375 
376 static uint64_t S1_512(uint64_t x)
377 {
378     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
379 }
380 
381 static uint64_t s0_512(uint64_t x)
382 {
383     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
384 }
385 
386 static uint64_t s1_512(uint64_t x)
387 {
388     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
389 }
390 
391 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
392 {
393     uint64_t *rd = vd;
394     uint64_t *rn = vn;
395     uint64_t *rm = vm;
396     uint64_t d0 = rd[0];
397     uint64_t d1 = rd[1];
398 
399     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
400     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
401 
402     rd[0] = d0;
403     rd[1] = d1;
404 
405     clear_tail_16(vd, desc);
406 }
407 
408 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
409 {
410     uint64_t *rd = vd;
411     uint64_t *rn = vn;
412     uint64_t *rm = vm;
413     uint64_t d0 = rd[0];
414     uint64_t d1 = rd[1];
415 
416     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
417     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
418 
419     rd[0] = d0;
420     rd[1] = d1;
421 
422     clear_tail_16(vd, desc);
423 }
424 
425 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
426 {
427     uint64_t *rd = vd;
428     uint64_t *rn = vn;
429     uint64_t d0 = rd[0];
430     uint64_t d1 = rd[1];
431 
432     d0 += s0_512(rd[1]);
433     d1 += s0_512(rn[0]);
434 
435     rd[0] = d0;
436     rd[1] = d1;
437 
438     clear_tail_16(vd, desc);
439 }
440 
441 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
442 {
443     uint64_t *rd = vd;
444     uint64_t *rn = vn;
445     uint64_t *rm = vm;
446 
447     rd[0] += s1_512(rn[0]) + rm[0];
448     rd[1] += s1_512(rn[1]) + rm[1];
449 
450     clear_tail_16(vd, desc);
451 }
452 
453 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
454 {
455     uint64_t *rd = vd;
456     uint64_t *rn = vn;
457     uint64_t *rm = vm;
458     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
459     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
460     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
461     uint32_t t;
462 
463     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
464     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
465 
466     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
467     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
468 
469     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
470     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
471 
472     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
473     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
474 
475     rd[0] = d.l[0];
476     rd[1] = d.l[1];
477 
478     clear_tail_16(vd, desc);
479 }
480 
481 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
482 {
483     uint64_t *rd = vd;
484     uint64_t *rn = vn;
485     uint64_t *rm = vm;
486     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
487     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
488     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
489     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
490 
491     CR_ST_WORD(d, 0) ^= t;
492     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
493     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
494     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
495                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
496 
497     rd[0] = d.l[0];
498     rd[1] = d.l[1];
499 
500     clear_tail_16(vd, desc);
501 }
502 
503 static inline void QEMU_ALWAYS_INLINE
504 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
505              uint32_t desc, uint32_t opcode)
506 {
507     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
508     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
509     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
510     uint32_t imm2 = simd_data(desc);
511     uint32_t t;
512 
513     assert(imm2 < 4);
514 
515     if (opcode == 0 || opcode == 2) {
516         /* SM3TT1A, SM3TT2A */
517         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
518     } else if (opcode == 1) {
519         /* SM3TT1B */
520         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
521     } else if (opcode == 3) {
522         /* SM3TT2B */
523         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
524     } else {
525         qemu_build_not_reached();
526     }
527 
528     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
529 
530     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
531 
532     if (opcode < 2) {
533         /* SM3TT1A, SM3TT1B */
534         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
535 
536         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
537     } else {
538         /* SM3TT2A, SM3TT2B */
539         t += CR_ST_WORD(n, 3);
540         t ^= rol32(t, 9) ^ rol32(t, 17);
541 
542         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
543     }
544 
545     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
546     CR_ST_WORD(d, 3) = t;
547 
548     rd[0] = d.l[0];
549     rd[1] = d.l[1];
550 
551     clear_tail_16(rd, desc);
552 }
553 
554 #define DO_SM3TT(NAME, OPCODE) \
555     void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
556     { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
557 
558 DO_SM3TT(crypto_sm3tt1a, 0)
559 DO_SM3TT(crypto_sm3tt1b, 1)
560 DO_SM3TT(crypto_sm3tt2a, 2)
561 DO_SM3TT(crypto_sm3tt2b, 3)
562 
563 #undef DO_SM3TT
564 
565 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
566 {
567     union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
568     union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
569     uint32_t t, i;
570 
571     for (i = 0; i < 4; i++) {
572         t = CR_ST_WORD(d, (i + 1) % 4) ^
573             CR_ST_WORD(d, (i + 2) % 4) ^
574             CR_ST_WORD(d, (i + 3) % 4) ^
575             CR_ST_WORD(n, i);
576 
577         t = sm4_sbox[t & 0xff] |
578             sm4_sbox[(t >> 8) & 0xff] << 8 |
579             sm4_sbox[(t >> 16) & 0xff] << 16 |
580             sm4_sbox[(t >> 24) & 0xff] << 24;
581 
582         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
583                             rol32(t, 24);
584     }
585 
586     rd[0] = d.l[0];
587     rd[1] = d.l[1];
588 }
589 
590 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
591 {
592     intptr_t i, opr_sz = simd_oprsz(desc);
593 
594     for (i = 0; i < opr_sz; i += 16) {
595         do_crypto_sm4e(vd + i, vn + i, vm + i);
596     }
597     clear_tail(vd, opr_sz, simd_maxsz(desc));
598 }
599 
600 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
601 {
602     union CRYPTO_STATE d;
603     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
604     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
605     uint32_t t, i;
606 
607     d = n;
608     for (i = 0; i < 4; i++) {
609         t = CR_ST_WORD(d, (i + 1) % 4) ^
610             CR_ST_WORD(d, (i + 2) % 4) ^
611             CR_ST_WORD(d, (i + 3) % 4) ^
612             CR_ST_WORD(m, i);
613 
614         t = sm4_sbox[t & 0xff] |
615             sm4_sbox[(t >> 8) & 0xff] << 8 |
616             sm4_sbox[(t >> 16) & 0xff] << 16 |
617             sm4_sbox[(t >> 24) & 0xff] << 24;
618 
619         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
620     }
621 
622     rd[0] = d.l[0];
623     rd[1] = d.l[1];
624 }
625 
626 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
627 {
628     intptr_t i, opr_sz = simd_oprsz(desc);
629 
630     for (i = 0; i < opr_sz; i += 16) {
631         do_crypto_sm4ekey(vd + i, vn + i, vm + i);
632     }
633     clear_tail(vd, opr_sz, simd_maxsz(desc));
634 }
635 
636 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
637 {
638     intptr_t i, opr_sz = simd_oprsz(desc);
639     uint64_t *d = vd, *n = vn, *m = vm;
640 
641     for (i = 0; i < opr_sz / 8; ++i) {
642         d[i] = n[i] ^ rol64(m[i], 1);
643     }
644     clear_tail(vd, opr_sz, simd_maxsz(desc));
645 }
646