1 /* 2 * crypto_helper.c - emulate v8 Crypto Extensions instructions 3 * 4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 */ 11 12 #include "qemu/osdep.h" 13 14 #include "cpu.h" 15 #include "exec/helper-proto.h" 16 #include "tcg/tcg-gvec-desc.h" 17 #include "crypto/aes-round.h" 18 #include "crypto/sm4.h" 19 #include "vec_internal.h" 20 21 union CRYPTO_STATE { 22 uint8_t bytes[16]; 23 uint32_t words[4]; 24 uint64_t l[2]; 25 }; 26 27 #if HOST_BIG_ENDIAN 28 #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) 29 #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) 30 #else 31 #define CR_ST_BYTE(state, i) ((state).bytes[i]) 32 #define CR_ST_WORD(state, i) ((state).words[i]) 33 #endif 34 35 /* 36 * The caller has not been converted to full gvec, and so only 37 * modifies the low 16 bytes of the vector register. 38 */ 39 static void clear_tail_16(void *vd, uint32_t desc) 40 { 41 int opr_sz = simd_oprsz(desc); 42 int max_sz = simd_maxsz(desc); 43 44 assert(opr_sz == 16); 45 clear_tail(vd, opr_sz, max_sz); 46 } 47 48 static const AESState aes_zero = { }; 49 50 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 51 { 52 intptr_t i, opr_sz = simd_oprsz(desc); 53 54 for (i = 0; i < opr_sz; i += 16) { 55 AESState *ad = (AESState *)(vd + i); 56 AESState *st = (AESState *)(vn + i); 57 AESState *rk = (AESState *)(vm + i); 58 AESState t; 59 60 /* 61 * Our uint64_t are in the wrong order for big-endian. 62 * The Arm AddRoundKey comes first, while the API AddRoundKey 63 * comes last: perform the xor here, and provide zero to API. 64 */ 65 if (HOST_BIG_ENDIAN) { 66 t.d[0] = st->d[1] ^ rk->d[1]; 67 t.d[1] = st->d[0] ^ rk->d[0]; 68 aesenc_SB_SR_AK(&t, &t, &aes_zero, false); 69 ad->d[0] = t.d[1]; 70 ad->d[1] = t.d[0]; 71 } else { 72 t.v = st->v ^ rk->v; 73 aesenc_SB_SR_AK(ad, &t, &aes_zero, false); 74 } 75 } 76 clear_tail(vd, opr_sz, simd_maxsz(desc)); 77 } 78 79 void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc) 80 { 81 intptr_t i, opr_sz = simd_oprsz(desc); 82 83 for (i = 0; i < opr_sz; i += 16) { 84 AESState *ad = (AESState *)(vd + i); 85 AESState *st = (AESState *)(vn + i); 86 AESState *rk = (AESState *)(vm + i); 87 AESState t; 88 89 /* Our uint64_t are in the wrong order for big-endian. */ 90 if (HOST_BIG_ENDIAN) { 91 t.d[0] = st->d[1] ^ rk->d[1]; 92 t.d[1] = st->d[0] ^ rk->d[0]; 93 aesdec_ISB_ISR_AK(&t, &t, &aes_zero, false); 94 ad->d[0] = t.d[1]; 95 ad->d[1] = t.d[0]; 96 } else { 97 t.v = st->v ^ rk->v; 98 aesdec_ISB_ISR_AK(ad, &t, &aes_zero, false); 99 } 100 } 101 clear_tail(vd, opr_sz, simd_maxsz(desc)); 102 } 103 104 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 105 { 106 intptr_t i, opr_sz = simd_oprsz(desc); 107 108 for (i = 0; i < opr_sz; i += 16) { 109 AESState *ad = (AESState *)(vd + i); 110 AESState *st = (AESState *)(vm + i); 111 AESState t; 112 113 /* Our uint64_t are in the wrong order for big-endian. */ 114 if (HOST_BIG_ENDIAN) { 115 t.d[0] = st->d[1]; 116 t.d[1] = st->d[0]; 117 aesenc_MC(&t, &t, false); 118 ad->d[0] = t.d[1]; 119 ad->d[1] = t.d[0]; 120 } else { 121 aesenc_MC(ad, st, false); 122 } 123 } 124 clear_tail(vd, opr_sz, simd_maxsz(desc)); 125 } 126 127 void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc) 128 { 129 intptr_t i, opr_sz = simd_oprsz(desc); 130 131 for (i = 0; i < opr_sz; i += 16) { 132 AESState *ad = (AESState *)(vd + i); 133 AESState *st = (AESState *)(vm + i); 134 AESState t; 135 136 /* Our uint64_t are in the wrong order for big-endian. */ 137 if (HOST_BIG_ENDIAN) { 138 t.d[0] = st->d[1]; 139 t.d[1] = st->d[0]; 140 aesdec_IMC(&t, &t, false); 141 ad->d[0] = t.d[1]; 142 ad->d[1] = t.d[0]; 143 } else { 144 aesdec_IMC(ad, st, false); 145 } 146 } 147 clear_tail(vd, opr_sz, simd_maxsz(desc)); 148 } 149 150 /* 151 * SHA-1 logical functions 152 */ 153 154 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 155 { 156 return (x & (y ^ z)) ^ z; 157 } 158 159 static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 160 { 161 return x ^ y ^ z; 162 } 163 164 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 165 { 166 return (x & y) | ((x | y) & z); 167 } 168 169 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) 170 { 171 uint64_t *d = vd, *n = vn, *m = vm; 172 uint64_t d0, d1; 173 174 d0 = d[1] ^ d[0] ^ m[0]; 175 d1 = n[0] ^ d[1] ^ m[1]; 176 d[0] = d0; 177 d[1] = d1; 178 179 clear_tail_16(vd, desc); 180 } 181 182 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, 183 uint64_t *rm, uint32_t desc, 184 uint32_t (*fn)(union CRYPTO_STATE *d)) 185 { 186 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 187 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 188 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 189 int i; 190 191 for (i = 0; i < 4; i++) { 192 uint32_t t = fn(&d); 193 194 t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 195 + CR_ST_WORD(m, i); 196 197 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 198 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 199 CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 200 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 201 CR_ST_WORD(d, 0) = t; 202 } 203 rd[0] = d.l[0]; 204 rd[1] = d.l[1]; 205 206 clear_tail_16(rd, desc); 207 } 208 209 static uint32_t do_sha1c(union CRYPTO_STATE *d) 210 { 211 return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 212 } 213 214 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) 215 { 216 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); 217 } 218 219 static uint32_t do_sha1p(union CRYPTO_STATE *d) 220 { 221 return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 222 } 223 224 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) 225 { 226 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); 227 } 228 229 static uint32_t do_sha1m(union CRYPTO_STATE *d) 230 { 231 return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 232 } 233 234 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) 235 { 236 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); 237 } 238 239 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) 240 { 241 uint64_t *rd = vd; 242 uint64_t *rm = vm; 243 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 244 245 CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 246 CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 247 248 rd[0] = m.l[0]; 249 rd[1] = m.l[1]; 250 251 clear_tail_16(vd, desc); 252 } 253 254 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) 255 { 256 uint64_t *rd = vd; 257 uint64_t *rm = vm; 258 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 259 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 260 261 CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 262 CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 263 CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 264 CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 265 266 rd[0] = d.l[0]; 267 rd[1] = d.l[1]; 268 269 clear_tail_16(vd, desc); 270 } 271 272 /* 273 * The SHA-256 logical functions, according to 274 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 275 */ 276 277 static uint32_t S0(uint32_t x) 278 { 279 return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 280 } 281 282 static uint32_t S1(uint32_t x) 283 { 284 return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 285 } 286 287 static uint32_t s0(uint32_t x) 288 { 289 return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 290 } 291 292 static uint32_t s1(uint32_t x) 293 { 294 return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 295 } 296 297 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) 298 { 299 uint64_t *rd = vd; 300 uint64_t *rn = vn; 301 uint64_t *rm = vm; 302 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 303 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 304 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 305 int i; 306 307 for (i = 0; i < 4; i++) { 308 uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 309 + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 310 + CR_ST_WORD(m, i); 311 312 CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 313 CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 314 CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 315 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 316 317 t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 318 + S0(CR_ST_WORD(d, 0)); 319 320 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 321 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 322 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 323 CR_ST_WORD(d, 0) = t; 324 } 325 326 rd[0] = d.l[0]; 327 rd[1] = d.l[1]; 328 329 clear_tail_16(vd, desc); 330 } 331 332 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) 333 { 334 uint64_t *rd = vd; 335 uint64_t *rn = vn; 336 uint64_t *rm = vm; 337 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 338 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 339 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 340 int i; 341 342 for (i = 0; i < 4; i++) { 343 uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 344 + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 345 + CR_ST_WORD(m, i); 346 347 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 348 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 349 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 350 CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 351 } 352 353 rd[0] = d.l[0]; 354 rd[1] = d.l[1]; 355 356 clear_tail_16(vd, desc); 357 } 358 359 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) 360 { 361 uint64_t *rd = vd; 362 uint64_t *rm = vm; 363 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 364 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 365 366 CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 367 CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 368 CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 369 CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 370 371 rd[0] = d.l[0]; 372 rd[1] = d.l[1]; 373 374 clear_tail_16(vd, desc); 375 } 376 377 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) 378 { 379 uint64_t *rd = vd; 380 uint64_t *rn = vn; 381 uint64_t *rm = vm; 382 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 383 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 384 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 385 386 CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 387 CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 388 CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 389 CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 390 391 rd[0] = d.l[0]; 392 rd[1] = d.l[1]; 393 394 clear_tail_16(vd, desc); 395 } 396 397 /* 398 * The SHA-512 logical functions (same as above but using 64-bit operands) 399 */ 400 401 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 402 { 403 return (x & (y ^ z)) ^ z; 404 } 405 406 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 407 { 408 return (x & y) | ((x | y) & z); 409 } 410 411 static uint64_t S0_512(uint64_t x) 412 { 413 return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 414 } 415 416 static uint64_t S1_512(uint64_t x) 417 { 418 return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 419 } 420 421 static uint64_t s0_512(uint64_t x) 422 { 423 return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 424 } 425 426 static uint64_t s1_512(uint64_t x) 427 { 428 return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 429 } 430 431 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) 432 { 433 uint64_t *rd = vd; 434 uint64_t *rn = vn; 435 uint64_t *rm = vm; 436 uint64_t d0 = rd[0]; 437 uint64_t d1 = rd[1]; 438 439 d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 440 d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 441 442 rd[0] = d0; 443 rd[1] = d1; 444 445 clear_tail_16(vd, desc); 446 } 447 448 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) 449 { 450 uint64_t *rd = vd; 451 uint64_t *rn = vn; 452 uint64_t *rm = vm; 453 uint64_t d0 = rd[0]; 454 uint64_t d1 = rd[1]; 455 456 d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 457 d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 458 459 rd[0] = d0; 460 rd[1] = d1; 461 462 clear_tail_16(vd, desc); 463 } 464 465 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) 466 { 467 uint64_t *rd = vd; 468 uint64_t *rn = vn; 469 uint64_t d0 = rd[0]; 470 uint64_t d1 = rd[1]; 471 472 d0 += s0_512(rd[1]); 473 d1 += s0_512(rn[0]); 474 475 rd[0] = d0; 476 rd[1] = d1; 477 478 clear_tail_16(vd, desc); 479 } 480 481 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) 482 { 483 uint64_t *rd = vd; 484 uint64_t *rn = vn; 485 uint64_t *rm = vm; 486 487 rd[0] += s1_512(rn[0]) + rm[0]; 488 rd[1] += s1_512(rn[1]) + rm[1]; 489 490 clear_tail_16(vd, desc); 491 } 492 493 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) 494 { 495 uint64_t *rd = vd; 496 uint64_t *rn = vn; 497 uint64_t *rm = vm; 498 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 499 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 500 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 501 uint32_t t; 502 503 t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 504 CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 505 506 t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 507 CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 508 509 t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 510 CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 511 512 t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 513 CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 514 515 rd[0] = d.l[0]; 516 rd[1] = d.l[1]; 517 518 clear_tail_16(vd, desc); 519 } 520 521 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) 522 { 523 uint64_t *rd = vd; 524 uint64_t *rn = vn; 525 uint64_t *rm = vm; 526 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 527 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 528 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 529 uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 530 531 CR_ST_WORD(d, 0) ^= t; 532 CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 533 CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 534 CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 535 ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 536 537 rd[0] = d.l[0]; 538 rd[1] = d.l[1]; 539 540 clear_tail_16(vd, desc); 541 } 542 543 static inline void QEMU_ALWAYS_INLINE 544 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, 545 uint32_t desc, uint32_t opcode) 546 { 547 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 548 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 549 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 550 uint32_t imm2 = simd_data(desc); 551 uint32_t t; 552 553 assert(imm2 < 4); 554 555 if (opcode == 0 || opcode == 2) { 556 /* SM3TT1A, SM3TT2A */ 557 t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 558 } else if (opcode == 1) { 559 /* SM3TT1B */ 560 t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 561 } else if (opcode == 3) { 562 /* SM3TT2B */ 563 t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 564 } else { 565 qemu_build_not_reached(); 566 } 567 568 t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 569 570 CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 571 572 if (opcode < 2) { 573 /* SM3TT1A, SM3TT1B */ 574 t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 575 576 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 577 } else { 578 /* SM3TT2A, SM3TT2B */ 579 t += CR_ST_WORD(n, 3); 580 t ^= rol32(t, 9) ^ rol32(t, 17); 581 582 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 583 } 584 585 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 586 CR_ST_WORD(d, 3) = t; 587 588 rd[0] = d.l[0]; 589 rd[1] = d.l[1]; 590 591 clear_tail_16(rd, desc); 592 } 593 594 #define DO_SM3TT(NAME, OPCODE) \ 595 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ 596 { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } 597 598 DO_SM3TT(crypto_sm3tt1a, 0) 599 DO_SM3TT(crypto_sm3tt1b, 1) 600 DO_SM3TT(crypto_sm3tt2a, 2) 601 DO_SM3TT(crypto_sm3tt2b, 3) 602 603 #undef DO_SM3TT 604 605 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 606 { 607 union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 608 union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 609 uint32_t t, i; 610 611 for (i = 0; i < 4; i++) { 612 t = CR_ST_WORD(d, (i + 1) % 4) ^ 613 CR_ST_WORD(d, (i + 2) % 4) ^ 614 CR_ST_WORD(d, (i + 3) % 4) ^ 615 CR_ST_WORD(n, i); 616 617 t = sm4_sbox[t & 0xff] | 618 sm4_sbox[(t >> 8) & 0xff] << 8 | 619 sm4_sbox[(t >> 16) & 0xff] << 16 | 620 sm4_sbox[(t >> 24) & 0xff] << 24; 621 622 CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 623 rol32(t, 24); 624 } 625 626 rd[0] = d.l[0]; 627 rd[1] = d.l[1]; 628 } 629 630 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 631 { 632 intptr_t i, opr_sz = simd_oprsz(desc); 633 634 for (i = 0; i < opr_sz; i += 16) { 635 do_crypto_sm4e(vd + i, vn + i, vm + i); 636 } 637 clear_tail(vd, opr_sz, simd_maxsz(desc)); 638 } 639 640 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 641 { 642 union CRYPTO_STATE d; 643 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 644 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 645 uint32_t t, i; 646 647 d = n; 648 for (i = 0; i < 4; i++) { 649 t = CR_ST_WORD(d, (i + 1) % 4) ^ 650 CR_ST_WORD(d, (i + 2) % 4) ^ 651 CR_ST_WORD(d, (i + 3) % 4) ^ 652 CR_ST_WORD(m, i); 653 654 t = sm4_sbox[t & 0xff] | 655 sm4_sbox[(t >> 8) & 0xff] << 8 | 656 sm4_sbox[(t >> 16) & 0xff] << 16 | 657 sm4_sbox[(t >> 24) & 0xff] << 24; 658 659 CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 660 } 661 662 rd[0] = d.l[0]; 663 rd[1] = d.l[1]; 664 } 665 666 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 667 { 668 intptr_t i, opr_sz = simd_oprsz(desc); 669 670 for (i = 0; i < opr_sz; i += 16) { 671 do_crypto_sm4ekey(vd + i, vn + i, vm + i); 672 } 673 clear_tail(vd, opr_sz, simd_maxsz(desc)); 674 } 675 676 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) 677 { 678 intptr_t i, opr_sz = simd_oprsz(desc); 679 uint64_t *d = vd, *n = vn, *m = vm; 680 681 for (i = 0; i < opr_sz / 8; ++i) { 682 d[i] = n[i] ^ rol64(m[i], 1); 683 } 684 clear_tail(vd, opr_sz, simd_maxsz(desc)); 685 } 686