1 /* 2 * crypto_helper.c - emulate v8 Crypto Extensions instructions 3 * 4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 */ 11 12 #include "qemu/osdep.h" 13 14 #include "cpu.h" 15 #include "exec/helper-proto.h" 16 #include "tcg/tcg-gvec-desc.h" 17 #include "crypto/aes.h" 18 #include "crypto/sm4.h" 19 #include "vec_internal.h" 20 21 union CRYPTO_STATE { 22 uint8_t bytes[16]; 23 uint32_t words[4]; 24 uint64_t l[2]; 25 }; 26 27 #if HOST_BIG_ENDIAN 28 #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) 29 #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) 30 #else 31 #define CR_ST_BYTE(state, i) ((state).bytes[i]) 32 #define CR_ST_WORD(state, i) ((state).words[i]) 33 #endif 34 35 /* 36 * The caller has not been converted to full gvec, and so only 37 * modifies the low 16 bytes of the vector register. 38 */ 39 static void clear_tail_16(void *vd, uint32_t desc) 40 { 41 int opr_sz = simd_oprsz(desc); 42 int max_sz = simd_maxsz(desc); 43 44 assert(opr_sz == 16); 45 clear_tail(vd, opr_sz, max_sz); 46 } 47 48 static void do_crypto_aese(uint64_t *rd, uint64_t *rn, 49 uint64_t *rm, bool decrypt) 50 { 51 static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; 52 static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; 53 union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; 54 union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; 55 int i; 56 57 /* xor state vector with round key */ 58 rk.l[0] ^= st.l[0]; 59 rk.l[1] ^= st.l[1]; 60 61 /* combine ShiftRows operation and sbox substitution */ 62 for (i = 0; i < 16; i++) { 63 CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])]; 64 } 65 66 rd[0] = st.l[0]; 67 rd[1] = st.l[1]; 68 } 69 70 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 71 { 72 intptr_t i, opr_sz = simd_oprsz(desc); 73 bool decrypt = simd_data(desc); 74 75 for (i = 0; i < opr_sz; i += 16) { 76 do_crypto_aese(vd + i, vn + i, vm + i, decrypt); 77 } 78 clear_tail(vd, opr_sz, simd_maxsz(desc)); 79 } 80 81 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt) 82 { 83 union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; 84 const uint32_t *mc = decrypt ? AES_imc_rot : AES_mc_rot; 85 int i; 86 87 for (i = 0; i < 16; i += 4) { 88 CR_ST_WORD(st, i >> 2) = 89 mc[CR_ST_BYTE(st, i)] ^ 90 rol32(mc[CR_ST_BYTE(st, i + 1)], 8) ^ 91 rol32(mc[CR_ST_BYTE(st, i + 2)], 16) ^ 92 rol32(mc[CR_ST_BYTE(st, i + 3)], 24); 93 } 94 95 rd[0] = st.l[0]; 96 rd[1] = st.l[1]; 97 } 98 99 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 100 { 101 intptr_t i, opr_sz = simd_oprsz(desc); 102 bool decrypt = simd_data(desc); 103 104 for (i = 0; i < opr_sz; i += 16) { 105 do_crypto_aesmc(vd + i, vm + i, decrypt); 106 } 107 clear_tail(vd, opr_sz, simd_maxsz(desc)); 108 } 109 110 /* 111 * SHA-1 logical functions 112 */ 113 114 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) 115 { 116 return (x & (y ^ z)) ^ z; 117 } 118 119 static uint32_t par(uint32_t x, uint32_t y, uint32_t z) 120 { 121 return x ^ y ^ z; 122 } 123 124 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) 125 { 126 return (x & y) | ((x | y) & z); 127 } 128 129 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) 130 { 131 uint64_t *d = vd, *n = vn, *m = vm; 132 uint64_t d0, d1; 133 134 d0 = d[1] ^ d[0] ^ m[0]; 135 d1 = n[0] ^ d[1] ^ m[1]; 136 d[0] = d0; 137 d[1] = d1; 138 139 clear_tail_16(vd, desc); 140 } 141 142 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, 143 uint64_t *rm, uint32_t desc, 144 uint32_t (*fn)(union CRYPTO_STATE *d)) 145 { 146 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 147 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 148 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 149 int i; 150 151 for (i = 0; i < 4; i++) { 152 uint32_t t = fn(&d); 153 154 t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) 155 + CR_ST_WORD(m, i); 156 157 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); 158 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 159 CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); 160 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 161 CR_ST_WORD(d, 0) = t; 162 } 163 rd[0] = d.l[0]; 164 rd[1] = d.l[1]; 165 166 clear_tail_16(rd, desc); 167 } 168 169 static uint32_t do_sha1c(union CRYPTO_STATE *d) 170 { 171 return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 172 } 173 174 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) 175 { 176 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); 177 } 178 179 static uint32_t do_sha1p(union CRYPTO_STATE *d) 180 { 181 return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 182 } 183 184 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) 185 { 186 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); 187 } 188 189 static uint32_t do_sha1m(union CRYPTO_STATE *d) 190 { 191 return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); 192 } 193 194 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) 195 { 196 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); 197 } 198 199 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) 200 { 201 uint64_t *rd = vd; 202 uint64_t *rm = vm; 203 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 204 205 CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); 206 CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; 207 208 rd[0] = m.l[0]; 209 rd[1] = m.l[1]; 210 211 clear_tail_16(vd, desc); 212 } 213 214 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) 215 { 216 uint64_t *rd = vd; 217 uint64_t *rm = vm; 218 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 219 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 220 221 CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); 222 CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); 223 CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); 224 CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); 225 226 rd[0] = d.l[0]; 227 rd[1] = d.l[1]; 228 229 clear_tail_16(vd, desc); 230 } 231 232 /* 233 * The SHA-256 logical functions, according to 234 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf 235 */ 236 237 static uint32_t S0(uint32_t x) 238 { 239 return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); 240 } 241 242 static uint32_t S1(uint32_t x) 243 { 244 return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); 245 } 246 247 static uint32_t s0(uint32_t x) 248 { 249 return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); 250 } 251 252 static uint32_t s1(uint32_t x) 253 { 254 return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); 255 } 256 257 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) 258 { 259 uint64_t *rd = vd; 260 uint64_t *rn = vn; 261 uint64_t *rm = vm; 262 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 263 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 264 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 265 int i; 266 267 for (i = 0; i < 4; i++) { 268 uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) 269 + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) 270 + CR_ST_WORD(m, i); 271 272 CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); 273 CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); 274 CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); 275 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; 276 277 t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 278 + S0(CR_ST_WORD(d, 0)); 279 280 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 281 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 282 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 283 CR_ST_WORD(d, 0) = t; 284 } 285 286 rd[0] = d.l[0]; 287 rd[1] = d.l[1]; 288 289 clear_tail_16(vd, desc); 290 } 291 292 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) 293 { 294 uint64_t *rd = vd; 295 uint64_t *rn = vn; 296 uint64_t *rm = vm; 297 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 298 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 299 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 300 int i; 301 302 for (i = 0; i < 4; i++) { 303 uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) 304 + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) 305 + CR_ST_WORD(m, i); 306 307 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); 308 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); 309 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); 310 CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; 311 } 312 313 rd[0] = d.l[0]; 314 rd[1] = d.l[1]; 315 316 clear_tail_16(vd, desc); 317 } 318 319 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) 320 { 321 uint64_t *rd = vd; 322 uint64_t *rm = vm; 323 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 324 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 325 326 CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); 327 CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); 328 CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); 329 CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); 330 331 rd[0] = d.l[0]; 332 rd[1] = d.l[1]; 333 334 clear_tail_16(vd, desc); 335 } 336 337 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) 338 { 339 uint64_t *rd = vd; 340 uint64_t *rn = vn; 341 uint64_t *rm = vm; 342 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 343 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 344 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 345 346 CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); 347 CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); 348 CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); 349 CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); 350 351 rd[0] = d.l[0]; 352 rd[1] = d.l[1]; 353 354 clear_tail_16(vd, desc); 355 } 356 357 /* 358 * The SHA-512 logical functions (same as above but using 64-bit operands) 359 */ 360 361 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) 362 { 363 return (x & (y ^ z)) ^ z; 364 } 365 366 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) 367 { 368 return (x & y) | ((x | y) & z); 369 } 370 371 static uint64_t S0_512(uint64_t x) 372 { 373 return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); 374 } 375 376 static uint64_t S1_512(uint64_t x) 377 { 378 return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); 379 } 380 381 static uint64_t s0_512(uint64_t x) 382 { 383 return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); 384 } 385 386 static uint64_t s1_512(uint64_t x) 387 { 388 return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); 389 } 390 391 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) 392 { 393 uint64_t *rd = vd; 394 uint64_t *rn = vn; 395 uint64_t *rm = vm; 396 uint64_t d0 = rd[0]; 397 uint64_t d1 = rd[1]; 398 399 d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); 400 d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); 401 402 rd[0] = d0; 403 rd[1] = d1; 404 405 clear_tail_16(vd, desc); 406 } 407 408 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) 409 { 410 uint64_t *rd = vd; 411 uint64_t *rn = vn; 412 uint64_t *rm = vm; 413 uint64_t d0 = rd[0]; 414 uint64_t d1 = rd[1]; 415 416 d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); 417 d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); 418 419 rd[0] = d0; 420 rd[1] = d1; 421 422 clear_tail_16(vd, desc); 423 } 424 425 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) 426 { 427 uint64_t *rd = vd; 428 uint64_t *rn = vn; 429 uint64_t d0 = rd[0]; 430 uint64_t d1 = rd[1]; 431 432 d0 += s0_512(rd[1]); 433 d1 += s0_512(rn[0]); 434 435 rd[0] = d0; 436 rd[1] = d1; 437 438 clear_tail_16(vd, desc); 439 } 440 441 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) 442 { 443 uint64_t *rd = vd; 444 uint64_t *rn = vn; 445 uint64_t *rm = vm; 446 447 rd[0] += s1_512(rn[0]) + rm[0]; 448 rd[1] += s1_512(rn[1]) + rm[1]; 449 450 clear_tail_16(vd, desc); 451 } 452 453 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) 454 { 455 uint64_t *rd = vd; 456 uint64_t *rn = vn; 457 uint64_t *rm = vm; 458 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 459 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 460 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 461 uint32_t t; 462 463 t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); 464 CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); 465 466 t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); 467 CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); 468 469 t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); 470 CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); 471 472 t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); 473 CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); 474 475 rd[0] = d.l[0]; 476 rd[1] = d.l[1]; 477 478 clear_tail_16(vd, desc); 479 } 480 481 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) 482 { 483 uint64_t *rd = vd; 484 uint64_t *rn = vn; 485 uint64_t *rm = vm; 486 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 487 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 488 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 489 uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); 490 491 CR_ST_WORD(d, 0) ^= t; 492 CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); 493 CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); 494 CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ 495 ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); 496 497 rd[0] = d.l[0]; 498 rd[1] = d.l[1]; 499 500 clear_tail_16(vd, desc); 501 } 502 503 static inline void QEMU_ALWAYS_INLINE 504 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, 505 uint32_t desc, uint32_t opcode) 506 { 507 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 508 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 509 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 510 uint32_t imm2 = simd_data(desc); 511 uint32_t t; 512 513 assert(imm2 < 4); 514 515 if (opcode == 0 || opcode == 2) { 516 /* SM3TT1A, SM3TT2A */ 517 t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 518 } else if (opcode == 1) { 519 /* SM3TT1B */ 520 t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 521 } else if (opcode == 3) { 522 /* SM3TT2B */ 523 t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); 524 } else { 525 qemu_build_not_reached(); 526 } 527 528 t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); 529 530 CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); 531 532 if (opcode < 2) { 533 /* SM3TT1A, SM3TT1B */ 534 t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); 535 536 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); 537 } else { 538 /* SM3TT2A, SM3TT2B */ 539 t += CR_ST_WORD(n, 3); 540 t ^= rol32(t, 9) ^ rol32(t, 17); 541 542 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); 543 } 544 545 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); 546 CR_ST_WORD(d, 3) = t; 547 548 rd[0] = d.l[0]; 549 rd[1] = d.l[1]; 550 551 clear_tail_16(rd, desc); 552 } 553 554 #define DO_SM3TT(NAME, OPCODE) \ 555 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ 556 { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } 557 558 DO_SM3TT(crypto_sm3tt1a, 0) 559 DO_SM3TT(crypto_sm3tt1b, 1) 560 DO_SM3TT(crypto_sm3tt2a, 2) 561 DO_SM3TT(crypto_sm3tt2b, 3) 562 563 #undef DO_SM3TT 564 565 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 566 { 567 union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 568 union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 569 uint32_t t, i; 570 571 for (i = 0; i < 4; i++) { 572 t = CR_ST_WORD(d, (i + 1) % 4) ^ 573 CR_ST_WORD(d, (i + 2) % 4) ^ 574 CR_ST_WORD(d, (i + 3) % 4) ^ 575 CR_ST_WORD(n, i); 576 577 t = sm4_sbox[t & 0xff] | 578 sm4_sbox[(t >> 8) & 0xff] << 8 | 579 sm4_sbox[(t >> 16) & 0xff] << 16 | 580 sm4_sbox[(t >> 24) & 0xff] << 24; 581 582 CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ 583 rol32(t, 24); 584 } 585 586 rd[0] = d.l[0]; 587 rd[1] = d.l[1]; 588 } 589 590 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 591 { 592 intptr_t i, opr_sz = simd_oprsz(desc); 593 594 for (i = 0; i < opr_sz; i += 16) { 595 do_crypto_sm4e(vd + i, vn + i, vm + i); 596 } 597 clear_tail(vd, opr_sz, simd_maxsz(desc)); 598 } 599 600 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 601 { 602 union CRYPTO_STATE d; 603 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 604 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; 605 uint32_t t, i; 606 607 d = n; 608 for (i = 0; i < 4; i++) { 609 t = CR_ST_WORD(d, (i + 1) % 4) ^ 610 CR_ST_WORD(d, (i + 2) % 4) ^ 611 CR_ST_WORD(d, (i + 3) % 4) ^ 612 CR_ST_WORD(m, i); 613 614 t = sm4_sbox[t & 0xff] | 615 sm4_sbox[(t >> 8) & 0xff] << 8 | 616 sm4_sbox[(t >> 16) & 0xff] << 16 | 617 sm4_sbox[(t >> 24) & 0xff] << 24; 618 619 CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); 620 } 621 622 rd[0] = d.l[0]; 623 rd[1] = d.l[1]; 624 } 625 626 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 627 { 628 intptr_t i, opr_sz = simd_oprsz(desc); 629 630 for (i = 0; i < opr_sz; i += 16) { 631 do_crypto_sm4ekey(vd + i, vn + i, vm + i); 632 } 633 clear_tail(vd, opr_sz, simd_maxsz(desc)); 634 } 635 636 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) 637 { 638 intptr_t i, opr_sz = simd_oprsz(desc); 639 uint64_t *d = vd, *n = vn, *m = vm; 640 641 for (i = 0; i < opr_sz / 8; ++i) { 642 d[i] = n[i] ^ rol64(m[i], 1); 643 } 644 clear_tail(vd, opr_sz, simd_maxsz(desc)); 645 } 646