18275d1aaSTim Chen /* 28275d1aaSTim Chen * Cryptographic API. 38275d1aaSTim Chen * 48275d1aaSTim Chen * Glue code for the SHA256 Secure Hash Algorithm assembler 58275d1aaSTim Chen * implementation using supplemental SSE3 / AVX / AVX2 instructions. 68275d1aaSTim Chen * 78275d1aaSTim Chen * This file is based on sha256_generic.c 88275d1aaSTim Chen * 98275d1aaSTim Chen * Copyright (C) 2013 Intel Corporation. 108275d1aaSTim Chen * 118275d1aaSTim Chen * Author: 128275d1aaSTim Chen * Tim Chen <tim.c.chen@linux.intel.com> 138275d1aaSTim Chen * 148275d1aaSTim Chen * This program is free software; you can redistribute it and/or modify it 158275d1aaSTim Chen * under the terms of the GNU General Public License as published by the Free 168275d1aaSTim Chen * Software Foundation; either version 2 of the License, or (at your option) 178275d1aaSTim Chen * any later version. 188275d1aaSTim Chen * 198275d1aaSTim Chen * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 208275d1aaSTim Chen * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 218275d1aaSTim Chen * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 228275d1aaSTim Chen * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 238275d1aaSTim Chen * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 248275d1aaSTim Chen * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 258275d1aaSTim Chen * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 268275d1aaSTim Chen * SOFTWARE. 278275d1aaSTim Chen */ 288275d1aaSTim Chen 298275d1aaSTim Chen 308275d1aaSTim Chen #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 318275d1aaSTim Chen 328275d1aaSTim Chen #include <crypto/internal/hash.h> 338275d1aaSTim Chen #include <linux/init.h> 348275d1aaSTim Chen #include <linux/module.h> 358275d1aaSTim Chen #include <linux/mm.h> 368275d1aaSTim Chen #include <linux/cryptohash.h> 378275d1aaSTim Chen #include <linux/types.h> 388275d1aaSTim Chen #include <crypto/sha.h> 398275d1aaSTim Chen #include <asm/byteorder.h> 408275d1aaSTim Chen #include <asm/i387.h> 418275d1aaSTim Chen #include <asm/xcr.h> 428275d1aaSTim Chen #include <asm/xsave.h> 438275d1aaSTim Chen #include <linux/string.h> 448275d1aaSTim Chen 458275d1aaSTim Chen asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest, 468275d1aaSTim Chen u64 rounds); 478275d1aaSTim Chen #ifdef CONFIG_AS_AVX 488275d1aaSTim Chen asmlinkage void sha256_transform_avx(const char *data, u32 *digest, 498275d1aaSTim Chen u64 rounds); 508275d1aaSTim Chen #endif 518275d1aaSTim Chen #ifdef CONFIG_AS_AVX2 528275d1aaSTim Chen asmlinkage void sha256_transform_rorx(const char *data, u32 *digest, 538275d1aaSTim Chen u64 rounds); 548275d1aaSTim Chen #endif 558275d1aaSTim Chen 568275d1aaSTim Chen static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64); 578275d1aaSTim Chen 588275d1aaSTim Chen 598275d1aaSTim Chen static int sha256_ssse3_init(struct shash_desc *desc) 608275d1aaSTim Chen { 618275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 628275d1aaSTim Chen 638275d1aaSTim Chen sctx->state[0] = SHA256_H0; 648275d1aaSTim Chen sctx->state[1] = SHA256_H1; 658275d1aaSTim Chen sctx->state[2] = SHA256_H2; 668275d1aaSTim Chen sctx->state[3] = SHA256_H3; 678275d1aaSTim Chen sctx->state[4] = SHA256_H4; 688275d1aaSTim Chen sctx->state[5] = SHA256_H5; 698275d1aaSTim Chen sctx->state[6] = SHA256_H6; 708275d1aaSTim Chen sctx->state[7] = SHA256_H7; 718275d1aaSTim Chen sctx->count = 0; 728275d1aaSTim Chen 738275d1aaSTim Chen return 0; 748275d1aaSTim Chen } 758275d1aaSTim Chen 768275d1aaSTim Chen static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data, 778275d1aaSTim Chen unsigned int len, unsigned int partial) 788275d1aaSTim Chen { 798275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 808275d1aaSTim Chen unsigned int done = 0; 818275d1aaSTim Chen 828275d1aaSTim Chen sctx->count += len; 838275d1aaSTim Chen 848275d1aaSTim Chen if (partial) { 858275d1aaSTim Chen done = SHA256_BLOCK_SIZE - partial; 868275d1aaSTim Chen memcpy(sctx->buf + partial, data, done); 878275d1aaSTim Chen sha256_transform_asm(sctx->buf, sctx->state, 1); 888275d1aaSTim Chen } 898275d1aaSTim Chen 908275d1aaSTim Chen if (len - done >= SHA256_BLOCK_SIZE) { 918275d1aaSTim Chen const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; 928275d1aaSTim Chen 938275d1aaSTim Chen sha256_transform_asm(data + done, sctx->state, (u64) rounds); 948275d1aaSTim Chen 958275d1aaSTim Chen done += rounds * SHA256_BLOCK_SIZE; 968275d1aaSTim Chen } 978275d1aaSTim Chen 988275d1aaSTim Chen memcpy(sctx->buf, data + done, len - done); 998275d1aaSTim Chen 1008275d1aaSTim Chen return 0; 1018275d1aaSTim Chen } 1028275d1aaSTim Chen 1038275d1aaSTim Chen static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, 1048275d1aaSTim Chen unsigned int len) 1058275d1aaSTim Chen { 1068275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 1078275d1aaSTim Chen unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; 1088275d1aaSTim Chen int res; 1098275d1aaSTim Chen 1108275d1aaSTim Chen /* Handle the fast case right here */ 1118275d1aaSTim Chen if (partial + len < SHA256_BLOCK_SIZE) { 1128275d1aaSTim Chen sctx->count += len; 1138275d1aaSTim Chen memcpy(sctx->buf + partial, data, len); 1148275d1aaSTim Chen 1158275d1aaSTim Chen return 0; 1168275d1aaSTim Chen } 1178275d1aaSTim Chen 1188275d1aaSTim Chen if (!irq_fpu_usable()) { 1198275d1aaSTim Chen res = crypto_sha256_update(desc, data, len); 1208275d1aaSTim Chen } else { 1218275d1aaSTim Chen kernel_fpu_begin(); 1228275d1aaSTim Chen res = __sha256_ssse3_update(desc, data, len, partial); 1238275d1aaSTim Chen kernel_fpu_end(); 1248275d1aaSTim Chen } 1258275d1aaSTim Chen 1268275d1aaSTim Chen return res; 1278275d1aaSTim Chen } 1288275d1aaSTim Chen 1298275d1aaSTim Chen 1308275d1aaSTim Chen /* Add padding and return the message digest. */ 1318275d1aaSTim Chen static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) 1328275d1aaSTim Chen { 1338275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 1348275d1aaSTim Chen unsigned int i, index, padlen; 1358275d1aaSTim Chen __be32 *dst = (__be32 *)out; 1368275d1aaSTim Chen __be64 bits; 1378275d1aaSTim Chen static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; 1388275d1aaSTim Chen 1398275d1aaSTim Chen bits = cpu_to_be64(sctx->count << 3); 1408275d1aaSTim Chen 1418275d1aaSTim Chen /* Pad out to 56 mod 64 and append length */ 1428275d1aaSTim Chen index = sctx->count % SHA256_BLOCK_SIZE; 1438275d1aaSTim Chen padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); 1448275d1aaSTim Chen 1458275d1aaSTim Chen if (!irq_fpu_usable()) { 1468275d1aaSTim Chen crypto_sha256_update(desc, padding, padlen); 1478275d1aaSTim Chen crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits)); 1488275d1aaSTim Chen } else { 1498275d1aaSTim Chen kernel_fpu_begin(); 1508275d1aaSTim Chen /* We need to fill a whole block for __sha256_ssse3_update() */ 1518275d1aaSTim Chen if (padlen <= 56) { 1528275d1aaSTim Chen sctx->count += padlen; 1538275d1aaSTim Chen memcpy(sctx->buf + index, padding, padlen); 1548275d1aaSTim Chen } else { 1558275d1aaSTim Chen __sha256_ssse3_update(desc, padding, padlen, index); 1568275d1aaSTim Chen } 1578275d1aaSTim Chen __sha256_ssse3_update(desc, (const u8 *)&bits, 1588275d1aaSTim Chen sizeof(bits), 56); 1598275d1aaSTim Chen kernel_fpu_end(); 1608275d1aaSTim Chen } 1618275d1aaSTim Chen 1628275d1aaSTim Chen /* Store state in digest */ 1638275d1aaSTim Chen for (i = 0; i < 8; i++) 1648275d1aaSTim Chen dst[i] = cpu_to_be32(sctx->state[i]); 1658275d1aaSTim Chen 1668275d1aaSTim Chen /* Wipe context */ 1678275d1aaSTim Chen memset(sctx, 0, sizeof(*sctx)); 1688275d1aaSTim Chen 1698275d1aaSTim Chen return 0; 1708275d1aaSTim Chen } 1718275d1aaSTim Chen 1728275d1aaSTim Chen static int sha256_ssse3_export(struct shash_desc *desc, void *out) 1738275d1aaSTim Chen { 1748275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 1758275d1aaSTim Chen 1768275d1aaSTim Chen memcpy(out, sctx, sizeof(*sctx)); 1778275d1aaSTim Chen 1788275d1aaSTim Chen return 0; 1798275d1aaSTim Chen } 1808275d1aaSTim Chen 1818275d1aaSTim Chen static int sha256_ssse3_import(struct shash_desc *desc, const void *in) 1828275d1aaSTim Chen { 1838275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 1848275d1aaSTim Chen 1858275d1aaSTim Chen memcpy(sctx, in, sizeof(*sctx)); 1868275d1aaSTim Chen 1878275d1aaSTim Chen return 0; 1888275d1aaSTim Chen } 1898275d1aaSTim Chen 190*a710f761SJussi Kivilinna static int sha224_ssse3_init(struct shash_desc *desc) 191*a710f761SJussi Kivilinna { 192*a710f761SJussi Kivilinna struct sha256_state *sctx = shash_desc_ctx(desc); 193*a710f761SJussi Kivilinna 194*a710f761SJussi Kivilinna sctx->state[0] = SHA224_H0; 195*a710f761SJussi Kivilinna sctx->state[1] = SHA224_H1; 196*a710f761SJussi Kivilinna sctx->state[2] = SHA224_H2; 197*a710f761SJussi Kivilinna sctx->state[3] = SHA224_H3; 198*a710f761SJussi Kivilinna sctx->state[4] = SHA224_H4; 199*a710f761SJussi Kivilinna sctx->state[5] = SHA224_H5; 200*a710f761SJussi Kivilinna sctx->state[6] = SHA224_H6; 201*a710f761SJussi Kivilinna sctx->state[7] = SHA224_H7; 202*a710f761SJussi Kivilinna sctx->count = 0; 203*a710f761SJussi Kivilinna 204*a710f761SJussi Kivilinna return 0; 205*a710f761SJussi Kivilinna } 206*a710f761SJussi Kivilinna 207*a710f761SJussi Kivilinna static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash) 208*a710f761SJussi Kivilinna { 209*a710f761SJussi Kivilinna u8 D[SHA256_DIGEST_SIZE]; 210*a710f761SJussi Kivilinna 211*a710f761SJussi Kivilinna sha256_ssse3_final(desc, D); 212*a710f761SJussi Kivilinna 213*a710f761SJussi Kivilinna memcpy(hash, D, SHA224_DIGEST_SIZE); 214*a710f761SJussi Kivilinna memset(D, 0, SHA256_DIGEST_SIZE); 215*a710f761SJussi Kivilinna 216*a710f761SJussi Kivilinna return 0; 217*a710f761SJussi Kivilinna } 218*a710f761SJussi Kivilinna 219*a710f761SJussi Kivilinna static struct shash_alg algs[] = { { 2208275d1aaSTim Chen .digestsize = SHA256_DIGEST_SIZE, 2218275d1aaSTim Chen .init = sha256_ssse3_init, 2228275d1aaSTim Chen .update = sha256_ssse3_update, 2238275d1aaSTim Chen .final = sha256_ssse3_final, 2248275d1aaSTim Chen .export = sha256_ssse3_export, 2258275d1aaSTim Chen .import = sha256_ssse3_import, 2268275d1aaSTim Chen .descsize = sizeof(struct sha256_state), 2278275d1aaSTim Chen .statesize = sizeof(struct sha256_state), 2288275d1aaSTim Chen .base = { 2298275d1aaSTim Chen .cra_name = "sha256", 2308275d1aaSTim Chen .cra_driver_name = "sha256-ssse3", 2318275d1aaSTim Chen .cra_priority = 150, 2328275d1aaSTim Chen .cra_flags = CRYPTO_ALG_TYPE_SHASH, 2338275d1aaSTim Chen .cra_blocksize = SHA256_BLOCK_SIZE, 2348275d1aaSTim Chen .cra_module = THIS_MODULE, 2358275d1aaSTim Chen } 236*a710f761SJussi Kivilinna }, { 237*a710f761SJussi Kivilinna .digestsize = SHA224_DIGEST_SIZE, 238*a710f761SJussi Kivilinna .init = sha224_ssse3_init, 239*a710f761SJussi Kivilinna .update = sha256_ssse3_update, 240*a710f761SJussi Kivilinna .final = sha224_ssse3_final, 241*a710f761SJussi Kivilinna .export = sha256_ssse3_export, 242*a710f761SJussi Kivilinna .import = sha256_ssse3_import, 243*a710f761SJussi Kivilinna .descsize = sizeof(struct sha256_state), 244*a710f761SJussi Kivilinna .statesize = sizeof(struct sha256_state), 245*a710f761SJussi Kivilinna .base = { 246*a710f761SJussi Kivilinna .cra_name = "sha224", 247*a710f761SJussi Kivilinna .cra_driver_name = "sha224-ssse3", 248*a710f761SJussi Kivilinna .cra_priority = 150, 249*a710f761SJussi Kivilinna .cra_flags = CRYPTO_ALG_TYPE_SHASH, 250*a710f761SJussi Kivilinna .cra_blocksize = SHA224_BLOCK_SIZE, 251*a710f761SJussi Kivilinna .cra_module = THIS_MODULE, 252*a710f761SJussi Kivilinna } 253*a710f761SJussi Kivilinna } }; 2548275d1aaSTim Chen 2558275d1aaSTim Chen #ifdef CONFIG_AS_AVX 2568275d1aaSTim Chen static bool __init avx_usable(void) 2578275d1aaSTim Chen { 2588275d1aaSTim Chen u64 xcr0; 2598275d1aaSTim Chen 2608275d1aaSTim Chen if (!cpu_has_avx || !cpu_has_osxsave) 2618275d1aaSTim Chen return false; 2628275d1aaSTim Chen 2638275d1aaSTim Chen xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 2648275d1aaSTim Chen if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { 2658275d1aaSTim Chen pr_info("AVX detected but unusable.\n"); 2668275d1aaSTim Chen 2678275d1aaSTim Chen return false; 2688275d1aaSTim Chen } 2698275d1aaSTim Chen 2708275d1aaSTim Chen return true; 2718275d1aaSTim Chen } 2728275d1aaSTim Chen #endif 2738275d1aaSTim Chen 2748275d1aaSTim Chen static int __init sha256_ssse3_mod_init(void) 2758275d1aaSTim Chen { 276*a710f761SJussi Kivilinna /* test for SSSE3 first */ 2778275d1aaSTim Chen if (cpu_has_ssse3) 2788275d1aaSTim Chen sha256_transform_asm = sha256_transform_ssse3; 2798275d1aaSTim Chen 2808275d1aaSTim Chen #ifdef CONFIG_AS_AVX 2818275d1aaSTim Chen /* allow AVX to override SSSE3, it's a little faster */ 2828275d1aaSTim Chen if (avx_usable()) { 2838275d1aaSTim Chen #ifdef CONFIG_AS_AVX2 2848275d1aaSTim Chen if (boot_cpu_has(X86_FEATURE_AVX2)) 2858275d1aaSTim Chen sha256_transform_asm = sha256_transform_rorx; 2868275d1aaSTim Chen else 2878275d1aaSTim Chen #endif 2888275d1aaSTim Chen sha256_transform_asm = sha256_transform_avx; 2898275d1aaSTim Chen } 2908275d1aaSTim Chen #endif 2918275d1aaSTim Chen 2928275d1aaSTim Chen if (sha256_transform_asm) { 2938275d1aaSTim Chen #ifdef CONFIG_AS_AVX 2948275d1aaSTim Chen if (sha256_transform_asm == sha256_transform_avx) 2958275d1aaSTim Chen pr_info("Using AVX optimized SHA-256 implementation\n"); 2968275d1aaSTim Chen #ifdef CONFIG_AS_AVX2 2978275d1aaSTim Chen else if (sha256_transform_asm == sha256_transform_rorx) 2988275d1aaSTim Chen pr_info("Using AVX2 optimized SHA-256 implementation\n"); 2998275d1aaSTim Chen #endif 3008275d1aaSTim Chen else 3018275d1aaSTim Chen #endif 3028275d1aaSTim Chen pr_info("Using SSSE3 optimized SHA-256 implementation\n"); 303*a710f761SJussi Kivilinna return crypto_register_shashes(algs, ARRAY_SIZE(algs)); 3048275d1aaSTim Chen } 3058275d1aaSTim Chen pr_info("Neither AVX nor SSSE3 is available/usable.\n"); 3068275d1aaSTim Chen 3078275d1aaSTim Chen return -ENODEV; 3088275d1aaSTim Chen } 3098275d1aaSTim Chen 3108275d1aaSTim Chen static void __exit sha256_ssse3_mod_fini(void) 3118275d1aaSTim Chen { 312*a710f761SJussi Kivilinna crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); 3138275d1aaSTim Chen } 3148275d1aaSTim Chen 3158275d1aaSTim Chen module_init(sha256_ssse3_mod_init); 3168275d1aaSTim Chen module_exit(sha256_ssse3_mod_fini); 3178275d1aaSTim Chen 3188275d1aaSTim Chen MODULE_LICENSE("GPL"); 3198275d1aaSTim Chen MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); 3208275d1aaSTim Chen 3218275d1aaSTim Chen MODULE_ALIAS("sha256"); 322*a710f761SJussi Kivilinna MODULE_ALIAS("sha384"); 323