18275d1aaSTim Chen /* 28275d1aaSTim Chen * Cryptographic API. 38275d1aaSTim Chen * 48275d1aaSTim Chen * Glue code for the SHA256 Secure Hash Algorithm assembler 58275d1aaSTim Chen * implementation using supplemental SSE3 / AVX / AVX2 instructions. 68275d1aaSTim Chen * 78275d1aaSTim Chen * This file is based on sha256_generic.c 88275d1aaSTim Chen * 98275d1aaSTim Chen * Copyright (C) 2013 Intel Corporation. 108275d1aaSTim Chen * 118275d1aaSTim Chen * Author: 128275d1aaSTim Chen * Tim Chen <tim.c.chen@linux.intel.com> 138275d1aaSTim Chen * 148275d1aaSTim Chen * This program is free software; you can redistribute it and/or modify it 158275d1aaSTim Chen * under the terms of the GNU General Public License as published by the Free 168275d1aaSTim Chen * Software Foundation; either version 2 of the License, or (at your option) 178275d1aaSTim Chen * any later version. 188275d1aaSTim Chen * 198275d1aaSTim Chen * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 208275d1aaSTim Chen * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 218275d1aaSTim Chen * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 228275d1aaSTim Chen * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 238275d1aaSTim Chen * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 248275d1aaSTim Chen * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 258275d1aaSTim Chen * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 268275d1aaSTim Chen * SOFTWARE. 278275d1aaSTim Chen */ 288275d1aaSTim Chen 298275d1aaSTim Chen 308275d1aaSTim Chen #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 318275d1aaSTim Chen 328275d1aaSTim Chen #include <crypto/internal/hash.h> 338275d1aaSTim Chen #include <linux/init.h> 348275d1aaSTim Chen #include <linux/module.h> 358275d1aaSTim Chen #include <linux/mm.h> 368275d1aaSTim Chen #include <linux/cryptohash.h> 378275d1aaSTim Chen #include <linux/types.h> 388275d1aaSTim Chen #include <crypto/sha.h> 39*1631030aSArd Biesheuvel #include <crypto/sha256_base.h> 408275d1aaSTim Chen #include <asm/i387.h> 418275d1aaSTim Chen #include <asm/xcr.h> 428275d1aaSTim Chen #include <asm/xsave.h> 438275d1aaSTim Chen #include <linux/string.h> 448275d1aaSTim Chen 45*1631030aSArd Biesheuvel asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, 468275d1aaSTim Chen u64 rounds); 478275d1aaSTim Chen #ifdef CONFIG_AS_AVX 48*1631030aSArd Biesheuvel asmlinkage void sha256_transform_avx(u32 *digest, const char *data, 498275d1aaSTim Chen u64 rounds); 508275d1aaSTim Chen #endif 518275d1aaSTim Chen #ifdef CONFIG_AS_AVX2 52*1631030aSArd Biesheuvel asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, 538275d1aaSTim Chen u64 rounds); 548275d1aaSTim Chen #endif 558275d1aaSTim Chen 56*1631030aSArd Biesheuvel static void (*sha256_transform_asm)(u32 *, const char *, u64); 578275d1aaSTim Chen 588275d1aaSTim Chen static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, 598275d1aaSTim Chen unsigned int len) 608275d1aaSTim Chen { 618275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 628275d1aaSTim Chen 63*1631030aSArd Biesheuvel if (!irq_fpu_usable() || 64*1631030aSArd Biesheuvel (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) 65*1631030aSArd Biesheuvel return crypto_sha256_update(desc, data, len); 66*1631030aSArd Biesheuvel 67*1631030aSArd Biesheuvel /* make sure casting to sha256_block_fn() is safe */ 68*1631030aSArd Biesheuvel BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); 69*1631030aSArd Biesheuvel 70*1631030aSArd Biesheuvel kernel_fpu_begin(); 71*1631030aSArd Biesheuvel sha256_base_do_update(desc, data, len, 72*1631030aSArd Biesheuvel (sha256_block_fn *)sha256_transform_asm); 73*1631030aSArd Biesheuvel kernel_fpu_end(); 748275d1aaSTim Chen 758275d1aaSTim Chen return 0; 768275d1aaSTim Chen } 778275d1aaSTim Chen 78*1631030aSArd Biesheuvel static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, 79*1631030aSArd Biesheuvel unsigned int len, u8 *out) 80*1631030aSArd Biesheuvel { 81*1631030aSArd Biesheuvel if (!irq_fpu_usable()) 82*1631030aSArd Biesheuvel return crypto_sha256_finup(desc, data, len, out); 83*1631030aSArd Biesheuvel 848275d1aaSTim Chen kernel_fpu_begin(); 85*1631030aSArd Biesheuvel if (len) 86*1631030aSArd Biesheuvel sha256_base_do_update(desc, data, len, 87*1631030aSArd Biesheuvel (sha256_block_fn *)sha256_transform_asm); 88*1631030aSArd Biesheuvel sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm); 898275d1aaSTim Chen kernel_fpu_end(); 908275d1aaSTim Chen 91*1631030aSArd Biesheuvel return sha256_base_finish(desc, out); 928275d1aaSTim Chen } 938275d1aaSTim Chen 948275d1aaSTim Chen /* Add padding and return the message digest. */ 958275d1aaSTim Chen static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) 968275d1aaSTim Chen { 97*1631030aSArd Biesheuvel return sha256_ssse3_finup(desc, NULL, 0, out); 98a710f761SJussi Kivilinna } 99a710f761SJussi Kivilinna 100a710f761SJussi Kivilinna static struct shash_alg algs[] = { { 1018275d1aaSTim Chen .digestsize = SHA256_DIGEST_SIZE, 102*1631030aSArd Biesheuvel .init = sha256_base_init, 1038275d1aaSTim Chen .update = sha256_ssse3_update, 1048275d1aaSTim Chen .final = sha256_ssse3_final, 105*1631030aSArd Biesheuvel .finup = sha256_ssse3_finup, 1068275d1aaSTim Chen .descsize = sizeof(struct sha256_state), 1078275d1aaSTim Chen .base = { 1088275d1aaSTim Chen .cra_name = "sha256", 1098275d1aaSTim Chen .cra_driver_name = "sha256-ssse3", 1108275d1aaSTim Chen .cra_priority = 150, 1118275d1aaSTim Chen .cra_flags = CRYPTO_ALG_TYPE_SHASH, 1128275d1aaSTim Chen .cra_blocksize = SHA256_BLOCK_SIZE, 1138275d1aaSTim Chen .cra_module = THIS_MODULE, 1148275d1aaSTim Chen } 115a710f761SJussi Kivilinna }, { 116a710f761SJussi Kivilinna .digestsize = SHA224_DIGEST_SIZE, 117*1631030aSArd Biesheuvel .init = sha224_base_init, 118a710f761SJussi Kivilinna .update = sha256_ssse3_update, 119*1631030aSArd Biesheuvel .final = sha256_ssse3_final, 120*1631030aSArd Biesheuvel .finup = sha256_ssse3_finup, 121a710f761SJussi Kivilinna .descsize = sizeof(struct sha256_state), 122a710f761SJussi Kivilinna .base = { 123a710f761SJussi Kivilinna .cra_name = "sha224", 124a710f761SJussi Kivilinna .cra_driver_name = "sha224-ssse3", 125a710f761SJussi Kivilinna .cra_priority = 150, 126a710f761SJussi Kivilinna .cra_flags = CRYPTO_ALG_TYPE_SHASH, 127a710f761SJussi Kivilinna .cra_blocksize = SHA224_BLOCK_SIZE, 128a710f761SJussi Kivilinna .cra_module = THIS_MODULE, 129a710f761SJussi Kivilinna } 130a710f761SJussi Kivilinna } }; 1318275d1aaSTim Chen 1328275d1aaSTim Chen #ifdef CONFIG_AS_AVX 1338275d1aaSTim Chen static bool __init avx_usable(void) 1348275d1aaSTim Chen { 1358275d1aaSTim Chen u64 xcr0; 1368275d1aaSTim Chen 1378275d1aaSTim Chen if (!cpu_has_avx || !cpu_has_osxsave) 1388275d1aaSTim Chen return false; 1398275d1aaSTim Chen 1408275d1aaSTim Chen xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 1418275d1aaSTim Chen if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { 1428275d1aaSTim Chen pr_info("AVX detected but unusable.\n"); 1438275d1aaSTim Chen 1448275d1aaSTim Chen return false; 1458275d1aaSTim Chen } 1468275d1aaSTim Chen 1478275d1aaSTim Chen return true; 1488275d1aaSTim Chen } 1498275d1aaSTim Chen #endif 1508275d1aaSTim Chen 1518275d1aaSTim Chen static int __init sha256_ssse3_mod_init(void) 1528275d1aaSTim Chen { 153a710f761SJussi Kivilinna /* test for SSSE3 first */ 1548275d1aaSTim Chen if (cpu_has_ssse3) 1558275d1aaSTim Chen sha256_transform_asm = sha256_transform_ssse3; 1568275d1aaSTim Chen 1578275d1aaSTim Chen #ifdef CONFIG_AS_AVX 1588275d1aaSTim Chen /* allow AVX to override SSSE3, it's a little faster */ 1598275d1aaSTim Chen if (avx_usable()) { 1608275d1aaSTim Chen #ifdef CONFIG_AS_AVX2 16116c0c4e1SOliver Neukum if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) 1628275d1aaSTim Chen sha256_transform_asm = sha256_transform_rorx; 1638275d1aaSTim Chen else 1648275d1aaSTim Chen #endif 1658275d1aaSTim Chen sha256_transform_asm = sha256_transform_avx; 1668275d1aaSTim Chen } 1678275d1aaSTim Chen #endif 1688275d1aaSTim Chen 1698275d1aaSTim Chen if (sha256_transform_asm) { 1708275d1aaSTim Chen #ifdef CONFIG_AS_AVX 1718275d1aaSTim Chen if (sha256_transform_asm == sha256_transform_avx) 1728275d1aaSTim Chen pr_info("Using AVX optimized SHA-256 implementation\n"); 1738275d1aaSTim Chen #ifdef CONFIG_AS_AVX2 1748275d1aaSTim Chen else if (sha256_transform_asm == sha256_transform_rorx) 1758275d1aaSTim Chen pr_info("Using AVX2 optimized SHA-256 implementation\n"); 1768275d1aaSTim Chen #endif 1778275d1aaSTim Chen else 1788275d1aaSTim Chen #endif 1798275d1aaSTim Chen pr_info("Using SSSE3 optimized SHA-256 implementation\n"); 180a710f761SJussi Kivilinna return crypto_register_shashes(algs, ARRAY_SIZE(algs)); 1818275d1aaSTim Chen } 1828275d1aaSTim Chen pr_info("Neither AVX nor SSSE3 is available/usable.\n"); 1838275d1aaSTim Chen 1848275d1aaSTim Chen return -ENODEV; 1858275d1aaSTim Chen } 1868275d1aaSTim Chen 1878275d1aaSTim Chen static void __exit sha256_ssse3_mod_fini(void) 1888275d1aaSTim Chen { 189a710f761SJussi Kivilinna crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); 1908275d1aaSTim Chen } 1918275d1aaSTim Chen 1928275d1aaSTim Chen module_init(sha256_ssse3_mod_init); 1938275d1aaSTim Chen module_exit(sha256_ssse3_mod_fini); 1948275d1aaSTim Chen 1958275d1aaSTim Chen MODULE_LICENSE("GPL"); 1968275d1aaSTim Chen MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); 1978275d1aaSTim Chen 1985d26a105SKees Cook MODULE_ALIAS_CRYPTO("sha256"); 1995d26a105SKees Cook MODULE_ALIAS_CRYPTO("sha224"); 200