1*8275d1aaSTim Chen /* 2*8275d1aaSTim Chen * Cryptographic API. 3*8275d1aaSTim Chen * 4*8275d1aaSTim Chen * Glue code for the SHA256 Secure Hash Algorithm assembler 5*8275d1aaSTim Chen * implementation using supplemental SSE3 / AVX / AVX2 instructions. 6*8275d1aaSTim Chen * 7*8275d1aaSTim Chen * This file is based on sha256_generic.c 8*8275d1aaSTim Chen * 9*8275d1aaSTim Chen * Copyright (C) 2013 Intel Corporation. 10*8275d1aaSTim Chen * 11*8275d1aaSTim Chen * Author: 12*8275d1aaSTim Chen * Tim Chen <tim.c.chen@linux.intel.com> 13*8275d1aaSTim Chen * 14*8275d1aaSTim Chen * This program is free software; you can redistribute it and/or modify it 15*8275d1aaSTim Chen * under the terms of the GNU General Public License as published by the Free 16*8275d1aaSTim Chen * Software Foundation; either version 2 of the License, or (at your option) 17*8275d1aaSTim Chen * any later version. 18*8275d1aaSTim Chen * 19*8275d1aaSTim Chen * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20*8275d1aaSTim Chen * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21*8275d1aaSTim Chen * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22*8275d1aaSTim Chen * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 23*8275d1aaSTim Chen * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 24*8275d1aaSTim Chen * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 25*8275d1aaSTim Chen * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26*8275d1aaSTim Chen * SOFTWARE. 27*8275d1aaSTim Chen */ 28*8275d1aaSTim Chen 29*8275d1aaSTim Chen 30*8275d1aaSTim Chen #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 31*8275d1aaSTim Chen 32*8275d1aaSTim Chen #include <crypto/internal/hash.h> 33*8275d1aaSTim Chen #include <linux/init.h> 34*8275d1aaSTim Chen #include <linux/module.h> 35*8275d1aaSTim Chen #include <linux/mm.h> 36*8275d1aaSTim Chen #include <linux/cryptohash.h> 37*8275d1aaSTim Chen #include <linux/types.h> 38*8275d1aaSTim Chen #include <crypto/sha.h> 39*8275d1aaSTim Chen #include <asm/byteorder.h> 40*8275d1aaSTim Chen #include <asm/i387.h> 41*8275d1aaSTim Chen #include <asm/xcr.h> 42*8275d1aaSTim Chen #include <asm/xsave.h> 43*8275d1aaSTim Chen #include <linux/string.h> 44*8275d1aaSTim Chen 45*8275d1aaSTim Chen asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest, 46*8275d1aaSTim Chen u64 rounds); 47*8275d1aaSTim Chen #ifdef CONFIG_AS_AVX 48*8275d1aaSTim Chen asmlinkage void sha256_transform_avx(const char *data, u32 *digest, 49*8275d1aaSTim Chen u64 rounds); 50*8275d1aaSTim Chen #endif 51*8275d1aaSTim Chen #ifdef CONFIG_AS_AVX2 52*8275d1aaSTim Chen asmlinkage void sha256_transform_rorx(const char *data, u32 *digest, 53*8275d1aaSTim Chen u64 rounds); 54*8275d1aaSTim Chen #endif 55*8275d1aaSTim Chen 56*8275d1aaSTim Chen static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64); 57*8275d1aaSTim Chen 58*8275d1aaSTim Chen 59*8275d1aaSTim Chen static int sha256_ssse3_init(struct shash_desc *desc) 60*8275d1aaSTim Chen { 61*8275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 62*8275d1aaSTim Chen 63*8275d1aaSTim Chen sctx->state[0] = SHA256_H0; 64*8275d1aaSTim Chen sctx->state[1] = SHA256_H1; 65*8275d1aaSTim Chen sctx->state[2] = SHA256_H2; 66*8275d1aaSTim Chen sctx->state[3] = SHA256_H3; 67*8275d1aaSTim Chen sctx->state[4] = SHA256_H4; 68*8275d1aaSTim Chen sctx->state[5] = SHA256_H5; 69*8275d1aaSTim Chen sctx->state[6] = SHA256_H6; 70*8275d1aaSTim Chen sctx->state[7] = SHA256_H7; 71*8275d1aaSTim Chen sctx->count = 0; 72*8275d1aaSTim Chen 73*8275d1aaSTim Chen return 0; 74*8275d1aaSTim Chen } 75*8275d1aaSTim Chen 76*8275d1aaSTim Chen static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data, 77*8275d1aaSTim Chen unsigned int len, unsigned int partial) 78*8275d1aaSTim Chen { 79*8275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 80*8275d1aaSTim Chen unsigned int done = 0; 81*8275d1aaSTim Chen 82*8275d1aaSTim Chen sctx->count += len; 83*8275d1aaSTim Chen 84*8275d1aaSTim Chen if (partial) { 85*8275d1aaSTim Chen done = SHA256_BLOCK_SIZE - partial; 86*8275d1aaSTim Chen memcpy(sctx->buf + partial, data, done); 87*8275d1aaSTim Chen sha256_transform_asm(sctx->buf, sctx->state, 1); 88*8275d1aaSTim Chen } 89*8275d1aaSTim Chen 90*8275d1aaSTim Chen if (len - done >= SHA256_BLOCK_SIZE) { 91*8275d1aaSTim Chen const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; 92*8275d1aaSTim Chen 93*8275d1aaSTim Chen sha256_transform_asm(data + done, sctx->state, (u64) rounds); 94*8275d1aaSTim Chen 95*8275d1aaSTim Chen done += rounds * SHA256_BLOCK_SIZE; 96*8275d1aaSTim Chen } 97*8275d1aaSTim Chen 98*8275d1aaSTim Chen memcpy(sctx->buf, data + done, len - done); 99*8275d1aaSTim Chen 100*8275d1aaSTim Chen return 0; 101*8275d1aaSTim Chen } 102*8275d1aaSTim Chen 103*8275d1aaSTim Chen static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, 104*8275d1aaSTim Chen unsigned int len) 105*8275d1aaSTim Chen { 106*8275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 107*8275d1aaSTim Chen unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; 108*8275d1aaSTim Chen int res; 109*8275d1aaSTim Chen 110*8275d1aaSTim Chen /* Handle the fast case right here */ 111*8275d1aaSTim Chen if (partial + len < SHA256_BLOCK_SIZE) { 112*8275d1aaSTim Chen sctx->count += len; 113*8275d1aaSTim Chen memcpy(sctx->buf + partial, data, len); 114*8275d1aaSTim Chen 115*8275d1aaSTim Chen return 0; 116*8275d1aaSTim Chen } 117*8275d1aaSTim Chen 118*8275d1aaSTim Chen if (!irq_fpu_usable()) { 119*8275d1aaSTim Chen res = crypto_sha256_update(desc, data, len); 120*8275d1aaSTim Chen } else { 121*8275d1aaSTim Chen kernel_fpu_begin(); 122*8275d1aaSTim Chen res = __sha256_ssse3_update(desc, data, len, partial); 123*8275d1aaSTim Chen kernel_fpu_end(); 124*8275d1aaSTim Chen } 125*8275d1aaSTim Chen 126*8275d1aaSTim Chen return res; 127*8275d1aaSTim Chen } 128*8275d1aaSTim Chen 129*8275d1aaSTim Chen 130*8275d1aaSTim Chen /* Add padding and return the message digest. */ 131*8275d1aaSTim Chen static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) 132*8275d1aaSTim Chen { 133*8275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 134*8275d1aaSTim Chen unsigned int i, index, padlen; 135*8275d1aaSTim Chen __be32 *dst = (__be32 *)out; 136*8275d1aaSTim Chen __be64 bits; 137*8275d1aaSTim Chen static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; 138*8275d1aaSTim Chen 139*8275d1aaSTim Chen bits = cpu_to_be64(sctx->count << 3); 140*8275d1aaSTim Chen 141*8275d1aaSTim Chen /* Pad out to 56 mod 64 and append length */ 142*8275d1aaSTim Chen index = sctx->count % SHA256_BLOCK_SIZE; 143*8275d1aaSTim Chen padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); 144*8275d1aaSTim Chen 145*8275d1aaSTim Chen if (!irq_fpu_usable()) { 146*8275d1aaSTim Chen crypto_sha256_update(desc, padding, padlen); 147*8275d1aaSTim Chen crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits)); 148*8275d1aaSTim Chen } else { 149*8275d1aaSTim Chen kernel_fpu_begin(); 150*8275d1aaSTim Chen /* We need to fill a whole block for __sha256_ssse3_update() */ 151*8275d1aaSTim Chen if (padlen <= 56) { 152*8275d1aaSTim Chen sctx->count += padlen; 153*8275d1aaSTim Chen memcpy(sctx->buf + index, padding, padlen); 154*8275d1aaSTim Chen } else { 155*8275d1aaSTim Chen __sha256_ssse3_update(desc, padding, padlen, index); 156*8275d1aaSTim Chen } 157*8275d1aaSTim Chen __sha256_ssse3_update(desc, (const u8 *)&bits, 158*8275d1aaSTim Chen sizeof(bits), 56); 159*8275d1aaSTim Chen kernel_fpu_end(); 160*8275d1aaSTim Chen } 161*8275d1aaSTim Chen 162*8275d1aaSTim Chen /* Store state in digest */ 163*8275d1aaSTim Chen for (i = 0; i < 8; i++) 164*8275d1aaSTim Chen dst[i] = cpu_to_be32(sctx->state[i]); 165*8275d1aaSTim Chen 166*8275d1aaSTim Chen /* Wipe context */ 167*8275d1aaSTim Chen memset(sctx, 0, sizeof(*sctx)); 168*8275d1aaSTim Chen 169*8275d1aaSTim Chen return 0; 170*8275d1aaSTim Chen } 171*8275d1aaSTim Chen 172*8275d1aaSTim Chen static int sha256_ssse3_export(struct shash_desc *desc, void *out) 173*8275d1aaSTim Chen { 174*8275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 175*8275d1aaSTim Chen 176*8275d1aaSTim Chen memcpy(out, sctx, sizeof(*sctx)); 177*8275d1aaSTim Chen 178*8275d1aaSTim Chen return 0; 179*8275d1aaSTim Chen } 180*8275d1aaSTim Chen 181*8275d1aaSTim Chen static int sha256_ssse3_import(struct shash_desc *desc, const void *in) 182*8275d1aaSTim Chen { 183*8275d1aaSTim Chen struct sha256_state *sctx = shash_desc_ctx(desc); 184*8275d1aaSTim Chen 185*8275d1aaSTim Chen memcpy(sctx, in, sizeof(*sctx)); 186*8275d1aaSTim Chen 187*8275d1aaSTim Chen return 0; 188*8275d1aaSTim Chen } 189*8275d1aaSTim Chen 190*8275d1aaSTim Chen static struct shash_alg alg = { 191*8275d1aaSTim Chen .digestsize = SHA256_DIGEST_SIZE, 192*8275d1aaSTim Chen .init = sha256_ssse3_init, 193*8275d1aaSTim Chen .update = sha256_ssse3_update, 194*8275d1aaSTim Chen .final = sha256_ssse3_final, 195*8275d1aaSTim Chen .export = sha256_ssse3_export, 196*8275d1aaSTim Chen .import = sha256_ssse3_import, 197*8275d1aaSTim Chen .descsize = sizeof(struct sha256_state), 198*8275d1aaSTim Chen .statesize = sizeof(struct sha256_state), 199*8275d1aaSTim Chen .base = { 200*8275d1aaSTim Chen .cra_name = "sha256", 201*8275d1aaSTim Chen .cra_driver_name = "sha256-ssse3", 202*8275d1aaSTim Chen .cra_priority = 150, 203*8275d1aaSTim Chen .cra_flags = CRYPTO_ALG_TYPE_SHASH, 204*8275d1aaSTim Chen .cra_blocksize = SHA256_BLOCK_SIZE, 205*8275d1aaSTim Chen .cra_module = THIS_MODULE, 206*8275d1aaSTim Chen } 207*8275d1aaSTim Chen }; 208*8275d1aaSTim Chen 209*8275d1aaSTim Chen #ifdef CONFIG_AS_AVX 210*8275d1aaSTim Chen static bool __init avx_usable(void) 211*8275d1aaSTim Chen { 212*8275d1aaSTim Chen u64 xcr0; 213*8275d1aaSTim Chen 214*8275d1aaSTim Chen if (!cpu_has_avx || !cpu_has_osxsave) 215*8275d1aaSTim Chen return false; 216*8275d1aaSTim Chen 217*8275d1aaSTim Chen xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 218*8275d1aaSTim Chen if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { 219*8275d1aaSTim Chen pr_info("AVX detected but unusable.\n"); 220*8275d1aaSTim Chen 221*8275d1aaSTim Chen return false; 222*8275d1aaSTim Chen } 223*8275d1aaSTim Chen 224*8275d1aaSTim Chen return true; 225*8275d1aaSTim Chen } 226*8275d1aaSTim Chen #endif 227*8275d1aaSTim Chen 228*8275d1aaSTim Chen static int __init sha256_ssse3_mod_init(void) 229*8275d1aaSTim Chen { 230*8275d1aaSTim Chen /* test for SSE3 first */ 231*8275d1aaSTim Chen if (cpu_has_ssse3) 232*8275d1aaSTim Chen sha256_transform_asm = sha256_transform_ssse3; 233*8275d1aaSTim Chen 234*8275d1aaSTim Chen #ifdef CONFIG_AS_AVX 235*8275d1aaSTim Chen /* allow AVX to override SSSE3, it's a little faster */ 236*8275d1aaSTim Chen if (avx_usable()) { 237*8275d1aaSTim Chen #ifdef CONFIG_AS_AVX2 238*8275d1aaSTim Chen if (boot_cpu_has(X86_FEATURE_AVX2)) 239*8275d1aaSTim Chen sha256_transform_asm = sha256_transform_rorx; 240*8275d1aaSTim Chen else 241*8275d1aaSTim Chen #endif 242*8275d1aaSTim Chen sha256_transform_asm = sha256_transform_avx; 243*8275d1aaSTim Chen } 244*8275d1aaSTim Chen #endif 245*8275d1aaSTim Chen 246*8275d1aaSTim Chen if (sha256_transform_asm) { 247*8275d1aaSTim Chen #ifdef CONFIG_AS_AVX 248*8275d1aaSTim Chen if (sha256_transform_asm == sha256_transform_avx) 249*8275d1aaSTim Chen pr_info("Using AVX optimized SHA-256 implementation\n"); 250*8275d1aaSTim Chen #ifdef CONFIG_AS_AVX2 251*8275d1aaSTim Chen else if (sha256_transform_asm == sha256_transform_rorx) 252*8275d1aaSTim Chen pr_info("Using AVX2 optimized SHA-256 implementation\n"); 253*8275d1aaSTim Chen #endif 254*8275d1aaSTim Chen else 255*8275d1aaSTim Chen #endif 256*8275d1aaSTim Chen pr_info("Using SSSE3 optimized SHA-256 implementation\n"); 257*8275d1aaSTim Chen return crypto_register_shash(&alg); 258*8275d1aaSTim Chen } 259*8275d1aaSTim Chen pr_info("Neither AVX nor SSSE3 is available/usable.\n"); 260*8275d1aaSTim Chen 261*8275d1aaSTim Chen return -ENODEV; 262*8275d1aaSTim Chen } 263*8275d1aaSTim Chen 264*8275d1aaSTim Chen static void __exit sha256_ssse3_mod_fini(void) 265*8275d1aaSTim Chen { 266*8275d1aaSTim Chen crypto_unregister_shash(&alg); 267*8275d1aaSTim Chen } 268*8275d1aaSTim Chen 269*8275d1aaSTim Chen module_init(sha256_ssse3_mod_init); 270*8275d1aaSTim Chen module_exit(sha256_ssse3_mod_fini); 271*8275d1aaSTim Chen 272*8275d1aaSTim Chen MODULE_LICENSE("GPL"); 273*8275d1aaSTim Chen MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); 274*8275d1aaSTim Chen 275*8275d1aaSTim Chen MODULE_ALIAS("sha256"); 276