1 /* 2 * Cryptographic API. 3 * 4 * Glue code for the SHA256 Secure Hash Algorithm assembler 5 * implementation using supplemental SSE3 / AVX / AVX2 instructions. 6 * 7 * This file is based on sha256_generic.c 8 * 9 * Copyright (C) 2013 Intel Corporation. 10 * 11 * Author: 12 * Tim Chen <tim.c.chen@linux.intel.com> 13 * 14 * This program is free software; you can redistribute it and/or modify it 15 * under the terms of the GNU General Public License as published by the Free 16 * Software Foundation; either version 2 of the License, or (at your option) 17 * any later version. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 23 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 24 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 25 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 * SOFTWARE. 27 */ 28 29 30 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 31 32 #include <crypto/internal/hash.h> 33 #include <linux/init.h> 34 #include <linux/module.h> 35 #include <linux/mm.h> 36 #include <linux/cryptohash.h> 37 #include <linux/types.h> 38 #include <crypto/sha.h> 39 #include <asm/byteorder.h> 40 #include <asm/i387.h> 41 #include <asm/xcr.h> 42 #include <asm/xsave.h> 43 #include <linux/string.h> 44 45 asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest, 46 u64 rounds); 47 #ifdef CONFIG_AS_AVX 48 asmlinkage void sha256_transform_avx(const char *data, u32 *digest, 49 u64 rounds); 50 #endif 51 #ifdef CONFIG_AS_AVX2 52 asmlinkage void sha256_transform_rorx(const char *data, u32 *digest, 53 u64 rounds); 54 #endif 55 56 static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64); 57 58 59 static int sha256_ssse3_init(struct shash_desc *desc) 60 { 61 struct sha256_state *sctx = shash_desc_ctx(desc); 62 63 sctx->state[0] = SHA256_H0; 64 sctx->state[1] = SHA256_H1; 65 sctx->state[2] = SHA256_H2; 66 sctx->state[3] = SHA256_H3; 67 sctx->state[4] = SHA256_H4; 68 sctx->state[5] = SHA256_H5; 69 sctx->state[6] = SHA256_H6; 70 sctx->state[7] = SHA256_H7; 71 sctx->count = 0; 72 73 return 0; 74 } 75 76 static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data, 77 unsigned int len, unsigned int partial) 78 { 79 struct sha256_state *sctx = shash_desc_ctx(desc); 80 unsigned int done = 0; 81 82 sctx->count += len; 83 84 if (partial) { 85 done = SHA256_BLOCK_SIZE - partial; 86 memcpy(sctx->buf + partial, data, done); 87 sha256_transform_asm(sctx->buf, sctx->state, 1); 88 } 89 90 if (len - done >= SHA256_BLOCK_SIZE) { 91 const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; 92 93 sha256_transform_asm(data + done, sctx->state, (u64) rounds); 94 95 done += rounds * SHA256_BLOCK_SIZE; 96 } 97 98 memcpy(sctx->buf, data + done, len - done); 99 100 return 0; 101 } 102 103 static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, 104 unsigned int len) 105 { 106 struct sha256_state *sctx = shash_desc_ctx(desc); 107 unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; 108 int res; 109 110 /* Handle the fast case right here */ 111 if (partial + len < SHA256_BLOCK_SIZE) { 112 sctx->count += len; 113 memcpy(sctx->buf + partial, data, len); 114 115 return 0; 116 } 117 118 if (!irq_fpu_usable()) { 119 res = crypto_sha256_update(desc, data, len); 120 } else { 121 kernel_fpu_begin(); 122 res = __sha256_ssse3_update(desc, data, len, partial); 123 kernel_fpu_end(); 124 } 125 126 return res; 127 } 128 129 130 /* Add padding and return the message digest. */ 131 static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) 132 { 133 struct sha256_state *sctx = shash_desc_ctx(desc); 134 unsigned int i, index, padlen; 135 __be32 *dst = (__be32 *)out; 136 __be64 bits; 137 static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; 138 139 bits = cpu_to_be64(sctx->count << 3); 140 141 /* Pad out to 56 mod 64 and append length */ 142 index = sctx->count % SHA256_BLOCK_SIZE; 143 padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); 144 145 if (!irq_fpu_usable()) { 146 crypto_sha256_update(desc, padding, padlen); 147 crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits)); 148 } else { 149 kernel_fpu_begin(); 150 /* We need to fill a whole block for __sha256_ssse3_update() */ 151 if (padlen <= 56) { 152 sctx->count += padlen; 153 memcpy(sctx->buf + index, padding, padlen); 154 } else { 155 __sha256_ssse3_update(desc, padding, padlen, index); 156 } 157 __sha256_ssse3_update(desc, (const u8 *)&bits, 158 sizeof(bits), 56); 159 kernel_fpu_end(); 160 } 161 162 /* Store state in digest */ 163 for (i = 0; i < 8; i++) 164 dst[i] = cpu_to_be32(sctx->state[i]); 165 166 /* Wipe context */ 167 memset(sctx, 0, sizeof(*sctx)); 168 169 return 0; 170 } 171 172 static int sha256_ssse3_export(struct shash_desc *desc, void *out) 173 { 174 struct sha256_state *sctx = shash_desc_ctx(desc); 175 176 memcpy(out, sctx, sizeof(*sctx)); 177 178 return 0; 179 } 180 181 static int sha256_ssse3_import(struct shash_desc *desc, const void *in) 182 { 183 struct sha256_state *sctx = shash_desc_ctx(desc); 184 185 memcpy(sctx, in, sizeof(*sctx)); 186 187 return 0; 188 } 189 190 static int sha224_ssse3_init(struct shash_desc *desc) 191 { 192 struct sha256_state *sctx = shash_desc_ctx(desc); 193 194 sctx->state[0] = SHA224_H0; 195 sctx->state[1] = SHA224_H1; 196 sctx->state[2] = SHA224_H2; 197 sctx->state[3] = SHA224_H3; 198 sctx->state[4] = SHA224_H4; 199 sctx->state[5] = SHA224_H5; 200 sctx->state[6] = SHA224_H6; 201 sctx->state[7] = SHA224_H7; 202 sctx->count = 0; 203 204 return 0; 205 } 206 207 static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash) 208 { 209 u8 D[SHA256_DIGEST_SIZE]; 210 211 sha256_ssse3_final(desc, D); 212 213 memcpy(hash, D, SHA224_DIGEST_SIZE); 214 memzero_explicit(D, SHA256_DIGEST_SIZE); 215 216 return 0; 217 } 218 219 static struct shash_alg algs[] = { { 220 .digestsize = SHA256_DIGEST_SIZE, 221 .init = sha256_ssse3_init, 222 .update = sha256_ssse3_update, 223 .final = sha256_ssse3_final, 224 .export = sha256_ssse3_export, 225 .import = sha256_ssse3_import, 226 .descsize = sizeof(struct sha256_state), 227 .statesize = sizeof(struct sha256_state), 228 .base = { 229 .cra_name = "sha256", 230 .cra_driver_name = "sha256-ssse3", 231 .cra_priority = 150, 232 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 233 .cra_blocksize = SHA256_BLOCK_SIZE, 234 .cra_module = THIS_MODULE, 235 } 236 }, { 237 .digestsize = SHA224_DIGEST_SIZE, 238 .init = sha224_ssse3_init, 239 .update = sha256_ssse3_update, 240 .final = sha224_ssse3_final, 241 .export = sha256_ssse3_export, 242 .import = sha256_ssse3_import, 243 .descsize = sizeof(struct sha256_state), 244 .statesize = sizeof(struct sha256_state), 245 .base = { 246 .cra_name = "sha224", 247 .cra_driver_name = "sha224-ssse3", 248 .cra_priority = 150, 249 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 250 .cra_blocksize = SHA224_BLOCK_SIZE, 251 .cra_module = THIS_MODULE, 252 } 253 } }; 254 255 #ifdef CONFIG_AS_AVX 256 static bool __init avx_usable(void) 257 { 258 u64 xcr0; 259 260 if (!cpu_has_avx || !cpu_has_osxsave) 261 return false; 262 263 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 264 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { 265 pr_info("AVX detected but unusable.\n"); 266 267 return false; 268 } 269 270 return true; 271 } 272 #endif 273 274 static int __init sha256_ssse3_mod_init(void) 275 { 276 /* test for SSSE3 first */ 277 if (cpu_has_ssse3) 278 sha256_transform_asm = sha256_transform_ssse3; 279 280 #ifdef CONFIG_AS_AVX 281 /* allow AVX to override SSSE3, it's a little faster */ 282 if (avx_usable()) { 283 #ifdef CONFIG_AS_AVX2 284 if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) 285 sha256_transform_asm = sha256_transform_rorx; 286 else 287 #endif 288 sha256_transform_asm = sha256_transform_avx; 289 } 290 #endif 291 292 if (sha256_transform_asm) { 293 #ifdef CONFIG_AS_AVX 294 if (sha256_transform_asm == sha256_transform_avx) 295 pr_info("Using AVX optimized SHA-256 implementation\n"); 296 #ifdef CONFIG_AS_AVX2 297 else if (sha256_transform_asm == sha256_transform_rorx) 298 pr_info("Using AVX2 optimized SHA-256 implementation\n"); 299 #endif 300 else 301 #endif 302 pr_info("Using SSSE3 optimized SHA-256 implementation\n"); 303 return crypto_register_shashes(algs, ARRAY_SIZE(algs)); 304 } 305 pr_info("Neither AVX nor SSSE3 is available/usable.\n"); 306 307 return -ENODEV; 308 } 309 310 static void __exit sha256_ssse3_mod_fini(void) 311 { 312 crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); 313 } 314 315 module_init(sha256_ssse3_mod_init); 316 module_exit(sha256_ssse3_mod_fini); 317 318 MODULE_LICENSE("GPL"); 319 MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); 320 321 MODULE_ALIAS_CRYPTO("sha256"); 322 MODULE_ALIAS_CRYPTO("sha224"); 323