1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ARM NEON accelerated ChaCha and XChaCha stream ciphers, 4 * including ChaCha20 (RFC7539) 5 * 6 * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> 7 * Copyright (C) 2015 Martin Willi 8 */ 9 10 #include <crypto/algapi.h> 11 #include <crypto/internal/chacha.h> 12 #include <crypto/internal/simd.h> 13 #include <crypto/internal/skcipher.h> 14 #include <linux/jump_label.h> 15 #include <linux/kernel.h> 16 #include <linux/module.h> 17 18 #include <asm/cputype.h> 19 #include <asm/hwcap.h> 20 #include <asm/neon.h> 21 #include <asm/simd.h> 22 23 asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, 24 int nrounds); 25 asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, 26 int nrounds); 27 asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); 28 asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); 29 30 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, 31 const u32 *state, int nrounds); 32 33 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); 34 35 static inline bool neon_usable(void) 36 { 37 return static_branch_likely(&use_neon) && crypto_simd_usable(); 38 } 39 40 static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, 41 unsigned int bytes, int nrounds) 42 { 43 u8 buf[CHACHA_BLOCK_SIZE]; 44 45 while (bytes >= CHACHA_BLOCK_SIZE * 4) { 46 chacha_4block_xor_neon(state, dst, src, nrounds); 47 bytes -= CHACHA_BLOCK_SIZE * 4; 48 src += CHACHA_BLOCK_SIZE * 4; 49 dst += CHACHA_BLOCK_SIZE * 4; 50 state[12] += 4; 51 } 52 while (bytes >= CHACHA_BLOCK_SIZE) { 53 chacha_block_xor_neon(state, dst, src, nrounds); 54 bytes -= CHACHA_BLOCK_SIZE; 55 src += CHACHA_BLOCK_SIZE; 56 dst += CHACHA_BLOCK_SIZE; 57 state[12]++; 58 } 59 if (bytes) { 60 memcpy(buf, src, bytes); 61 chacha_block_xor_neon(state, buf, buf, nrounds); 62 memcpy(dst, buf, bytes); 63 } 64 } 65 66 void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) 67 { 68 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { 69 hchacha_block_arm(state, stream, nrounds); 70 } else { 71 kernel_neon_begin(); 72 hchacha_block_neon(state, stream, nrounds); 73 kernel_neon_end(); 74 } 75 } 76 EXPORT_SYMBOL(hchacha_block_arch); 77 78 void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) 79 { 80 chacha_init_generic(state, key, iv); 81 } 82 EXPORT_SYMBOL(chacha_init_arch); 83 84 void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, 85 int nrounds) 86 { 87 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || 88 bytes <= CHACHA_BLOCK_SIZE) { 89 chacha_doarm(dst, src, bytes, state, nrounds); 90 state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); 91 return; 92 } 93 94 kernel_neon_begin(); 95 chacha_doneon(state, dst, src, bytes, nrounds); 96 kernel_neon_end(); 97 } 98 EXPORT_SYMBOL(chacha_crypt_arch); 99 100 static int chacha_stream_xor(struct skcipher_request *req, 101 const struct chacha_ctx *ctx, const u8 *iv, 102 bool neon) 103 { 104 struct skcipher_walk walk; 105 u32 state[16]; 106 int err; 107 108 err = skcipher_walk_virt(&walk, req, false); 109 110 chacha_init_generic(state, ctx->key, iv); 111 112 while (walk.nbytes > 0) { 113 unsigned int nbytes = walk.nbytes; 114 115 if (nbytes < walk.total) 116 nbytes = round_down(nbytes, walk.stride); 117 118 if (!neon) { 119 chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, 120 nbytes, state, ctx->nrounds); 121 state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); 122 } else { 123 kernel_neon_begin(); 124 chacha_doneon(state, walk.dst.virt.addr, 125 walk.src.virt.addr, nbytes, ctx->nrounds); 126 kernel_neon_end(); 127 } 128 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 129 } 130 131 return err; 132 } 133 134 static int do_chacha(struct skcipher_request *req, bool neon) 135 { 136 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 137 struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); 138 139 return chacha_stream_xor(req, ctx, req->iv, neon); 140 } 141 142 static int chacha_arm(struct skcipher_request *req) 143 { 144 return do_chacha(req, false); 145 } 146 147 static int chacha_neon(struct skcipher_request *req) 148 { 149 return do_chacha(req, neon_usable()); 150 } 151 152 static int do_xchacha(struct skcipher_request *req, bool neon) 153 { 154 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 155 struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); 156 struct chacha_ctx subctx; 157 u32 state[16]; 158 u8 real_iv[16]; 159 160 chacha_init_generic(state, ctx->key, req->iv); 161 162 if (!neon) { 163 hchacha_block_arm(state, subctx.key, ctx->nrounds); 164 } else { 165 kernel_neon_begin(); 166 hchacha_block_neon(state, subctx.key, ctx->nrounds); 167 kernel_neon_end(); 168 } 169 subctx.nrounds = ctx->nrounds; 170 171 memcpy(&real_iv[0], req->iv + 24, 8); 172 memcpy(&real_iv[8], req->iv + 16, 8); 173 return chacha_stream_xor(req, &subctx, real_iv, neon); 174 } 175 176 static int xchacha_arm(struct skcipher_request *req) 177 { 178 return do_xchacha(req, false); 179 } 180 181 static int xchacha_neon(struct skcipher_request *req) 182 { 183 return do_xchacha(req, neon_usable()); 184 } 185 186 static struct skcipher_alg arm_algs[] = { 187 { 188 .base.cra_name = "chacha20", 189 .base.cra_driver_name = "chacha20-arm", 190 .base.cra_priority = 200, 191 .base.cra_blocksize = 1, 192 .base.cra_ctxsize = sizeof(struct chacha_ctx), 193 .base.cra_module = THIS_MODULE, 194 195 .min_keysize = CHACHA_KEY_SIZE, 196 .max_keysize = CHACHA_KEY_SIZE, 197 .ivsize = CHACHA_IV_SIZE, 198 .chunksize = CHACHA_BLOCK_SIZE, 199 .setkey = chacha20_setkey, 200 .encrypt = chacha_arm, 201 .decrypt = chacha_arm, 202 }, { 203 .base.cra_name = "xchacha20", 204 .base.cra_driver_name = "xchacha20-arm", 205 .base.cra_priority = 200, 206 .base.cra_blocksize = 1, 207 .base.cra_ctxsize = sizeof(struct chacha_ctx), 208 .base.cra_module = THIS_MODULE, 209 210 .min_keysize = CHACHA_KEY_SIZE, 211 .max_keysize = CHACHA_KEY_SIZE, 212 .ivsize = XCHACHA_IV_SIZE, 213 .chunksize = CHACHA_BLOCK_SIZE, 214 .setkey = chacha20_setkey, 215 .encrypt = xchacha_arm, 216 .decrypt = xchacha_arm, 217 }, { 218 .base.cra_name = "xchacha12", 219 .base.cra_driver_name = "xchacha12-arm", 220 .base.cra_priority = 200, 221 .base.cra_blocksize = 1, 222 .base.cra_ctxsize = sizeof(struct chacha_ctx), 223 .base.cra_module = THIS_MODULE, 224 225 .min_keysize = CHACHA_KEY_SIZE, 226 .max_keysize = CHACHA_KEY_SIZE, 227 .ivsize = XCHACHA_IV_SIZE, 228 .chunksize = CHACHA_BLOCK_SIZE, 229 .setkey = chacha12_setkey, 230 .encrypt = xchacha_arm, 231 .decrypt = xchacha_arm, 232 }, 233 }; 234 235 static struct skcipher_alg neon_algs[] = { 236 { 237 .base.cra_name = "chacha20", 238 .base.cra_driver_name = "chacha20-neon", 239 .base.cra_priority = 300, 240 .base.cra_blocksize = 1, 241 .base.cra_ctxsize = sizeof(struct chacha_ctx), 242 .base.cra_module = THIS_MODULE, 243 244 .min_keysize = CHACHA_KEY_SIZE, 245 .max_keysize = CHACHA_KEY_SIZE, 246 .ivsize = CHACHA_IV_SIZE, 247 .chunksize = CHACHA_BLOCK_SIZE, 248 .walksize = 4 * CHACHA_BLOCK_SIZE, 249 .setkey = chacha20_setkey, 250 .encrypt = chacha_neon, 251 .decrypt = chacha_neon, 252 }, { 253 .base.cra_name = "xchacha20", 254 .base.cra_driver_name = "xchacha20-neon", 255 .base.cra_priority = 300, 256 .base.cra_blocksize = 1, 257 .base.cra_ctxsize = sizeof(struct chacha_ctx), 258 .base.cra_module = THIS_MODULE, 259 260 .min_keysize = CHACHA_KEY_SIZE, 261 .max_keysize = CHACHA_KEY_SIZE, 262 .ivsize = XCHACHA_IV_SIZE, 263 .chunksize = CHACHA_BLOCK_SIZE, 264 .walksize = 4 * CHACHA_BLOCK_SIZE, 265 .setkey = chacha20_setkey, 266 .encrypt = xchacha_neon, 267 .decrypt = xchacha_neon, 268 }, { 269 .base.cra_name = "xchacha12", 270 .base.cra_driver_name = "xchacha12-neon", 271 .base.cra_priority = 300, 272 .base.cra_blocksize = 1, 273 .base.cra_ctxsize = sizeof(struct chacha_ctx), 274 .base.cra_module = THIS_MODULE, 275 276 .min_keysize = CHACHA_KEY_SIZE, 277 .max_keysize = CHACHA_KEY_SIZE, 278 .ivsize = XCHACHA_IV_SIZE, 279 .chunksize = CHACHA_BLOCK_SIZE, 280 .walksize = 4 * CHACHA_BLOCK_SIZE, 281 .setkey = chacha12_setkey, 282 .encrypt = xchacha_neon, 283 .decrypt = xchacha_neon, 284 } 285 }; 286 287 static int __init chacha_simd_mod_init(void) 288 { 289 int err = 0; 290 291 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { 292 err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); 293 if (err) 294 return err; 295 } 296 297 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { 298 int i; 299 300 switch (read_cpuid_part()) { 301 case ARM_CPU_PART_CORTEX_A7: 302 case ARM_CPU_PART_CORTEX_A5: 303 /* 304 * The Cortex-A7 and Cortex-A5 do not perform well with 305 * the NEON implementation but do incredibly with the 306 * scalar one and use less power. 307 */ 308 for (i = 0; i < ARRAY_SIZE(neon_algs); i++) 309 neon_algs[i].base.cra_priority = 0; 310 break; 311 default: 312 static_branch_enable(&use_neon); 313 } 314 315 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { 316 err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); 317 if (err) 318 crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); 319 } 320 } 321 return err; 322 } 323 324 static void __exit chacha_simd_mod_fini(void) 325 { 326 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { 327 crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); 328 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) 329 crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); 330 } 331 } 332 333 module_init(chacha_simd_mod_init); 334 module_exit(chacha_simd_mod_fini); 335 336 MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); 337 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 338 MODULE_LICENSE("GPL v2"); 339 MODULE_ALIAS_CRYPTO("chacha20"); 340 MODULE_ALIAS_CRYPTO("chacha20-arm"); 341 MODULE_ALIAS_CRYPTO("xchacha20"); 342 MODULE_ALIAS_CRYPTO("xchacha20-arm"); 343 MODULE_ALIAS_CRYPTO("xchacha12"); 344 MODULE_ALIAS_CRYPTO("xchacha12-arm"); 345 #ifdef CONFIG_KERNEL_MODE_NEON 346 MODULE_ALIAS_CRYPTO("chacha20-neon"); 347 MODULE_ALIAS_CRYPTO("xchacha20-neon"); 348 MODULE_ALIAS_CRYPTO("xchacha12-neon"); 349 #endif 350