1a6b803b3SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0 2a6b803b3SArd Biesheuvel /* 3a6b803b3SArd Biesheuvel * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM 4a6b803b3SArd Biesheuvel * 5a6b803b3SArd Biesheuvel * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> 6a6b803b3SArd Biesheuvel */ 7a6b803b3SArd Biesheuvel 8a6b803b3SArd Biesheuvel #include <asm/hwcap.h> 9a6b803b3SArd Biesheuvel #include <asm/neon.h> 10a6b803b3SArd Biesheuvel #include <asm/simd.h> 11a6b803b3SArd Biesheuvel #include <asm/unaligned.h> 12a6b803b3SArd Biesheuvel #include <crypto/algapi.h> 13a6b803b3SArd Biesheuvel #include <crypto/internal/hash.h> 14a6b803b3SArd Biesheuvel #include <crypto/internal/poly1305.h> 15a6b803b3SArd Biesheuvel #include <crypto/internal/simd.h> 16a6b803b3SArd Biesheuvel #include <linux/cpufeature.h> 17a6b803b3SArd Biesheuvel #include <linux/crypto.h> 18a6b803b3SArd Biesheuvel #include <linux/jump_label.h> 19a6b803b3SArd Biesheuvel #include <linux/module.h> 20a6b803b3SArd Biesheuvel 21a6b803b3SArd Biesheuvel void poly1305_init_arm(void *state, const u8 *key); 22a6b803b3SArd Biesheuvel void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); 23a6b803b3SArd Biesheuvel void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce); 24a6b803b3SArd Biesheuvel 25a6b803b3SArd Biesheuvel void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) 26a6b803b3SArd Biesheuvel { 27a6b803b3SArd Biesheuvel } 28a6b803b3SArd Biesheuvel 29a6b803b3SArd Biesheuvel static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); 30a6b803b3SArd Biesheuvel 31a6b803b3SArd Biesheuvel void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) 32a6b803b3SArd Biesheuvel { 33a6b803b3SArd Biesheuvel poly1305_init_arm(&dctx->h, key); 34a6b803b3SArd Biesheuvel dctx->s[0] = get_unaligned_le32(key + 16); 35a6b803b3SArd Biesheuvel dctx->s[1] = get_unaligned_le32(key + 20); 36a6b803b3SArd Biesheuvel dctx->s[2] = get_unaligned_le32(key + 24); 37a6b803b3SArd Biesheuvel dctx->s[3] = get_unaligned_le32(key + 28); 38a6b803b3SArd Biesheuvel dctx->buflen = 0; 39a6b803b3SArd Biesheuvel } 40a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_init_arch); 41a6b803b3SArd Biesheuvel 42a6b803b3SArd Biesheuvel static int arm_poly1305_init(struct shash_desc *desc) 43a6b803b3SArd Biesheuvel { 44a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 45a6b803b3SArd Biesheuvel 46a6b803b3SArd Biesheuvel dctx->buflen = 0; 47a6b803b3SArd Biesheuvel dctx->rset = 0; 48a6b803b3SArd Biesheuvel dctx->sset = false; 49a6b803b3SArd Biesheuvel 50a6b803b3SArd Biesheuvel return 0; 51a6b803b3SArd Biesheuvel } 52a6b803b3SArd Biesheuvel 53a6b803b3SArd Biesheuvel static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, 54a6b803b3SArd Biesheuvel u32 len, u32 hibit, bool do_neon) 55a6b803b3SArd Biesheuvel { 56a6b803b3SArd Biesheuvel if (unlikely(!dctx->sset)) { 57a6b803b3SArd Biesheuvel if (!dctx->rset) { 58a6b803b3SArd Biesheuvel poly1305_init_arm(&dctx->h, src); 59a6b803b3SArd Biesheuvel src += POLY1305_BLOCK_SIZE; 60a6b803b3SArd Biesheuvel len -= POLY1305_BLOCK_SIZE; 61a6b803b3SArd Biesheuvel dctx->rset = 1; 62a6b803b3SArd Biesheuvel } 63a6b803b3SArd Biesheuvel if (len >= POLY1305_BLOCK_SIZE) { 64a6b803b3SArd Biesheuvel dctx->s[0] = get_unaligned_le32(src + 0); 65a6b803b3SArd Biesheuvel dctx->s[1] = get_unaligned_le32(src + 4); 66a6b803b3SArd Biesheuvel dctx->s[2] = get_unaligned_le32(src + 8); 67a6b803b3SArd Biesheuvel dctx->s[3] = get_unaligned_le32(src + 12); 68a6b803b3SArd Biesheuvel src += POLY1305_BLOCK_SIZE; 69a6b803b3SArd Biesheuvel len -= POLY1305_BLOCK_SIZE; 70a6b803b3SArd Biesheuvel dctx->sset = true; 71a6b803b3SArd Biesheuvel } 72a6b803b3SArd Biesheuvel if (len < POLY1305_BLOCK_SIZE) 73a6b803b3SArd Biesheuvel return; 74a6b803b3SArd Biesheuvel } 75a6b803b3SArd Biesheuvel 76a6b803b3SArd Biesheuvel len &= ~(POLY1305_BLOCK_SIZE - 1); 77a6b803b3SArd Biesheuvel 78a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && likely(do_neon)) 79a6b803b3SArd Biesheuvel poly1305_blocks_neon(&dctx->h, src, len, hibit); 80a6b803b3SArd Biesheuvel else 81a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, src, len, hibit); 82a6b803b3SArd Biesheuvel } 83a6b803b3SArd Biesheuvel 84a6b803b3SArd Biesheuvel static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, 85a6b803b3SArd Biesheuvel const u8 *src, u32 len, bool do_neon) 86a6b803b3SArd Biesheuvel { 87a6b803b3SArd Biesheuvel if (unlikely(dctx->buflen)) { 88a6b803b3SArd Biesheuvel u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); 89a6b803b3SArd Biesheuvel 90a6b803b3SArd Biesheuvel memcpy(dctx->buf + dctx->buflen, src, bytes); 91a6b803b3SArd Biesheuvel src += bytes; 92a6b803b3SArd Biesheuvel len -= bytes; 93a6b803b3SArd Biesheuvel dctx->buflen += bytes; 94a6b803b3SArd Biesheuvel 95a6b803b3SArd Biesheuvel if (dctx->buflen == POLY1305_BLOCK_SIZE) { 96a6b803b3SArd Biesheuvel arm_poly1305_blocks(dctx, dctx->buf, 97a6b803b3SArd Biesheuvel POLY1305_BLOCK_SIZE, 1, false); 98a6b803b3SArd Biesheuvel dctx->buflen = 0; 99a6b803b3SArd Biesheuvel } 100a6b803b3SArd Biesheuvel } 101a6b803b3SArd Biesheuvel 102a6b803b3SArd Biesheuvel if (likely(len >= POLY1305_BLOCK_SIZE)) { 103a6b803b3SArd Biesheuvel arm_poly1305_blocks(dctx, src, len, 1, do_neon); 104a6b803b3SArd Biesheuvel src += round_down(len, POLY1305_BLOCK_SIZE); 105a6b803b3SArd Biesheuvel len %= POLY1305_BLOCK_SIZE; 106a6b803b3SArd Biesheuvel } 107a6b803b3SArd Biesheuvel 108a6b803b3SArd Biesheuvel if (unlikely(len)) { 109a6b803b3SArd Biesheuvel dctx->buflen = len; 110a6b803b3SArd Biesheuvel memcpy(dctx->buf, src, len); 111a6b803b3SArd Biesheuvel } 112a6b803b3SArd Biesheuvel } 113a6b803b3SArd Biesheuvel 114a6b803b3SArd Biesheuvel static int arm_poly1305_update(struct shash_desc *desc, 115a6b803b3SArd Biesheuvel const u8 *src, unsigned int srclen) 116a6b803b3SArd Biesheuvel { 117a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 118a6b803b3SArd Biesheuvel 119a6b803b3SArd Biesheuvel arm_poly1305_do_update(dctx, src, srclen, false); 120a6b803b3SArd Biesheuvel return 0; 121a6b803b3SArd Biesheuvel } 122a6b803b3SArd Biesheuvel 123a6b803b3SArd Biesheuvel static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, 124a6b803b3SArd Biesheuvel const u8 *src, 125a6b803b3SArd Biesheuvel unsigned int srclen) 126a6b803b3SArd Biesheuvel { 127a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 128a6b803b3SArd Biesheuvel bool do_neon = crypto_simd_usable() && srclen > 128; 129a6b803b3SArd Biesheuvel 130a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && do_neon) 131a6b803b3SArd Biesheuvel kernel_neon_begin(); 132a6b803b3SArd Biesheuvel arm_poly1305_do_update(dctx, src, srclen, do_neon); 133a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && do_neon) 134a6b803b3SArd Biesheuvel kernel_neon_end(); 135a6b803b3SArd Biesheuvel return 0; 136a6b803b3SArd Biesheuvel } 137a6b803b3SArd Biesheuvel 138a6b803b3SArd Biesheuvel void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, 139a6b803b3SArd Biesheuvel unsigned int nbytes) 140a6b803b3SArd Biesheuvel { 141a6b803b3SArd Biesheuvel bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 142a6b803b3SArd Biesheuvel crypto_simd_usable(); 143a6b803b3SArd Biesheuvel 144a6b803b3SArd Biesheuvel if (unlikely(dctx->buflen)) { 145a6b803b3SArd Biesheuvel u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); 146a6b803b3SArd Biesheuvel 147a6b803b3SArd Biesheuvel memcpy(dctx->buf + dctx->buflen, src, bytes); 148a6b803b3SArd Biesheuvel src += bytes; 149a6b803b3SArd Biesheuvel nbytes -= bytes; 150a6b803b3SArd Biesheuvel dctx->buflen += bytes; 151a6b803b3SArd Biesheuvel 152a6b803b3SArd Biesheuvel if (dctx->buflen == POLY1305_BLOCK_SIZE) { 153a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, dctx->buf, 154a6b803b3SArd Biesheuvel POLY1305_BLOCK_SIZE, 1); 155a6b803b3SArd Biesheuvel dctx->buflen = 0; 156a6b803b3SArd Biesheuvel } 157a6b803b3SArd Biesheuvel } 158a6b803b3SArd Biesheuvel 159a6b803b3SArd Biesheuvel if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { 160a6b803b3SArd Biesheuvel unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); 161a6b803b3SArd Biesheuvel 162a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && do_neon) { 163a6b803b3SArd Biesheuvel kernel_neon_begin(); 164a6b803b3SArd Biesheuvel poly1305_blocks_neon(&dctx->h, src, len, 1); 165a6b803b3SArd Biesheuvel kernel_neon_end(); 166a6b803b3SArd Biesheuvel } else { 167a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, src, len, 1); 168a6b803b3SArd Biesheuvel } 169a6b803b3SArd Biesheuvel src += len; 170a6b803b3SArd Biesheuvel nbytes %= POLY1305_BLOCK_SIZE; 171a6b803b3SArd Biesheuvel } 172a6b803b3SArd Biesheuvel 173a6b803b3SArd Biesheuvel if (unlikely(nbytes)) { 174a6b803b3SArd Biesheuvel dctx->buflen = nbytes; 175a6b803b3SArd Biesheuvel memcpy(dctx->buf, src, nbytes); 176a6b803b3SArd Biesheuvel } 177a6b803b3SArd Biesheuvel } 178a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_update_arch); 179a6b803b3SArd Biesheuvel 180a6b803b3SArd Biesheuvel void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) 181a6b803b3SArd Biesheuvel { 182a6b803b3SArd Biesheuvel __le32 digest[4]; 183a6b803b3SArd Biesheuvel u64 f = 0; 184a6b803b3SArd Biesheuvel 185a6b803b3SArd Biesheuvel if (unlikely(dctx->buflen)) { 186a6b803b3SArd Biesheuvel dctx->buf[dctx->buflen++] = 1; 187a6b803b3SArd Biesheuvel memset(dctx->buf + dctx->buflen, 0, 188a6b803b3SArd Biesheuvel POLY1305_BLOCK_SIZE - dctx->buflen); 189a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); 190a6b803b3SArd Biesheuvel } 191a6b803b3SArd Biesheuvel 192a6b803b3SArd Biesheuvel poly1305_emit_arm(&dctx->h, digest, dctx->s); 193a6b803b3SArd Biesheuvel 194a6b803b3SArd Biesheuvel /* mac = (h + s) % (2^128) */ 195a6b803b3SArd Biesheuvel f = (f >> 32) + le32_to_cpu(digest[0]); 196a6b803b3SArd Biesheuvel put_unaligned_le32(f, dst); 197a6b803b3SArd Biesheuvel f = (f >> 32) + le32_to_cpu(digest[1]); 198a6b803b3SArd Biesheuvel put_unaligned_le32(f, dst + 4); 199a6b803b3SArd Biesheuvel f = (f >> 32) + le32_to_cpu(digest[2]); 200a6b803b3SArd Biesheuvel put_unaligned_le32(f, dst + 8); 201a6b803b3SArd Biesheuvel f = (f >> 32) + le32_to_cpu(digest[3]); 202a6b803b3SArd Biesheuvel put_unaligned_le32(f, dst + 12); 203a6b803b3SArd Biesheuvel 204a6b803b3SArd Biesheuvel *dctx = (struct poly1305_desc_ctx){}; 205a6b803b3SArd Biesheuvel } 206a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_final_arch); 207a6b803b3SArd Biesheuvel 208a6b803b3SArd Biesheuvel static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) 209a6b803b3SArd Biesheuvel { 210a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 211a6b803b3SArd Biesheuvel 212a6b803b3SArd Biesheuvel if (unlikely(!dctx->sset)) 213a6b803b3SArd Biesheuvel return -ENOKEY; 214a6b803b3SArd Biesheuvel 215a6b803b3SArd Biesheuvel poly1305_final_arch(dctx, dst); 216a6b803b3SArd Biesheuvel return 0; 217a6b803b3SArd Biesheuvel } 218a6b803b3SArd Biesheuvel 219a6b803b3SArd Biesheuvel static struct shash_alg arm_poly1305_algs[] = {{ 220a6b803b3SArd Biesheuvel .init = arm_poly1305_init, 221a6b803b3SArd Biesheuvel .update = arm_poly1305_update, 222a6b803b3SArd Biesheuvel .final = arm_poly1305_final, 223a6b803b3SArd Biesheuvel .digestsize = POLY1305_DIGEST_SIZE, 224a6b803b3SArd Biesheuvel .descsize = sizeof(struct poly1305_desc_ctx), 225a6b803b3SArd Biesheuvel 226a6b803b3SArd Biesheuvel .base.cra_name = "poly1305", 227a6b803b3SArd Biesheuvel .base.cra_driver_name = "poly1305-arm", 228a6b803b3SArd Biesheuvel .base.cra_priority = 150, 229a6b803b3SArd Biesheuvel .base.cra_blocksize = POLY1305_BLOCK_SIZE, 230a6b803b3SArd Biesheuvel .base.cra_module = THIS_MODULE, 231a6b803b3SArd Biesheuvel #ifdef CONFIG_KERNEL_MODE_NEON 232a6b803b3SArd Biesheuvel }, { 233a6b803b3SArd Biesheuvel .init = arm_poly1305_init, 234a6b803b3SArd Biesheuvel .update = arm_poly1305_update_neon, 235a6b803b3SArd Biesheuvel .final = arm_poly1305_final, 236a6b803b3SArd Biesheuvel .digestsize = POLY1305_DIGEST_SIZE, 237a6b803b3SArd Biesheuvel .descsize = sizeof(struct poly1305_desc_ctx), 238a6b803b3SArd Biesheuvel 239a6b803b3SArd Biesheuvel .base.cra_name = "poly1305", 240a6b803b3SArd Biesheuvel .base.cra_driver_name = "poly1305-neon", 241a6b803b3SArd Biesheuvel .base.cra_priority = 200, 242a6b803b3SArd Biesheuvel .base.cra_blocksize = POLY1305_BLOCK_SIZE, 243a6b803b3SArd Biesheuvel .base.cra_module = THIS_MODULE, 244a6b803b3SArd Biesheuvel #endif 245a6b803b3SArd Biesheuvel }}; 246a6b803b3SArd Biesheuvel 247a6b803b3SArd Biesheuvel static int __init arm_poly1305_mod_init(void) 248a6b803b3SArd Biesheuvel { 249a6b803b3SArd Biesheuvel if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 250a6b803b3SArd Biesheuvel (elf_hwcap & HWCAP_NEON)) 251a6b803b3SArd Biesheuvel static_branch_enable(&have_neon); 252*8394bfecSJason A. Donenfeld else if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) 253a6b803b3SArd Biesheuvel /* register only the first entry */ 254a6b803b3SArd Biesheuvel return crypto_register_shash(&arm_poly1305_algs[0]); 255a6b803b3SArd Biesheuvel 256*8394bfecSJason A. Donenfeld return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? 257*8394bfecSJason A. Donenfeld crypto_register_shashes(arm_poly1305_algs, 258*8394bfecSJason A. Donenfeld ARRAY_SIZE(arm_poly1305_algs)) : 0; 259a6b803b3SArd Biesheuvel } 260a6b803b3SArd Biesheuvel 261a6b803b3SArd Biesheuvel static void __exit arm_poly1305_mod_exit(void) 262a6b803b3SArd Biesheuvel { 263*8394bfecSJason A. Donenfeld if (!IS_REACHABLE(CONFIG_CRYPTO_HASH)) 264*8394bfecSJason A. Donenfeld return; 265a6b803b3SArd Biesheuvel if (!static_branch_likely(&have_neon)) { 266a6b803b3SArd Biesheuvel crypto_unregister_shash(&arm_poly1305_algs[0]); 267a6b803b3SArd Biesheuvel return; 268a6b803b3SArd Biesheuvel } 269a6b803b3SArd Biesheuvel crypto_unregister_shashes(arm_poly1305_algs, 270a6b803b3SArd Biesheuvel ARRAY_SIZE(arm_poly1305_algs)); 271a6b803b3SArd Biesheuvel } 272a6b803b3SArd Biesheuvel 273a6b803b3SArd Biesheuvel module_init(arm_poly1305_mod_init); 274a6b803b3SArd Biesheuvel module_exit(arm_poly1305_mod_exit); 275a6b803b3SArd Biesheuvel 276a6b803b3SArd Biesheuvel MODULE_LICENSE("GPL v2"); 277a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305"); 278a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-arm"); 279a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-neon"); 280