1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 4 * 5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 */ 7 8 #include <asm/hwcap.h> 9 #include <asm/neon.h> 10 #include <asm/simd.h> 11 #include <asm/unaligned.h> 12 #include <crypto/algapi.h> 13 #include <crypto/internal/hash.h> 14 #include <crypto/internal/poly1305.h> 15 #include <crypto/internal/simd.h> 16 #include <linux/cpufeature.h> 17 #include <linux/crypto.h> 18 #include <linux/jump_label.h> 19 #include <linux/module.h> 20 21 asmlinkage void poly1305_init_arm64(void *state, const u8 *key); 22 asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); 23 asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); 24 asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce); 25 26 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); 27 28 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) 29 { 30 poly1305_init_arm64(&dctx->h, key); 31 dctx->s[0] = get_unaligned_le32(key + 16); 32 dctx->s[1] = get_unaligned_le32(key + 20); 33 dctx->s[2] = get_unaligned_le32(key + 24); 34 dctx->s[3] = get_unaligned_le32(key + 28); 35 dctx->buflen = 0; 36 } 37 EXPORT_SYMBOL(poly1305_init_arch); 38 39 static int neon_poly1305_init(struct shash_desc *desc) 40 { 41 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 42 43 dctx->buflen = 0; 44 dctx->rset = 0; 45 dctx->sset = false; 46 47 return 0; 48 } 49 50 static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, 51 u32 len, u32 hibit, bool do_neon) 52 { 53 if (unlikely(!dctx->sset)) { 54 if (!dctx->rset) { 55 poly1305_init_arch(dctx, src); 56 src += POLY1305_BLOCK_SIZE; 57 len -= POLY1305_BLOCK_SIZE; 58 dctx->rset = 1; 59 } 60 if (len >= POLY1305_BLOCK_SIZE) { 61 dctx->s[0] = get_unaligned_le32(src + 0); 62 dctx->s[1] = get_unaligned_le32(src + 4); 63 dctx->s[2] = get_unaligned_le32(src + 8); 64 dctx->s[3] = get_unaligned_le32(src + 12); 65 src += POLY1305_BLOCK_SIZE; 66 len -= POLY1305_BLOCK_SIZE; 67 dctx->sset = true; 68 } 69 if (len < POLY1305_BLOCK_SIZE) 70 return; 71 } 72 73 len &= ~(POLY1305_BLOCK_SIZE - 1); 74 75 if (static_branch_likely(&have_neon) && likely(do_neon)) 76 poly1305_blocks_neon(&dctx->h, src, len, hibit); 77 else 78 poly1305_blocks(&dctx->h, src, len, hibit); 79 } 80 81 static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx, 82 const u8 *src, u32 len, bool do_neon) 83 { 84 if (unlikely(dctx->buflen)) { 85 u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); 86 87 memcpy(dctx->buf + dctx->buflen, src, bytes); 88 src += bytes; 89 len -= bytes; 90 dctx->buflen += bytes; 91 92 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 93 neon_poly1305_blocks(dctx, dctx->buf, 94 POLY1305_BLOCK_SIZE, 1, false); 95 dctx->buflen = 0; 96 } 97 } 98 99 if (likely(len >= POLY1305_BLOCK_SIZE)) { 100 neon_poly1305_blocks(dctx, src, len, 1, do_neon); 101 src += round_down(len, POLY1305_BLOCK_SIZE); 102 len %= POLY1305_BLOCK_SIZE; 103 } 104 105 if (unlikely(len)) { 106 dctx->buflen = len; 107 memcpy(dctx->buf, src, len); 108 } 109 } 110 111 static int neon_poly1305_update(struct shash_desc *desc, 112 const u8 *src, unsigned int srclen) 113 { 114 bool do_neon = crypto_simd_usable() && srclen > 128; 115 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 116 117 if (static_branch_likely(&have_neon) && do_neon) 118 kernel_neon_begin(); 119 neon_poly1305_do_update(dctx, src, srclen, do_neon); 120 if (static_branch_likely(&have_neon) && do_neon) 121 kernel_neon_end(); 122 return 0; 123 } 124 125 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, 126 unsigned int nbytes) 127 { 128 if (unlikely(dctx->buflen)) { 129 u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); 130 131 memcpy(dctx->buf + dctx->buflen, src, bytes); 132 src += bytes; 133 nbytes -= bytes; 134 dctx->buflen += bytes; 135 136 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 137 poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); 138 dctx->buflen = 0; 139 } 140 } 141 142 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { 143 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); 144 145 if (static_branch_likely(&have_neon) && crypto_simd_usable()) { 146 kernel_neon_begin(); 147 poly1305_blocks_neon(&dctx->h, src, len, 1); 148 kernel_neon_end(); 149 } else { 150 poly1305_blocks(&dctx->h, src, len, 1); 151 } 152 src += len; 153 nbytes %= POLY1305_BLOCK_SIZE; 154 } 155 156 if (unlikely(nbytes)) { 157 dctx->buflen = nbytes; 158 memcpy(dctx->buf, src, nbytes); 159 } 160 } 161 EXPORT_SYMBOL(poly1305_update_arch); 162 163 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) 164 { 165 __le32 digest[4]; 166 u64 f = 0; 167 168 if (unlikely(dctx->buflen)) { 169 dctx->buf[dctx->buflen++] = 1; 170 memset(dctx->buf + dctx->buflen, 0, 171 POLY1305_BLOCK_SIZE - dctx->buflen); 172 poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); 173 } 174 175 poly1305_emit(&dctx->h, digest, dctx->s); 176 177 /* mac = (h + s) % (2^128) */ 178 f = (f >> 32) + le32_to_cpu(digest[0]); 179 put_unaligned_le32(f, dst); 180 f = (f >> 32) + le32_to_cpu(digest[1]); 181 put_unaligned_le32(f, dst + 4); 182 f = (f >> 32) + le32_to_cpu(digest[2]); 183 put_unaligned_le32(f, dst + 8); 184 f = (f >> 32) + le32_to_cpu(digest[3]); 185 put_unaligned_le32(f, dst + 12); 186 187 *dctx = (struct poly1305_desc_ctx){}; 188 } 189 EXPORT_SYMBOL(poly1305_final_arch); 190 191 static int neon_poly1305_final(struct shash_desc *desc, u8 *dst) 192 { 193 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 194 195 if (unlikely(!dctx->sset)) 196 return -ENOKEY; 197 198 poly1305_final_arch(dctx, dst); 199 return 0; 200 } 201 202 static struct shash_alg neon_poly1305_alg = { 203 .init = neon_poly1305_init, 204 .update = neon_poly1305_update, 205 .final = neon_poly1305_final, 206 .digestsize = POLY1305_DIGEST_SIZE, 207 .descsize = sizeof(struct poly1305_desc_ctx), 208 209 .base.cra_name = "poly1305", 210 .base.cra_driver_name = "poly1305-neon", 211 .base.cra_priority = 200, 212 .base.cra_blocksize = POLY1305_BLOCK_SIZE, 213 .base.cra_module = THIS_MODULE, 214 }; 215 216 static int __init neon_poly1305_mod_init(void) 217 { 218 if (!cpu_have_named_feature(ASIMD)) 219 return 0; 220 221 static_branch_enable(&have_neon); 222 223 return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? 224 crypto_register_shash(&neon_poly1305_alg) : 0; 225 } 226 227 static void __exit neon_poly1305_mod_exit(void) 228 { 229 if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD)) 230 crypto_unregister_shash(&neon_poly1305_alg); 231 } 232 233 module_init(neon_poly1305_mod_init); 234 module_exit(neon_poly1305_mod_exit); 235 236 MODULE_LICENSE("GPL v2"); 237 MODULE_ALIAS_CRYPTO("poly1305"); 238 MODULE_ALIAS_CRYPTO("poly1305-neon"); 239