1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM 4 * 5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 */ 7 8 #include <asm/hwcap.h> 9 #include <asm/neon.h> 10 #include <asm/simd.h> 11 #include <asm/unaligned.h> 12 #include <crypto/algapi.h> 13 #include <crypto/internal/hash.h> 14 #include <crypto/internal/poly1305.h> 15 #include <crypto/internal/simd.h> 16 #include <linux/cpufeature.h> 17 #include <linux/crypto.h> 18 #include <linux/jump_label.h> 19 #include <linux/module.h> 20 21 void poly1305_init_arm(void *state, const u8 *key); 22 void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); 23 void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce); 24 25 void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) 26 { 27 } 28 29 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); 30 31 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) 32 { 33 poly1305_init_arm(&dctx->h, key); 34 dctx->s[0] = get_unaligned_le32(key + 16); 35 dctx->s[1] = get_unaligned_le32(key + 20); 36 dctx->s[2] = get_unaligned_le32(key + 24); 37 dctx->s[3] = get_unaligned_le32(key + 28); 38 dctx->buflen = 0; 39 } 40 EXPORT_SYMBOL(poly1305_init_arch); 41 42 static int arm_poly1305_init(struct shash_desc *desc) 43 { 44 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 45 46 dctx->buflen = 0; 47 dctx->rset = 0; 48 dctx->sset = false; 49 50 return 0; 51 } 52 53 static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, 54 u32 len, u32 hibit, bool do_neon) 55 { 56 if (unlikely(!dctx->sset)) { 57 if (!dctx->rset) { 58 poly1305_init_arm(&dctx->h, src); 59 src += POLY1305_BLOCK_SIZE; 60 len -= POLY1305_BLOCK_SIZE; 61 dctx->rset = 1; 62 } 63 if (len >= POLY1305_BLOCK_SIZE) { 64 dctx->s[0] = get_unaligned_le32(src + 0); 65 dctx->s[1] = get_unaligned_le32(src + 4); 66 dctx->s[2] = get_unaligned_le32(src + 8); 67 dctx->s[3] = get_unaligned_le32(src + 12); 68 src += POLY1305_BLOCK_SIZE; 69 len -= POLY1305_BLOCK_SIZE; 70 dctx->sset = true; 71 } 72 if (len < POLY1305_BLOCK_SIZE) 73 return; 74 } 75 76 len &= ~(POLY1305_BLOCK_SIZE - 1); 77 78 if (static_branch_likely(&have_neon) && likely(do_neon)) 79 poly1305_blocks_neon(&dctx->h, src, len, hibit); 80 else 81 poly1305_blocks_arm(&dctx->h, src, len, hibit); 82 } 83 84 static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, 85 const u8 *src, u32 len, bool do_neon) 86 { 87 if (unlikely(dctx->buflen)) { 88 u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); 89 90 memcpy(dctx->buf + dctx->buflen, src, bytes); 91 src += bytes; 92 len -= bytes; 93 dctx->buflen += bytes; 94 95 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 96 arm_poly1305_blocks(dctx, dctx->buf, 97 POLY1305_BLOCK_SIZE, 1, false); 98 dctx->buflen = 0; 99 } 100 } 101 102 if (likely(len >= POLY1305_BLOCK_SIZE)) { 103 arm_poly1305_blocks(dctx, src, len, 1, do_neon); 104 src += round_down(len, POLY1305_BLOCK_SIZE); 105 len %= POLY1305_BLOCK_SIZE; 106 } 107 108 if (unlikely(len)) { 109 dctx->buflen = len; 110 memcpy(dctx->buf, src, len); 111 } 112 } 113 114 static int arm_poly1305_update(struct shash_desc *desc, 115 const u8 *src, unsigned int srclen) 116 { 117 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 118 119 arm_poly1305_do_update(dctx, src, srclen, false); 120 return 0; 121 } 122 123 static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, 124 const u8 *src, 125 unsigned int srclen) 126 { 127 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 128 bool do_neon = crypto_simd_usable() && srclen > 128; 129 130 if (static_branch_likely(&have_neon) && do_neon) 131 kernel_neon_begin(); 132 arm_poly1305_do_update(dctx, src, srclen, do_neon); 133 if (static_branch_likely(&have_neon) && do_neon) 134 kernel_neon_end(); 135 return 0; 136 } 137 138 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, 139 unsigned int nbytes) 140 { 141 bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 142 crypto_simd_usable(); 143 144 if (unlikely(dctx->buflen)) { 145 u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); 146 147 memcpy(dctx->buf + dctx->buflen, src, bytes); 148 src += bytes; 149 nbytes -= bytes; 150 dctx->buflen += bytes; 151 152 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 153 poly1305_blocks_arm(&dctx->h, dctx->buf, 154 POLY1305_BLOCK_SIZE, 1); 155 dctx->buflen = 0; 156 } 157 } 158 159 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { 160 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); 161 162 if (static_branch_likely(&have_neon) && do_neon) { 163 kernel_neon_begin(); 164 poly1305_blocks_neon(&dctx->h, src, len, 1); 165 kernel_neon_end(); 166 } else { 167 poly1305_blocks_arm(&dctx->h, src, len, 1); 168 } 169 src += len; 170 nbytes %= POLY1305_BLOCK_SIZE; 171 } 172 173 if (unlikely(nbytes)) { 174 dctx->buflen = nbytes; 175 memcpy(dctx->buf, src, nbytes); 176 } 177 } 178 EXPORT_SYMBOL(poly1305_update_arch); 179 180 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) 181 { 182 __le32 digest[4]; 183 u64 f = 0; 184 185 if (unlikely(dctx->buflen)) { 186 dctx->buf[dctx->buflen++] = 1; 187 memset(dctx->buf + dctx->buflen, 0, 188 POLY1305_BLOCK_SIZE - dctx->buflen); 189 poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); 190 } 191 192 poly1305_emit_arm(&dctx->h, digest, dctx->s); 193 194 /* mac = (h + s) % (2^128) */ 195 f = (f >> 32) + le32_to_cpu(digest[0]); 196 put_unaligned_le32(f, dst); 197 f = (f >> 32) + le32_to_cpu(digest[1]); 198 put_unaligned_le32(f, dst + 4); 199 f = (f >> 32) + le32_to_cpu(digest[2]); 200 put_unaligned_le32(f, dst + 8); 201 f = (f >> 32) + le32_to_cpu(digest[3]); 202 put_unaligned_le32(f, dst + 12); 203 204 *dctx = (struct poly1305_desc_ctx){}; 205 } 206 EXPORT_SYMBOL(poly1305_final_arch); 207 208 static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) 209 { 210 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 211 212 if (unlikely(!dctx->sset)) 213 return -ENOKEY; 214 215 poly1305_final_arch(dctx, dst); 216 return 0; 217 } 218 219 static struct shash_alg arm_poly1305_algs[] = {{ 220 .init = arm_poly1305_init, 221 .update = arm_poly1305_update, 222 .final = arm_poly1305_final, 223 .digestsize = POLY1305_DIGEST_SIZE, 224 .descsize = sizeof(struct poly1305_desc_ctx), 225 226 .base.cra_name = "poly1305", 227 .base.cra_driver_name = "poly1305-arm", 228 .base.cra_priority = 150, 229 .base.cra_blocksize = POLY1305_BLOCK_SIZE, 230 .base.cra_module = THIS_MODULE, 231 #ifdef CONFIG_KERNEL_MODE_NEON 232 }, { 233 .init = arm_poly1305_init, 234 .update = arm_poly1305_update_neon, 235 .final = arm_poly1305_final, 236 .digestsize = POLY1305_DIGEST_SIZE, 237 .descsize = sizeof(struct poly1305_desc_ctx), 238 239 .base.cra_name = "poly1305", 240 .base.cra_driver_name = "poly1305-neon", 241 .base.cra_priority = 200, 242 .base.cra_blocksize = POLY1305_BLOCK_SIZE, 243 .base.cra_module = THIS_MODULE, 244 #endif 245 }}; 246 247 static int __init arm_poly1305_mod_init(void) 248 { 249 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 250 (elf_hwcap & HWCAP_NEON)) 251 static_branch_enable(&have_neon); 252 else if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) 253 /* register only the first entry */ 254 return crypto_register_shash(&arm_poly1305_algs[0]); 255 256 return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? 257 crypto_register_shashes(arm_poly1305_algs, 258 ARRAY_SIZE(arm_poly1305_algs)) : 0; 259 } 260 261 static void __exit arm_poly1305_mod_exit(void) 262 { 263 if (!IS_REACHABLE(CONFIG_CRYPTO_HASH)) 264 return; 265 if (!static_branch_likely(&have_neon)) { 266 crypto_unregister_shash(&arm_poly1305_algs[0]); 267 return; 268 } 269 crypto_unregister_shashes(arm_poly1305_algs, 270 ARRAY_SIZE(arm_poly1305_algs)); 271 } 272 273 module_init(arm_poly1305_mod_init); 274 module_exit(arm_poly1305_mod_exit); 275 276 MODULE_LICENSE("GPL v2"); 277 MODULE_ALIAS_CRYPTO("poly1305"); 278 MODULE_ALIAS_CRYPTO("poly1305-arm"); 279 MODULE_ALIAS_CRYPTO("poly1305-neon"); 280