1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM 4 * 5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 */ 7 8 #include <asm/hwcap.h> 9 #include <asm/neon.h> 10 #include <asm/simd.h> 11 #include <asm/unaligned.h> 12 #include <crypto/algapi.h> 13 #include <crypto/internal/hash.h> 14 #include <crypto/internal/poly1305.h> 15 #include <crypto/internal/simd.h> 16 #include <linux/cpufeature.h> 17 #include <linux/crypto.h> 18 #include <linux/jump_label.h> 19 #include <linux/module.h> 20 21 void poly1305_init_arm(void *state, const u8 *key); 22 void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); 23 void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); 24 25 void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) 26 { 27 } 28 29 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); 30 31 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) 32 { 33 poly1305_init_arm(&dctx->h, key); 34 dctx->s[0] = get_unaligned_le32(key + 16); 35 dctx->s[1] = get_unaligned_le32(key + 20); 36 dctx->s[2] = get_unaligned_le32(key + 24); 37 dctx->s[3] = get_unaligned_le32(key + 28); 38 dctx->buflen = 0; 39 } 40 EXPORT_SYMBOL(poly1305_init_arch); 41 42 static int arm_poly1305_init(struct shash_desc *desc) 43 { 44 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 45 46 dctx->buflen = 0; 47 dctx->rset = 0; 48 dctx->sset = false; 49 50 return 0; 51 } 52 53 static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, 54 u32 len, u32 hibit, bool do_neon) 55 { 56 if (unlikely(!dctx->sset)) { 57 if (!dctx->rset) { 58 poly1305_init_arm(&dctx->h, src); 59 src += POLY1305_BLOCK_SIZE; 60 len -= POLY1305_BLOCK_SIZE; 61 dctx->rset = 1; 62 } 63 if (len >= POLY1305_BLOCK_SIZE) { 64 dctx->s[0] = get_unaligned_le32(src + 0); 65 dctx->s[1] = get_unaligned_le32(src + 4); 66 dctx->s[2] = get_unaligned_le32(src + 8); 67 dctx->s[3] = get_unaligned_le32(src + 12); 68 src += POLY1305_BLOCK_SIZE; 69 len -= POLY1305_BLOCK_SIZE; 70 dctx->sset = true; 71 } 72 if (len < POLY1305_BLOCK_SIZE) 73 return; 74 } 75 76 len &= ~(POLY1305_BLOCK_SIZE - 1); 77 78 if (static_branch_likely(&have_neon) && likely(do_neon)) 79 poly1305_blocks_neon(&dctx->h, src, len, hibit); 80 else 81 poly1305_blocks_arm(&dctx->h, src, len, hibit); 82 } 83 84 static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, 85 const u8 *src, u32 len, bool do_neon) 86 { 87 if (unlikely(dctx->buflen)) { 88 u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); 89 90 memcpy(dctx->buf + dctx->buflen, src, bytes); 91 src += bytes; 92 len -= bytes; 93 dctx->buflen += bytes; 94 95 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 96 arm_poly1305_blocks(dctx, dctx->buf, 97 POLY1305_BLOCK_SIZE, 1, false); 98 dctx->buflen = 0; 99 } 100 } 101 102 if (likely(len >= POLY1305_BLOCK_SIZE)) { 103 arm_poly1305_blocks(dctx, src, len, 1, do_neon); 104 src += round_down(len, POLY1305_BLOCK_SIZE); 105 len %= POLY1305_BLOCK_SIZE; 106 } 107 108 if (unlikely(len)) { 109 dctx->buflen = len; 110 memcpy(dctx->buf, src, len); 111 } 112 } 113 114 static int arm_poly1305_update(struct shash_desc *desc, 115 const u8 *src, unsigned int srclen) 116 { 117 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 118 119 arm_poly1305_do_update(dctx, src, srclen, false); 120 return 0; 121 } 122 123 static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, 124 const u8 *src, 125 unsigned int srclen) 126 { 127 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 128 bool do_neon = crypto_simd_usable() && srclen > 128; 129 130 if (static_branch_likely(&have_neon) && do_neon) 131 kernel_neon_begin(); 132 arm_poly1305_do_update(dctx, src, srclen, do_neon); 133 if (static_branch_likely(&have_neon) && do_neon) 134 kernel_neon_end(); 135 return 0; 136 } 137 138 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, 139 unsigned int nbytes) 140 { 141 bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 142 crypto_simd_usable(); 143 144 if (unlikely(dctx->buflen)) { 145 u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); 146 147 memcpy(dctx->buf + dctx->buflen, src, bytes); 148 src += bytes; 149 nbytes -= bytes; 150 dctx->buflen += bytes; 151 152 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 153 poly1305_blocks_arm(&dctx->h, dctx->buf, 154 POLY1305_BLOCK_SIZE, 1); 155 dctx->buflen = 0; 156 } 157 } 158 159 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { 160 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); 161 162 if (static_branch_likely(&have_neon) && do_neon) { 163 do { 164 unsigned int todo = min_t(unsigned int, len, SZ_4K); 165 166 kernel_neon_begin(); 167 poly1305_blocks_neon(&dctx->h, src, todo, 1); 168 kernel_neon_end(); 169 170 len -= todo; 171 src += todo; 172 } while (len); 173 } else { 174 poly1305_blocks_arm(&dctx->h, src, len, 1); 175 src += len; 176 } 177 nbytes %= POLY1305_BLOCK_SIZE; 178 } 179 180 if (unlikely(nbytes)) { 181 dctx->buflen = nbytes; 182 memcpy(dctx->buf, src, nbytes); 183 } 184 } 185 EXPORT_SYMBOL(poly1305_update_arch); 186 187 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) 188 { 189 if (unlikely(dctx->buflen)) { 190 dctx->buf[dctx->buflen++] = 1; 191 memset(dctx->buf + dctx->buflen, 0, 192 POLY1305_BLOCK_SIZE - dctx->buflen); 193 poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); 194 } 195 196 poly1305_emit_arm(&dctx->h, dst, dctx->s); 197 *dctx = (struct poly1305_desc_ctx){}; 198 } 199 EXPORT_SYMBOL(poly1305_final_arch); 200 201 static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) 202 { 203 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 204 205 if (unlikely(!dctx->sset)) 206 return -ENOKEY; 207 208 poly1305_final_arch(dctx, dst); 209 return 0; 210 } 211 212 static struct shash_alg arm_poly1305_algs[] = {{ 213 .init = arm_poly1305_init, 214 .update = arm_poly1305_update, 215 .final = arm_poly1305_final, 216 .digestsize = POLY1305_DIGEST_SIZE, 217 .descsize = sizeof(struct poly1305_desc_ctx), 218 219 .base.cra_name = "poly1305", 220 .base.cra_driver_name = "poly1305-arm", 221 .base.cra_priority = 150, 222 .base.cra_blocksize = POLY1305_BLOCK_SIZE, 223 .base.cra_module = THIS_MODULE, 224 #ifdef CONFIG_KERNEL_MODE_NEON 225 }, { 226 .init = arm_poly1305_init, 227 .update = arm_poly1305_update_neon, 228 .final = arm_poly1305_final, 229 .digestsize = POLY1305_DIGEST_SIZE, 230 .descsize = sizeof(struct poly1305_desc_ctx), 231 232 .base.cra_name = "poly1305", 233 .base.cra_driver_name = "poly1305-neon", 234 .base.cra_priority = 200, 235 .base.cra_blocksize = POLY1305_BLOCK_SIZE, 236 .base.cra_module = THIS_MODULE, 237 #endif 238 }}; 239 240 static int __init arm_poly1305_mod_init(void) 241 { 242 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 243 (elf_hwcap & HWCAP_NEON)) 244 static_branch_enable(&have_neon); 245 else if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) 246 /* register only the first entry */ 247 return crypto_register_shash(&arm_poly1305_algs[0]); 248 249 return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? 250 crypto_register_shashes(arm_poly1305_algs, 251 ARRAY_SIZE(arm_poly1305_algs)) : 0; 252 } 253 254 static void __exit arm_poly1305_mod_exit(void) 255 { 256 if (!IS_REACHABLE(CONFIG_CRYPTO_HASH)) 257 return; 258 if (!static_branch_likely(&have_neon)) { 259 crypto_unregister_shash(&arm_poly1305_algs[0]); 260 return; 261 } 262 crypto_unregister_shashes(arm_poly1305_algs, 263 ARRAY_SIZE(arm_poly1305_algs)); 264 } 265 266 module_init(arm_poly1305_mod_init); 267 module_exit(arm_poly1305_mod_exit); 268 269 MODULE_LICENSE("GPL v2"); 270 MODULE_ALIAS_CRYPTO("poly1305"); 271 MODULE_ALIAS_CRYPTO("poly1305-arm"); 272 MODULE_ALIAS_CRYPTO("poly1305-neon"); 273