1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM 4 * 5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 */ 7 8 #include <asm/hwcap.h> 9 #include <asm/neon.h> 10 #include <asm/simd.h> 11 #include <asm/unaligned.h> 12 #include <crypto/algapi.h> 13 #include <crypto/internal/hash.h> 14 #include <crypto/internal/poly1305.h> 15 #include <crypto/internal/simd.h> 16 #include <linux/cpufeature.h> 17 #include <linux/crypto.h> 18 #include <linux/jump_label.h> 19 #include <linux/module.h> 20 21 void poly1305_init_arm(void *state, const u8 *key); 22 void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); 23 void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); 24 25 void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) 26 { 27 } 28 29 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); 30 31 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) 32 { 33 poly1305_init_arm(&dctx->h, key); 34 dctx->s[0] = get_unaligned_le32(key + 16); 35 dctx->s[1] = get_unaligned_le32(key + 20); 36 dctx->s[2] = get_unaligned_le32(key + 24); 37 dctx->s[3] = get_unaligned_le32(key + 28); 38 dctx->buflen = 0; 39 } 40 EXPORT_SYMBOL(poly1305_init_arch); 41 42 static int arm_poly1305_init(struct shash_desc *desc) 43 { 44 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 45 46 dctx->buflen = 0; 47 dctx->rset = 0; 48 dctx->sset = false; 49 50 return 0; 51 } 52 53 static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, 54 u32 len, u32 hibit, bool do_neon) 55 { 56 if (unlikely(!dctx->sset)) { 57 if (!dctx->rset) { 58 poly1305_init_arm(&dctx->h, src); 59 src += POLY1305_BLOCK_SIZE; 60 len -= POLY1305_BLOCK_SIZE; 61 dctx->rset = 1; 62 } 63 if (len >= POLY1305_BLOCK_SIZE) { 64 dctx->s[0] = get_unaligned_le32(src + 0); 65 dctx->s[1] = get_unaligned_le32(src + 4); 66 dctx->s[2] = get_unaligned_le32(src + 8); 67 dctx->s[3] = get_unaligned_le32(src + 12); 68 src += POLY1305_BLOCK_SIZE; 69 len -= POLY1305_BLOCK_SIZE; 70 dctx->sset = true; 71 } 72 if (len < POLY1305_BLOCK_SIZE) 73 return; 74 } 75 76 len &= ~(POLY1305_BLOCK_SIZE - 1); 77 78 if (static_branch_likely(&have_neon) && likely(do_neon)) 79 poly1305_blocks_neon(&dctx->h, src, len, hibit); 80 else 81 poly1305_blocks_arm(&dctx->h, src, len, hibit); 82 } 83 84 static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, 85 const u8 *src, u32 len, bool do_neon) 86 { 87 if (unlikely(dctx->buflen)) { 88 u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); 89 90 memcpy(dctx->buf + dctx->buflen, src, bytes); 91 src += bytes; 92 len -= bytes; 93 dctx->buflen += bytes; 94 95 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 96 arm_poly1305_blocks(dctx, dctx->buf, 97 POLY1305_BLOCK_SIZE, 1, false); 98 dctx->buflen = 0; 99 } 100 } 101 102 if (likely(len >= POLY1305_BLOCK_SIZE)) { 103 arm_poly1305_blocks(dctx, src, len, 1, do_neon); 104 src += round_down(len, POLY1305_BLOCK_SIZE); 105 len %= POLY1305_BLOCK_SIZE; 106 } 107 108 if (unlikely(len)) { 109 dctx->buflen = len; 110 memcpy(dctx->buf, src, len); 111 } 112 } 113 114 static int arm_poly1305_update(struct shash_desc *desc, 115 const u8 *src, unsigned int srclen) 116 { 117 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 118 119 arm_poly1305_do_update(dctx, src, srclen, false); 120 return 0; 121 } 122 123 static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, 124 const u8 *src, 125 unsigned int srclen) 126 { 127 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 128 bool do_neon = crypto_simd_usable() && srclen > 128; 129 130 if (static_branch_likely(&have_neon) && do_neon) 131 kernel_neon_begin(); 132 arm_poly1305_do_update(dctx, src, srclen, do_neon); 133 if (static_branch_likely(&have_neon) && do_neon) 134 kernel_neon_end(); 135 return 0; 136 } 137 138 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, 139 unsigned int nbytes) 140 { 141 bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 142 crypto_simd_usable(); 143 144 if (unlikely(dctx->buflen)) { 145 u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); 146 147 memcpy(dctx->buf + dctx->buflen, src, bytes); 148 src += bytes; 149 nbytes -= bytes; 150 dctx->buflen += bytes; 151 152 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 153 poly1305_blocks_arm(&dctx->h, dctx->buf, 154 POLY1305_BLOCK_SIZE, 1); 155 dctx->buflen = 0; 156 } 157 } 158 159 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { 160 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); 161 162 if (static_branch_likely(&have_neon) && do_neon) { 163 kernel_neon_begin(); 164 poly1305_blocks_neon(&dctx->h, src, len, 1); 165 kernel_neon_end(); 166 } else { 167 poly1305_blocks_arm(&dctx->h, src, len, 1); 168 } 169 src += len; 170 nbytes %= POLY1305_BLOCK_SIZE; 171 } 172 173 if (unlikely(nbytes)) { 174 dctx->buflen = nbytes; 175 memcpy(dctx->buf, src, nbytes); 176 } 177 } 178 EXPORT_SYMBOL(poly1305_update_arch); 179 180 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) 181 { 182 if (unlikely(dctx->buflen)) { 183 dctx->buf[dctx->buflen++] = 1; 184 memset(dctx->buf + dctx->buflen, 0, 185 POLY1305_BLOCK_SIZE - dctx->buflen); 186 poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); 187 } 188 189 poly1305_emit_arm(&dctx->h, dst, dctx->s); 190 *dctx = (struct poly1305_desc_ctx){}; 191 } 192 EXPORT_SYMBOL(poly1305_final_arch); 193 194 static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) 195 { 196 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 197 198 if (unlikely(!dctx->sset)) 199 return -ENOKEY; 200 201 poly1305_final_arch(dctx, dst); 202 return 0; 203 } 204 205 static struct shash_alg arm_poly1305_algs[] = {{ 206 .init = arm_poly1305_init, 207 .update = arm_poly1305_update, 208 .final = arm_poly1305_final, 209 .digestsize = POLY1305_DIGEST_SIZE, 210 .descsize = sizeof(struct poly1305_desc_ctx), 211 212 .base.cra_name = "poly1305", 213 .base.cra_driver_name = "poly1305-arm", 214 .base.cra_priority = 150, 215 .base.cra_blocksize = POLY1305_BLOCK_SIZE, 216 .base.cra_module = THIS_MODULE, 217 #ifdef CONFIG_KERNEL_MODE_NEON 218 }, { 219 .init = arm_poly1305_init, 220 .update = arm_poly1305_update_neon, 221 .final = arm_poly1305_final, 222 .digestsize = POLY1305_DIGEST_SIZE, 223 .descsize = sizeof(struct poly1305_desc_ctx), 224 225 .base.cra_name = "poly1305", 226 .base.cra_driver_name = "poly1305-neon", 227 .base.cra_priority = 200, 228 .base.cra_blocksize = POLY1305_BLOCK_SIZE, 229 .base.cra_module = THIS_MODULE, 230 #endif 231 }}; 232 233 static int __init arm_poly1305_mod_init(void) 234 { 235 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 236 (elf_hwcap & HWCAP_NEON)) 237 static_branch_enable(&have_neon); 238 else if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) 239 /* register only the first entry */ 240 return crypto_register_shash(&arm_poly1305_algs[0]); 241 242 return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? 243 crypto_register_shashes(arm_poly1305_algs, 244 ARRAY_SIZE(arm_poly1305_algs)) : 0; 245 } 246 247 static void __exit arm_poly1305_mod_exit(void) 248 { 249 if (!IS_REACHABLE(CONFIG_CRYPTO_HASH)) 250 return; 251 if (!static_branch_likely(&have_neon)) { 252 crypto_unregister_shash(&arm_poly1305_algs[0]); 253 return; 254 } 255 crypto_unregister_shashes(arm_poly1305_algs, 256 ARRAY_SIZE(arm_poly1305_algs)); 257 } 258 259 module_init(arm_poly1305_mod_init); 260 module_exit(arm_poly1305_mod_exit); 261 262 MODULE_LICENSE("GPL v2"); 263 MODULE_ALIAS_CRYPTO("poly1305"); 264 MODULE_ALIAS_CRYPTO("poly1305-arm"); 265 MODULE_ALIAS_CRYPTO("poly1305-neon"); 266