1*a6b803b3SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0 2*a6b803b3SArd Biesheuvel /* 3*a6b803b3SArd Biesheuvel * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM 4*a6b803b3SArd Biesheuvel * 5*a6b803b3SArd Biesheuvel * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> 6*a6b803b3SArd Biesheuvel */ 7*a6b803b3SArd Biesheuvel 8*a6b803b3SArd Biesheuvel #include <asm/hwcap.h> 9*a6b803b3SArd Biesheuvel #include <asm/neon.h> 10*a6b803b3SArd Biesheuvel #include <asm/simd.h> 11*a6b803b3SArd Biesheuvel #include <asm/unaligned.h> 12*a6b803b3SArd Biesheuvel #include <crypto/algapi.h> 13*a6b803b3SArd Biesheuvel #include <crypto/internal/hash.h> 14*a6b803b3SArd Biesheuvel #include <crypto/internal/poly1305.h> 15*a6b803b3SArd Biesheuvel #include <crypto/internal/simd.h> 16*a6b803b3SArd Biesheuvel #include <linux/cpufeature.h> 17*a6b803b3SArd Biesheuvel #include <linux/crypto.h> 18*a6b803b3SArd Biesheuvel #include <linux/jump_label.h> 19*a6b803b3SArd Biesheuvel #include <linux/module.h> 20*a6b803b3SArd Biesheuvel 21*a6b803b3SArd Biesheuvel void poly1305_init_arm(void *state, const u8 *key); 22*a6b803b3SArd Biesheuvel void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); 23*a6b803b3SArd Biesheuvel void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce); 24*a6b803b3SArd Biesheuvel 25*a6b803b3SArd Biesheuvel void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) 26*a6b803b3SArd Biesheuvel { 27*a6b803b3SArd Biesheuvel } 28*a6b803b3SArd Biesheuvel 29*a6b803b3SArd Biesheuvel static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); 30*a6b803b3SArd Biesheuvel 31*a6b803b3SArd Biesheuvel void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) 32*a6b803b3SArd Biesheuvel { 33*a6b803b3SArd Biesheuvel poly1305_init_arm(&dctx->h, key); 34*a6b803b3SArd Biesheuvel dctx->s[0] = get_unaligned_le32(key + 16); 35*a6b803b3SArd Biesheuvel dctx->s[1] = get_unaligned_le32(key + 20); 36*a6b803b3SArd Biesheuvel dctx->s[2] = get_unaligned_le32(key + 24); 37*a6b803b3SArd Biesheuvel dctx->s[3] = get_unaligned_le32(key + 28); 38*a6b803b3SArd Biesheuvel dctx->buflen = 0; 39*a6b803b3SArd Biesheuvel } 40*a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_init_arch); 41*a6b803b3SArd Biesheuvel 42*a6b803b3SArd Biesheuvel static int arm_poly1305_init(struct shash_desc *desc) 43*a6b803b3SArd Biesheuvel { 44*a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 45*a6b803b3SArd Biesheuvel 46*a6b803b3SArd Biesheuvel dctx->buflen = 0; 47*a6b803b3SArd Biesheuvel dctx->rset = 0; 48*a6b803b3SArd Biesheuvel dctx->sset = false; 49*a6b803b3SArd Biesheuvel 50*a6b803b3SArd Biesheuvel return 0; 51*a6b803b3SArd Biesheuvel } 52*a6b803b3SArd Biesheuvel 53*a6b803b3SArd Biesheuvel static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, 54*a6b803b3SArd Biesheuvel u32 len, u32 hibit, bool do_neon) 55*a6b803b3SArd Biesheuvel { 56*a6b803b3SArd Biesheuvel if (unlikely(!dctx->sset)) { 57*a6b803b3SArd Biesheuvel if (!dctx->rset) { 58*a6b803b3SArd Biesheuvel poly1305_init_arm(&dctx->h, src); 59*a6b803b3SArd Biesheuvel src += POLY1305_BLOCK_SIZE; 60*a6b803b3SArd Biesheuvel len -= POLY1305_BLOCK_SIZE; 61*a6b803b3SArd Biesheuvel dctx->rset = 1; 62*a6b803b3SArd Biesheuvel } 63*a6b803b3SArd Biesheuvel if (len >= POLY1305_BLOCK_SIZE) { 64*a6b803b3SArd Biesheuvel dctx->s[0] = get_unaligned_le32(src + 0); 65*a6b803b3SArd Biesheuvel dctx->s[1] = get_unaligned_le32(src + 4); 66*a6b803b3SArd Biesheuvel dctx->s[2] = get_unaligned_le32(src + 8); 67*a6b803b3SArd Biesheuvel dctx->s[3] = get_unaligned_le32(src + 12); 68*a6b803b3SArd Biesheuvel src += POLY1305_BLOCK_SIZE; 69*a6b803b3SArd Biesheuvel len -= POLY1305_BLOCK_SIZE; 70*a6b803b3SArd Biesheuvel dctx->sset = true; 71*a6b803b3SArd Biesheuvel } 72*a6b803b3SArd Biesheuvel if (len < POLY1305_BLOCK_SIZE) 73*a6b803b3SArd Biesheuvel return; 74*a6b803b3SArd Biesheuvel } 75*a6b803b3SArd Biesheuvel 76*a6b803b3SArd Biesheuvel len &= ~(POLY1305_BLOCK_SIZE - 1); 77*a6b803b3SArd Biesheuvel 78*a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && likely(do_neon)) 79*a6b803b3SArd Biesheuvel poly1305_blocks_neon(&dctx->h, src, len, hibit); 80*a6b803b3SArd Biesheuvel else 81*a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, src, len, hibit); 82*a6b803b3SArd Biesheuvel } 83*a6b803b3SArd Biesheuvel 84*a6b803b3SArd Biesheuvel static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, 85*a6b803b3SArd Biesheuvel const u8 *src, u32 len, bool do_neon) 86*a6b803b3SArd Biesheuvel { 87*a6b803b3SArd Biesheuvel if (unlikely(dctx->buflen)) { 88*a6b803b3SArd Biesheuvel u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); 89*a6b803b3SArd Biesheuvel 90*a6b803b3SArd Biesheuvel memcpy(dctx->buf + dctx->buflen, src, bytes); 91*a6b803b3SArd Biesheuvel src += bytes; 92*a6b803b3SArd Biesheuvel len -= bytes; 93*a6b803b3SArd Biesheuvel dctx->buflen += bytes; 94*a6b803b3SArd Biesheuvel 95*a6b803b3SArd Biesheuvel if (dctx->buflen == POLY1305_BLOCK_SIZE) { 96*a6b803b3SArd Biesheuvel arm_poly1305_blocks(dctx, dctx->buf, 97*a6b803b3SArd Biesheuvel POLY1305_BLOCK_SIZE, 1, false); 98*a6b803b3SArd Biesheuvel dctx->buflen = 0; 99*a6b803b3SArd Biesheuvel } 100*a6b803b3SArd Biesheuvel } 101*a6b803b3SArd Biesheuvel 102*a6b803b3SArd Biesheuvel if (likely(len >= POLY1305_BLOCK_SIZE)) { 103*a6b803b3SArd Biesheuvel arm_poly1305_blocks(dctx, src, len, 1, do_neon); 104*a6b803b3SArd Biesheuvel src += round_down(len, POLY1305_BLOCK_SIZE); 105*a6b803b3SArd Biesheuvel len %= POLY1305_BLOCK_SIZE; 106*a6b803b3SArd Biesheuvel } 107*a6b803b3SArd Biesheuvel 108*a6b803b3SArd Biesheuvel if (unlikely(len)) { 109*a6b803b3SArd Biesheuvel dctx->buflen = len; 110*a6b803b3SArd Biesheuvel memcpy(dctx->buf, src, len); 111*a6b803b3SArd Biesheuvel } 112*a6b803b3SArd Biesheuvel } 113*a6b803b3SArd Biesheuvel 114*a6b803b3SArd Biesheuvel static int arm_poly1305_update(struct shash_desc *desc, 115*a6b803b3SArd Biesheuvel const u8 *src, unsigned int srclen) 116*a6b803b3SArd Biesheuvel { 117*a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 118*a6b803b3SArd Biesheuvel 119*a6b803b3SArd Biesheuvel arm_poly1305_do_update(dctx, src, srclen, false); 120*a6b803b3SArd Biesheuvel return 0; 121*a6b803b3SArd Biesheuvel } 122*a6b803b3SArd Biesheuvel 123*a6b803b3SArd Biesheuvel static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, 124*a6b803b3SArd Biesheuvel const u8 *src, 125*a6b803b3SArd Biesheuvel unsigned int srclen) 126*a6b803b3SArd Biesheuvel { 127*a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 128*a6b803b3SArd Biesheuvel bool do_neon = crypto_simd_usable() && srclen > 128; 129*a6b803b3SArd Biesheuvel 130*a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && do_neon) 131*a6b803b3SArd Biesheuvel kernel_neon_begin(); 132*a6b803b3SArd Biesheuvel arm_poly1305_do_update(dctx, src, srclen, do_neon); 133*a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && do_neon) 134*a6b803b3SArd Biesheuvel kernel_neon_end(); 135*a6b803b3SArd Biesheuvel return 0; 136*a6b803b3SArd Biesheuvel } 137*a6b803b3SArd Biesheuvel 138*a6b803b3SArd Biesheuvel void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, 139*a6b803b3SArd Biesheuvel unsigned int nbytes) 140*a6b803b3SArd Biesheuvel { 141*a6b803b3SArd Biesheuvel bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 142*a6b803b3SArd Biesheuvel crypto_simd_usable(); 143*a6b803b3SArd Biesheuvel 144*a6b803b3SArd Biesheuvel if (unlikely(dctx->buflen)) { 145*a6b803b3SArd Biesheuvel u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); 146*a6b803b3SArd Biesheuvel 147*a6b803b3SArd Biesheuvel memcpy(dctx->buf + dctx->buflen, src, bytes); 148*a6b803b3SArd Biesheuvel src += bytes; 149*a6b803b3SArd Biesheuvel nbytes -= bytes; 150*a6b803b3SArd Biesheuvel dctx->buflen += bytes; 151*a6b803b3SArd Biesheuvel 152*a6b803b3SArd Biesheuvel if (dctx->buflen == POLY1305_BLOCK_SIZE) { 153*a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, dctx->buf, 154*a6b803b3SArd Biesheuvel POLY1305_BLOCK_SIZE, 1); 155*a6b803b3SArd Biesheuvel dctx->buflen = 0; 156*a6b803b3SArd Biesheuvel } 157*a6b803b3SArd Biesheuvel } 158*a6b803b3SArd Biesheuvel 159*a6b803b3SArd Biesheuvel if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { 160*a6b803b3SArd Biesheuvel unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); 161*a6b803b3SArd Biesheuvel 162*a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && do_neon) { 163*a6b803b3SArd Biesheuvel kernel_neon_begin(); 164*a6b803b3SArd Biesheuvel poly1305_blocks_neon(&dctx->h, src, len, 1); 165*a6b803b3SArd Biesheuvel kernel_neon_end(); 166*a6b803b3SArd Biesheuvel } else { 167*a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, src, len, 1); 168*a6b803b3SArd Biesheuvel } 169*a6b803b3SArd Biesheuvel src += len; 170*a6b803b3SArd Biesheuvel nbytes %= POLY1305_BLOCK_SIZE; 171*a6b803b3SArd Biesheuvel } 172*a6b803b3SArd Biesheuvel 173*a6b803b3SArd Biesheuvel if (unlikely(nbytes)) { 174*a6b803b3SArd Biesheuvel dctx->buflen = nbytes; 175*a6b803b3SArd Biesheuvel memcpy(dctx->buf, src, nbytes); 176*a6b803b3SArd Biesheuvel } 177*a6b803b3SArd Biesheuvel } 178*a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_update_arch); 179*a6b803b3SArd Biesheuvel 180*a6b803b3SArd Biesheuvel void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) 181*a6b803b3SArd Biesheuvel { 182*a6b803b3SArd Biesheuvel __le32 digest[4]; 183*a6b803b3SArd Biesheuvel u64 f = 0; 184*a6b803b3SArd Biesheuvel 185*a6b803b3SArd Biesheuvel if (unlikely(dctx->buflen)) { 186*a6b803b3SArd Biesheuvel dctx->buf[dctx->buflen++] = 1; 187*a6b803b3SArd Biesheuvel memset(dctx->buf + dctx->buflen, 0, 188*a6b803b3SArd Biesheuvel POLY1305_BLOCK_SIZE - dctx->buflen); 189*a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); 190*a6b803b3SArd Biesheuvel } 191*a6b803b3SArd Biesheuvel 192*a6b803b3SArd Biesheuvel poly1305_emit_arm(&dctx->h, digest, dctx->s); 193*a6b803b3SArd Biesheuvel 194*a6b803b3SArd Biesheuvel /* mac = (h + s) % (2^128) */ 195*a6b803b3SArd Biesheuvel f = (f >> 32) + le32_to_cpu(digest[0]); 196*a6b803b3SArd Biesheuvel put_unaligned_le32(f, dst); 197*a6b803b3SArd Biesheuvel f = (f >> 32) + le32_to_cpu(digest[1]); 198*a6b803b3SArd Biesheuvel put_unaligned_le32(f, dst + 4); 199*a6b803b3SArd Biesheuvel f = (f >> 32) + le32_to_cpu(digest[2]); 200*a6b803b3SArd Biesheuvel put_unaligned_le32(f, dst + 8); 201*a6b803b3SArd Biesheuvel f = (f >> 32) + le32_to_cpu(digest[3]); 202*a6b803b3SArd Biesheuvel put_unaligned_le32(f, dst + 12); 203*a6b803b3SArd Biesheuvel 204*a6b803b3SArd Biesheuvel *dctx = (struct poly1305_desc_ctx){}; 205*a6b803b3SArd Biesheuvel } 206*a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_final_arch); 207*a6b803b3SArd Biesheuvel 208*a6b803b3SArd Biesheuvel static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) 209*a6b803b3SArd Biesheuvel { 210*a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 211*a6b803b3SArd Biesheuvel 212*a6b803b3SArd Biesheuvel if (unlikely(!dctx->sset)) 213*a6b803b3SArd Biesheuvel return -ENOKEY; 214*a6b803b3SArd Biesheuvel 215*a6b803b3SArd Biesheuvel poly1305_final_arch(dctx, dst); 216*a6b803b3SArd Biesheuvel return 0; 217*a6b803b3SArd Biesheuvel } 218*a6b803b3SArd Biesheuvel 219*a6b803b3SArd Biesheuvel static struct shash_alg arm_poly1305_algs[] = {{ 220*a6b803b3SArd Biesheuvel .init = arm_poly1305_init, 221*a6b803b3SArd Biesheuvel .update = arm_poly1305_update, 222*a6b803b3SArd Biesheuvel .final = arm_poly1305_final, 223*a6b803b3SArd Biesheuvel .digestsize = POLY1305_DIGEST_SIZE, 224*a6b803b3SArd Biesheuvel .descsize = sizeof(struct poly1305_desc_ctx), 225*a6b803b3SArd Biesheuvel 226*a6b803b3SArd Biesheuvel .base.cra_name = "poly1305", 227*a6b803b3SArd Biesheuvel .base.cra_driver_name = "poly1305-arm", 228*a6b803b3SArd Biesheuvel .base.cra_priority = 150, 229*a6b803b3SArd Biesheuvel .base.cra_blocksize = POLY1305_BLOCK_SIZE, 230*a6b803b3SArd Biesheuvel .base.cra_module = THIS_MODULE, 231*a6b803b3SArd Biesheuvel #ifdef CONFIG_KERNEL_MODE_NEON 232*a6b803b3SArd Biesheuvel }, { 233*a6b803b3SArd Biesheuvel .init = arm_poly1305_init, 234*a6b803b3SArd Biesheuvel .update = arm_poly1305_update_neon, 235*a6b803b3SArd Biesheuvel .final = arm_poly1305_final, 236*a6b803b3SArd Biesheuvel .digestsize = POLY1305_DIGEST_SIZE, 237*a6b803b3SArd Biesheuvel .descsize = sizeof(struct poly1305_desc_ctx), 238*a6b803b3SArd Biesheuvel 239*a6b803b3SArd Biesheuvel .base.cra_name = "poly1305", 240*a6b803b3SArd Biesheuvel .base.cra_driver_name = "poly1305-neon", 241*a6b803b3SArd Biesheuvel .base.cra_priority = 200, 242*a6b803b3SArd Biesheuvel .base.cra_blocksize = POLY1305_BLOCK_SIZE, 243*a6b803b3SArd Biesheuvel .base.cra_module = THIS_MODULE, 244*a6b803b3SArd Biesheuvel #endif 245*a6b803b3SArd Biesheuvel }}; 246*a6b803b3SArd Biesheuvel 247*a6b803b3SArd Biesheuvel static int __init arm_poly1305_mod_init(void) 248*a6b803b3SArd Biesheuvel { 249*a6b803b3SArd Biesheuvel if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 250*a6b803b3SArd Biesheuvel (elf_hwcap & HWCAP_NEON)) 251*a6b803b3SArd Biesheuvel static_branch_enable(&have_neon); 252*a6b803b3SArd Biesheuvel else 253*a6b803b3SArd Biesheuvel /* register only the first entry */ 254*a6b803b3SArd Biesheuvel return crypto_register_shash(&arm_poly1305_algs[0]); 255*a6b803b3SArd Biesheuvel 256*a6b803b3SArd Biesheuvel return crypto_register_shashes(arm_poly1305_algs, 257*a6b803b3SArd Biesheuvel ARRAY_SIZE(arm_poly1305_algs)); 258*a6b803b3SArd Biesheuvel } 259*a6b803b3SArd Biesheuvel 260*a6b803b3SArd Biesheuvel static void __exit arm_poly1305_mod_exit(void) 261*a6b803b3SArd Biesheuvel { 262*a6b803b3SArd Biesheuvel if (!static_branch_likely(&have_neon)) { 263*a6b803b3SArd Biesheuvel crypto_unregister_shash(&arm_poly1305_algs[0]); 264*a6b803b3SArd Biesheuvel return; 265*a6b803b3SArd Biesheuvel } 266*a6b803b3SArd Biesheuvel crypto_unregister_shashes(arm_poly1305_algs, 267*a6b803b3SArd Biesheuvel ARRAY_SIZE(arm_poly1305_algs)); 268*a6b803b3SArd Biesheuvel } 269*a6b803b3SArd Biesheuvel 270*a6b803b3SArd Biesheuvel module_init(arm_poly1305_mod_init); 271*a6b803b3SArd Biesheuvel module_exit(arm_poly1305_mod_exit); 272*a6b803b3SArd Biesheuvel 273*a6b803b3SArd Biesheuvel MODULE_LICENSE("GPL v2"); 274*a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305"); 275*a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-arm"); 276*a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-neon"); 277