1f1939f7cSShane Wang /* 2f1939f7cSShane Wang * Modified to interface to the Linux kernel 3f1939f7cSShane Wang * Copyright (c) 2009, Intel Corporation. 4f1939f7cSShane Wang * 5f1939f7cSShane Wang * This program is free software; you can redistribute it and/or modify it 6f1939f7cSShane Wang * under the terms and conditions of the GNU General Public License, 7f1939f7cSShane Wang * version 2, as published by the Free Software Foundation. 8f1939f7cSShane Wang * 9f1939f7cSShane Wang * This program is distributed in the hope it will be useful, but WITHOUT 10f1939f7cSShane Wang * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11f1939f7cSShane Wang * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12f1939f7cSShane Wang * more details. 13f1939f7cSShane Wang * 14f1939f7cSShane Wang * You should have received a copy of the GNU General Public License along with 15f1939f7cSShane Wang * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 16f1939f7cSShane Wang * Place - Suite 330, Boston, MA 02111-1307 USA. 17f1939f7cSShane Wang */ 18f1939f7cSShane Wang 19f1939f7cSShane Wang /* -------------------------------------------------------------------------- 20f1939f7cSShane Wang * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai. 21f1939f7cSShane Wang * This implementation is herby placed in the public domain. 22f1939f7cSShane Wang * The authors offers no warranty. Use at your own risk. 23f1939f7cSShane Wang * Please send bug reports to the authors. 24f1939f7cSShane Wang * Last modified: 17 APR 08, 1700 PDT 25f1939f7cSShane Wang * ----------------------------------------------------------------------- */ 26f1939f7cSShane Wang 27f1939f7cSShane Wang #include <linux/init.h> 28f1939f7cSShane Wang #include <linux/types.h> 29f1939f7cSShane Wang #include <linux/crypto.h> 30f1939f7cSShane Wang #include <linux/scatterlist.h> 31f1939f7cSShane Wang #include <asm/byteorder.h> 32f1939f7cSShane Wang #include <crypto/scatterwalk.h> 33f1939f7cSShane Wang #include <crypto/vmac.h> 34f1939f7cSShane Wang #include <crypto/internal/hash.h> 35f1939f7cSShane Wang 36f1939f7cSShane Wang /* 37f1939f7cSShane Wang * Constants and masks 38f1939f7cSShane Wang */ 39f1939f7cSShane Wang #define UINT64_C(x) x##ULL 40f1939f7cSShane Wang const u64 p64 = UINT64_C(0xfffffffffffffeff); /* 2^64 - 257 prime */ 41f1939f7cSShane Wang const u64 m62 = UINT64_C(0x3fffffffffffffff); /* 62-bit mask */ 42f1939f7cSShane Wang const u64 m63 = UINT64_C(0x7fffffffffffffff); /* 63-bit mask */ 43f1939f7cSShane Wang const u64 m64 = UINT64_C(0xffffffffffffffff); /* 64-bit mask */ 44f1939f7cSShane Wang const u64 mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask */ 45f1939f7cSShane Wang 46304a204eSShane Wang #define pe64_to_cpup le64_to_cpup /* Prefer little endian */ 47304a204eSShane Wang 48f1939f7cSShane Wang #ifdef __LITTLE_ENDIAN 49f1939f7cSShane Wang #define INDEX_HIGH 1 50f1939f7cSShane Wang #define INDEX_LOW 0 51f1939f7cSShane Wang #else 52f1939f7cSShane Wang #define INDEX_HIGH 0 53f1939f7cSShane Wang #define INDEX_LOW 1 54f1939f7cSShane Wang #endif 55f1939f7cSShane Wang 56f1939f7cSShane Wang /* 57f1939f7cSShane Wang * The following routines are used in this implementation. They are 58f1939f7cSShane Wang * written via macros to simulate zero-overhead call-by-reference. 59f1939f7cSShane Wang * 60f1939f7cSShane Wang * MUL64: 64x64->128-bit multiplication 61f1939f7cSShane Wang * PMUL64: assumes top bits cleared on inputs 62f1939f7cSShane Wang * ADD128: 128x128->128-bit addition 63f1939f7cSShane Wang */ 64f1939f7cSShane Wang 65f1939f7cSShane Wang #define ADD128(rh, rl, ih, il) \ 66f1939f7cSShane Wang do { \ 67f1939f7cSShane Wang u64 _il = (il); \ 68f1939f7cSShane Wang (rl) += (_il); \ 69f1939f7cSShane Wang if ((rl) < (_il)) \ 70f1939f7cSShane Wang (rh)++; \ 71f1939f7cSShane Wang (rh) += (ih); \ 72f1939f7cSShane Wang } while (0) 73f1939f7cSShane Wang 74f1939f7cSShane Wang #define MUL32(i1, i2) ((u64)(u32)(i1)*(u32)(i2)) 75f1939f7cSShane Wang 76f1939f7cSShane Wang #define PMUL64(rh, rl, i1, i2) /* Assumes m doesn't overflow */ \ 77f1939f7cSShane Wang do { \ 78f1939f7cSShane Wang u64 _i1 = (i1), _i2 = (i2); \ 79f1939f7cSShane Wang u64 m = MUL32(_i1, _i2>>32) + MUL32(_i1>>32, _i2); \ 80f1939f7cSShane Wang rh = MUL32(_i1>>32, _i2>>32); \ 81f1939f7cSShane Wang rl = MUL32(_i1, _i2); \ 82f1939f7cSShane Wang ADD128(rh, rl, (m >> 32), (m << 32)); \ 83f1939f7cSShane Wang } while (0) 84f1939f7cSShane Wang 85f1939f7cSShane Wang #define MUL64(rh, rl, i1, i2) \ 86f1939f7cSShane Wang do { \ 87f1939f7cSShane Wang u64 _i1 = (i1), _i2 = (i2); \ 88f1939f7cSShane Wang u64 m1 = MUL32(_i1, _i2>>32); \ 89f1939f7cSShane Wang u64 m2 = MUL32(_i1>>32, _i2); \ 90f1939f7cSShane Wang rh = MUL32(_i1>>32, _i2>>32); \ 91f1939f7cSShane Wang rl = MUL32(_i1, _i2); \ 92f1939f7cSShane Wang ADD128(rh, rl, (m1 >> 32), (m1 << 32)); \ 93f1939f7cSShane Wang ADD128(rh, rl, (m2 >> 32), (m2 << 32)); \ 94f1939f7cSShane Wang } while (0) 95f1939f7cSShane Wang 96f1939f7cSShane Wang /* 97f1939f7cSShane Wang * For highest performance the L1 NH and L2 polynomial hashes should be 98*25985edcSLucas De Marchi * carefully implemented to take advantage of one's target architecture. 99f1939f7cSShane Wang * Here these two hash functions are defined multiple time; once for 100f1939f7cSShane Wang * 64-bit architectures, once for 32-bit SSE2 architectures, and once 101f1939f7cSShane Wang * for the rest (32-bit) architectures. 102f1939f7cSShane Wang * For each, nh_16 *must* be defined (works on multiples of 16 bytes). 103f1939f7cSShane Wang * Optionally, nh_vmac_nhbytes can be defined (for multiples of 104f1939f7cSShane Wang * VMAC_NHBYTES), and nh_16_2 and nh_vmac_nhbytes_2 (versions that do two 105f1939f7cSShane Wang * NH computations at once). 106f1939f7cSShane Wang */ 107f1939f7cSShane Wang 108f1939f7cSShane Wang #ifdef CONFIG_64BIT 109f1939f7cSShane Wang 110f1939f7cSShane Wang #define nh_16(mp, kp, nw, rh, rl) \ 111f1939f7cSShane Wang do { \ 112f1939f7cSShane Wang int i; u64 th, tl; \ 113f1939f7cSShane Wang rh = rl = 0; \ 114f1939f7cSShane Wang for (i = 0; i < nw; i += 2) { \ 115304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ 116304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ 117f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 118f1939f7cSShane Wang } \ 119f1939f7cSShane Wang } while (0) 120f1939f7cSShane Wang 121f1939f7cSShane Wang #define nh_16_2(mp, kp, nw, rh, rl, rh1, rl1) \ 122f1939f7cSShane Wang do { \ 123f1939f7cSShane Wang int i; u64 th, tl; \ 124f1939f7cSShane Wang rh1 = rl1 = rh = rl = 0; \ 125f1939f7cSShane Wang for (i = 0; i < nw; i += 2) { \ 126304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ 127304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ 128f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 129304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2], \ 130304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+3]); \ 131f1939f7cSShane Wang ADD128(rh1, rl1, th, tl); \ 132f1939f7cSShane Wang } \ 133f1939f7cSShane Wang } while (0) 134f1939f7cSShane Wang 135f1939f7cSShane Wang #if (VMAC_NHBYTES >= 64) /* These versions do 64-bytes of message at a time */ 136f1939f7cSShane Wang #define nh_vmac_nhbytes(mp, kp, nw, rh, rl) \ 137f1939f7cSShane Wang do { \ 138f1939f7cSShane Wang int i; u64 th, tl; \ 139f1939f7cSShane Wang rh = rl = 0; \ 140f1939f7cSShane Wang for (i = 0; i < nw; i += 8) { \ 141304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ 142304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ 143f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 144304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2], \ 145304a204eSShane Wang pe64_to_cpup((mp)+i+3)+(kp)[i+3]); \ 146f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 147304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4], \ 148304a204eSShane Wang pe64_to_cpup((mp)+i+5)+(kp)[i+5]); \ 149f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 150304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6], \ 151304a204eSShane Wang pe64_to_cpup((mp)+i+7)+(kp)[i+7]); \ 152f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 153f1939f7cSShane Wang } \ 154f1939f7cSShane Wang } while (0) 155f1939f7cSShane Wang 156f1939f7cSShane Wang #define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh1, rl1) \ 157f1939f7cSShane Wang do { \ 158f1939f7cSShane Wang int i; u64 th, tl; \ 159f1939f7cSShane Wang rh1 = rl1 = rh = rl = 0; \ 160f1939f7cSShane Wang for (i = 0; i < nw; i += 8) { \ 161304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ 162304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ 163f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 164304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2], \ 165304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+3]); \ 166f1939f7cSShane Wang ADD128(rh1, rl1, th, tl); \ 167304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2], \ 168304a204eSShane Wang pe64_to_cpup((mp)+i+3)+(kp)[i+3]); \ 169f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 170304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+4], \ 171304a204eSShane Wang pe64_to_cpup((mp)+i+3)+(kp)[i+5]); \ 172f1939f7cSShane Wang ADD128(rh1, rl1, th, tl); \ 173304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4], \ 174304a204eSShane Wang pe64_to_cpup((mp)+i+5)+(kp)[i+5]); \ 175f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 176304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+6], \ 177304a204eSShane Wang pe64_to_cpup((mp)+i+5)+(kp)[i+7]); \ 178f1939f7cSShane Wang ADD128(rh1, rl1, th, tl); \ 179304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6], \ 180304a204eSShane Wang pe64_to_cpup((mp)+i+7)+(kp)[i+7]); \ 181f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 182304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+8], \ 183304a204eSShane Wang pe64_to_cpup((mp)+i+7)+(kp)[i+9]); \ 184f1939f7cSShane Wang ADD128(rh1, rl1, th, tl); \ 185f1939f7cSShane Wang } \ 186f1939f7cSShane Wang } while (0) 187f1939f7cSShane Wang #endif 188f1939f7cSShane Wang 189f1939f7cSShane Wang #define poly_step(ah, al, kh, kl, mh, ml) \ 190f1939f7cSShane Wang do { \ 191f1939f7cSShane Wang u64 t1h, t1l, t2h, t2l, t3h, t3l, z = 0; \ 192f1939f7cSShane Wang /* compute ab*cd, put bd into result registers */ \ 193f1939f7cSShane Wang PMUL64(t3h, t3l, al, kh); \ 194f1939f7cSShane Wang PMUL64(t2h, t2l, ah, kl); \ 195f1939f7cSShane Wang PMUL64(t1h, t1l, ah, 2*kh); \ 196f1939f7cSShane Wang PMUL64(ah, al, al, kl); \ 197f1939f7cSShane Wang /* add 2 * ac to result */ \ 198f1939f7cSShane Wang ADD128(ah, al, t1h, t1l); \ 199f1939f7cSShane Wang /* add together ad + bc */ \ 200f1939f7cSShane Wang ADD128(t2h, t2l, t3h, t3l); \ 201f1939f7cSShane Wang /* now (ah,al), (t2l,2*t2h) need summing */ \ 202f1939f7cSShane Wang /* first add the high registers, carrying into t2h */ \ 203f1939f7cSShane Wang ADD128(t2h, ah, z, t2l); \ 204f1939f7cSShane Wang /* double t2h and add top bit of ah */ \ 205f1939f7cSShane Wang t2h = 2 * t2h + (ah >> 63); \ 206f1939f7cSShane Wang ah &= m63; \ 207f1939f7cSShane Wang /* now add the low registers */ \ 208f1939f7cSShane Wang ADD128(ah, al, mh, ml); \ 209f1939f7cSShane Wang ADD128(ah, al, z, t2h); \ 210f1939f7cSShane Wang } while (0) 211f1939f7cSShane Wang 212f1939f7cSShane Wang #else /* ! CONFIG_64BIT */ 213f1939f7cSShane Wang 214f1939f7cSShane Wang #ifndef nh_16 215f1939f7cSShane Wang #define nh_16(mp, kp, nw, rh, rl) \ 216f1939f7cSShane Wang do { \ 217f1939f7cSShane Wang u64 t1, t2, m1, m2, t; \ 218f1939f7cSShane Wang int i; \ 219f1939f7cSShane Wang rh = rl = t = 0; \ 220f1939f7cSShane Wang for (i = 0; i < nw; i += 2) { \ 221304a204eSShane Wang t1 = pe64_to_cpup(mp+i) + kp[i]; \ 222304a204eSShane Wang t2 = pe64_to_cpup(mp+i+1) + kp[i+1]; \ 223f1939f7cSShane Wang m2 = MUL32(t1 >> 32, t2); \ 224f1939f7cSShane Wang m1 = MUL32(t1, t2 >> 32); \ 225f1939f7cSShane Wang ADD128(rh, rl, MUL32(t1 >> 32, t2 >> 32), \ 226f1939f7cSShane Wang MUL32(t1, t2)); \ 227f1939f7cSShane Wang rh += (u64)(u32)(m1 >> 32) \ 228f1939f7cSShane Wang + (u32)(m2 >> 32); \ 229f1939f7cSShane Wang t += (u64)(u32)m1 + (u32)m2; \ 230f1939f7cSShane Wang } \ 231f1939f7cSShane Wang ADD128(rh, rl, (t >> 32), (t << 32)); \ 232f1939f7cSShane Wang } while (0) 233f1939f7cSShane Wang #endif 234f1939f7cSShane Wang 235f1939f7cSShane Wang static void poly_step_func(u64 *ahi, u64 *alo, 236f1939f7cSShane Wang const u64 *kh, const u64 *kl, 237f1939f7cSShane Wang const u64 *mh, const u64 *ml) 238f1939f7cSShane Wang { 239f1939f7cSShane Wang #define a0 (*(((u32 *)alo)+INDEX_LOW)) 240f1939f7cSShane Wang #define a1 (*(((u32 *)alo)+INDEX_HIGH)) 241f1939f7cSShane Wang #define a2 (*(((u32 *)ahi)+INDEX_LOW)) 242f1939f7cSShane Wang #define a3 (*(((u32 *)ahi)+INDEX_HIGH)) 243f1939f7cSShane Wang #define k0 (*(((u32 *)kl)+INDEX_LOW)) 244f1939f7cSShane Wang #define k1 (*(((u32 *)kl)+INDEX_HIGH)) 245f1939f7cSShane Wang #define k2 (*(((u32 *)kh)+INDEX_LOW)) 246f1939f7cSShane Wang #define k3 (*(((u32 *)kh)+INDEX_HIGH)) 247f1939f7cSShane Wang 248f1939f7cSShane Wang u64 p, q, t; 249f1939f7cSShane Wang u32 t2; 250f1939f7cSShane Wang 251f1939f7cSShane Wang p = MUL32(a3, k3); 252f1939f7cSShane Wang p += p; 253f1939f7cSShane Wang p += *(u64 *)mh; 254f1939f7cSShane Wang p += MUL32(a0, k2); 255f1939f7cSShane Wang p += MUL32(a1, k1); 256f1939f7cSShane Wang p += MUL32(a2, k0); 257f1939f7cSShane Wang t = (u32)(p); 258f1939f7cSShane Wang p >>= 32; 259f1939f7cSShane Wang p += MUL32(a0, k3); 260f1939f7cSShane Wang p += MUL32(a1, k2); 261f1939f7cSShane Wang p += MUL32(a2, k1); 262f1939f7cSShane Wang p += MUL32(a3, k0); 263f1939f7cSShane Wang t |= ((u64)((u32)p & 0x7fffffff)) << 32; 264f1939f7cSShane Wang p >>= 31; 265f1939f7cSShane Wang p += (u64)(((u32 *)ml)[INDEX_LOW]); 266f1939f7cSShane Wang p += MUL32(a0, k0); 267f1939f7cSShane Wang q = MUL32(a1, k3); 268f1939f7cSShane Wang q += MUL32(a2, k2); 269f1939f7cSShane Wang q += MUL32(a3, k1); 270f1939f7cSShane Wang q += q; 271f1939f7cSShane Wang p += q; 272f1939f7cSShane Wang t2 = (u32)(p); 273f1939f7cSShane Wang p >>= 32; 274f1939f7cSShane Wang p += (u64)(((u32 *)ml)[INDEX_HIGH]); 275f1939f7cSShane Wang p += MUL32(a0, k1); 276f1939f7cSShane Wang p += MUL32(a1, k0); 277f1939f7cSShane Wang q = MUL32(a2, k3); 278f1939f7cSShane Wang q += MUL32(a3, k2); 279f1939f7cSShane Wang q += q; 280f1939f7cSShane Wang p += q; 281f1939f7cSShane Wang *(u64 *)(alo) = (p << 32) | t2; 282f1939f7cSShane Wang p >>= 32; 283f1939f7cSShane Wang *(u64 *)(ahi) = p + t; 284f1939f7cSShane Wang 285f1939f7cSShane Wang #undef a0 286f1939f7cSShane Wang #undef a1 287f1939f7cSShane Wang #undef a2 288f1939f7cSShane Wang #undef a3 289f1939f7cSShane Wang #undef k0 290f1939f7cSShane Wang #undef k1 291f1939f7cSShane Wang #undef k2 292f1939f7cSShane Wang #undef k3 293f1939f7cSShane Wang } 294f1939f7cSShane Wang 295f1939f7cSShane Wang #define poly_step(ah, al, kh, kl, mh, ml) \ 296f1939f7cSShane Wang poly_step_func(&(ah), &(al), &(kh), &(kl), &(mh), &(ml)) 297f1939f7cSShane Wang 298f1939f7cSShane Wang #endif /* end of specialized NH and poly definitions */ 299f1939f7cSShane Wang 300f1939f7cSShane Wang /* At least nh_16 is defined. Defined others as needed here */ 301f1939f7cSShane Wang #ifndef nh_16_2 302f1939f7cSShane Wang #define nh_16_2(mp, kp, nw, rh, rl, rh2, rl2) \ 303f1939f7cSShane Wang do { \ 304f1939f7cSShane Wang nh_16(mp, kp, nw, rh, rl); \ 305f1939f7cSShane Wang nh_16(mp, ((kp)+2), nw, rh2, rl2); \ 306f1939f7cSShane Wang } while (0) 307f1939f7cSShane Wang #endif 308f1939f7cSShane Wang #ifndef nh_vmac_nhbytes 309f1939f7cSShane Wang #define nh_vmac_nhbytes(mp, kp, nw, rh, rl) \ 310f1939f7cSShane Wang nh_16(mp, kp, nw, rh, rl) 311f1939f7cSShane Wang #endif 312f1939f7cSShane Wang #ifndef nh_vmac_nhbytes_2 313f1939f7cSShane Wang #define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh2, rl2) \ 314f1939f7cSShane Wang do { \ 315f1939f7cSShane Wang nh_vmac_nhbytes(mp, kp, nw, rh, rl); \ 316f1939f7cSShane Wang nh_vmac_nhbytes(mp, ((kp)+2), nw, rh2, rl2); \ 317f1939f7cSShane Wang } while (0) 318f1939f7cSShane Wang #endif 319f1939f7cSShane Wang 320f1939f7cSShane Wang static void vhash_abort(struct vmac_ctx *ctx) 321f1939f7cSShane Wang { 322f1939f7cSShane Wang ctx->polytmp[0] = ctx->polykey[0] ; 323f1939f7cSShane Wang ctx->polytmp[1] = ctx->polykey[1] ; 324f1939f7cSShane Wang ctx->first_block_processed = 0; 325f1939f7cSShane Wang } 326f1939f7cSShane Wang 327304a204eSShane Wang static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len) 328f1939f7cSShane Wang { 329f1939f7cSShane Wang u64 rh, rl, t, z = 0; 330f1939f7cSShane Wang 331f1939f7cSShane Wang /* fully reduce (p1,p2)+(len,0) mod p127 */ 332f1939f7cSShane Wang t = p1 >> 63; 333f1939f7cSShane Wang p1 &= m63; 334f1939f7cSShane Wang ADD128(p1, p2, len, t); 335f1939f7cSShane Wang /* At this point, (p1,p2) is at most 2^127+(len<<64) */ 336f1939f7cSShane Wang t = (p1 > m63) + ((p1 == m63) && (p2 == m64)); 337f1939f7cSShane Wang ADD128(p1, p2, z, t); 338f1939f7cSShane Wang p1 &= m63; 339f1939f7cSShane Wang 340f1939f7cSShane Wang /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */ 341f1939f7cSShane Wang t = p1 + (p2 >> 32); 342f1939f7cSShane Wang t += (t >> 32); 343f1939f7cSShane Wang t += (u32)t > 0xfffffffeu; 344f1939f7cSShane Wang p1 += (t >> 32); 345f1939f7cSShane Wang p2 += (p1 << 32); 346f1939f7cSShane Wang 347f1939f7cSShane Wang /* compute (p1+k1)%p64 and (p2+k2)%p64 */ 348f1939f7cSShane Wang p1 += k1; 349f1939f7cSShane Wang p1 += (0 - (p1 < k1)) & 257; 350f1939f7cSShane Wang p2 += k2; 351f1939f7cSShane Wang p2 += (0 - (p2 < k2)) & 257; 352f1939f7cSShane Wang 353f1939f7cSShane Wang /* compute (p1+k1)*(p2+k2)%p64 */ 354f1939f7cSShane Wang MUL64(rh, rl, p1, p2); 355f1939f7cSShane Wang t = rh >> 56; 356f1939f7cSShane Wang ADD128(t, rl, z, rh); 357f1939f7cSShane Wang rh <<= 8; 358f1939f7cSShane Wang ADD128(t, rl, z, rh); 359f1939f7cSShane Wang t += t << 8; 360f1939f7cSShane Wang rl += t; 361f1939f7cSShane Wang rl += (0 - (rl < t)) & 257; 362f1939f7cSShane Wang rl += (0 - (rl > p64-1)) & 257; 363f1939f7cSShane Wang return rl; 364f1939f7cSShane Wang } 365f1939f7cSShane Wang 366f1939f7cSShane Wang static void vhash_update(const unsigned char *m, 367f1939f7cSShane Wang unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */ 368f1939f7cSShane Wang struct vmac_ctx *ctx) 369f1939f7cSShane Wang { 370f1939f7cSShane Wang u64 rh, rl, *mptr; 371f1939f7cSShane Wang const u64 *kptr = (u64 *)ctx->nhkey; 372f1939f7cSShane Wang int i; 373f1939f7cSShane Wang u64 ch, cl; 374f1939f7cSShane Wang u64 pkh = ctx->polykey[0]; 375f1939f7cSShane Wang u64 pkl = ctx->polykey[1]; 376f1939f7cSShane Wang 377f1939f7cSShane Wang mptr = (u64 *)m; 378f1939f7cSShane Wang i = mbytes / VMAC_NHBYTES; /* Must be non-zero */ 379f1939f7cSShane Wang 380f1939f7cSShane Wang ch = ctx->polytmp[0]; 381f1939f7cSShane Wang cl = ctx->polytmp[1]; 382f1939f7cSShane Wang 383f1939f7cSShane Wang if (!ctx->first_block_processed) { 384f1939f7cSShane Wang ctx->first_block_processed = 1; 385f1939f7cSShane Wang nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); 386f1939f7cSShane Wang rh &= m62; 387f1939f7cSShane Wang ADD128(ch, cl, rh, rl); 388f1939f7cSShane Wang mptr += (VMAC_NHBYTES/sizeof(u64)); 389f1939f7cSShane Wang i--; 390f1939f7cSShane Wang } 391f1939f7cSShane Wang 392f1939f7cSShane Wang while (i--) { 393f1939f7cSShane Wang nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); 394f1939f7cSShane Wang rh &= m62; 395f1939f7cSShane Wang poly_step(ch, cl, pkh, pkl, rh, rl); 396f1939f7cSShane Wang mptr += (VMAC_NHBYTES/sizeof(u64)); 397f1939f7cSShane Wang } 398f1939f7cSShane Wang 399f1939f7cSShane Wang ctx->polytmp[0] = ch; 400f1939f7cSShane Wang ctx->polytmp[1] = cl; 401f1939f7cSShane Wang } 402f1939f7cSShane Wang 403f1939f7cSShane Wang static u64 vhash(unsigned char m[], unsigned int mbytes, 404f1939f7cSShane Wang u64 *tagl, struct vmac_ctx *ctx) 405f1939f7cSShane Wang { 406f1939f7cSShane Wang u64 rh, rl, *mptr; 407f1939f7cSShane Wang const u64 *kptr = (u64 *)ctx->nhkey; 408f1939f7cSShane Wang int i, remaining; 409f1939f7cSShane Wang u64 ch, cl; 410f1939f7cSShane Wang u64 pkh = ctx->polykey[0]; 411f1939f7cSShane Wang u64 pkl = ctx->polykey[1]; 412f1939f7cSShane Wang 413f1939f7cSShane Wang mptr = (u64 *)m; 414f1939f7cSShane Wang i = mbytes / VMAC_NHBYTES; 415f1939f7cSShane Wang remaining = mbytes % VMAC_NHBYTES; 416f1939f7cSShane Wang 417f1939f7cSShane Wang if (ctx->first_block_processed) { 418f1939f7cSShane Wang ch = ctx->polytmp[0]; 419f1939f7cSShane Wang cl = ctx->polytmp[1]; 420f1939f7cSShane Wang } else if (i) { 421f1939f7cSShane Wang nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, ch, cl); 422f1939f7cSShane Wang ch &= m62; 423f1939f7cSShane Wang ADD128(ch, cl, pkh, pkl); 424f1939f7cSShane Wang mptr += (VMAC_NHBYTES/sizeof(u64)); 425f1939f7cSShane Wang i--; 426f1939f7cSShane Wang } else if (remaining) { 427f1939f7cSShane Wang nh_16(mptr, kptr, 2*((remaining+15)/16), ch, cl); 428f1939f7cSShane Wang ch &= m62; 429f1939f7cSShane Wang ADD128(ch, cl, pkh, pkl); 430f1939f7cSShane Wang mptr += (VMAC_NHBYTES/sizeof(u64)); 431f1939f7cSShane Wang goto do_l3; 432f1939f7cSShane Wang } else {/* Empty String */ 433f1939f7cSShane Wang ch = pkh; cl = pkl; 434f1939f7cSShane Wang goto do_l3; 435f1939f7cSShane Wang } 436f1939f7cSShane Wang 437f1939f7cSShane Wang while (i--) { 438f1939f7cSShane Wang nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); 439f1939f7cSShane Wang rh &= m62; 440f1939f7cSShane Wang poly_step(ch, cl, pkh, pkl, rh, rl); 441f1939f7cSShane Wang mptr += (VMAC_NHBYTES/sizeof(u64)); 442f1939f7cSShane Wang } 443f1939f7cSShane Wang if (remaining) { 444f1939f7cSShane Wang nh_16(mptr, kptr, 2*((remaining+15)/16), rh, rl); 445f1939f7cSShane Wang rh &= m62; 446f1939f7cSShane Wang poly_step(ch, cl, pkh, pkl, rh, rl); 447f1939f7cSShane Wang } 448f1939f7cSShane Wang 449f1939f7cSShane Wang do_l3: 450f1939f7cSShane Wang vhash_abort(ctx); 451f1939f7cSShane Wang remaining *= 8; 452f1939f7cSShane Wang return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1], remaining); 453f1939f7cSShane Wang } 454f1939f7cSShane Wang 455f1939f7cSShane Wang static u64 vmac(unsigned char m[], unsigned int mbytes, 456f1939f7cSShane Wang unsigned char n[16], u64 *tagl, 457f1939f7cSShane Wang struct vmac_ctx_t *ctx) 458f1939f7cSShane Wang { 459f1939f7cSShane Wang u64 *in_n, *out_p; 460f1939f7cSShane Wang u64 p, h; 461f1939f7cSShane Wang int i; 462f1939f7cSShane Wang 463f1939f7cSShane Wang in_n = ctx->__vmac_ctx.cached_nonce; 464f1939f7cSShane Wang out_p = ctx->__vmac_ctx.cached_aes; 465f1939f7cSShane Wang 466f1939f7cSShane Wang i = n[15] & 1; 467f1939f7cSShane Wang if ((*(u64 *)(n+8) != in_n[1]) || (*(u64 *)(n) != in_n[0])) { 468f1939f7cSShane Wang in_n[0] = *(u64 *)(n); 469f1939f7cSShane Wang in_n[1] = *(u64 *)(n+8); 470f1939f7cSShane Wang ((unsigned char *)in_n)[15] &= 0xFE; 471f1939f7cSShane Wang crypto_cipher_encrypt_one(ctx->child, 472f1939f7cSShane Wang (unsigned char *)out_p, (unsigned char *)in_n); 473f1939f7cSShane Wang 474f1939f7cSShane Wang ((unsigned char *)in_n)[15] |= (unsigned char)(1-i); 475f1939f7cSShane Wang } 476f1939f7cSShane Wang p = be64_to_cpup(out_p + i); 477f1939f7cSShane Wang h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx); 478304a204eSShane Wang return le64_to_cpu(p + h); 479f1939f7cSShane Wang } 480f1939f7cSShane Wang 481f1939f7cSShane Wang static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx) 482f1939f7cSShane Wang { 483f1939f7cSShane Wang u64 in[2] = {0}, out[2]; 484f1939f7cSShane Wang unsigned i; 485f1939f7cSShane Wang int err = 0; 486f1939f7cSShane Wang 487f1939f7cSShane Wang err = crypto_cipher_setkey(ctx->child, user_key, VMAC_KEY_LEN); 488f1939f7cSShane Wang if (err) 489f1939f7cSShane Wang return err; 490f1939f7cSShane Wang 491f1939f7cSShane Wang /* Fill nh key */ 492f1939f7cSShane Wang ((unsigned char *)in)[0] = 0x80; 493f1939f7cSShane Wang for (i = 0; i < sizeof(ctx->__vmac_ctx.nhkey)/8; i += 2) { 494f1939f7cSShane Wang crypto_cipher_encrypt_one(ctx->child, 495f1939f7cSShane Wang (unsigned char *)out, (unsigned char *)in); 496f1939f7cSShane Wang ctx->__vmac_ctx.nhkey[i] = be64_to_cpup(out); 497f1939f7cSShane Wang ctx->__vmac_ctx.nhkey[i+1] = be64_to_cpup(out+1); 498f1939f7cSShane Wang ((unsigned char *)in)[15] += 1; 499f1939f7cSShane Wang } 500f1939f7cSShane Wang 501f1939f7cSShane Wang /* Fill poly key */ 502f1939f7cSShane Wang ((unsigned char *)in)[0] = 0xC0; 503f1939f7cSShane Wang in[1] = 0; 504f1939f7cSShane Wang for (i = 0; i < sizeof(ctx->__vmac_ctx.polykey)/8; i += 2) { 505f1939f7cSShane Wang crypto_cipher_encrypt_one(ctx->child, 506f1939f7cSShane Wang (unsigned char *)out, (unsigned char *)in); 507f1939f7cSShane Wang ctx->__vmac_ctx.polytmp[i] = 508f1939f7cSShane Wang ctx->__vmac_ctx.polykey[i] = 509f1939f7cSShane Wang be64_to_cpup(out) & mpoly; 510f1939f7cSShane Wang ctx->__vmac_ctx.polytmp[i+1] = 511f1939f7cSShane Wang ctx->__vmac_ctx.polykey[i+1] = 512f1939f7cSShane Wang be64_to_cpup(out+1) & mpoly; 513f1939f7cSShane Wang ((unsigned char *)in)[15] += 1; 514f1939f7cSShane Wang } 515f1939f7cSShane Wang 516f1939f7cSShane Wang /* Fill ip key */ 517f1939f7cSShane Wang ((unsigned char *)in)[0] = 0xE0; 518f1939f7cSShane Wang in[1] = 0; 519f1939f7cSShane Wang for (i = 0; i < sizeof(ctx->__vmac_ctx.l3key)/8; i += 2) { 520f1939f7cSShane Wang do { 521f1939f7cSShane Wang crypto_cipher_encrypt_one(ctx->child, 522f1939f7cSShane Wang (unsigned char *)out, (unsigned char *)in); 523f1939f7cSShane Wang ctx->__vmac_ctx.l3key[i] = be64_to_cpup(out); 524f1939f7cSShane Wang ctx->__vmac_ctx.l3key[i+1] = be64_to_cpup(out+1); 525f1939f7cSShane Wang ((unsigned char *)in)[15] += 1; 526f1939f7cSShane Wang } while (ctx->__vmac_ctx.l3key[i] >= p64 527f1939f7cSShane Wang || ctx->__vmac_ctx.l3key[i+1] >= p64); 528f1939f7cSShane Wang } 529f1939f7cSShane Wang 530f1939f7cSShane Wang /* Invalidate nonce/aes cache and reset other elements */ 531f1939f7cSShane Wang ctx->__vmac_ctx.cached_nonce[0] = (u64)-1; /* Ensure illegal nonce */ 532f1939f7cSShane Wang ctx->__vmac_ctx.cached_nonce[1] = (u64)0; /* Ensure illegal nonce */ 533f1939f7cSShane Wang ctx->__vmac_ctx.first_block_processed = 0; 534f1939f7cSShane Wang 535f1939f7cSShane Wang return err; 536f1939f7cSShane Wang } 537f1939f7cSShane Wang 538f1939f7cSShane Wang static int vmac_setkey(struct crypto_shash *parent, 539f1939f7cSShane Wang const u8 *key, unsigned int keylen) 540f1939f7cSShane Wang { 541f1939f7cSShane Wang struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); 542f1939f7cSShane Wang 543f1939f7cSShane Wang if (keylen != VMAC_KEY_LEN) { 544f1939f7cSShane Wang crypto_shash_set_flags(parent, CRYPTO_TFM_RES_BAD_KEY_LEN); 545f1939f7cSShane Wang return -EINVAL; 546f1939f7cSShane Wang } 547f1939f7cSShane Wang 548f1939f7cSShane Wang return vmac_set_key((u8 *)key, ctx); 549f1939f7cSShane Wang } 550f1939f7cSShane Wang 551f1939f7cSShane Wang static int vmac_init(struct shash_desc *pdesc) 552f1939f7cSShane Wang { 553f1939f7cSShane Wang return 0; 554f1939f7cSShane Wang } 555f1939f7cSShane Wang 556f1939f7cSShane Wang static int vmac_update(struct shash_desc *pdesc, const u8 *p, 557f1939f7cSShane Wang unsigned int len) 558f1939f7cSShane Wang { 559f1939f7cSShane Wang struct crypto_shash *parent = pdesc->tfm; 560f1939f7cSShane Wang struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); 561f1939f7cSShane Wang 562f1939f7cSShane Wang vhash_update(p, len, &ctx->__vmac_ctx); 563f1939f7cSShane Wang 564f1939f7cSShane Wang return 0; 565f1939f7cSShane Wang } 566f1939f7cSShane Wang 567f1939f7cSShane Wang static int vmac_final(struct shash_desc *pdesc, u8 *out) 568f1939f7cSShane Wang { 569f1939f7cSShane Wang struct crypto_shash *parent = pdesc->tfm; 570f1939f7cSShane Wang struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); 571f1939f7cSShane Wang vmac_t mac; 572f1939f7cSShane Wang u8 nonce[16] = {}; 573f1939f7cSShane Wang 574f1939f7cSShane Wang mac = vmac(NULL, 0, nonce, NULL, ctx); 575f1939f7cSShane Wang memcpy(out, &mac, sizeof(vmac_t)); 576f1939f7cSShane Wang memset(&mac, 0, sizeof(vmac_t)); 577f1939f7cSShane Wang memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx)); 578f1939f7cSShane Wang return 0; 579f1939f7cSShane Wang } 580f1939f7cSShane Wang 581f1939f7cSShane Wang static int vmac_init_tfm(struct crypto_tfm *tfm) 582f1939f7cSShane Wang { 583f1939f7cSShane Wang struct crypto_cipher *cipher; 584f1939f7cSShane Wang struct crypto_instance *inst = (void *)tfm->__crt_alg; 585f1939f7cSShane Wang struct crypto_spawn *spawn = crypto_instance_ctx(inst); 586f1939f7cSShane Wang struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm); 587f1939f7cSShane Wang 588f1939f7cSShane Wang cipher = crypto_spawn_cipher(spawn); 589f1939f7cSShane Wang if (IS_ERR(cipher)) 590f1939f7cSShane Wang return PTR_ERR(cipher); 591f1939f7cSShane Wang 592f1939f7cSShane Wang ctx->child = cipher; 593f1939f7cSShane Wang return 0; 594f1939f7cSShane Wang } 595f1939f7cSShane Wang 596f1939f7cSShane Wang static void vmac_exit_tfm(struct crypto_tfm *tfm) 597f1939f7cSShane Wang { 598f1939f7cSShane Wang struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm); 599f1939f7cSShane Wang crypto_free_cipher(ctx->child); 600f1939f7cSShane Wang } 601f1939f7cSShane Wang 602f1939f7cSShane Wang static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb) 603f1939f7cSShane Wang { 604f1939f7cSShane Wang struct shash_instance *inst; 605f1939f7cSShane Wang struct crypto_alg *alg; 606f1939f7cSShane Wang int err; 607f1939f7cSShane Wang 608f1939f7cSShane Wang err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH); 609f1939f7cSShane Wang if (err) 610f1939f7cSShane Wang return err; 611f1939f7cSShane Wang 612f1939f7cSShane Wang alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, 613f1939f7cSShane Wang CRYPTO_ALG_TYPE_MASK); 614f1939f7cSShane Wang if (IS_ERR(alg)) 615f1939f7cSShane Wang return PTR_ERR(alg); 616f1939f7cSShane Wang 617f1939f7cSShane Wang inst = shash_alloc_instance("vmac", alg); 618f1939f7cSShane Wang err = PTR_ERR(inst); 619f1939f7cSShane Wang if (IS_ERR(inst)) 620f1939f7cSShane Wang goto out_put_alg; 621f1939f7cSShane Wang 622f1939f7cSShane Wang err = crypto_init_spawn(shash_instance_ctx(inst), alg, 623f1939f7cSShane Wang shash_crypto_instance(inst), 624f1939f7cSShane Wang CRYPTO_ALG_TYPE_MASK); 625f1939f7cSShane Wang if (err) 626f1939f7cSShane Wang goto out_free_inst; 627f1939f7cSShane Wang 628f1939f7cSShane Wang inst->alg.base.cra_priority = alg->cra_priority; 629f1939f7cSShane Wang inst->alg.base.cra_blocksize = alg->cra_blocksize; 630f1939f7cSShane Wang inst->alg.base.cra_alignmask = alg->cra_alignmask; 631f1939f7cSShane Wang 632f1939f7cSShane Wang inst->alg.digestsize = sizeof(vmac_t); 633f1939f7cSShane Wang inst->alg.base.cra_ctxsize = sizeof(struct vmac_ctx_t); 634f1939f7cSShane Wang inst->alg.base.cra_init = vmac_init_tfm; 635f1939f7cSShane Wang inst->alg.base.cra_exit = vmac_exit_tfm; 636f1939f7cSShane Wang 637f1939f7cSShane Wang inst->alg.init = vmac_init; 638f1939f7cSShane Wang inst->alg.update = vmac_update; 639f1939f7cSShane Wang inst->alg.final = vmac_final; 640f1939f7cSShane Wang inst->alg.setkey = vmac_setkey; 641f1939f7cSShane Wang 642f1939f7cSShane Wang err = shash_register_instance(tmpl, inst); 643f1939f7cSShane Wang if (err) { 644f1939f7cSShane Wang out_free_inst: 645f1939f7cSShane Wang shash_free_instance(shash_crypto_instance(inst)); 646f1939f7cSShane Wang } 647f1939f7cSShane Wang 648f1939f7cSShane Wang out_put_alg: 649f1939f7cSShane Wang crypto_mod_put(alg); 650f1939f7cSShane Wang return err; 651f1939f7cSShane Wang } 652f1939f7cSShane Wang 653f1939f7cSShane Wang static struct crypto_template vmac_tmpl = { 654f1939f7cSShane Wang .name = "vmac", 655f1939f7cSShane Wang .create = vmac_create, 656f1939f7cSShane Wang .free = shash_free_instance, 657f1939f7cSShane Wang .module = THIS_MODULE, 658f1939f7cSShane Wang }; 659f1939f7cSShane Wang 660f1939f7cSShane Wang static int __init vmac_module_init(void) 661f1939f7cSShane Wang { 662f1939f7cSShane Wang return crypto_register_template(&vmac_tmpl); 663f1939f7cSShane Wang } 664f1939f7cSShane Wang 665f1939f7cSShane Wang static void __exit vmac_module_exit(void) 666f1939f7cSShane Wang { 667f1939f7cSShane Wang crypto_unregister_template(&vmac_tmpl); 668f1939f7cSShane Wang } 669f1939f7cSShane Wang 670f1939f7cSShane Wang module_init(vmac_module_init); 671f1939f7cSShane Wang module_exit(vmac_module_exit); 672f1939f7cSShane Wang 673f1939f7cSShane Wang MODULE_LICENSE("GPL"); 674f1939f7cSShane Wang MODULE_DESCRIPTION("VMAC hash algorithm"); 675f1939f7cSShane Wang 676