1f1939f7cSShane Wang /* 2f1939f7cSShane Wang * Modified to interface to the Linux kernel 3f1939f7cSShane Wang * Copyright (c) 2009, Intel Corporation. 4f1939f7cSShane Wang * 5f1939f7cSShane Wang * This program is free software; you can redistribute it and/or modify it 6f1939f7cSShane Wang * under the terms and conditions of the GNU General Public License, 7f1939f7cSShane Wang * version 2, as published by the Free Software Foundation. 8f1939f7cSShane Wang * 9f1939f7cSShane Wang * This program is distributed in the hope it will be useful, but WITHOUT 10f1939f7cSShane Wang * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11f1939f7cSShane Wang * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12f1939f7cSShane Wang * more details. 13f1939f7cSShane Wang * 14f1939f7cSShane Wang * You should have received a copy of the GNU General Public License along with 15f1939f7cSShane Wang * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 16f1939f7cSShane Wang * Place - Suite 330, Boston, MA 02111-1307 USA. 17f1939f7cSShane Wang */ 18f1939f7cSShane Wang 19f1939f7cSShane Wang /* -------------------------------------------------------------------------- 20f1939f7cSShane Wang * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai. 21f1939f7cSShane Wang * This implementation is herby placed in the public domain. 22f1939f7cSShane Wang * The authors offers no warranty. Use at your own risk. 23f1939f7cSShane Wang * Please send bug reports to the authors. 24f1939f7cSShane Wang * Last modified: 17 APR 08, 1700 PDT 25f1939f7cSShane Wang * ----------------------------------------------------------------------- */ 26f1939f7cSShane Wang 27f1939f7cSShane Wang #include <linux/init.h> 28f1939f7cSShane Wang #include <linux/types.h> 29f1939f7cSShane Wang #include <linux/crypto.h> 304bb33cc8SPaul Gortmaker #include <linux/module.h> 31f1939f7cSShane Wang #include <linux/scatterlist.h> 32f1939f7cSShane Wang #include <asm/byteorder.h> 33f1939f7cSShane Wang #include <crypto/scatterwalk.h> 34f1939f7cSShane Wang #include <crypto/vmac.h> 35f1939f7cSShane Wang #include <crypto/internal/hash.h> 36f1939f7cSShane Wang 37f1939f7cSShane Wang /* 38f1939f7cSShane Wang * Constants and masks 39f1939f7cSShane Wang */ 40f1939f7cSShane Wang #define UINT64_C(x) x##ULL 4166ce0b0fSJussi Kivilinna static const u64 p64 = UINT64_C(0xfffffffffffffeff); /* 2^64 - 257 prime */ 4266ce0b0fSJussi Kivilinna static const u64 m62 = UINT64_C(0x3fffffffffffffff); /* 62-bit mask */ 4366ce0b0fSJussi Kivilinna static const u64 m63 = UINT64_C(0x7fffffffffffffff); /* 63-bit mask */ 4466ce0b0fSJussi Kivilinna static const u64 m64 = UINT64_C(0xffffffffffffffff); /* 64-bit mask */ 4566ce0b0fSJussi Kivilinna static const u64 mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask */ 46f1939f7cSShane Wang 47304a204eSShane Wang #define pe64_to_cpup le64_to_cpup /* Prefer little endian */ 48304a204eSShane Wang 49f1939f7cSShane Wang #ifdef __LITTLE_ENDIAN 50f1939f7cSShane Wang #define INDEX_HIGH 1 51f1939f7cSShane Wang #define INDEX_LOW 0 52f1939f7cSShane Wang #else 53f1939f7cSShane Wang #define INDEX_HIGH 0 54f1939f7cSShane Wang #define INDEX_LOW 1 55f1939f7cSShane Wang #endif 56f1939f7cSShane Wang 57f1939f7cSShane Wang /* 58f1939f7cSShane Wang * The following routines are used in this implementation. They are 59f1939f7cSShane Wang * written via macros to simulate zero-overhead call-by-reference. 60f1939f7cSShane Wang * 61f1939f7cSShane Wang * MUL64: 64x64->128-bit multiplication 62f1939f7cSShane Wang * PMUL64: assumes top bits cleared on inputs 63f1939f7cSShane Wang * ADD128: 128x128->128-bit addition 64f1939f7cSShane Wang */ 65f1939f7cSShane Wang 66f1939f7cSShane Wang #define ADD128(rh, rl, ih, il) \ 67f1939f7cSShane Wang do { \ 68f1939f7cSShane Wang u64 _il = (il); \ 69f1939f7cSShane Wang (rl) += (_il); \ 70f1939f7cSShane Wang if ((rl) < (_il)) \ 71f1939f7cSShane Wang (rh)++; \ 72f1939f7cSShane Wang (rh) += (ih); \ 73f1939f7cSShane Wang } while (0) 74f1939f7cSShane Wang 75f1939f7cSShane Wang #define MUL32(i1, i2) ((u64)(u32)(i1)*(u32)(i2)) 76f1939f7cSShane Wang 77f1939f7cSShane Wang #define PMUL64(rh, rl, i1, i2) /* Assumes m doesn't overflow */ \ 78f1939f7cSShane Wang do { \ 79f1939f7cSShane Wang u64 _i1 = (i1), _i2 = (i2); \ 80f1939f7cSShane Wang u64 m = MUL32(_i1, _i2>>32) + MUL32(_i1>>32, _i2); \ 81f1939f7cSShane Wang rh = MUL32(_i1>>32, _i2>>32); \ 82f1939f7cSShane Wang rl = MUL32(_i1, _i2); \ 83f1939f7cSShane Wang ADD128(rh, rl, (m >> 32), (m << 32)); \ 84f1939f7cSShane Wang } while (0) 85f1939f7cSShane Wang 86f1939f7cSShane Wang #define MUL64(rh, rl, i1, i2) \ 87f1939f7cSShane Wang do { \ 88f1939f7cSShane Wang u64 _i1 = (i1), _i2 = (i2); \ 89f1939f7cSShane Wang u64 m1 = MUL32(_i1, _i2>>32); \ 90f1939f7cSShane Wang u64 m2 = MUL32(_i1>>32, _i2); \ 91f1939f7cSShane Wang rh = MUL32(_i1>>32, _i2>>32); \ 92f1939f7cSShane Wang rl = MUL32(_i1, _i2); \ 93f1939f7cSShane Wang ADD128(rh, rl, (m1 >> 32), (m1 << 32)); \ 94f1939f7cSShane Wang ADD128(rh, rl, (m2 >> 32), (m2 << 32)); \ 95f1939f7cSShane Wang } while (0) 96f1939f7cSShane Wang 97f1939f7cSShane Wang /* 98f1939f7cSShane Wang * For highest performance the L1 NH and L2 polynomial hashes should be 9925985edcSLucas De Marchi * carefully implemented to take advantage of one's target architecture. 100f1939f7cSShane Wang * Here these two hash functions are defined multiple time; once for 101f1939f7cSShane Wang * 64-bit architectures, once for 32-bit SSE2 architectures, and once 102f1939f7cSShane Wang * for the rest (32-bit) architectures. 103f1939f7cSShane Wang * For each, nh_16 *must* be defined (works on multiples of 16 bytes). 104f1939f7cSShane Wang * Optionally, nh_vmac_nhbytes can be defined (for multiples of 105f1939f7cSShane Wang * VMAC_NHBYTES), and nh_16_2 and nh_vmac_nhbytes_2 (versions that do two 106f1939f7cSShane Wang * NH computations at once). 107f1939f7cSShane Wang */ 108f1939f7cSShane Wang 109f1939f7cSShane Wang #ifdef CONFIG_64BIT 110f1939f7cSShane Wang 111f1939f7cSShane Wang #define nh_16(mp, kp, nw, rh, rl) \ 112f1939f7cSShane Wang do { \ 113f1939f7cSShane Wang int i; u64 th, tl; \ 114f1939f7cSShane Wang rh = rl = 0; \ 115f1939f7cSShane Wang for (i = 0; i < nw; i += 2) { \ 116304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ 117304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ 118f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 119f1939f7cSShane Wang } \ 120f1939f7cSShane Wang } while (0) 121f1939f7cSShane Wang 122f1939f7cSShane Wang #define nh_16_2(mp, kp, nw, rh, rl, rh1, rl1) \ 123f1939f7cSShane Wang do { \ 124f1939f7cSShane Wang int i; u64 th, tl; \ 125f1939f7cSShane Wang rh1 = rl1 = rh = rl = 0; \ 126f1939f7cSShane Wang for (i = 0; i < nw; i += 2) { \ 127304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ 128304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ 129f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 130304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2], \ 131304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+3]); \ 132f1939f7cSShane Wang ADD128(rh1, rl1, th, tl); \ 133f1939f7cSShane Wang } \ 134f1939f7cSShane Wang } while (0) 135f1939f7cSShane Wang 136f1939f7cSShane Wang #if (VMAC_NHBYTES >= 64) /* These versions do 64-bytes of message at a time */ 137f1939f7cSShane Wang #define nh_vmac_nhbytes(mp, kp, nw, rh, rl) \ 138f1939f7cSShane Wang do { \ 139f1939f7cSShane Wang int i; u64 th, tl; \ 140f1939f7cSShane Wang rh = rl = 0; \ 141f1939f7cSShane Wang for (i = 0; i < nw; i += 8) { \ 142304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ 143304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ 144f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 145304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2], \ 146304a204eSShane Wang pe64_to_cpup((mp)+i+3)+(kp)[i+3]); \ 147f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 148304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4], \ 149304a204eSShane Wang pe64_to_cpup((mp)+i+5)+(kp)[i+5]); \ 150f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 151304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6], \ 152304a204eSShane Wang pe64_to_cpup((mp)+i+7)+(kp)[i+7]); \ 153f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 154f1939f7cSShane Wang } \ 155f1939f7cSShane Wang } while (0) 156f1939f7cSShane Wang 157f1939f7cSShane Wang #define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh1, rl1) \ 158f1939f7cSShane Wang do { \ 159f1939f7cSShane Wang int i; u64 th, tl; \ 160f1939f7cSShane Wang rh1 = rl1 = rh = rl = 0; \ 161f1939f7cSShane Wang for (i = 0; i < nw; i += 8) { \ 162304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \ 163304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+1]); \ 164f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 165304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2], \ 166304a204eSShane Wang pe64_to_cpup((mp)+i+1)+(kp)[i+3]); \ 167f1939f7cSShane Wang ADD128(rh1, rl1, th, tl); \ 168304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2], \ 169304a204eSShane Wang pe64_to_cpup((mp)+i+3)+(kp)[i+3]); \ 170f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 171304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+4], \ 172304a204eSShane Wang pe64_to_cpup((mp)+i+3)+(kp)[i+5]); \ 173f1939f7cSShane Wang ADD128(rh1, rl1, th, tl); \ 174304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4], \ 175304a204eSShane Wang pe64_to_cpup((mp)+i+5)+(kp)[i+5]); \ 176f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 177304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+6], \ 178304a204eSShane Wang pe64_to_cpup((mp)+i+5)+(kp)[i+7]); \ 179f1939f7cSShane Wang ADD128(rh1, rl1, th, tl); \ 180304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6], \ 181304a204eSShane Wang pe64_to_cpup((mp)+i+7)+(kp)[i+7]); \ 182f1939f7cSShane Wang ADD128(rh, rl, th, tl); \ 183304a204eSShane Wang MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+8], \ 184304a204eSShane Wang pe64_to_cpup((mp)+i+7)+(kp)[i+9]); \ 185f1939f7cSShane Wang ADD128(rh1, rl1, th, tl); \ 186f1939f7cSShane Wang } \ 187f1939f7cSShane Wang } while (0) 188f1939f7cSShane Wang #endif 189f1939f7cSShane Wang 190f1939f7cSShane Wang #define poly_step(ah, al, kh, kl, mh, ml) \ 191f1939f7cSShane Wang do { \ 192f1939f7cSShane Wang u64 t1h, t1l, t2h, t2l, t3h, t3l, z = 0; \ 193f1939f7cSShane Wang /* compute ab*cd, put bd into result registers */ \ 194f1939f7cSShane Wang PMUL64(t3h, t3l, al, kh); \ 195f1939f7cSShane Wang PMUL64(t2h, t2l, ah, kl); \ 196f1939f7cSShane Wang PMUL64(t1h, t1l, ah, 2*kh); \ 197f1939f7cSShane Wang PMUL64(ah, al, al, kl); \ 198f1939f7cSShane Wang /* add 2 * ac to result */ \ 199f1939f7cSShane Wang ADD128(ah, al, t1h, t1l); \ 200f1939f7cSShane Wang /* add together ad + bc */ \ 201f1939f7cSShane Wang ADD128(t2h, t2l, t3h, t3l); \ 202f1939f7cSShane Wang /* now (ah,al), (t2l,2*t2h) need summing */ \ 203f1939f7cSShane Wang /* first add the high registers, carrying into t2h */ \ 204f1939f7cSShane Wang ADD128(t2h, ah, z, t2l); \ 205f1939f7cSShane Wang /* double t2h and add top bit of ah */ \ 206f1939f7cSShane Wang t2h = 2 * t2h + (ah >> 63); \ 207f1939f7cSShane Wang ah &= m63; \ 208f1939f7cSShane Wang /* now add the low registers */ \ 209f1939f7cSShane Wang ADD128(ah, al, mh, ml); \ 210f1939f7cSShane Wang ADD128(ah, al, z, t2h); \ 211f1939f7cSShane Wang } while (0) 212f1939f7cSShane Wang 213f1939f7cSShane Wang #else /* ! CONFIG_64BIT */ 214f1939f7cSShane Wang 215f1939f7cSShane Wang #ifndef nh_16 216f1939f7cSShane Wang #define nh_16(mp, kp, nw, rh, rl) \ 217f1939f7cSShane Wang do { \ 218f1939f7cSShane Wang u64 t1, t2, m1, m2, t; \ 219f1939f7cSShane Wang int i; \ 220f1939f7cSShane Wang rh = rl = t = 0; \ 221f1939f7cSShane Wang for (i = 0; i < nw; i += 2) { \ 222304a204eSShane Wang t1 = pe64_to_cpup(mp+i) + kp[i]; \ 223304a204eSShane Wang t2 = pe64_to_cpup(mp+i+1) + kp[i+1]; \ 224f1939f7cSShane Wang m2 = MUL32(t1 >> 32, t2); \ 225f1939f7cSShane Wang m1 = MUL32(t1, t2 >> 32); \ 226f1939f7cSShane Wang ADD128(rh, rl, MUL32(t1 >> 32, t2 >> 32), \ 227f1939f7cSShane Wang MUL32(t1, t2)); \ 228f1939f7cSShane Wang rh += (u64)(u32)(m1 >> 32) \ 229f1939f7cSShane Wang + (u32)(m2 >> 32); \ 230f1939f7cSShane Wang t += (u64)(u32)m1 + (u32)m2; \ 231f1939f7cSShane Wang } \ 232f1939f7cSShane Wang ADD128(rh, rl, (t >> 32), (t << 32)); \ 233f1939f7cSShane Wang } while (0) 234f1939f7cSShane Wang #endif 235f1939f7cSShane Wang 236f1939f7cSShane Wang static void poly_step_func(u64 *ahi, u64 *alo, 237f1939f7cSShane Wang const u64 *kh, const u64 *kl, 238f1939f7cSShane Wang const u64 *mh, const u64 *ml) 239f1939f7cSShane Wang { 240f1939f7cSShane Wang #define a0 (*(((u32 *)alo)+INDEX_LOW)) 241f1939f7cSShane Wang #define a1 (*(((u32 *)alo)+INDEX_HIGH)) 242f1939f7cSShane Wang #define a2 (*(((u32 *)ahi)+INDEX_LOW)) 243f1939f7cSShane Wang #define a3 (*(((u32 *)ahi)+INDEX_HIGH)) 244f1939f7cSShane Wang #define k0 (*(((u32 *)kl)+INDEX_LOW)) 245f1939f7cSShane Wang #define k1 (*(((u32 *)kl)+INDEX_HIGH)) 246f1939f7cSShane Wang #define k2 (*(((u32 *)kh)+INDEX_LOW)) 247f1939f7cSShane Wang #define k3 (*(((u32 *)kh)+INDEX_HIGH)) 248f1939f7cSShane Wang 249f1939f7cSShane Wang u64 p, q, t; 250f1939f7cSShane Wang u32 t2; 251f1939f7cSShane Wang 252f1939f7cSShane Wang p = MUL32(a3, k3); 253f1939f7cSShane Wang p += p; 254f1939f7cSShane Wang p += *(u64 *)mh; 255f1939f7cSShane Wang p += MUL32(a0, k2); 256f1939f7cSShane Wang p += MUL32(a1, k1); 257f1939f7cSShane Wang p += MUL32(a2, k0); 258f1939f7cSShane Wang t = (u32)(p); 259f1939f7cSShane Wang p >>= 32; 260f1939f7cSShane Wang p += MUL32(a0, k3); 261f1939f7cSShane Wang p += MUL32(a1, k2); 262f1939f7cSShane Wang p += MUL32(a2, k1); 263f1939f7cSShane Wang p += MUL32(a3, k0); 264f1939f7cSShane Wang t |= ((u64)((u32)p & 0x7fffffff)) << 32; 265f1939f7cSShane Wang p >>= 31; 266f1939f7cSShane Wang p += (u64)(((u32 *)ml)[INDEX_LOW]); 267f1939f7cSShane Wang p += MUL32(a0, k0); 268f1939f7cSShane Wang q = MUL32(a1, k3); 269f1939f7cSShane Wang q += MUL32(a2, k2); 270f1939f7cSShane Wang q += MUL32(a3, k1); 271f1939f7cSShane Wang q += q; 272f1939f7cSShane Wang p += q; 273f1939f7cSShane Wang t2 = (u32)(p); 274f1939f7cSShane Wang p >>= 32; 275f1939f7cSShane Wang p += (u64)(((u32 *)ml)[INDEX_HIGH]); 276f1939f7cSShane Wang p += MUL32(a0, k1); 277f1939f7cSShane Wang p += MUL32(a1, k0); 278f1939f7cSShane Wang q = MUL32(a2, k3); 279f1939f7cSShane Wang q += MUL32(a3, k2); 280f1939f7cSShane Wang q += q; 281f1939f7cSShane Wang p += q; 282f1939f7cSShane Wang *(u64 *)(alo) = (p << 32) | t2; 283f1939f7cSShane Wang p >>= 32; 284f1939f7cSShane Wang *(u64 *)(ahi) = p + t; 285f1939f7cSShane Wang 286f1939f7cSShane Wang #undef a0 287f1939f7cSShane Wang #undef a1 288f1939f7cSShane Wang #undef a2 289f1939f7cSShane Wang #undef a3 290f1939f7cSShane Wang #undef k0 291f1939f7cSShane Wang #undef k1 292f1939f7cSShane Wang #undef k2 293f1939f7cSShane Wang #undef k3 294f1939f7cSShane Wang } 295f1939f7cSShane Wang 296f1939f7cSShane Wang #define poly_step(ah, al, kh, kl, mh, ml) \ 297f1939f7cSShane Wang poly_step_func(&(ah), &(al), &(kh), &(kl), &(mh), &(ml)) 298f1939f7cSShane Wang 299f1939f7cSShane Wang #endif /* end of specialized NH and poly definitions */ 300f1939f7cSShane Wang 301f1939f7cSShane Wang /* At least nh_16 is defined. Defined others as needed here */ 302f1939f7cSShane Wang #ifndef nh_16_2 303f1939f7cSShane Wang #define nh_16_2(mp, kp, nw, rh, rl, rh2, rl2) \ 304f1939f7cSShane Wang do { \ 305f1939f7cSShane Wang nh_16(mp, kp, nw, rh, rl); \ 306f1939f7cSShane Wang nh_16(mp, ((kp)+2), nw, rh2, rl2); \ 307f1939f7cSShane Wang } while (0) 308f1939f7cSShane Wang #endif 309f1939f7cSShane Wang #ifndef nh_vmac_nhbytes 310f1939f7cSShane Wang #define nh_vmac_nhbytes(mp, kp, nw, rh, rl) \ 311f1939f7cSShane Wang nh_16(mp, kp, nw, rh, rl) 312f1939f7cSShane Wang #endif 313f1939f7cSShane Wang #ifndef nh_vmac_nhbytes_2 314f1939f7cSShane Wang #define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh2, rl2) \ 315f1939f7cSShane Wang do { \ 316f1939f7cSShane Wang nh_vmac_nhbytes(mp, kp, nw, rh, rl); \ 317f1939f7cSShane Wang nh_vmac_nhbytes(mp, ((kp)+2), nw, rh2, rl2); \ 318f1939f7cSShane Wang } while (0) 319f1939f7cSShane Wang #endif 320f1939f7cSShane Wang 321f1939f7cSShane Wang static void vhash_abort(struct vmac_ctx *ctx) 322f1939f7cSShane Wang { 323f1939f7cSShane Wang ctx->polytmp[0] = ctx->polykey[0] ; 324f1939f7cSShane Wang ctx->polytmp[1] = ctx->polykey[1] ; 325f1939f7cSShane Wang ctx->first_block_processed = 0; 326f1939f7cSShane Wang } 327f1939f7cSShane Wang 328304a204eSShane Wang static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len) 329f1939f7cSShane Wang { 330f1939f7cSShane Wang u64 rh, rl, t, z = 0; 331f1939f7cSShane Wang 332f1939f7cSShane Wang /* fully reduce (p1,p2)+(len,0) mod p127 */ 333f1939f7cSShane Wang t = p1 >> 63; 334f1939f7cSShane Wang p1 &= m63; 335f1939f7cSShane Wang ADD128(p1, p2, len, t); 336f1939f7cSShane Wang /* At this point, (p1,p2) is at most 2^127+(len<<64) */ 337f1939f7cSShane Wang t = (p1 > m63) + ((p1 == m63) && (p2 == m64)); 338f1939f7cSShane Wang ADD128(p1, p2, z, t); 339f1939f7cSShane Wang p1 &= m63; 340f1939f7cSShane Wang 341f1939f7cSShane Wang /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */ 342f1939f7cSShane Wang t = p1 + (p2 >> 32); 343f1939f7cSShane Wang t += (t >> 32); 344f1939f7cSShane Wang t += (u32)t > 0xfffffffeu; 345f1939f7cSShane Wang p1 += (t >> 32); 346f1939f7cSShane Wang p2 += (p1 << 32); 347f1939f7cSShane Wang 348f1939f7cSShane Wang /* compute (p1+k1)%p64 and (p2+k2)%p64 */ 349f1939f7cSShane Wang p1 += k1; 350f1939f7cSShane Wang p1 += (0 - (p1 < k1)) & 257; 351f1939f7cSShane Wang p2 += k2; 352f1939f7cSShane Wang p2 += (0 - (p2 < k2)) & 257; 353f1939f7cSShane Wang 354f1939f7cSShane Wang /* compute (p1+k1)*(p2+k2)%p64 */ 355f1939f7cSShane Wang MUL64(rh, rl, p1, p2); 356f1939f7cSShane Wang t = rh >> 56; 357f1939f7cSShane Wang ADD128(t, rl, z, rh); 358f1939f7cSShane Wang rh <<= 8; 359f1939f7cSShane Wang ADD128(t, rl, z, rh); 360f1939f7cSShane Wang t += t << 8; 361f1939f7cSShane Wang rl += t; 362f1939f7cSShane Wang rl += (0 - (rl < t)) & 257; 363f1939f7cSShane Wang rl += (0 - (rl > p64-1)) & 257; 364f1939f7cSShane Wang return rl; 365f1939f7cSShane Wang } 366f1939f7cSShane Wang 367f1939f7cSShane Wang static void vhash_update(const unsigned char *m, 368f1939f7cSShane Wang unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */ 369f1939f7cSShane Wang struct vmac_ctx *ctx) 370f1939f7cSShane Wang { 371f1939f7cSShane Wang u64 rh, rl, *mptr; 372f1939f7cSShane Wang const u64 *kptr = (u64 *)ctx->nhkey; 373f1939f7cSShane Wang int i; 374f1939f7cSShane Wang u64 ch, cl; 375f1939f7cSShane Wang u64 pkh = ctx->polykey[0]; 376f1939f7cSShane Wang u64 pkl = ctx->polykey[1]; 377f1939f7cSShane Wang 378f1939f7cSShane Wang mptr = (u64 *)m; 379f1939f7cSShane Wang i = mbytes / VMAC_NHBYTES; /* Must be non-zero */ 380f1939f7cSShane Wang 381f1939f7cSShane Wang ch = ctx->polytmp[0]; 382f1939f7cSShane Wang cl = ctx->polytmp[1]; 383f1939f7cSShane Wang 384f1939f7cSShane Wang if (!ctx->first_block_processed) { 385f1939f7cSShane Wang ctx->first_block_processed = 1; 386f1939f7cSShane Wang nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); 387f1939f7cSShane Wang rh &= m62; 388f1939f7cSShane Wang ADD128(ch, cl, rh, rl); 389f1939f7cSShane Wang mptr += (VMAC_NHBYTES/sizeof(u64)); 390f1939f7cSShane Wang i--; 391f1939f7cSShane Wang } 392f1939f7cSShane Wang 393f1939f7cSShane Wang while (i--) { 394f1939f7cSShane Wang nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); 395f1939f7cSShane Wang rh &= m62; 396f1939f7cSShane Wang poly_step(ch, cl, pkh, pkl, rh, rl); 397f1939f7cSShane Wang mptr += (VMAC_NHBYTES/sizeof(u64)); 398f1939f7cSShane Wang } 399f1939f7cSShane Wang 400f1939f7cSShane Wang ctx->polytmp[0] = ch; 401f1939f7cSShane Wang ctx->polytmp[1] = cl; 402f1939f7cSShane Wang } 403f1939f7cSShane Wang 404f1939f7cSShane Wang static u64 vhash(unsigned char m[], unsigned int mbytes, 405f1939f7cSShane Wang u64 *tagl, struct vmac_ctx *ctx) 406f1939f7cSShane Wang { 407f1939f7cSShane Wang u64 rh, rl, *mptr; 408f1939f7cSShane Wang const u64 *kptr = (u64 *)ctx->nhkey; 409f1939f7cSShane Wang int i, remaining; 410f1939f7cSShane Wang u64 ch, cl; 411f1939f7cSShane Wang u64 pkh = ctx->polykey[0]; 412f1939f7cSShane Wang u64 pkl = ctx->polykey[1]; 413f1939f7cSShane Wang 414f1939f7cSShane Wang mptr = (u64 *)m; 415f1939f7cSShane Wang i = mbytes / VMAC_NHBYTES; 416f1939f7cSShane Wang remaining = mbytes % VMAC_NHBYTES; 417f1939f7cSShane Wang 418f1939f7cSShane Wang if (ctx->first_block_processed) { 419f1939f7cSShane Wang ch = ctx->polytmp[0]; 420f1939f7cSShane Wang cl = ctx->polytmp[1]; 421f1939f7cSShane Wang } else if (i) { 422f1939f7cSShane Wang nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, ch, cl); 423f1939f7cSShane Wang ch &= m62; 424f1939f7cSShane Wang ADD128(ch, cl, pkh, pkl); 425f1939f7cSShane Wang mptr += (VMAC_NHBYTES/sizeof(u64)); 426f1939f7cSShane Wang i--; 427f1939f7cSShane Wang } else if (remaining) { 428f1939f7cSShane Wang nh_16(mptr, kptr, 2*((remaining+15)/16), ch, cl); 429f1939f7cSShane Wang ch &= m62; 430f1939f7cSShane Wang ADD128(ch, cl, pkh, pkl); 431f1939f7cSShane Wang mptr += (VMAC_NHBYTES/sizeof(u64)); 432f1939f7cSShane Wang goto do_l3; 433f1939f7cSShane Wang } else {/* Empty String */ 434f1939f7cSShane Wang ch = pkh; cl = pkl; 435f1939f7cSShane Wang goto do_l3; 436f1939f7cSShane Wang } 437f1939f7cSShane Wang 438f1939f7cSShane Wang while (i--) { 439f1939f7cSShane Wang nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl); 440f1939f7cSShane Wang rh &= m62; 441f1939f7cSShane Wang poly_step(ch, cl, pkh, pkl, rh, rl); 442f1939f7cSShane Wang mptr += (VMAC_NHBYTES/sizeof(u64)); 443f1939f7cSShane Wang } 444f1939f7cSShane Wang if (remaining) { 445f1939f7cSShane Wang nh_16(mptr, kptr, 2*((remaining+15)/16), rh, rl); 446f1939f7cSShane Wang rh &= m62; 447f1939f7cSShane Wang poly_step(ch, cl, pkh, pkl, rh, rl); 448f1939f7cSShane Wang } 449f1939f7cSShane Wang 450f1939f7cSShane Wang do_l3: 451f1939f7cSShane Wang vhash_abort(ctx); 452f1939f7cSShane Wang remaining *= 8; 453f1939f7cSShane Wang return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1], remaining); 454f1939f7cSShane Wang } 455f1939f7cSShane Wang 456f1939f7cSShane Wang static u64 vmac(unsigned char m[], unsigned int mbytes, 457f1939f7cSShane Wang unsigned char n[16], u64 *tagl, 458f1939f7cSShane Wang struct vmac_ctx_t *ctx) 459f1939f7cSShane Wang { 460f1939f7cSShane Wang u64 *in_n, *out_p; 461f1939f7cSShane Wang u64 p, h; 462f1939f7cSShane Wang int i; 463f1939f7cSShane Wang 464f1939f7cSShane Wang in_n = ctx->__vmac_ctx.cached_nonce; 465f1939f7cSShane Wang out_p = ctx->__vmac_ctx.cached_aes; 466f1939f7cSShane Wang 467f1939f7cSShane Wang i = n[15] & 1; 468f1939f7cSShane Wang if ((*(u64 *)(n+8) != in_n[1]) || (*(u64 *)(n) != in_n[0])) { 469f1939f7cSShane Wang in_n[0] = *(u64 *)(n); 470f1939f7cSShane Wang in_n[1] = *(u64 *)(n+8); 471f1939f7cSShane Wang ((unsigned char *)in_n)[15] &= 0xFE; 472f1939f7cSShane Wang crypto_cipher_encrypt_one(ctx->child, 473f1939f7cSShane Wang (unsigned char *)out_p, (unsigned char *)in_n); 474f1939f7cSShane Wang 475f1939f7cSShane Wang ((unsigned char *)in_n)[15] |= (unsigned char)(1-i); 476f1939f7cSShane Wang } 477f1939f7cSShane Wang p = be64_to_cpup(out_p + i); 478f1939f7cSShane Wang h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx); 479304a204eSShane Wang return le64_to_cpu(p + h); 480f1939f7cSShane Wang } 481f1939f7cSShane Wang 482f1939f7cSShane Wang static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx) 483f1939f7cSShane Wang { 484f1939f7cSShane Wang u64 in[2] = {0}, out[2]; 485f1939f7cSShane Wang unsigned i; 486f1939f7cSShane Wang int err = 0; 487f1939f7cSShane Wang 488f1939f7cSShane Wang err = crypto_cipher_setkey(ctx->child, user_key, VMAC_KEY_LEN); 489f1939f7cSShane Wang if (err) 490f1939f7cSShane Wang return err; 491f1939f7cSShane Wang 492f1939f7cSShane Wang /* Fill nh key */ 493f1939f7cSShane Wang ((unsigned char *)in)[0] = 0x80; 494f1939f7cSShane Wang for (i = 0; i < sizeof(ctx->__vmac_ctx.nhkey)/8; i += 2) { 495f1939f7cSShane Wang crypto_cipher_encrypt_one(ctx->child, 496f1939f7cSShane Wang (unsigned char *)out, (unsigned char *)in); 497f1939f7cSShane Wang ctx->__vmac_ctx.nhkey[i] = be64_to_cpup(out); 498f1939f7cSShane Wang ctx->__vmac_ctx.nhkey[i+1] = be64_to_cpup(out+1); 499f1939f7cSShane Wang ((unsigned char *)in)[15] += 1; 500f1939f7cSShane Wang } 501f1939f7cSShane Wang 502f1939f7cSShane Wang /* Fill poly key */ 503f1939f7cSShane Wang ((unsigned char *)in)[0] = 0xC0; 504f1939f7cSShane Wang in[1] = 0; 505f1939f7cSShane Wang for (i = 0; i < sizeof(ctx->__vmac_ctx.polykey)/8; i += 2) { 506f1939f7cSShane Wang crypto_cipher_encrypt_one(ctx->child, 507f1939f7cSShane Wang (unsigned char *)out, (unsigned char *)in); 508f1939f7cSShane Wang ctx->__vmac_ctx.polytmp[i] = 509f1939f7cSShane Wang ctx->__vmac_ctx.polykey[i] = 510f1939f7cSShane Wang be64_to_cpup(out) & mpoly; 511f1939f7cSShane Wang ctx->__vmac_ctx.polytmp[i+1] = 512f1939f7cSShane Wang ctx->__vmac_ctx.polykey[i+1] = 513f1939f7cSShane Wang be64_to_cpup(out+1) & mpoly; 514f1939f7cSShane Wang ((unsigned char *)in)[15] += 1; 515f1939f7cSShane Wang } 516f1939f7cSShane Wang 517f1939f7cSShane Wang /* Fill ip key */ 518f1939f7cSShane Wang ((unsigned char *)in)[0] = 0xE0; 519f1939f7cSShane Wang in[1] = 0; 520f1939f7cSShane Wang for (i = 0; i < sizeof(ctx->__vmac_ctx.l3key)/8; i += 2) { 521f1939f7cSShane Wang do { 522f1939f7cSShane Wang crypto_cipher_encrypt_one(ctx->child, 523f1939f7cSShane Wang (unsigned char *)out, (unsigned char *)in); 524f1939f7cSShane Wang ctx->__vmac_ctx.l3key[i] = be64_to_cpup(out); 525f1939f7cSShane Wang ctx->__vmac_ctx.l3key[i+1] = be64_to_cpup(out+1); 526f1939f7cSShane Wang ((unsigned char *)in)[15] += 1; 527f1939f7cSShane Wang } while (ctx->__vmac_ctx.l3key[i] >= p64 528f1939f7cSShane Wang || ctx->__vmac_ctx.l3key[i+1] >= p64); 529f1939f7cSShane Wang } 530f1939f7cSShane Wang 531f1939f7cSShane Wang /* Invalidate nonce/aes cache and reset other elements */ 532f1939f7cSShane Wang ctx->__vmac_ctx.cached_nonce[0] = (u64)-1; /* Ensure illegal nonce */ 533f1939f7cSShane Wang ctx->__vmac_ctx.cached_nonce[1] = (u64)0; /* Ensure illegal nonce */ 534f1939f7cSShane Wang ctx->__vmac_ctx.first_block_processed = 0; 535f1939f7cSShane Wang 536f1939f7cSShane Wang return err; 537f1939f7cSShane Wang } 538f1939f7cSShane Wang 539f1939f7cSShane Wang static int vmac_setkey(struct crypto_shash *parent, 540f1939f7cSShane Wang const u8 *key, unsigned int keylen) 541f1939f7cSShane Wang { 542f1939f7cSShane Wang struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); 543f1939f7cSShane Wang 544f1939f7cSShane Wang if (keylen != VMAC_KEY_LEN) { 545f1939f7cSShane Wang crypto_shash_set_flags(parent, CRYPTO_TFM_RES_BAD_KEY_LEN); 546f1939f7cSShane Wang return -EINVAL; 547f1939f7cSShane Wang } 548f1939f7cSShane Wang 549f1939f7cSShane Wang return vmac_set_key((u8 *)key, ctx); 550f1939f7cSShane Wang } 551f1939f7cSShane Wang 552f1939f7cSShane Wang static int vmac_init(struct shash_desc *pdesc) 553f1939f7cSShane Wang { 554f1939f7cSShane Wang return 0; 555f1939f7cSShane Wang } 556f1939f7cSShane Wang 557f1939f7cSShane Wang static int vmac_update(struct shash_desc *pdesc, const u8 *p, 558f1939f7cSShane Wang unsigned int len) 559f1939f7cSShane Wang { 560f1939f7cSShane Wang struct crypto_shash *parent = pdesc->tfm; 561f1939f7cSShane Wang struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); 562f1939f7cSShane Wang 563f1939f7cSShane Wang vhash_update(p, len, &ctx->__vmac_ctx); 564f1939f7cSShane Wang 565f1939f7cSShane Wang return 0; 566f1939f7cSShane Wang } 567f1939f7cSShane Wang 568f1939f7cSShane Wang static int vmac_final(struct shash_desc *pdesc, u8 *out) 569f1939f7cSShane Wang { 570f1939f7cSShane Wang struct crypto_shash *parent = pdesc->tfm; 571f1939f7cSShane Wang struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); 572f1939f7cSShane Wang vmac_t mac; 573f1939f7cSShane Wang u8 nonce[16] = {}; 574f1939f7cSShane Wang 575f1939f7cSShane Wang mac = vmac(NULL, 0, nonce, NULL, ctx); 576f1939f7cSShane Wang memcpy(out, &mac, sizeof(vmac_t)); 577f1939f7cSShane Wang memset(&mac, 0, sizeof(vmac_t)); 578f1939f7cSShane Wang memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx)); 579f1939f7cSShane Wang return 0; 580f1939f7cSShane Wang } 581f1939f7cSShane Wang 582f1939f7cSShane Wang static int vmac_init_tfm(struct crypto_tfm *tfm) 583f1939f7cSShane Wang { 584f1939f7cSShane Wang struct crypto_cipher *cipher; 585f1939f7cSShane Wang struct crypto_instance *inst = (void *)tfm->__crt_alg; 586f1939f7cSShane Wang struct crypto_spawn *spawn = crypto_instance_ctx(inst); 587f1939f7cSShane Wang struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm); 588f1939f7cSShane Wang 589f1939f7cSShane Wang cipher = crypto_spawn_cipher(spawn); 590f1939f7cSShane Wang if (IS_ERR(cipher)) 591f1939f7cSShane Wang return PTR_ERR(cipher); 592f1939f7cSShane Wang 593f1939f7cSShane Wang ctx->child = cipher; 594f1939f7cSShane Wang return 0; 595f1939f7cSShane Wang } 596f1939f7cSShane Wang 597f1939f7cSShane Wang static void vmac_exit_tfm(struct crypto_tfm *tfm) 598f1939f7cSShane Wang { 599f1939f7cSShane Wang struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm); 600f1939f7cSShane Wang crypto_free_cipher(ctx->child); 601f1939f7cSShane Wang } 602f1939f7cSShane Wang 603f1939f7cSShane Wang static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb) 604f1939f7cSShane Wang { 605f1939f7cSShane Wang struct shash_instance *inst; 606f1939f7cSShane Wang struct crypto_alg *alg; 607f1939f7cSShane Wang int err; 608f1939f7cSShane Wang 609f1939f7cSShane Wang err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH); 610f1939f7cSShane Wang if (err) 611f1939f7cSShane Wang return err; 612f1939f7cSShane Wang 613f1939f7cSShane Wang alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, 614f1939f7cSShane Wang CRYPTO_ALG_TYPE_MASK); 615f1939f7cSShane Wang if (IS_ERR(alg)) 616f1939f7cSShane Wang return PTR_ERR(alg); 617f1939f7cSShane Wang 618f1939f7cSShane Wang inst = shash_alloc_instance("vmac", alg); 619f1939f7cSShane Wang err = PTR_ERR(inst); 620f1939f7cSShane Wang if (IS_ERR(inst)) 621f1939f7cSShane Wang goto out_put_alg; 622f1939f7cSShane Wang 623f1939f7cSShane Wang err = crypto_init_spawn(shash_instance_ctx(inst), alg, 624f1939f7cSShane Wang shash_crypto_instance(inst), 625f1939f7cSShane Wang CRYPTO_ALG_TYPE_MASK); 626f1939f7cSShane Wang if (err) 627f1939f7cSShane Wang goto out_free_inst; 628f1939f7cSShane Wang 629f1939f7cSShane Wang inst->alg.base.cra_priority = alg->cra_priority; 630f1939f7cSShane Wang inst->alg.base.cra_blocksize = alg->cra_blocksize; 631f1939f7cSShane Wang inst->alg.base.cra_alignmask = alg->cra_alignmask; 632f1939f7cSShane Wang 633f1939f7cSShane Wang inst->alg.digestsize = sizeof(vmac_t); 634f1939f7cSShane Wang inst->alg.base.cra_ctxsize = sizeof(struct vmac_ctx_t); 635f1939f7cSShane Wang inst->alg.base.cra_init = vmac_init_tfm; 636f1939f7cSShane Wang inst->alg.base.cra_exit = vmac_exit_tfm; 637f1939f7cSShane Wang 638f1939f7cSShane Wang inst->alg.init = vmac_init; 639f1939f7cSShane Wang inst->alg.update = vmac_update; 640f1939f7cSShane Wang inst->alg.final = vmac_final; 641f1939f7cSShane Wang inst->alg.setkey = vmac_setkey; 642f1939f7cSShane Wang 643f1939f7cSShane Wang err = shash_register_instance(tmpl, inst); 644f1939f7cSShane Wang if (err) { 645f1939f7cSShane Wang out_free_inst: 646f1939f7cSShane Wang shash_free_instance(shash_crypto_instance(inst)); 647f1939f7cSShane Wang } 648f1939f7cSShane Wang 649f1939f7cSShane Wang out_put_alg: 650f1939f7cSShane Wang crypto_mod_put(alg); 651f1939f7cSShane Wang return err; 652f1939f7cSShane Wang } 653f1939f7cSShane Wang 654f1939f7cSShane Wang static struct crypto_template vmac_tmpl = { 655f1939f7cSShane Wang .name = "vmac", 656f1939f7cSShane Wang .create = vmac_create, 657f1939f7cSShane Wang .free = shash_free_instance, 658f1939f7cSShane Wang .module = THIS_MODULE, 659f1939f7cSShane Wang }; 660f1939f7cSShane Wang 661f1939f7cSShane Wang static int __init vmac_module_init(void) 662f1939f7cSShane Wang { 663f1939f7cSShane Wang return crypto_register_template(&vmac_tmpl); 664f1939f7cSShane Wang } 665f1939f7cSShane Wang 666f1939f7cSShane Wang static void __exit vmac_module_exit(void) 667f1939f7cSShane Wang { 668f1939f7cSShane Wang crypto_unregister_template(&vmac_tmpl); 669f1939f7cSShane Wang } 670f1939f7cSShane Wang 671f1939f7cSShane Wang module_init(vmac_module_init); 672f1939f7cSShane Wang module_exit(vmac_module_exit); 673f1939f7cSShane Wang 674f1939f7cSShane Wang MODULE_LICENSE("GPL"); 675f1939f7cSShane Wang MODULE_DESCRIPTION("VMAC hash algorithm"); 676f1939f7cSShane Wang 677