161c581a4SArd Biesheuvel /* gf128mul.c - GF(2^128) multiplication functions
261c581a4SArd Biesheuvel *
361c581a4SArd Biesheuvel * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
461c581a4SArd Biesheuvel * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
561c581a4SArd Biesheuvel *
661c581a4SArd Biesheuvel * Based on Dr Brian Gladman's (GPL'd) work published at
761c581a4SArd Biesheuvel * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
861c581a4SArd Biesheuvel * See the original copyright notice below.
961c581a4SArd Biesheuvel *
1061c581a4SArd Biesheuvel * This program is free software; you can redistribute it and/or modify it
1161c581a4SArd Biesheuvel * under the terms of the GNU General Public License as published by the Free
1261c581a4SArd Biesheuvel * Software Foundation; either version 2 of the License, or (at your option)
1361c581a4SArd Biesheuvel * any later version.
1461c581a4SArd Biesheuvel */
1561c581a4SArd Biesheuvel
1661c581a4SArd Biesheuvel /*
1761c581a4SArd Biesheuvel ---------------------------------------------------------------------------
1861c581a4SArd Biesheuvel Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
1961c581a4SArd Biesheuvel
2061c581a4SArd Biesheuvel LICENSE TERMS
2161c581a4SArd Biesheuvel
2261c581a4SArd Biesheuvel The free distribution and use of this software in both source and binary
2361c581a4SArd Biesheuvel form is allowed (with or without changes) provided that:
2461c581a4SArd Biesheuvel
2561c581a4SArd Biesheuvel 1. distributions of this source code include the above copyright
2661c581a4SArd Biesheuvel notice, this list of conditions and the following disclaimer;
2761c581a4SArd Biesheuvel
2861c581a4SArd Biesheuvel 2. distributions in binary form include the above copyright
2961c581a4SArd Biesheuvel notice, this list of conditions and the following disclaimer
3061c581a4SArd Biesheuvel in the documentation and/or other associated materials;
3161c581a4SArd Biesheuvel
3261c581a4SArd Biesheuvel 3. the copyright holder's name is not used to endorse products
3361c581a4SArd Biesheuvel built using this software without specific written permission.
3461c581a4SArd Biesheuvel
3561c581a4SArd Biesheuvel ALTERNATIVELY, provided that this notice is retained in full, this product
3661c581a4SArd Biesheuvel may be distributed under the terms of the GNU General Public License (GPL),
3761c581a4SArd Biesheuvel in which case the provisions of the GPL apply INSTEAD OF those given above.
3861c581a4SArd Biesheuvel
3961c581a4SArd Biesheuvel DISCLAIMER
4061c581a4SArd Biesheuvel
4161c581a4SArd Biesheuvel This software is provided 'as is' with no explicit or implied warranties
4261c581a4SArd Biesheuvel in respect of its properties, including, but not limited to, correctness
4361c581a4SArd Biesheuvel and/or fitness for purpose.
4461c581a4SArd Biesheuvel ---------------------------------------------------------------------------
4561c581a4SArd Biesheuvel Issue 31/01/2006
4661c581a4SArd Biesheuvel
4761c581a4SArd Biesheuvel This file provides fast multiplication in GF(2^128) as required by several
4861c581a4SArd Biesheuvel cryptographic authentication modes
4961c581a4SArd Biesheuvel */
5061c581a4SArd Biesheuvel
5161c581a4SArd Biesheuvel #include <crypto/gf128mul.h>
5261c581a4SArd Biesheuvel #include <linux/kernel.h>
5361c581a4SArd Biesheuvel #include <linux/module.h>
5461c581a4SArd Biesheuvel #include <linux/slab.h>
5561c581a4SArd Biesheuvel
5661c581a4SArd Biesheuvel #define gf128mul_dat(q) { \
5761c581a4SArd Biesheuvel q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
5861c581a4SArd Biesheuvel q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
5961c581a4SArd Biesheuvel q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
6061c581a4SArd Biesheuvel q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
6161c581a4SArd Biesheuvel q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
6261c581a4SArd Biesheuvel q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
6361c581a4SArd Biesheuvel q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
6461c581a4SArd Biesheuvel q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
6561c581a4SArd Biesheuvel q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
6661c581a4SArd Biesheuvel q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
6761c581a4SArd Biesheuvel q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
6861c581a4SArd Biesheuvel q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
6961c581a4SArd Biesheuvel q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
7061c581a4SArd Biesheuvel q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
7161c581a4SArd Biesheuvel q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
7261c581a4SArd Biesheuvel q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
7361c581a4SArd Biesheuvel q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
7461c581a4SArd Biesheuvel q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
7561c581a4SArd Biesheuvel q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
7661c581a4SArd Biesheuvel q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
7761c581a4SArd Biesheuvel q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
7861c581a4SArd Biesheuvel q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
7961c581a4SArd Biesheuvel q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
8061c581a4SArd Biesheuvel q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
8161c581a4SArd Biesheuvel q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
8261c581a4SArd Biesheuvel q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
8361c581a4SArd Biesheuvel q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
8461c581a4SArd Biesheuvel q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
8561c581a4SArd Biesheuvel q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
8661c581a4SArd Biesheuvel q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
8761c581a4SArd Biesheuvel q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
8861c581a4SArd Biesheuvel q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
8961c581a4SArd Biesheuvel }
9061c581a4SArd Biesheuvel
9161c581a4SArd Biesheuvel /*
9261c581a4SArd Biesheuvel * Given a value i in 0..255 as the byte overflow when a field element
9361c581a4SArd Biesheuvel * in GF(2^128) is multiplied by x^8, the following macro returns the
9461c581a4SArd Biesheuvel * 16-bit value that must be XOR-ed into the low-degree end of the
9561c581a4SArd Biesheuvel * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
9661c581a4SArd Biesheuvel *
9761c581a4SArd Biesheuvel * There are two versions of the macro, and hence two tables: one for
9861c581a4SArd Biesheuvel * the "be" convention where the highest-order bit is the coefficient of
9961c581a4SArd Biesheuvel * the highest-degree polynomial term, and one for the "le" convention
10061c581a4SArd Biesheuvel * where the highest-order bit is the coefficient of the lowest-degree
10161c581a4SArd Biesheuvel * polynomial term. In both cases the values are stored in CPU byte
10261c581a4SArd Biesheuvel * endianness such that the coefficients are ordered consistently across
10361c581a4SArd Biesheuvel * bytes, i.e. in the "be" table bits 15..0 of the stored value
10461c581a4SArd Biesheuvel * correspond to the coefficients of x^15..x^0, and in the "le" table
10561c581a4SArd Biesheuvel * bits 15..0 correspond to the coefficients of x^0..x^15.
10661c581a4SArd Biesheuvel *
10761c581a4SArd Biesheuvel * Therefore, provided that the appropriate byte endianness conversions
10861c581a4SArd Biesheuvel * are done by the multiplication functions (and these must be in place
10961c581a4SArd Biesheuvel * anyway to support both little endian and big endian CPUs), the "be"
11061c581a4SArd Biesheuvel * table can be used for multiplications of both "bbe" and "ble"
11161c581a4SArd Biesheuvel * elements, and the "le" table can be used for multiplications of both
11261c581a4SArd Biesheuvel * "lle" and "lbe" elements.
11361c581a4SArd Biesheuvel */
11461c581a4SArd Biesheuvel
11561c581a4SArd Biesheuvel #define xda_be(i) ( \
11661c581a4SArd Biesheuvel (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
11761c581a4SArd Biesheuvel (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
11861c581a4SArd Biesheuvel (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
11961c581a4SArd Biesheuvel (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
12061c581a4SArd Biesheuvel )
12161c581a4SArd Biesheuvel
12261c581a4SArd Biesheuvel #define xda_le(i) ( \
12361c581a4SArd Biesheuvel (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
12461c581a4SArd Biesheuvel (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
12561c581a4SArd Biesheuvel (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
12661c581a4SArd Biesheuvel (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
12761c581a4SArd Biesheuvel )
12861c581a4SArd Biesheuvel
12961c581a4SArd Biesheuvel static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
13061c581a4SArd Biesheuvel static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
13161c581a4SArd Biesheuvel
13261c581a4SArd Biesheuvel /*
13361c581a4SArd Biesheuvel * The following functions multiply a field element by x^8 in
13461c581a4SArd Biesheuvel * the polynomial field representation. They use 64-bit word operations
13561c581a4SArd Biesheuvel * to gain speed but compensate for machine endianness and hence work
13661c581a4SArd Biesheuvel * correctly on both styles of machine.
13761c581a4SArd Biesheuvel */
13861c581a4SArd Biesheuvel
gf128mul_x8_lle(be128 * x)13961c581a4SArd Biesheuvel static void gf128mul_x8_lle(be128 *x)
14061c581a4SArd Biesheuvel {
14161c581a4SArd Biesheuvel u64 a = be64_to_cpu(x->a);
14261c581a4SArd Biesheuvel u64 b = be64_to_cpu(x->b);
14361c581a4SArd Biesheuvel u64 _tt = gf128mul_table_le[b & 0xff];
14461c581a4SArd Biesheuvel
14561c581a4SArd Biesheuvel x->b = cpu_to_be64((b >> 8) | (a << 56));
14661c581a4SArd Biesheuvel x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
14761c581a4SArd Biesheuvel }
14861c581a4SArd Biesheuvel
149*b67ce439SArd Biesheuvel /* time invariant version of gf128mul_x8_lle */
gf128mul_x8_lle_ti(be128 * x)150*b67ce439SArd Biesheuvel static void gf128mul_x8_lle_ti(be128 *x)
151*b67ce439SArd Biesheuvel {
152*b67ce439SArd Biesheuvel u64 a = be64_to_cpu(x->a);
153*b67ce439SArd Biesheuvel u64 b = be64_to_cpu(x->b);
154*b67ce439SArd Biesheuvel u64 _tt = xda_le(b & 0xff); /* avoid table lookup */
155*b67ce439SArd Biesheuvel
156*b67ce439SArd Biesheuvel x->b = cpu_to_be64((b >> 8) | (a << 56));
157*b67ce439SArd Biesheuvel x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
158*b67ce439SArd Biesheuvel }
159*b67ce439SArd Biesheuvel
gf128mul_x8_bbe(be128 * x)16061c581a4SArd Biesheuvel static void gf128mul_x8_bbe(be128 *x)
16161c581a4SArd Biesheuvel {
16261c581a4SArd Biesheuvel u64 a = be64_to_cpu(x->a);
16361c581a4SArd Biesheuvel u64 b = be64_to_cpu(x->b);
16461c581a4SArd Biesheuvel u64 _tt = gf128mul_table_be[a >> 56];
16561c581a4SArd Biesheuvel
16661c581a4SArd Biesheuvel x->a = cpu_to_be64((a << 8) | (b >> 56));
16761c581a4SArd Biesheuvel x->b = cpu_to_be64((b << 8) ^ _tt);
16861c581a4SArd Biesheuvel }
16961c581a4SArd Biesheuvel
gf128mul_x8_ble(le128 * r,const le128 * x)17061c581a4SArd Biesheuvel void gf128mul_x8_ble(le128 *r, const le128 *x)
17161c581a4SArd Biesheuvel {
17261c581a4SArd Biesheuvel u64 a = le64_to_cpu(x->a);
17361c581a4SArd Biesheuvel u64 b = le64_to_cpu(x->b);
17461c581a4SArd Biesheuvel u64 _tt = gf128mul_table_be[a >> 56];
17561c581a4SArd Biesheuvel
17661c581a4SArd Biesheuvel r->a = cpu_to_le64((a << 8) | (b >> 56));
17761c581a4SArd Biesheuvel r->b = cpu_to_le64((b << 8) ^ _tt);
17861c581a4SArd Biesheuvel }
17961c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_x8_ble);
18061c581a4SArd Biesheuvel
gf128mul_lle(be128 * r,const be128 * b)18161c581a4SArd Biesheuvel void gf128mul_lle(be128 *r, const be128 *b)
18261c581a4SArd Biesheuvel {
183*b67ce439SArd Biesheuvel /*
184*b67ce439SArd Biesheuvel * The p array should be aligned to twice the size of its element type,
185*b67ce439SArd Biesheuvel * so that every even/odd pair is guaranteed to share a cacheline
186*b67ce439SArd Biesheuvel * (assuming a cacheline size of 32 bytes or more, which is by far the
187*b67ce439SArd Biesheuvel * most common). This ensures that each be128_xor() call in the loop
188*b67ce439SArd Biesheuvel * takes the same amount of time regardless of the value of 'ch', which
189*b67ce439SArd Biesheuvel * is derived from function parameter 'b', which is commonly used as a
190*b67ce439SArd Biesheuvel * key, e.g., for GHASH. The odd array elements are all set to zero,
191*b67ce439SArd Biesheuvel * making each be128_xor() a NOP if its associated bit in 'ch' is not
192*b67ce439SArd Biesheuvel * set, and this is equivalent to calling be128_xor() conditionally.
193*b67ce439SArd Biesheuvel * This approach aims to avoid leaking information about such keys
194*b67ce439SArd Biesheuvel * through execution time variances.
195*b67ce439SArd Biesheuvel *
196*b67ce439SArd Biesheuvel * Unfortunately, __aligned(16) or higher does not work on x86 for
197*b67ce439SArd Biesheuvel * variables on the stack so we need to perform the alignment by hand.
198*b67ce439SArd Biesheuvel */
199*b67ce439SArd Biesheuvel be128 array[16 + 3] = {};
200*b67ce439SArd Biesheuvel be128 *p = PTR_ALIGN(&array[0], 2 * sizeof(be128));
20161c581a4SArd Biesheuvel int i;
20261c581a4SArd Biesheuvel
20361c581a4SArd Biesheuvel p[0] = *r;
20461c581a4SArd Biesheuvel for (i = 0; i < 7; ++i)
205*b67ce439SArd Biesheuvel gf128mul_x_lle(&p[2 * i + 2], &p[2 * i]);
20661c581a4SArd Biesheuvel
20761c581a4SArd Biesheuvel memset(r, 0, sizeof(*r));
20861c581a4SArd Biesheuvel for (i = 0;;) {
20961c581a4SArd Biesheuvel u8 ch = ((u8 *)b)[15 - i];
21061c581a4SArd Biesheuvel
211*b67ce439SArd Biesheuvel be128_xor(r, r, &p[ 0 + !(ch & 0x80)]);
212*b67ce439SArd Biesheuvel be128_xor(r, r, &p[ 2 + !(ch & 0x40)]);
213*b67ce439SArd Biesheuvel be128_xor(r, r, &p[ 4 + !(ch & 0x20)]);
214*b67ce439SArd Biesheuvel be128_xor(r, r, &p[ 6 + !(ch & 0x10)]);
215*b67ce439SArd Biesheuvel be128_xor(r, r, &p[ 8 + !(ch & 0x08)]);
216*b67ce439SArd Biesheuvel be128_xor(r, r, &p[10 + !(ch & 0x04)]);
217*b67ce439SArd Biesheuvel be128_xor(r, r, &p[12 + !(ch & 0x02)]);
218*b67ce439SArd Biesheuvel be128_xor(r, r, &p[14 + !(ch & 0x01)]);
21961c581a4SArd Biesheuvel
22061c581a4SArd Biesheuvel if (++i >= 16)
22161c581a4SArd Biesheuvel break;
22261c581a4SArd Biesheuvel
223*b67ce439SArd Biesheuvel gf128mul_x8_lle_ti(r); /* use the time invariant version */
22461c581a4SArd Biesheuvel }
22561c581a4SArd Biesheuvel }
22661c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_lle);
22761c581a4SArd Biesheuvel
gf128mul_bbe(be128 * r,const be128 * b)22861c581a4SArd Biesheuvel void gf128mul_bbe(be128 *r, const be128 *b)
22961c581a4SArd Biesheuvel {
23061c581a4SArd Biesheuvel be128 p[8];
23161c581a4SArd Biesheuvel int i;
23261c581a4SArd Biesheuvel
23361c581a4SArd Biesheuvel p[0] = *r;
23461c581a4SArd Biesheuvel for (i = 0; i < 7; ++i)
23561c581a4SArd Biesheuvel gf128mul_x_bbe(&p[i + 1], &p[i]);
23661c581a4SArd Biesheuvel
23761c581a4SArd Biesheuvel memset(r, 0, sizeof(*r));
23861c581a4SArd Biesheuvel for (i = 0;;) {
23961c581a4SArd Biesheuvel u8 ch = ((u8 *)b)[i];
24061c581a4SArd Biesheuvel
24161c581a4SArd Biesheuvel if (ch & 0x80)
24261c581a4SArd Biesheuvel be128_xor(r, r, &p[7]);
24361c581a4SArd Biesheuvel if (ch & 0x40)
24461c581a4SArd Biesheuvel be128_xor(r, r, &p[6]);
24561c581a4SArd Biesheuvel if (ch & 0x20)
24661c581a4SArd Biesheuvel be128_xor(r, r, &p[5]);
24761c581a4SArd Biesheuvel if (ch & 0x10)
24861c581a4SArd Biesheuvel be128_xor(r, r, &p[4]);
24961c581a4SArd Biesheuvel if (ch & 0x08)
25061c581a4SArd Biesheuvel be128_xor(r, r, &p[3]);
25161c581a4SArd Biesheuvel if (ch & 0x04)
25261c581a4SArd Biesheuvel be128_xor(r, r, &p[2]);
25361c581a4SArd Biesheuvel if (ch & 0x02)
25461c581a4SArd Biesheuvel be128_xor(r, r, &p[1]);
25561c581a4SArd Biesheuvel if (ch & 0x01)
25661c581a4SArd Biesheuvel be128_xor(r, r, &p[0]);
25761c581a4SArd Biesheuvel
25861c581a4SArd Biesheuvel if (++i >= 16)
25961c581a4SArd Biesheuvel break;
26061c581a4SArd Biesheuvel
26161c581a4SArd Biesheuvel gf128mul_x8_bbe(r);
26261c581a4SArd Biesheuvel }
26361c581a4SArd Biesheuvel }
26461c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_bbe);
26561c581a4SArd Biesheuvel
26661c581a4SArd Biesheuvel /* This version uses 64k bytes of table space.
26761c581a4SArd Biesheuvel A 16 byte buffer has to be multiplied by a 16 byte key
26861c581a4SArd Biesheuvel value in GF(2^128). If we consider a GF(2^128) value in
26961c581a4SArd Biesheuvel the buffer's lowest byte, we can construct a table of
27061c581a4SArd Biesheuvel the 256 16 byte values that result from the 256 values
27161c581a4SArd Biesheuvel of this byte. This requires 4096 bytes. But we also
27261c581a4SArd Biesheuvel need tables for each of the 16 higher bytes in the
27361c581a4SArd Biesheuvel buffer as well, which makes 64 kbytes in total.
27461c581a4SArd Biesheuvel */
27561c581a4SArd Biesheuvel /* additional explanation
27661c581a4SArd Biesheuvel * t[0][BYTE] contains g*BYTE
27761c581a4SArd Biesheuvel * t[1][BYTE] contains g*x^8*BYTE
27861c581a4SArd Biesheuvel * ..
27961c581a4SArd Biesheuvel * t[15][BYTE] contains g*x^120*BYTE */
gf128mul_init_64k_bbe(const be128 * g)28061c581a4SArd Biesheuvel struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
28161c581a4SArd Biesheuvel {
28261c581a4SArd Biesheuvel struct gf128mul_64k *t;
28361c581a4SArd Biesheuvel int i, j, k;
28461c581a4SArd Biesheuvel
28561c581a4SArd Biesheuvel t = kzalloc(sizeof(*t), GFP_KERNEL);
28661c581a4SArd Biesheuvel if (!t)
28761c581a4SArd Biesheuvel goto out;
28861c581a4SArd Biesheuvel
28961c581a4SArd Biesheuvel for (i = 0; i < 16; i++) {
29061c581a4SArd Biesheuvel t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
29161c581a4SArd Biesheuvel if (!t->t[i]) {
29261c581a4SArd Biesheuvel gf128mul_free_64k(t);
29361c581a4SArd Biesheuvel t = NULL;
29461c581a4SArd Biesheuvel goto out;
29561c581a4SArd Biesheuvel }
29661c581a4SArd Biesheuvel }
29761c581a4SArd Biesheuvel
29861c581a4SArd Biesheuvel t->t[0]->t[1] = *g;
29961c581a4SArd Biesheuvel for (j = 1; j <= 64; j <<= 1)
30061c581a4SArd Biesheuvel gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
30161c581a4SArd Biesheuvel
30261c581a4SArd Biesheuvel for (i = 0;;) {
30361c581a4SArd Biesheuvel for (j = 2; j < 256; j += j)
30461c581a4SArd Biesheuvel for (k = 1; k < j; ++k)
30561c581a4SArd Biesheuvel be128_xor(&t->t[i]->t[j + k],
30661c581a4SArd Biesheuvel &t->t[i]->t[j], &t->t[i]->t[k]);
30761c581a4SArd Biesheuvel
30861c581a4SArd Biesheuvel if (++i >= 16)
30961c581a4SArd Biesheuvel break;
31061c581a4SArd Biesheuvel
31161c581a4SArd Biesheuvel for (j = 128; j > 0; j >>= 1) {
31261c581a4SArd Biesheuvel t->t[i]->t[j] = t->t[i - 1]->t[j];
31361c581a4SArd Biesheuvel gf128mul_x8_bbe(&t->t[i]->t[j]);
31461c581a4SArd Biesheuvel }
31561c581a4SArd Biesheuvel }
31661c581a4SArd Biesheuvel
31761c581a4SArd Biesheuvel out:
31861c581a4SArd Biesheuvel return t;
31961c581a4SArd Biesheuvel }
32061c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_init_64k_bbe);
32161c581a4SArd Biesheuvel
gf128mul_free_64k(struct gf128mul_64k * t)32261c581a4SArd Biesheuvel void gf128mul_free_64k(struct gf128mul_64k *t)
32361c581a4SArd Biesheuvel {
32461c581a4SArd Biesheuvel int i;
32561c581a4SArd Biesheuvel
32661c581a4SArd Biesheuvel for (i = 0; i < 16; i++)
32761c581a4SArd Biesheuvel kfree_sensitive(t->t[i]);
32861c581a4SArd Biesheuvel kfree_sensitive(t);
32961c581a4SArd Biesheuvel }
33061c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_free_64k);
33161c581a4SArd Biesheuvel
gf128mul_64k_bbe(be128 * a,const struct gf128mul_64k * t)33261c581a4SArd Biesheuvel void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t)
33361c581a4SArd Biesheuvel {
33461c581a4SArd Biesheuvel u8 *ap = (u8 *)a;
33561c581a4SArd Biesheuvel be128 r[1];
33661c581a4SArd Biesheuvel int i;
33761c581a4SArd Biesheuvel
33861c581a4SArd Biesheuvel *r = t->t[0]->t[ap[15]];
33961c581a4SArd Biesheuvel for (i = 1; i < 16; ++i)
34061c581a4SArd Biesheuvel be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
34161c581a4SArd Biesheuvel *a = *r;
34261c581a4SArd Biesheuvel }
34361c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_64k_bbe);
34461c581a4SArd Biesheuvel
34561c581a4SArd Biesheuvel /* This version uses 4k bytes of table space.
34661c581a4SArd Biesheuvel A 16 byte buffer has to be multiplied by a 16 byte key
34761c581a4SArd Biesheuvel value in GF(2^128). If we consider a GF(2^128) value in a
34861c581a4SArd Biesheuvel single byte, we can construct a table of the 256 16 byte
34961c581a4SArd Biesheuvel values that result from the 256 values of this byte.
35061c581a4SArd Biesheuvel This requires 4096 bytes. If we take the highest byte in
35161c581a4SArd Biesheuvel the buffer and use this table to get the result, we then
35261c581a4SArd Biesheuvel have to multiply by x^120 to get the final value. For the
35361c581a4SArd Biesheuvel next highest byte the result has to be multiplied by x^112
35461c581a4SArd Biesheuvel and so on. But we can do this by accumulating the result
35561c581a4SArd Biesheuvel in an accumulator starting with the result for the top
35661c581a4SArd Biesheuvel byte. We repeatedly multiply the accumulator value by
35761c581a4SArd Biesheuvel x^8 and then add in (i.e. xor) the 16 bytes of the next
35861c581a4SArd Biesheuvel lower byte in the buffer, stopping when we reach the
35961c581a4SArd Biesheuvel lowest byte. This requires a 4096 byte table.
36061c581a4SArd Biesheuvel */
gf128mul_init_4k_lle(const be128 * g)36161c581a4SArd Biesheuvel struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
36261c581a4SArd Biesheuvel {
36361c581a4SArd Biesheuvel struct gf128mul_4k *t;
36461c581a4SArd Biesheuvel int j, k;
36561c581a4SArd Biesheuvel
36661c581a4SArd Biesheuvel t = kzalloc(sizeof(*t), GFP_KERNEL);
36761c581a4SArd Biesheuvel if (!t)
36861c581a4SArd Biesheuvel goto out;
36961c581a4SArd Biesheuvel
37061c581a4SArd Biesheuvel t->t[128] = *g;
37161c581a4SArd Biesheuvel for (j = 64; j > 0; j >>= 1)
37261c581a4SArd Biesheuvel gf128mul_x_lle(&t->t[j], &t->t[j+j]);
37361c581a4SArd Biesheuvel
37461c581a4SArd Biesheuvel for (j = 2; j < 256; j += j)
37561c581a4SArd Biesheuvel for (k = 1; k < j; ++k)
37661c581a4SArd Biesheuvel be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
37761c581a4SArd Biesheuvel
37861c581a4SArd Biesheuvel out:
37961c581a4SArd Biesheuvel return t;
38061c581a4SArd Biesheuvel }
38161c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_init_4k_lle);
38261c581a4SArd Biesheuvel
gf128mul_init_4k_bbe(const be128 * g)38361c581a4SArd Biesheuvel struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
38461c581a4SArd Biesheuvel {
38561c581a4SArd Biesheuvel struct gf128mul_4k *t;
38661c581a4SArd Biesheuvel int j, k;
38761c581a4SArd Biesheuvel
38861c581a4SArd Biesheuvel t = kzalloc(sizeof(*t), GFP_KERNEL);
38961c581a4SArd Biesheuvel if (!t)
39061c581a4SArd Biesheuvel goto out;
39161c581a4SArd Biesheuvel
39261c581a4SArd Biesheuvel t->t[1] = *g;
39361c581a4SArd Biesheuvel for (j = 1; j <= 64; j <<= 1)
39461c581a4SArd Biesheuvel gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
39561c581a4SArd Biesheuvel
39661c581a4SArd Biesheuvel for (j = 2; j < 256; j += j)
39761c581a4SArd Biesheuvel for (k = 1; k < j; ++k)
39861c581a4SArd Biesheuvel be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
39961c581a4SArd Biesheuvel
40061c581a4SArd Biesheuvel out:
40161c581a4SArd Biesheuvel return t;
40261c581a4SArd Biesheuvel }
40361c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_init_4k_bbe);
40461c581a4SArd Biesheuvel
gf128mul_4k_lle(be128 * a,const struct gf128mul_4k * t)40561c581a4SArd Biesheuvel void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
40661c581a4SArd Biesheuvel {
40761c581a4SArd Biesheuvel u8 *ap = (u8 *)a;
40861c581a4SArd Biesheuvel be128 r[1];
40961c581a4SArd Biesheuvel int i = 15;
41061c581a4SArd Biesheuvel
41161c581a4SArd Biesheuvel *r = t->t[ap[15]];
41261c581a4SArd Biesheuvel while (i--) {
41361c581a4SArd Biesheuvel gf128mul_x8_lle(r);
41461c581a4SArd Biesheuvel be128_xor(r, r, &t->t[ap[i]]);
41561c581a4SArd Biesheuvel }
41661c581a4SArd Biesheuvel *a = *r;
41761c581a4SArd Biesheuvel }
41861c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_4k_lle);
41961c581a4SArd Biesheuvel
gf128mul_4k_bbe(be128 * a,const struct gf128mul_4k * t)42061c581a4SArd Biesheuvel void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t)
42161c581a4SArd Biesheuvel {
42261c581a4SArd Biesheuvel u8 *ap = (u8 *)a;
42361c581a4SArd Biesheuvel be128 r[1];
42461c581a4SArd Biesheuvel int i = 0;
42561c581a4SArd Biesheuvel
42661c581a4SArd Biesheuvel *r = t->t[ap[0]];
42761c581a4SArd Biesheuvel while (++i < 16) {
42861c581a4SArd Biesheuvel gf128mul_x8_bbe(r);
42961c581a4SArd Biesheuvel be128_xor(r, r, &t->t[ap[i]]);
43061c581a4SArd Biesheuvel }
43161c581a4SArd Biesheuvel *a = *r;
43261c581a4SArd Biesheuvel }
43361c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_4k_bbe);
43461c581a4SArd Biesheuvel
43561c581a4SArd Biesheuvel MODULE_LICENSE("GPL");
43661c581a4SArd Biesheuvel MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
437