xref: /openbmc/linux/lib/crypto/gf128mul.c (revision b67ce439)
161c581a4SArd Biesheuvel /* gf128mul.c - GF(2^128) multiplication functions
261c581a4SArd Biesheuvel  *
361c581a4SArd Biesheuvel  * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
461c581a4SArd Biesheuvel  * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
561c581a4SArd Biesheuvel  *
661c581a4SArd Biesheuvel  * Based on Dr Brian Gladman's (GPL'd) work published at
761c581a4SArd Biesheuvel  * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
861c581a4SArd Biesheuvel  * See the original copyright notice below.
961c581a4SArd Biesheuvel  *
1061c581a4SArd Biesheuvel  * This program is free software; you can redistribute it and/or modify it
1161c581a4SArd Biesheuvel  * under the terms of the GNU General Public License as published by the Free
1261c581a4SArd Biesheuvel  * Software Foundation; either version 2 of the License, or (at your option)
1361c581a4SArd Biesheuvel  * any later version.
1461c581a4SArd Biesheuvel  */
1561c581a4SArd Biesheuvel 
1661c581a4SArd Biesheuvel /*
1761c581a4SArd Biesheuvel  ---------------------------------------------------------------------------
1861c581a4SArd Biesheuvel  Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
1961c581a4SArd Biesheuvel 
2061c581a4SArd Biesheuvel  LICENSE TERMS
2161c581a4SArd Biesheuvel 
2261c581a4SArd Biesheuvel  The free distribution and use of this software in both source and binary
2361c581a4SArd Biesheuvel  form is allowed (with or without changes) provided that:
2461c581a4SArd Biesheuvel 
2561c581a4SArd Biesheuvel    1. distributions of this source code include the above copyright
2661c581a4SArd Biesheuvel       notice, this list of conditions and the following disclaimer;
2761c581a4SArd Biesheuvel 
2861c581a4SArd Biesheuvel    2. distributions in binary form include the above copyright
2961c581a4SArd Biesheuvel       notice, this list of conditions and the following disclaimer
3061c581a4SArd Biesheuvel       in the documentation and/or other associated materials;
3161c581a4SArd Biesheuvel 
3261c581a4SArd Biesheuvel    3. the copyright holder's name is not used to endorse products
3361c581a4SArd Biesheuvel       built using this software without specific written permission.
3461c581a4SArd Biesheuvel 
3561c581a4SArd Biesheuvel  ALTERNATIVELY, provided that this notice is retained in full, this product
3661c581a4SArd Biesheuvel  may be distributed under the terms of the GNU General Public License (GPL),
3761c581a4SArd Biesheuvel  in which case the provisions of the GPL apply INSTEAD OF those given above.
3861c581a4SArd Biesheuvel 
3961c581a4SArd Biesheuvel  DISCLAIMER
4061c581a4SArd Biesheuvel 
4161c581a4SArd Biesheuvel  This software is provided 'as is' with no explicit or implied warranties
4261c581a4SArd Biesheuvel  in respect of its properties, including, but not limited to, correctness
4361c581a4SArd Biesheuvel  and/or fitness for purpose.
4461c581a4SArd Biesheuvel  ---------------------------------------------------------------------------
4561c581a4SArd Biesheuvel  Issue 31/01/2006
4661c581a4SArd Biesheuvel 
4761c581a4SArd Biesheuvel  This file provides fast multiplication in GF(2^128) as required by several
4861c581a4SArd Biesheuvel  cryptographic authentication modes
4961c581a4SArd Biesheuvel */
5061c581a4SArd Biesheuvel 
5161c581a4SArd Biesheuvel #include <crypto/gf128mul.h>
5261c581a4SArd Biesheuvel #include <linux/kernel.h>
5361c581a4SArd Biesheuvel #include <linux/module.h>
5461c581a4SArd Biesheuvel #include <linux/slab.h>
5561c581a4SArd Biesheuvel 
5661c581a4SArd Biesheuvel #define gf128mul_dat(q) { \
5761c581a4SArd Biesheuvel 	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
5861c581a4SArd Biesheuvel 	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
5961c581a4SArd Biesheuvel 	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
6061c581a4SArd Biesheuvel 	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
6161c581a4SArd Biesheuvel 	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
6261c581a4SArd Biesheuvel 	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
6361c581a4SArd Biesheuvel 	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
6461c581a4SArd Biesheuvel 	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
6561c581a4SArd Biesheuvel 	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
6661c581a4SArd Biesheuvel 	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
6761c581a4SArd Biesheuvel 	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
6861c581a4SArd Biesheuvel 	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
6961c581a4SArd Biesheuvel 	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
7061c581a4SArd Biesheuvel 	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
7161c581a4SArd Biesheuvel 	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
7261c581a4SArd Biesheuvel 	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
7361c581a4SArd Biesheuvel 	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
7461c581a4SArd Biesheuvel 	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
7561c581a4SArd Biesheuvel 	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
7661c581a4SArd Biesheuvel 	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
7761c581a4SArd Biesheuvel 	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
7861c581a4SArd Biesheuvel 	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
7961c581a4SArd Biesheuvel 	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
8061c581a4SArd Biesheuvel 	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
8161c581a4SArd Biesheuvel 	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
8261c581a4SArd Biesheuvel 	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
8361c581a4SArd Biesheuvel 	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
8461c581a4SArd Biesheuvel 	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
8561c581a4SArd Biesheuvel 	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
8661c581a4SArd Biesheuvel 	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
8761c581a4SArd Biesheuvel 	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
8861c581a4SArd Biesheuvel 	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
8961c581a4SArd Biesheuvel }
9061c581a4SArd Biesheuvel 
9161c581a4SArd Biesheuvel /*
9261c581a4SArd Biesheuvel  * Given a value i in 0..255 as the byte overflow when a field element
9361c581a4SArd Biesheuvel  * in GF(2^128) is multiplied by x^8, the following macro returns the
9461c581a4SArd Biesheuvel  * 16-bit value that must be XOR-ed into the low-degree end of the
9561c581a4SArd Biesheuvel  * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
9661c581a4SArd Biesheuvel  *
9761c581a4SArd Biesheuvel  * There are two versions of the macro, and hence two tables: one for
9861c581a4SArd Biesheuvel  * the "be" convention where the highest-order bit is the coefficient of
9961c581a4SArd Biesheuvel  * the highest-degree polynomial term, and one for the "le" convention
10061c581a4SArd Biesheuvel  * where the highest-order bit is the coefficient of the lowest-degree
10161c581a4SArd Biesheuvel  * polynomial term.  In both cases the values are stored in CPU byte
10261c581a4SArd Biesheuvel  * endianness such that the coefficients are ordered consistently across
10361c581a4SArd Biesheuvel  * bytes, i.e. in the "be" table bits 15..0 of the stored value
10461c581a4SArd Biesheuvel  * correspond to the coefficients of x^15..x^0, and in the "le" table
10561c581a4SArd Biesheuvel  * bits 15..0 correspond to the coefficients of x^0..x^15.
10661c581a4SArd Biesheuvel  *
10761c581a4SArd Biesheuvel  * Therefore, provided that the appropriate byte endianness conversions
10861c581a4SArd Biesheuvel  * are done by the multiplication functions (and these must be in place
10961c581a4SArd Biesheuvel  * anyway to support both little endian and big endian CPUs), the "be"
11061c581a4SArd Biesheuvel  * table can be used for multiplications of both "bbe" and "ble"
11161c581a4SArd Biesheuvel  * elements, and the "le" table can be used for multiplications of both
11261c581a4SArd Biesheuvel  * "lle" and "lbe" elements.
11361c581a4SArd Biesheuvel  */
11461c581a4SArd Biesheuvel 
11561c581a4SArd Biesheuvel #define xda_be(i) ( \
11661c581a4SArd Biesheuvel 	(i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
11761c581a4SArd Biesheuvel 	(i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
11861c581a4SArd Biesheuvel 	(i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
11961c581a4SArd Biesheuvel 	(i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
12061c581a4SArd Biesheuvel )
12161c581a4SArd Biesheuvel 
12261c581a4SArd Biesheuvel #define xda_le(i) ( \
12361c581a4SArd Biesheuvel 	(i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
12461c581a4SArd Biesheuvel 	(i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
12561c581a4SArd Biesheuvel 	(i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
12661c581a4SArd Biesheuvel 	(i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
12761c581a4SArd Biesheuvel )
12861c581a4SArd Biesheuvel 
12961c581a4SArd Biesheuvel static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
13061c581a4SArd Biesheuvel static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
13161c581a4SArd Biesheuvel 
13261c581a4SArd Biesheuvel /*
13361c581a4SArd Biesheuvel  * The following functions multiply a field element by x^8 in
13461c581a4SArd Biesheuvel  * the polynomial field representation.  They use 64-bit word operations
13561c581a4SArd Biesheuvel  * to gain speed but compensate for machine endianness and hence work
13661c581a4SArd Biesheuvel  * correctly on both styles of machine.
13761c581a4SArd Biesheuvel  */
13861c581a4SArd Biesheuvel 
gf128mul_x8_lle(be128 * x)13961c581a4SArd Biesheuvel static void gf128mul_x8_lle(be128 *x)
14061c581a4SArd Biesheuvel {
14161c581a4SArd Biesheuvel 	u64 a = be64_to_cpu(x->a);
14261c581a4SArd Biesheuvel 	u64 b = be64_to_cpu(x->b);
14361c581a4SArd Biesheuvel 	u64 _tt = gf128mul_table_le[b & 0xff];
14461c581a4SArd Biesheuvel 
14561c581a4SArd Biesheuvel 	x->b = cpu_to_be64((b >> 8) | (a << 56));
14661c581a4SArd Biesheuvel 	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
14761c581a4SArd Biesheuvel }
14861c581a4SArd Biesheuvel 
149*b67ce439SArd Biesheuvel /* time invariant version of gf128mul_x8_lle */
gf128mul_x8_lle_ti(be128 * x)150*b67ce439SArd Biesheuvel static void gf128mul_x8_lle_ti(be128 *x)
151*b67ce439SArd Biesheuvel {
152*b67ce439SArd Biesheuvel 	u64 a = be64_to_cpu(x->a);
153*b67ce439SArd Biesheuvel 	u64 b = be64_to_cpu(x->b);
154*b67ce439SArd Biesheuvel 	u64 _tt = xda_le(b & 0xff); /* avoid table lookup */
155*b67ce439SArd Biesheuvel 
156*b67ce439SArd Biesheuvel 	x->b = cpu_to_be64((b >> 8) | (a << 56));
157*b67ce439SArd Biesheuvel 	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
158*b67ce439SArd Biesheuvel }
159*b67ce439SArd Biesheuvel 
gf128mul_x8_bbe(be128 * x)16061c581a4SArd Biesheuvel static void gf128mul_x8_bbe(be128 *x)
16161c581a4SArd Biesheuvel {
16261c581a4SArd Biesheuvel 	u64 a = be64_to_cpu(x->a);
16361c581a4SArd Biesheuvel 	u64 b = be64_to_cpu(x->b);
16461c581a4SArd Biesheuvel 	u64 _tt = gf128mul_table_be[a >> 56];
16561c581a4SArd Biesheuvel 
16661c581a4SArd Biesheuvel 	x->a = cpu_to_be64((a << 8) | (b >> 56));
16761c581a4SArd Biesheuvel 	x->b = cpu_to_be64((b << 8) ^ _tt);
16861c581a4SArd Biesheuvel }
16961c581a4SArd Biesheuvel 
gf128mul_x8_ble(le128 * r,const le128 * x)17061c581a4SArd Biesheuvel void gf128mul_x8_ble(le128 *r, const le128 *x)
17161c581a4SArd Biesheuvel {
17261c581a4SArd Biesheuvel 	u64 a = le64_to_cpu(x->a);
17361c581a4SArd Biesheuvel 	u64 b = le64_to_cpu(x->b);
17461c581a4SArd Biesheuvel 	u64 _tt = gf128mul_table_be[a >> 56];
17561c581a4SArd Biesheuvel 
17661c581a4SArd Biesheuvel 	r->a = cpu_to_le64((a << 8) | (b >> 56));
17761c581a4SArd Biesheuvel 	r->b = cpu_to_le64((b << 8) ^ _tt);
17861c581a4SArd Biesheuvel }
17961c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_x8_ble);
18061c581a4SArd Biesheuvel 
gf128mul_lle(be128 * r,const be128 * b)18161c581a4SArd Biesheuvel void gf128mul_lle(be128 *r, const be128 *b)
18261c581a4SArd Biesheuvel {
183*b67ce439SArd Biesheuvel 	/*
184*b67ce439SArd Biesheuvel 	 * The p array should be aligned to twice the size of its element type,
185*b67ce439SArd Biesheuvel 	 * so that every even/odd pair is guaranteed to share a cacheline
186*b67ce439SArd Biesheuvel 	 * (assuming a cacheline size of 32 bytes or more, which is by far the
187*b67ce439SArd Biesheuvel 	 * most common). This ensures that each be128_xor() call in the loop
188*b67ce439SArd Biesheuvel 	 * takes the same amount of time regardless of the value of 'ch', which
189*b67ce439SArd Biesheuvel 	 * is derived from function parameter 'b', which is commonly used as a
190*b67ce439SArd Biesheuvel 	 * key, e.g., for GHASH. The odd array elements are all set to zero,
191*b67ce439SArd Biesheuvel 	 * making each be128_xor() a NOP if its associated bit in 'ch' is not
192*b67ce439SArd Biesheuvel 	 * set, and this is equivalent to calling be128_xor() conditionally.
193*b67ce439SArd Biesheuvel 	 * This approach aims to avoid leaking information about such keys
194*b67ce439SArd Biesheuvel 	 * through execution time variances.
195*b67ce439SArd Biesheuvel 	 *
196*b67ce439SArd Biesheuvel 	 * Unfortunately, __aligned(16) or higher does not work on x86 for
197*b67ce439SArd Biesheuvel 	 * variables on the stack so we need to perform the alignment by hand.
198*b67ce439SArd Biesheuvel 	 */
199*b67ce439SArd Biesheuvel 	be128 array[16 + 3] = {};
200*b67ce439SArd Biesheuvel 	be128 *p = PTR_ALIGN(&array[0], 2 * sizeof(be128));
20161c581a4SArd Biesheuvel 	int i;
20261c581a4SArd Biesheuvel 
20361c581a4SArd Biesheuvel 	p[0] = *r;
20461c581a4SArd Biesheuvel 	for (i = 0; i < 7; ++i)
205*b67ce439SArd Biesheuvel 		gf128mul_x_lle(&p[2 * i + 2], &p[2 * i]);
20661c581a4SArd Biesheuvel 
20761c581a4SArd Biesheuvel 	memset(r, 0, sizeof(*r));
20861c581a4SArd Biesheuvel 	for (i = 0;;) {
20961c581a4SArd Biesheuvel 		u8 ch = ((u8 *)b)[15 - i];
21061c581a4SArd Biesheuvel 
211*b67ce439SArd Biesheuvel 		be128_xor(r, r, &p[ 0 + !(ch & 0x80)]);
212*b67ce439SArd Biesheuvel 		be128_xor(r, r, &p[ 2 + !(ch & 0x40)]);
213*b67ce439SArd Biesheuvel 		be128_xor(r, r, &p[ 4 + !(ch & 0x20)]);
214*b67ce439SArd Biesheuvel 		be128_xor(r, r, &p[ 6 + !(ch & 0x10)]);
215*b67ce439SArd Biesheuvel 		be128_xor(r, r, &p[ 8 + !(ch & 0x08)]);
216*b67ce439SArd Biesheuvel 		be128_xor(r, r, &p[10 + !(ch & 0x04)]);
217*b67ce439SArd Biesheuvel 		be128_xor(r, r, &p[12 + !(ch & 0x02)]);
218*b67ce439SArd Biesheuvel 		be128_xor(r, r, &p[14 + !(ch & 0x01)]);
21961c581a4SArd Biesheuvel 
22061c581a4SArd Biesheuvel 		if (++i >= 16)
22161c581a4SArd Biesheuvel 			break;
22261c581a4SArd Biesheuvel 
223*b67ce439SArd Biesheuvel 		gf128mul_x8_lle_ti(r); /* use the time invariant version */
22461c581a4SArd Biesheuvel 	}
22561c581a4SArd Biesheuvel }
22661c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_lle);
22761c581a4SArd Biesheuvel 
gf128mul_bbe(be128 * r,const be128 * b)22861c581a4SArd Biesheuvel void gf128mul_bbe(be128 *r, const be128 *b)
22961c581a4SArd Biesheuvel {
23061c581a4SArd Biesheuvel 	be128 p[8];
23161c581a4SArd Biesheuvel 	int i;
23261c581a4SArd Biesheuvel 
23361c581a4SArd Biesheuvel 	p[0] = *r;
23461c581a4SArd Biesheuvel 	for (i = 0; i < 7; ++i)
23561c581a4SArd Biesheuvel 		gf128mul_x_bbe(&p[i + 1], &p[i]);
23661c581a4SArd Biesheuvel 
23761c581a4SArd Biesheuvel 	memset(r, 0, sizeof(*r));
23861c581a4SArd Biesheuvel 	for (i = 0;;) {
23961c581a4SArd Biesheuvel 		u8 ch = ((u8 *)b)[i];
24061c581a4SArd Biesheuvel 
24161c581a4SArd Biesheuvel 		if (ch & 0x80)
24261c581a4SArd Biesheuvel 			be128_xor(r, r, &p[7]);
24361c581a4SArd Biesheuvel 		if (ch & 0x40)
24461c581a4SArd Biesheuvel 			be128_xor(r, r, &p[6]);
24561c581a4SArd Biesheuvel 		if (ch & 0x20)
24661c581a4SArd Biesheuvel 			be128_xor(r, r, &p[5]);
24761c581a4SArd Biesheuvel 		if (ch & 0x10)
24861c581a4SArd Biesheuvel 			be128_xor(r, r, &p[4]);
24961c581a4SArd Biesheuvel 		if (ch & 0x08)
25061c581a4SArd Biesheuvel 			be128_xor(r, r, &p[3]);
25161c581a4SArd Biesheuvel 		if (ch & 0x04)
25261c581a4SArd Biesheuvel 			be128_xor(r, r, &p[2]);
25361c581a4SArd Biesheuvel 		if (ch & 0x02)
25461c581a4SArd Biesheuvel 			be128_xor(r, r, &p[1]);
25561c581a4SArd Biesheuvel 		if (ch & 0x01)
25661c581a4SArd Biesheuvel 			be128_xor(r, r, &p[0]);
25761c581a4SArd Biesheuvel 
25861c581a4SArd Biesheuvel 		if (++i >= 16)
25961c581a4SArd Biesheuvel 			break;
26061c581a4SArd Biesheuvel 
26161c581a4SArd Biesheuvel 		gf128mul_x8_bbe(r);
26261c581a4SArd Biesheuvel 	}
26361c581a4SArd Biesheuvel }
26461c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_bbe);
26561c581a4SArd Biesheuvel 
26661c581a4SArd Biesheuvel /*      This version uses 64k bytes of table space.
26761c581a4SArd Biesheuvel     A 16 byte buffer has to be multiplied by a 16 byte key
26861c581a4SArd Biesheuvel     value in GF(2^128).  If we consider a GF(2^128) value in
26961c581a4SArd Biesheuvel     the buffer's lowest byte, we can construct a table of
27061c581a4SArd Biesheuvel     the 256 16 byte values that result from the 256 values
27161c581a4SArd Biesheuvel     of this byte.  This requires 4096 bytes. But we also
27261c581a4SArd Biesheuvel     need tables for each of the 16 higher bytes in the
27361c581a4SArd Biesheuvel     buffer as well, which makes 64 kbytes in total.
27461c581a4SArd Biesheuvel */
27561c581a4SArd Biesheuvel /* additional explanation
27661c581a4SArd Biesheuvel  * t[0][BYTE] contains g*BYTE
27761c581a4SArd Biesheuvel  * t[1][BYTE] contains g*x^8*BYTE
27861c581a4SArd Biesheuvel  *  ..
27961c581a4SArd Biesheuvel  * t[15][BYTE] contains g*x^120*BYTE */
gf128mul_init_64k_bbe(const be128 * g)28061c581a4SArd Biesheuvel struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
28161c581a4SArd Biesheuvel {
28261c581a4SArd Biesheuvel 	struct gf128mul_64k *t;
28361c581a4SArd Biesheuvel 	int i, j, k;
28461c581a4SArd Biesheuvel 
28561c581a4SArd Biesheuvel 	t = kzalloc(sizeof(*t), GFP_KERNEL);
28661c581a4SArd Biesheuvel 	if (!t)
28761c581a4SArd Biesheuvel 		goto out;
28861c581a4SArd Biesheuvel 
28961c581a4SArd Biesheuvel 	for (i = 0; i < 16; i++) {
29061c581a4SArd Biesheuvel 		t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
29161c581a4SArd Biesheuvel 		if (!t->t[i]) {
29261c581a4SArd Biesheuvel 			gf128mul_free_64k(t);
29361c581a4SArd Biesheuvel 			t = NULL;
29461c581a4SArd Biesheuvel 			goto out;
29561c581a4SArd Biesheuvel 		}
29661c581a4SArd Biesheuvel 	}
29761c581a4SArd Biesheuvel 
29861c581a4SArd Biesheuvel 	t->t[0]->t[1] = *g;
29961c581a4SArd Biesheuvel 	for (j = 1; j <= 64; j <<= 1)
30061c581a4SArd Biesheuvel 		gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
30161c581a4SArd Biesheuvel 
30261c581a4SArd Biesheuvel 	for (i = 0;;) {
30361c581a4SArd Biesheuvel 		for (j = 2; j < 256; j += j)
30461c581a4SArd Biesheuvel 			for (k = 1; k < j; ++k)
30561c581a4SArd Biesheuvel 				be128_xor(&t->t[i]->t[j + k],
30661c581a4SArd Biesheuvel 					  &t->t[i]->t[j], &t->t[i]->t[k]);
30761c581a4SArd Biesheuvel 
30861c581a4SArd Biesheuvel 		if (++i >= 16)
30961c581a4SArd Biesheuvel 			break;
31061c581a4SArd Biesheuvel 
31161c581a4SArd Biesheuvel 		for (j = 128; j > 0; j >>= 1) {
31261c581a4SArd Biesheuvel 			t->t[i]->t[j] = t->t[i - 1]->t[j];
31361c581a4SArd Biesheuvel 			gf128mul_x8_bbe(&t->t[i]->t[j]);
31461c581a4SArd Biesheuvel 		}
31561c581a4SArd Biesheuvel 	}
31661c581a4SArd Biesheuvel 
31761c581a4SArd Biesheuvel out:
31861c581a4SArd Biesheuvel 	return t;
31961c581a4SArd Biesheuvel }
32061c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_init_64k_bbe);
32161c581a4SArd Biesheuvel 
gf128mul_free_64k(struct gf128mul_64k * t)32261c581a4SArd Biesheuvel void gf128mul_free_64k(struct gf128mul_64k *t)
32361c581a4SArd Biesheuvel {
32461c581a4SArd Biesheuvel 	int i;
32561c581a4SArd Biesheuvel 
32661c581a4SArd Biesheuvel 	for (i = 0; i < 16; i++)
32761c581a4SArd Biesheuvel 		kfree_sensitive(t->t[i]);
32861c581a4SArd Biesheuvel 	kfree_sensitive(t);
32961c581a4SArd Biesheuvel }
33061c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_free_64k);
33161c581a4SArd Biesheuvel 
gf128mul_64k_bbe(be128 * a,const struct gf128mul_64k * t)33261c581a4SArd Biesheuvel void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t)
33361c581a4SArd Biesheuvel {
33461c581a4SArd Biesheuvel 	u8 *ap = (u8 *)a;
33561c581a4SArd Biesheuvel 	be128 r[1];
33661c581a4SArd Biesheuvel 	int i;
33761c581a4SArd Biesheuvel 
33861c581a4SArd Biesheuvel 	*r = t->t[0]->t[ap[15]];
33961c581a4SArd Biesheuvel 	for (i = 1; i < 16; ++i)
34061c581a4SArd Biesheuvel 		be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
34161c581a4SArd Biesheuvel 	*a = *r;
34261c581a4SArd Biesheuvel }
34361c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_64k_bbe);
34461c581a4SArd Biesheuvel 
34561c581a4SArd Biesheuvel /*      This version uses 4k bytes of table space.
34661c581a4SArd Biesheuvel     A 16 byte buffer has to be multiplied by a 16 byte key
34761c581a4SArd Biesheuvel     value in GF(2^128).  If we consider a GF(2^128) value in a
34861c581a4SArd Biesheuvel     single byte, we can construct a table of the 256 16 byte
34961c581a4SArd Biesheuvel     values that result from the 256 values of this byte.
35061c581a4SArd Biesheuvel     This requires 4096 bytes. If we take the highest byte in
35161c581a4SArd Biesheuvel     the buffer and use this table to get the result, we then
35261c581a4SArd Biesheuvel     have to multiply by x^120 to get the final value. For the
35361c581a4SArd Biesheuvel     next highest byte the result has to be multiplied by x^112
35461c581a4SArd Biesheuvel     and so on. But we can do this by accumulating the result
35561c581a4SArd Biesheuvel     in an accumulator starting with the result for the top
35661c581a4SArd Biesheuvel     byte.  We repeatedly multiply the accumulator value by
35761c581a4SArd Biesheuvel     x^8 and then add in (i.e. xor) the 16 bytes of the next
35861c581a4SArd Biesheuvel     lower byte in the buffer, stopping when we reach the
35961c581a4SArd Biesheuvel     lowest byte. This requires a 4096 byte table.
36061c581a4SArd Biesheuvel */
gf128mul_init_4k_lle(const be128 * g)36161c581a4SArd Biesheuvel struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
36261c581a4SArd Biesheuvel {
36361c581a4SArd Biesheuvel 	struct gf128mul_4k *t;
36461c581a4SArd Biesheuvel 	int j, k;
36561c581a4SArd Biesheuvel 
36661c581a4SArd Biesheuvel 	t = kzalloc(sizeof(*t), GFP_KERNEL);
36761c581a4SArd Biesheuvel 	if (!t)
36861c581a4SArd Biesheuvel 		goto out;
36961c581a4SArd Biesheuvel 
37061c581a4SArd Biesheuvel 	t->t[128] = *g;
37161c581a4SArd Biesheuvel 	for (j = 64; j > 0; j >>= 1)
37261c581a4SArd Biesheuvel 		gf128mul_x_lle(&t->t[j], &t->t[j+j]);
37361c581a4SArd Biesheuvel 
37461c581a4SArd Biesheuvel 	for (j = 2; j < 256; j += j)
37561c581a4SArd Biesheuvel 		for (k = 1; k < j; ++k)
37661c581a4SArd Biesheuvel 			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
37761c581a4SArd Biesheuvel 
37861c581a4SArd Biesheuvel out:
37961c581a4SArd Biesheuvel 	return t;
38061c581a4SArd Biesheuvel }
38161c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_init_4k_lle);
38261c581a4SArd Biesheuvel 
gf128mul_init_4k_bbe(const be128 * g)38361c581a4SArd Biesheuvel struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
38461c581a4SArd Biesheuvel {
38561c581a4SArd Biesheuvel 	struct gf128mul_4k *t;
38661c581a4SArd Biesheuvel 	int j, k;
38761c581a4SArd Biesheuvel 
38861c581a4SArd Biesheuvel 	t = kzalloc(sizeof(*t), GFP_KERNEL);
38961c581a4SArd Biesheuvel 	if (!t)
39061c581a4SArd Biesheuvel 		goto out;
39161c581a4SArd Biesheuvel 
39261c581a4SArd Biesheuvel 	t->t[1] = *g;
39361c581a4SArd Biesheuvel 	for (j = 1; j <= 64; j <<= 1)
39461c581a4SArd Biesheuvel 		gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
39561c581a4SArd Biesheuvel 
39661c581a4SArd Biesheuvel 	for (j = 2; j < 256; j += j)
39761c581a4SArd Biesheuvel 		for (k = 1; k < j; ++k)
39861c581a4SArd Biesheuvel 			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
39961c581a4SArd Biesheuvel 
40061c581a4SArd Biesheuvel out:
40161c581a4SArd Biesheuvel 	return t;
40261c581a4SArd Biesheuvel }
40361c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_init_4k_bbe);
40461c581a4SArd Biesheuvel 
gf128mul_4k_lle(be128 * a,const struct gf128mul_4k * t)40561c581a4SArd Biesheuvel void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
40661c581a4SArd Biesheuvel {
40761c581a4SArd Biesheuvel 	u8 *ap = (u8 *)a;
40861c581a4SArd Biesheuvel 	be128 r[1];
40961c581a4SArd Biesheuvel 	int i = 15;
41061c581a4SArd Biesheuvel 
41161c581a4SArd Biesheuvel 	*r = t->t[ap[15]];
41261c581a4SArd Biesheuvel 	while (i--) {
41361c581a4SArd Biesheuvel 		gf128mul_x8_lle(r);
41461c581a4SArd Biesheuvel 		be128_xor(r, r, &t->t[ap[i]]);
41561c581a4SArd Biesheuvel 	}
41661c581a4SArd Biesheuvel 	*a = *r;
41761c581a4SArd Biesheuvel }
41861c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_4k_lle);
41961c581a4SArd Biesheuvel 
gf128mul_4k_bbe(be128 * a,const struct gf128mul_4k * t)42061c581a4SArd Biesheuvel void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t)
42161c581a4SArd Biesheuvel {
42261c581a4SArd Biesheuvel 	u8 *ap = (u8 *)a;
42361c581a4SArd Biesheuvel 	be128 r[1];
42461c581a4SArd Biesheuvel 	int i = 0;
42561c581a4SArd Biesheuvel 
42661c581a4SArd Biesheuvel 	*r = t->t[ap[0]];
42761c581a4SArd Biesheuvel 	while (++i < 16) {
42861c581a4SArd Biesheuvel 		gf128mul_x8_bbe(r);
42961c581a4SArd Biesheuvel 		be128_xor(r, r, &t->t[ap[i]]);
43061c581a4SArd Biesheuvel 	}
43161c581a4SArd Biesheuvel 	*a = *r;
43261c581a4SArd Biesheuvel }
43361c581a4SArd Biesheuvel EXPORT_SYMBOL(gf128mul_4k_bbe);
43461c581a4SArd Biesheuvel 
43561c581a4SArd Biesheuvel MODULE_LICENSE("GPL");
43661c581a4SArd Biesheuvel MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
437