xref: /openbmc/linux/include/crypto/gf128mul.h (revision 453431a5)
1c494e070SRik Snel /* gf128mul.h - GF(2^128) multiplication functions
2c494e070SRik Snel  *
3c494e070SRik Snel  * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
4c494e070SRik Snel  * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org>
5c494e070SRik Snel  *
6c494e070SRik Snel  * Based on Dr Brian Gladman's (GPL'd) work published at
7c494e070SRik Snel  * http://fp.gladman.plus.com/cryptography_technology/index.htm
8c494e070SRik Snel  * See the original copyright notice below.
9c494e070SRik Snel  *
10c494e070SRik Snel  * This program is free software; you can redistribute it and/or modify it
11c494e070SRik Snel  * under the terms of the GNU General Public License as published by the Free
12c494e070SRik Snel  * Software Foundation; either version 2 of the License, or (at your option)
13c494e070SRik Snel  * any later version.
14c494e070SRik Snel  */
15c494e070SRik Snel /*
16c494e070SRik Snel  ---------------------------------------------------------------------------
17c494e070SRik Snel  Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
18c494e070SRik Snel 
19c494e070SRik Snel  LICENSE TERMS
20c494e070SRik Snel 
21c494e070SRik Snel  The free distribution and use of this software in both source and binary
22c494e070SRik Snel  form is allowed (with or without changes) provided that:
23c494e070SRik Snel 
24c494e070SRik Snel    1. distributions of this source code include the above copyright
25c494e070SRik Snel       notice, this list of conditions and the following disclaimer;
26c494e070SRik Snel 
27c494e070SRik Snel    2. distributions in binary form include the above copyright
28c494e070SRik Snel       notice, this list of conditions and the following disclaimer
29c494e070SRik Snel       in the documentation and/or other associated materials;
30c494e070SRik Snel 
31c494e070SRik Snel    3. the copyright holder's name is not used to endorse products
32c494e070SRik Snel       built using this software without specific written permission.
33c494e070SRik Snel 
34c494e070SRik Snel  ALTERNATIVELY, provided that this notice is retained in full, this product
35c494e070SRik Snel  may be distributed under the terms of the GNU General Public License (GPL),
36c494e070SRik Snel  in which case the provisions of the GPL apply INSTEAD OF those given above.
37c494e070SRik Snel 
38c494e070SRik Snel  DISCLAIMER
39c494e070SRik Snel 
40c494e070SRik Snel  This software is provided 'as is' with no explicit or implied warranties
41c494e070SRik Snel  in respect of its properties, including, but not limited to, correctness
42c494e070SRik Snel  and/or fitness for purpose.
43c494e070SRik Snel  ---------------------------------------------------------------------------
44c494e070SRik Snel  Issue Date: 31/01/2006
45c494e070SRik Snel 
4663be5b53SEric Biggers  An implementation of field multiplication in Galois Field GF(2^128)
47c494e070SRik Snel */
48c494e070SRik Snel 
49c494e070SRik Snel #ifndef _CRYPTO_GF128MUL_H
50c494e070SRik Snel #define _CRYPTO_GF128MUL_H
51c494e070SRik Snel 
52acb9b159SOndrej Mosnáček #include <asm/byteorder.h>
53c494e070SRik Snel #include <crypto/b128ops.h>
54c494e070SRik Snel #include <linux/slab.h>
55c494e070SRik Snel 
56c494e070SRik Snel /* Comment by Rik:
57c494e070SRik Snel  *
58631dd1a8SJustin P. Mattock  * For some background on GF(2^128) see for example:
59631dd1a8SJustin P. Mattock  * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf
60c494e070SRik Snel  *
61c494e070SRik Snel  * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can
62c494e070SRik Snel  * be mapped to computer memory in a variety of ways. Let's examine
63c494e070SRik Snel  * three common cases.
64c494e070SRik Snel  *
65c494e070SRik Snel  * Take a look at the 16 binary octets below in memory order. The msb's
66c494e070SRik Snel  * are left and the lsb's are right. char b[16] is an array and b[0] is
67c494e070SRik Snel  * the first octet.
68c494e070SRik Snel  *
6963be5b53SEric Biggers  * 10000000 00000000 00000000 00000000 .... 00000000 00000000 00000000
70c494e070SRik Snel  *   b[0]     b[1]     b[2]     b[3]          b[13]    b[14]    b[15]
71c494e070SRik Snel  *
72c494e070SRik Snel  * Every bit is a coefficient of some power of X. We can store the bits
73c494e070SRik Snel  * in every byte in little-endian order and the bytes themselves also in
74c494e070SRik Snel  * little endian order. I will call this lle (little-little-endian).
75c494e070SRik Snel  * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks
76c494e070SRik Snel  * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }.
77c494e070SRik Snel  * This format was originally implemented in gf128mul and is used
78c494e070SRik Snel  * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length).
79c494e070SRik Snel  *
80c494e070SRik Snel  * Another convention says: store the bits in bigendian order and the
81c494e070SRik Snel  * bytes also. This is bbe (big-big-endian). Now the buffer above
82c494e070SRik Snel  * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111,
83c494e070SRik Snel  * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe
84c494e070SRik Snel  * is partly implemented.
85c494e070SRik Snel  *
86c494e070SRik Snel  * Both of the above formats are easy to implement on big-endian
87c494e070SRik Snel  * machines.
88c494e070SRik Snel  *
8963be5b53SEric Biggers  * XTS and EME (the latter of which is patent encumbered) use the ble
9063be5b53SEric Biggers  * format (bits are stored in big endian order and the bytes in little
9163be5b53SEric Biggers  * endian). The above buffer represents X^7 in this case and the
9263be5b53SEric Biggers  * primitive polynomial is b[0] = 0x87.
93c494e070SRik Snel  *
94c494e070SRik Snel  * The common machine word-size is smaller than 128 bits, so to make
95c494e070SRik Snel  * an efficient implementation we must split into machine word sizes.
9663be5b53SEric Biggers  * This implementation uses 64-bit words for the moment. Machine
9763be5b53SEric Biggers  * endianness comes into play. The lle format in relation to machine
9863be5b53SEric Biggers  * endianness is discussed below by the original author of gf128mul Dr
9963be5b53SEric Biggers  * Brian Gladman.
100c494e070SRik Snel  *
101c494e070SRik Snel  * Let's look at the bbe and ble format on a little endian machine.
102c494e070SRik Snel  *
103c494e070SRik Snel  * bbe on a little endian machine u32 x[4]:
104c494e070SRik Snel  *
105c494e070SRik Snel  *  MS            x[0]           LS  MS            x[1]		  LS
106c494e070SRik Snel  *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
107c494e070SRik Snel  *  103..96 111.104 119.112 127.120  71...64 79...72 87...80 95...88
108c494e070SRik Snel  *
109c494e070SRik Snel  *  MS            x[2]           LS  MS            x[3]		  LS
110c494e070SRik Snel  *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
111c494e070SRik Snel  *  39...32 47...40 55...48 63...56  07...00 15...08 23...16 31...24
112c494e070SRik Snel  *
113c494e070SRik Snel  * ble on a little endian machine
114c494e070SRik Snel  *
115c494e070SRik Snel  *  MS            x[0]           LS  MS            x[1]		  LS
116c494e070SRik Snel  *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
117c494e070SRik Snel  *  31...24 23...16 15...08 07...00  63...56 55...48 47...40 39...32
118c494e070SRik Snel  *
119c494e070SRik Snel  *  MS            x[2]           LS  MS            x[3]		  LS
120c494e070SRik Snel  *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
121c494e070SRik Snel  *  95...88 87...80 79...72 71...64  127.120 199.112 111.104 103..96
122c494e070SRik Snel  *
123c494e070SRik Snel  * Multiplications in GF(2^128) are mostly bit-shifts, so you see why
124c494e070SRik Snel  * ble (and lbe also) are easier to implement on a little-endian
125c494e070SRik Snel  * machine than on a big-endian machine. The converse holds for bbe
126c494e070SRik Snel  * and lle.
127c494e070SRik Snel  *
128c494e070SRik Snel  * Note: to have good alignment, it seems to me that it is sufficient
129c494e070SRik Snel  * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize
130c494e070SRik Snel  * machines this will automatically aligned to wordsize and on a 64-bit
131c494e070SRik Snel  * machine also.
132c494e070SRik Snel  */
13363be5b53SEric Biggers /*	Multiply a GF(2^128) field element by x. Field elements are
13463be5b53SEric Biggers     held in arrays of bytes in which field bits 8n..8n + 7 are held in
13563be5b53SEric Biggers     byte[n], with lower indexed bits placed in the more numerically
13663be5b53SEric Biggers     significant bit positions within bytes.
137c494e070SRik Snel 
138c494e070SRik Snel     On little endian machines the bit indexes translate into the bit
139c494e070SRik Snel     positions within four 32-bit words in the following way
140c494e070SRik Snel 
141c494e070SRik Snel     MS            x[0]           LS  MS            x[1]		  LS
142c494e070SRik Snel     ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
143c494e070SRik Snel     24...31 16...23 08...15 00...07  56...63 48...55 40...47 32...39
144c494e070SRik Snel 
145c494e070SRik Snel     MS            x[2]           LS  MS            x[3]		  LS
146c494e070SRik Snel     ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
147c494e070SRik Snel     88...95 80...87 72...79 64...71  120.127 112.119 104.111 96..103
148c494e070SRik Snel 
149c494e070SRik Snel     On big endian machines the bit indexes translate into the bit
150c494e070SRik Snel     positions within four 32-bit words in the following way
151c494e070SRik Snel 
152c494e070SRik Snel     MS            x[0]           LS  MS            x[1]		  LS
153c494e070SRik Snel     ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
154c494e070SRik Snel     00...07 08...15 16...23 24...31  32...39 40...47 48...55 56...63
155c494e070SRik Snel 
156c494e070SRik Snel     MS            x[2]           LS  MS            x[3]		  LS
157c494e070SRik Snel     ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
158c494e070SRik Snel     64...71 72...79 80...87 88...95  96..103 104.111 112.119 120.127
159c494e070SRik Snel */
160c494e070SRik Snel 
161c494e070SRik Snel /*	A slow generic version of gf_mul, implemented for lle and bbe
162c494e070SRik Snel  * 	It multiplies a and b and puts the result in a */
163c494e070SRik Snel void gf128mul_lle(be128 *a, const be128 *b);
164c494e070SRik Snel 
165c494e070SRik Snel void gf128mul_bbe(be128 *a, const be128 *b);
166c494e070SRik Snel 
167acb9b159SOndrej Mosnáček /*
168acb9b159SOndrej Mosnáček  * The following functions multiply a field element by x in
169acb9b159SOndrej Mosnáček  * the polynomial field representation.  They use 64-bit word operations
170acb9b159SOndrej Mosnáček  * to gain speed but compensate for machine endianness and hence work
171acb9b159SOndrej Mosnáček  * correctly on both styles of machine.
172acb9b159SOndrej Mosnáček  *
173acb9b159SOndrej Mosnáček  * They are defined here for performance.
174acb9b159SOndrej Mosnáček  */
175acb9b159SOndrej Mosnáček 
gf128mul_mask_from_bit(u64 x,int which)176acb9b159SOndrej Mosnáček static inline u64 gf128mul_mask_from_bit(u64 x, int which)
177acb9b159SOndrej Mosnáček {
178acb9b159SOndrej Mosnáček 	/* a constant-time version of 'x & ((u64)1 << which) ? (u64)-1 : 0' */
179acb9b159SOndrej Mosnáček 	return ((s64)(x << (63 - which)) >> 63);
180acb9b159SOndrej Mosnáček }
181acb9b159SOndrej Mosnáček 
gf128mul_x_lle(be128 * r,const be128 * x)182acb9b159SOndrej Mosnáček static inline void gf128mul_x_lle(be128 *r, const be128 *x)
183acb9b159SOndrej Mosnáček {
184acb9b159SOndrej Mosnáček 	u64 a = be64_to_cpu(x->a);
185acb9b159SOndrej Mosnáček 	u64 b = be64_to_cpu(x->b);
186acb9b159SOndrej Mosnáček 
187acb9b159SOndrej Mosnáček 	/* equivalent to gf128mul_table_le[(b << 7) & 0xff] << 48
188acb9b159SOndrej Mosnáček 	 * (see crypto/gf128mul.c): */
189acb9b159SOndrej Mosnáček 	u64 _tt = gf128mul_mask_from_bit(b, 0) & ((u64)0xe1 << 56);
190acb9b159SOndrej Mosnáček 
191acb9b159SOndrej Mosnáček 	r->b = cpu_to_be64((b >> 1) | (a << 63));
192acb9b159SOndrej Mosnáček 	r->a = cpu_to_be64((a >> 1) ^ _tt);
193acb9b159SOndrej Mosnáček }
194acb9b159SOndrej Mosnáček 
gf128mul_x_bbe(be128 * r,const be128 * x)195acb9b159SOndrej Mosnáček static inline void gf128mul_x_bbe(be128 *r, const be128 *x)
196acb9b159SOndrej Mosnáček {
197acb9b159SOndrej Mosnáček 	u64 a = be64_to_cpu(x->a);
198acb9b159SOndrej Mosnáček 	u64 b = be64_to_cpu(x->b);
199acb9b159SOndrej Mosnáček 
200acb9b159SOndrej Mosnáček 	/* equivalent to gf128mul_table_be[a >> 63] (see crypto/gf128mul.c): */
201acb9b159SOndrej Mosnáček 	u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87;
202acb9b159SOndrej Mosnáček 
203acb9b159SOndrej Mosnáček 	r->a = cpu_to_be64((a << 1) | (b >> 63));
204acb9b159SOndrej Mosnáček 	r->b = cpu_to_be64((b << 1) ^ _tt);
205acb9b159SOndrej Mosnáček }
206acb9b159SOndrej Mosnáček 
207acb9b159SOndrej Mosnáček /* needed by XTS */
gf128mul_x_ble(le128 * r,const le128 * x)208e55318c8SOndrej Mosnáček static inline void gf128mul_x_ble(le128 *r, const le128 *x)
209acb9b159SOndrej Mosnáček {
210acb9b159SOndrej Mosnáček 	u64 a = le64_to_cpu(x->a);
211acb9b159SOndrej Mosnáček 	u64 b = le64_to_cpu(x->b);
212acb9b159SOndrej Mosnáček 
213acb9b159SOndrej Mosnáček 	/* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */
214e55318c8SOndrej Mosnáček 	u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87;
215acb9b159SOndrej Mosnáček 
216e55318c8SOndrej Mosnáček 	r->a = cpu_to_le64((a << 1) | (b >> 63));
217e55318c8SOndrej Mosnáček 	r->b = cpu_to_le64((b << 1) ^ _tt);
218acb9b159SOndrej Mosnáček }
219c494e070SRik Snel 
220c494e070SRik Snel /* 4k table optimization */
221c494e070SRik Snel 
222c494e070SRik Snel struct gf128mul_4k {
223c494e070SRik Snel 	be128 t[256];
224c494e070SRik Snel };
225c494e070SRik Snel 
226c494e070SRik Snel struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g);
227c494e070SRik Snel struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g);
2283ea996ddSEric Biggers void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t);
2293ea996ddSEric Biggers void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t);
230acfc5878SHarsh Jain void gf128mul_x8_ble(le128 *r, const le128 *x);
gf128mul_free_4k(struct gf128mul_4k * t)231c494e070SRik Snel static inline void gf128mul_free_4k(struct gf128mul_4k *t)
232c494e070SRik Snel {
233453431a5SWaiman Long 	kfree_sensitive(t);
234c494e070SRik Snel }
235c494e070SRik Snel 
236c494e070SRik Snel 
237d266f44bSAlex Cope /* 64k table optimization, implemented for bbe */
238c494e070SRik Snel 
239c494e070SRik Snel struct gf128mul_64k {
240c494e070SRik Snel 	struct gf128mul_4k *t[16];
241c494e070SRik Snel };
242c494e070SRik Snel 
243d266f44bSAlex Cope /* First initialize with the constant factor with which you
244d266f44bSAlex Cope  * want to multiply and then call gf128mul_64k_bbe with the other
245d266f44bSAlex Cope  * factor in the first argument, and the table in the second.
246d266f44bSAlex Cope  * Afterwards, the result is stored in *a.
247d266f44bSAlex Cope  */
248c494e070SRik Snel struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g);
249c494e070SRik Snel void gf128mul_free_64k(struct gf128mul_64k *t);
2503ea996ddSEric Biggers void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t);
251c494e070SRik Snel 
252c494e070SRik Snel #endif /* _CRYPTO_GF128MUL_H */
253