1c494e070SRik Snel /* gf128mul.h - GF(2^128) multiplication functions 2c494e070SRik Snel * 3c494e070SRik Snel * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. 4c494e070SRik Snel * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org> 5c494e070SRik Snel * 6c494e070SRik Snel * Based on Dr Brian Gladman's (GPL'd) work published at 7c494e070SRik Snel * http://fp.gladman.plus.com/cryptography_technology/index.htm 8c494e070SRik Snel * See the original copyright notice below. 9c494e070SRik Snel * 10c494e070SRik Snel * This program is free software; you can redistribute it and/or modify it 11c494e070SRik Snel * under the terms of the GNU General Public License as published by the Free 12c494e070SRik Snel * Software Foundation; either version 2 of the License, or (at your option) 13c494e070SRik Snel * any later version. 14c494e070SRik Snel */ 15c494e070SRik Snel /* 16c494e070SRik Snel --------------------------------------------------------------------------- 17c494e070SRik Snel Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. 18c494e070SRik Snel 19c494e070SRik Snel LICENSE TERMS 20c494e070SRik Snel 21c494e070SRik Snel The free distribution and use of this software in both source and binary 22c494e070SRik Snel form is allowed (with or without changes) provided that: 23c494e070SRik Snel 24c494e070SRik Snel 1. distributions of this source code include the above copyright 25c494e070SRik Snel notice, this list of conditions and the following disclaimer; 26c494e070SRik Snel 27c494e070SRik Snel 2. distributions in binary form include the above copyright 28c494e070SRik Snel notice, this list of conditions and the following disclaimer 29c494e070SRik Snel in the documentation and/or other associated materials; 30c494e070SRik Snel 31c494e070SRik Snel 3. the copyright holder's name is not used to endorse products 32c494e070SRik Snel built using this software without specific written permission. 33c494e070SRik Snel 34c494e070SRik Snel ALTERNATIVELY, provided that this notice is retained in full, this product 35c494e070SRik Snel may be distributed under the terms of the GNU General Public License (GPL), 36c494e070SRik Snel in which case the provisions of the GPL apply INSTEAD OF those given above. 37c494e070SRik Snel 38c494e070SRik Snel DISCLAIMER 39c494e070SRik Snel 40c494e070SRik Snel This software is provided 'as is' with no explicit or implied warranties 41c494e070SRik Snel in respect of its properties, including, but not limited to, correctness 42c494e070SRik Snel and/or fitness for purpose. 43c494e070SRik Snel --------------------------------------------------------------------------- 44c494e070SRik Snel Issue Date: 31/01/2006 45c494e070SRik Snel 4663be5b53SEric Biggers An implementation of field multiplication in Galois Field GF(2^128) 47c494e070SRik Snel */ 48c494e070SRik Snel 49c494e070SRik Snel #ifndef _CRYPTO_GF128MUL_H 50c494e070SRik Snel #define _CRYPTO_GF128MUL_H 51c494e070SRik Snel 52acb9b159SOndrej Mosnáček #include <asm/byteorder.h> 53c494e070SRik Snel #include <crypto/b128ops.h> 54c494e070SRik Snel #include <linux/slab.h> 55c494e070SRik Snel 56c494e070SRik Snel /* Comment by Rik: 57c494e070SRik Snel * 58631dd1a8SJustin P. Mattock * For some background on GF(2^128) see for example: 59631dd1a8SJustin P. Mattock * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf 60c494e070SRik Snel * 61c494e070SRik Snel * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can 62c494e070SRik Snel * be mapped to computer memory in a variety of ways. Let's examine 63c494e070SRik Snel * three common cases. 64c494e070SRik Snel * 65c494e070SRik Snel * Take a look at the 16 binary octets below in memory order. The msb's 66c494e070SRik Snel * are left and the lsb's are right. char b[16] is an array and b[0] is 67c494e070SRik Snel * the first octet. 68c494e070SRik Snel * 6963be5b53SEric Biggers * 10000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 70c494e070SRik Snel * b[0] b[1] b[2] b[3] b[13] b[14] b[15] 71c494e070SRik Snel * 72c494e070SRik Snel * Every bit is a coefficient of some power of X. We can store the bits 73c494e070SRik Snel * in every byte in little-endian order and the bytes themselves also in 74c494e070SRik Snel * little endian order. I will call this lle (little-little-endian). 75c494e070SRik Snel * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks 76c494e070SRik Snel * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }. 77c494e070SRik Snel * This format was originally implemented in gf128mul and is used 78c494e070SRik Snel * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length). 79c494e070SRik Snel * 80c494e070SRik Snel * Another convention says: store the bits in bigendian order and the 81c494e070SRik Snel * bytes also. This is bbe (big-big-endian). Now the buffer above 82c494e070SRik Snel * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111, 83c494e070SRik Snel * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe 84c494e070SRik Snel * is partly implemented. 85c494e070SRik Snel * 86c494e070SRik Snel * Both of the above formats are easy to implement on big-endian 87c494e070SRik Snel * machines. 88c494e070SRik Snel * 8963be5b53SEric Biggers * XTS and EME (the latter of which is patent encumbered) use the ble 9063be5b53SEric Biggers * format (bits are stored in big endian order and the bytes in little 9163be5b53SEric Biggers * endian). The above buffer represents X^7 in this case and the 9263be5b53SEric Biggers * primitive polynomial is b[0] = 0x87. 93c494e070SRik Snel * 94c494e070SRik Snel * The common machine word-size is smaller than 128 bits, so to make 95c494e070SRik Snel * an efficient implementation we must split into machine word sizes. 9663be5b53SEric Biggers * This implementation uses 64-bit words for the moment. Machine 9763be5b53SEric Biggers * endianness comes into play. The lle format in relation to machine 9863be5b53SEric Biggers * endianness is discussed below by the original author of gf128mul Dr 9963be5b53SEric Biggers * Brian Gladman. 100c494e070SRik Snel * 101c494e070SRik Snel * Let's look at the bbe and ble format on a little endian machine. 102c494e070SRik Snel * 103c494e070SRik Snel * bbe on a little endian machine u32 x[4]: 104c494e070SRik Snel * 105c494e070SRik Snel * MS x[0] LS MS x[1] LS 106c494e070SRik Snel * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 107c494e070SRik Snel * 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88 108c494e070SRik Snel * 109c494e070SRik Snel * MS x[2] LS MS x[3] LS 110c494e070SRik Snel * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 111c494e070SRik Snel * 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24 112c494e070SRik Snel * 113c494e070SRik Snel * ble on a little endian machine 114c494e070SRik Snel * 115c494e070SRik Snel * MS x[0] LS MS x[1] LS 116c494e070SRik Snel * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 117c494e070SRik Snel * 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32 118c494e070SRik Snel * 119c494e070SRik Snel * MS x[2] LS MS x[3] LS 120c494e070SRik Snel * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 121c494e070SRik Snel * 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96 122c494e070SRik Snel * 123c494e070SRik Snel * Multiplications in GF(2^128) are mostly bit-shifts, so you see why 124c494e070SRik Snel * ble (and lbe also) are easier to implement on a little-endian 125c494e070SRik Snel * machine than on a big-endian machine. The converse holds for bbe 126c494e070SRik Snel * and lle. 127c494e070SRik Snel * 128c494e070SRik Snel * Note: to have good alignment, it seems to me that it is sufficient 129c494e070SRik Snel * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize 130c494e070SRik Snel * machines this will automatically aligned to wordsize and on a 64-bit 131c494e070SRik Snel * machine also. 132c494e070SRik Snel */ 13363be5b53SEric Biggers /* Multiply a GF(2^128) field element by x. Field elements are 13463be5b53SEric Biggers held in arrays of bytes in which field bits 8n..8n + 7 are held in 13563be5b53SEric Biggers byte[n], with lower indexed bits placed in the more numerically 13663be5b53SEric Biggers significant bit positions within bytes. 137c494e070SRik Snel 138c494e070SRik Snel On little endian machines the bit indexes translate into the bit 139c494e070SRik Snel positions within four 32-bit words in the following way 140c494e070SRik Snel 141c494e070SRik Snel MS x[0] LS MS x[1] LS 142c494e070SRik Snel ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 143c494e070SRik Snel 24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39 144c494e070SRik Snel 145c494e070SRik Snel MS x[2] LS MS x[3] LS 146c494e070SRik Snel ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 147c494e070SRik Snel 88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103 148c494e070SRik Snel 149c494e070SRik Snel On big endian machines the bit indexes translate into the bit 150c494e070SRik Snel positions within four 32-bit words in the following way 151c494e070SRik Snel 152c494e070SRik Snel MS x[0] LS MS x[1] LS 153c494e070SRik Snel ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 154c494e070SRik Snel 00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63 155c494e070SRik Snel 156c494e070SRik Snel MS x[2] LS MS x[3] LS 157c494e070SRik Snel ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 158c494e070SRik Snel 64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127 159c494e070SRik Snel */ 160c494e070SRik Snel 161c494e070SRik Snel /* A slow generic version of gf_mul, implemented for lle and bbe 162c494e070SRik Snel * It multiplies a and b and puts the result in a */ 163c494e070SRik Snel void gf128mul_lle(be128 *a, const be128 *b); 164c494e070SRik Snel 165c494e070SRik Snel void gf128mul_bbe(be128 *a, const be128 *b); 166c494e070SRik Snel 167acb9b159SOndrej Mosnáček /* 168acb9b159SOndrej Mosnáček * The following functions multiply a field element by x in 169acb9b159SOndrej Mosnáček * the polynomial field representation. They use 64-bit word operations 170acb9b159SOndrej Mosnáček * to gain speed but compensate for machine endianness and hence work 171acb9b159SOndrej Mosnáček * correctly on both styles of machine. 172acb9b159SOndrej Mosnáček * 173acb9b159SOndrej Mosnáček * They are defined here for performance. 174acb9b159SOndrej Mosnáček */ 175acb9b159SOndrej Mosnáček 176acb9b159SOndrej Mosnáček static inline u64 gf128mul_mask_from_bit(u64 x, int which) 177acb9b159SOndrej Mosnáček { 178acb9b159SOndrej Mosnáček /* a constant-time version of 'x & ((u64)1 << which) ? (u64)-1 : 0' */ 179acb9b159SOndrej Mosnáček return ((s64)(x << (63 - which)) >> 63); 180acb9b159SOndrej Mosnáček } 181acb9b159SOndrej Mosnáček 182acb9b159SOndrej Mosnáček static inline void gf128mul_x_lle(be128 *r, const be128 *x) 183acb9b159SOndrej Mosnáček { 184acb9b159SOndrej Mosnáček u64 a = be64_to_cpu(x->a); 185acb9b159SOndrej Mosnáček u64 b = be64_to_cpu(x->b); 186acb9b159SOndrej Mosnáček 187acb9b159SOndrej Mosnáček /* equivalent to gf128mul_table_le[(b << 7) & 0xff] << 48 188acb9b159SOndrej Mosnáček * (see crypto/gf128mul.c): */ 189acb9b159SOndrej Mosnáček u64 _tt = gf128mul_mask_from_bit(b, 0) & ((u64)0xe1 << 56); 190acb9b159SOndrej Mosnáček 191acb9b159SOndrej Mosnáček r->b = cpu_to_be64((b >> 1) | (a << 63)); 192acb9b159SOndrej Mosnáček r->a = cpu_to_be64((a >> 1) ^ _tt); 193acb9b159SOndrej Mosnáček } 194acb9b159SOndrej Mosnáček 195acb9b159SOndrej Mosnáček static inline void gf128mul_x_bbe(be128 *r, const be128 *x) 196acb9b159SOndrej Mosnáček { 197acb9b159SOndrej Mosnáček u64 a = be64_to_cpu(x->a); 198acb9b159SOndrej Mosnáček u64 b = be64_to_cpu(x->b); 199acb9b159SOndrej Mosnáček 200acb9b159SOndrej Mosnáček /* equivalent to gf128mul_table_be[a >> 63] (see crypto/gf128mul.c): */ 201acb9b159SOndrej Mosnáček u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; 202acb9b159SOndrej Mosnáček 203acb9b159SOndrej Mosnáček r->a = cpu_to_be64((a << 1) | (b >> 63)); 204acb9b159SOndrej Mosnáček r->b = cpu_to_be64((b << 1) ^ _tt); 205acb9b159SOndrej Mosnáček } 206acb9b159SOndrej Mosnáček 207acb9b159SOndrej Mosnáček /* needed by XTS */ 208e55318c8SOndrej Mosnáček static inline void gf128mul_x_ble(le128 *r, const le128 *x) 209acb9b159SOndrej Mosnáček { 210acb9b159SOndrej Mosnáček u64 a = le64_to_cpu(x->a); 211acb9b159SOndrej Mosnáček u64 b = le64_to_cpu(x->b); 212acb9b159SOndrej Mosnáček 213acb9b159SOndrej Mosnáček /* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */ 214e55318c8SOndrej Mosnáček u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; 215acb9b159SOndrej Mosnáček 216e55318c8SOndrej Mosnáček r->a = cpu_to_le64((a << 1) | (b >> 63)); 217e55318c8SOndrej Mosnáček r->b = cpu_to_le64((b << 1) ^ _tt); 218acb9b159SOndrej Mosnáček } 219c494e070SRik Snel 220c494e070SRik Snel /* 4k table optimization */ 221c494e070SRik Snel 222c494e070SRik Snel struct gf128mul_4k { 223c494e070SRik Snel be128 t[256]; 224c494e070SRik Snel }; 225c494e070SRik Snel 226c494e070SRik Snel struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); 227c494e070SRik Snel struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); 2283ea996ddSEric Biggers void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t); 2293ea996ddSEric Biggers void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t); 230c494e070SRik Snel 231c494e070SRik Snel static inline void gf128mul_free_4k(struct gf128mul_4k *t) 232c494e070SRik Snel { 23375aa0a7cSAlex Cope kzfree(t); 234c494e070SRik Snel } 235c494e070SRik Snel 236c494e070SRik Snel 237d266f44bSAlex Cope /* 64k table optimization, implemented for bbe */ 238c494e070SRik Snel 239c494e070SRik Snel struct gf128mul_64k { 240c494e070SRik Snel struct gf128mul_4k *t[16]; 241c494e070SRik Snel }; 242c494e070SRik Snel 243d266f44bSAlex Cope /* First initialize with the constant factor with which you 244d266f44bSAlex Cope * want to multiply and then call gf128mul_64k_bbe with the other 245d266f44bSAlex Cope * factor in the first argument, and the table in the second. 246d266f44bSAlex Cope * Afterwards, the result is stored in *a. 247d266f44bSAlex Cope */ 248c494e070SRik Snel struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); 249c494e070SRik Snel void gf128mul_free_64k(struct gf128mul_64k *t); 2503ea996ddSEric Biggers void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t); 251c494e070SRik Snel 252c494e070SRik Snel #endif /* _CRYPTO_GF128MUL_H */ 253