1c494e070SRik Snel /* gf128mul.h - GF(2^128) multiplication functions 2c494e070SRik Snel * 3c494e070SRik Snel * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. 4c494e070SRik Snel * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org> 5c494e070SRik Snel * 6c494e070SRik Snel * Based on Dr Brian Gladman's (GPL'd) work published at 7c494e070SRik Snel * http://fp.gladman.plus.com/cryptography_technology/index.htm 8c494e070SRik Snel * See the original copyright notice below. 9c494e070SRik Snel * 10c494e070SRik Snel * This program is free software; you can redistribute it and/or modify it 11c494e070SRik Snel * under the terms of the GNU General Public License as published by the Free 12c494e070SRik Snel * Software Foundation; either version 2 of the License, or (at your option) 13c494e070SRik Snel * any later version. 14c494e070SRik Snel */ 15c494e070SRik Snel /* 16c494e070SRik Snel --------------------------------------------------------------------------- 17c494e070SRik Snel Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. 18c494e070SRik Snel 19c494e070SRik Snel LICENSE TERMS 20c494e070SRik Snel 21c494e070SRik Snel The free distribution and use of this software in both source and binary 22c494e070SRik Snel form is allowed (with or without changes) provided that: 23c494e070SRik Snel 24c494e070SRik Snel 1. distributions of this source code include the above copyright 25c494e070SRik Snel notice, this list of conditions and the following disclaimer; 26c494e070SRik Snel 27c494e070SRik Snel 2. distributions in binary form include the above copyright 28c494e070SRik Snel notice, this list of conditions and the following disclaimer 29c494e070SRik Snel in the documentation and/or other associated materials; 30c494e070SRik Snel 31c494e070SRik Snel 3. the copyright holder's name is not used to endorse products 32c494e070SRik Snel built using this software without specific written permission. 33c494e070SRik Snel 34c494e070SRik Snel ALTERNATIVELY, provided that this notice is retained in full, this product 35c494e070SRik Snel may be distributed under the terms of the GNU General Public License (GPL), 36c494e070SRik Snel in which case the provisions of the GPL apply INSTEAD OF those given above. 37c494e070SRik Snel 38c494e070SRik Snel DISCLAIMER 39c494e070SRik Snel 40c494e070SRik Snel This software is provided 'as is' with no explicit or implied warranties 41c494e070SRik Snel in respect of its properties, including, but not limited to, correctness 42c494e070SRik Snel and/or fitness for purpose. 43c494e070SRik Snel --------------------------------------------------------------------------- 44c494e070SRik Snel Issue Date: 31/01/2006 45c494e070SRik Snel 46c494e070SRik Snel An implementation of field multiplication in Galois Field GF(128) 47c494e070SRik Snel */ 48c494e070SRik Snel 49c494e070SRik Snel #ifndef _CRYPTO_GF128MUL_H 50c494e070SRik Snel #define _CRYPTO_GF128MUL_H 51c494e070SRik Snel 52c494e070SRik Snel #include <crypto/b128ops.h> 53c494e070SRik Snel #include <linux/slab.h> 54c494e070SRik Snel 55c494e070SRik Snel /* Comment by Rik: 56c494e070SRik Snel * 57631dd1a8SJustin P. Mattock * For some background on GF(2^128) see for example: 58631dd1a8SJustin P. Mattock * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf 59c494e070SRik Snel * 60c494e070SRik Snel * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can 61c494e070SRik Snel * be mapped to computer memory in a variety of ways. Let's examine 62c494e070SRik Snel * three common cases. 63c494e070SRik Snel * 64c494e070SRik Snel * Take a look at the 16 binary octets below in memory order. The msb's 65c494e070SRik Snel * are left and the lsb's are right. char b[16] is an array and b[0] is 66c494e070SRik Snel * the first octet. 67c494e070SRik Snel * 68c494e070SRik Snel * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 69c494e070SRik Snel * b[0] b[1] b[2] b[3] b[13] b[14] b[15] 70c494e070SRik Snel * 71c494e070SRik Snel * Every bit is a coefficient of some power of X. We can store the bits 72c494e070SRik Snel * in every byte in little-endian order and the bytes themselves also in 73c494e070SRik Snel * little endian order. I will call this lle (little-little-endian). 74c494e070SRik Snel * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks 75c494e070SRik Snel * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }. 76c494e070SRik Snel * This format was originally implemented in gf128mul and is used 77c494e070SRik Snel * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length). 78c494e070SRik Snel * 79c494e070SRik Snel * Another convention says: store the bits in bigendian order and the 80c494e070SRik Snel * bytes also. This is bbe (big-big-endian). Now the buffer above 81c494e070SRik Snel * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111, 82c494e070SRik Snel * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe 83c494e070SRik Snel * is partly implemented. 84c494e070SRik Snel * 85c494e070SRik Snel * Both of the above formats are easy to implement on big-endian 86c494e070SRik Snel * machines. 87c494e070SRik Snel * 88c494e070SRik Snel * EME (which is patent encumbered) uses the ble format (bits are stored 89c494e070SRik Snel * in big endian order and the bytes in little endian). The above buffer 90c494e070SRik Snel * represents X^7 in this case and the primitive polynomial is b[0] = 0x87. 91c494e070SRik Snel * 92c494e070SRik Snel * The common machine word-size is smaller than 128 bits, so to make 93c494e070SRik Snel * an efficient implementation we must split into machine word sizes. 94c494e070SRik Snel * This file uses one 32bit for the moment. Machine endianness comes into 95c494e070SRik Snel * play. The lle format in relation to machine endianness is discussed 96c494e070SRik Snel * below by the original author of gf128mul Dr Brian Gladman. 97c494e070SRik Snel * 98c494e070SRik Snel * Let's look at the bbe and ble format on a little endian machine. 99c494e070SRik Snel * 100c494e070SRik Snel * bbe on a little endian machine u32 x[4]: 101c494e070SRik Snel * 102c494e070SRik Snel * MS x[0] LS MS x[1] LS 103c494e070SRik Snel * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 104c494e070SRik Snel * 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88 105c494e070SRik Snel * 106c494e070SRik Snel * MS x[2] LS MS x[3] LS 107c494e070SRik Snel * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 108c494e070SRik Snel * 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24 109c494e070SRik Snel * 110c494e070SRik Snel * ble on a little endian machine 111c494e070SRik Snel * 112c494e070SRik Snel * MS x[0] LS MS x[1] LS 113c494e070SRik Snel * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 114c494e070SRik Snel * 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32 115c494e070SRik Snel * 116c494e070SRik Snel * MS x[2] LS MS x[3] LS 117c494e070SRik Snel * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 118c494e070SRik Snel * 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96 119c494e070SRik Snel * 120c494e070SRik Snel * Multiplications in GF(2^128) are mostly bit-shifts, so you see why 121c494e070SRik Snel * ble (and lbe also) are easier to implement on a little-endian 122c494e070SRik Snel * machine than on a big-endian machine. The converse holds for bbe 123c494e070SRik Snel * and lle. 124c494e070SRik Snel * 125c494e070SRik Snel * Note: to have good alignment, it seems to me that it is sufficient 126c494e070SRik Snel * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize 127c494e070SRik Snel * machines this will automatically aligned to wordsize and on a 64-bit 128c494e070SRik Snel * machine also. 129c494e070SRik Snel */ 130c494e070SRik Snel /* Multiply a GF128 field element by x. Field elements are held in arrays 131c494e070SRik Snel of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower 132c494e070SRik Snel indexed bits placed in the more numerically significant bit positions 133c494e070SRik Snel within bytes. 134c494e070SRik Snel 135c494e070SRik Snel On little endian machines the bit indexes translate into the bit 136c494e070SRik Snel positions within four 32-bit words in the following way 137c494e070SRik Snel 138c494e070SRik Snel MS x[0] LS MS x[1] LS 139c494e070SRik Snel ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 140c494e070SRik Snel 24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39 141c494e070SRik Snel 142c494e070SRik Snel MS x[2] LS MS x[3] LS 143c494e070SRik Snel ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 144c494e070SRik Snel 88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103 145c494e070SRik Snel 146c494e070SRik Snel On big endian machines the bit indexes translate into the bit 147c494e070SRik Snel positions within four 32-bit words in the following way 148c494e070SRik Snel 149c494e070SRik Snel MS x[0] LS MS x[1] LS 150c494e070SRik Snel ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 151c494e070SRik Snel 00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63 152c494e070SRik Snel 153c494e070SRik Snel MS x[2] LS MS x[3] LS 154c494e070SRik Snel ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 155c494e070SRik Snel 64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127 156c494e070SRik Snel */ 157c494e070SRik Snel 158c494e070SRik Snel /* A slow generic version of gf_mul, implemented for lle and bbe 159c494e070SRik Snel * It multiplies a and b and puts the result in a */ 160c494e070SRik Snel void gf128mul_lle(be128 *a, const be128 *b); 161c494e070SRik Snel 162c494e070SRik Snel void gf128mul_bbe(be128 *a, const be128 *b); 163c494e070SRik Snel 164f19f5111SRik Snel /* multiply by x in ble format, needed by XTS */ 165f19f5111SRik Snel void gf128mul_x_ble(be128 *a, const be128 *b); 166c494e070SRik Snel 167c494e070SRik Snel /* 4k table optimization */ 168c494e070SRik Snel 169c494e070SRik Snel struct gf128mul_4k { 170c494e070SRik Snel be128 t[256]; 171c494e070SRik Snel }; 172c494e070SRik Snel 173c494e070SRik Snel struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); 174c494e070SRik Snel struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); 175c494e070SRik Snel void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t); 176c494e070SRik Snel void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t); 177c494e070SRik Snel 178c494e070SRik Snel static inline void gf128mul_free_4k(struct gf128mul_4k *t) 179c494e070SRik Snel { 18075aa0a7cSAlex Cope kzfree(t); 181c494e070SRik Snel } 182c494e070SRik Snel 183c494e070SRik Snel 184d266f44bSAlex Cope /* 64k table optimization, implemented for bbe */ 185c494e070SRik Snel 186c494e070SRik Snel struct gf128mul_64k { 187c494e070SRik Snel struct gf128mul_4k *t[16]; 188c494e070SRik Snel }; 189c494e070SRik Snel 190d266f44bSAlex Cope /* First initialize with the constant factor with which you 191d266f44bSAlex Cope * want to multiply and then call gf128mul_64k_bbe with the other 192d266f44bSAlex Cope * factor in the first argument, and the table in the second. 193d266f44bSAlex Cope * Afterwards, the result is stored in *a. 194d266f44bSAlex Cope */ 195c494e070SRik Snel struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); 196c494e070SRik Snel void gf128mul_free_64k(struct gf128mul_64k *t); 197c494e070SRik Snel void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t); 198c494e070SRik Snel 199c494e070SRik Snel #endif /* _CRYPTO_GF128MUL_H */ 200