include/crypto/gf128mul.h

c494e070SRik Snel/* gf128mul.h - GF(2^128) multiplication functions
c494e070SRik Snel *
c494e070SRik Snel * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
c494e070SRik Snel * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org>
c494e070SRik Snel *
c494e070SRik Snel * Based on Dr Brian Gladman's (GPL'd) work published at
c494e070SRik Snel * http://fp.gladman.plus.com/cryptography_technology/index.htm
c494e070SRik Snel * See the original copyright notice below.
c494e070SRik Snel *
c494e070SRik Snel * This program is free software; you can redistribute it and/or modify it
c494e070SRik Snel * under the terms of the GNU General Public License as published by the Free
c494e070SRik Snel * Software Foundation; either version 2 of the License, or (at your option)
c494e070SRik Snel * any later version.
c494e070SRik Snel */
c494e070SRik Snel/*
c494e070SRik Snel ---------------------------------------------------------------------------
c494e070SRik Snel Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
c494e070SRik Snel
c494e070SRik Snel LICENSE TERMS
c494e070SRik Snel
c494e070SRik Snel The free distribution and use of this software in both source and binary
c494e070SRik Snel form is allowed (with or without changes) provided that:
c494e070SRik Snel
c494e070SRik Snel   1. distributions of this source code include the above copyright
c494e070SRik Snel      notice, this list of conditions and the following disclaimer;
c494e070SRik Snel
c494e070SRik Snel   2. distributions in binary form include the above copyright
c494e070SRik Snel      notice, this list of conditions and the following disclaimer
c494e070SRik Snel      in the documentation and/or other associated materials;
c494e070SRik Snel
c494e070SRik Snel   3. the copyright holder's name is not used to endorse products
c494e070SRik Snel      built using this software without specific written permission.
c494e070SRik Snel
c494e070SRik Snel ALTERNATIVELY, provided that this notice is retained in full, this product
c494e070SRik Snel may be distributed under the terms of the GNU General Public License (GPL),
c494e070SRik Snel in which case the provisions of the GPL apply INSTEAD OF those given above.
c494e070SRik Snel
c494e070SRik Snel DISCLAIMER
c494e070SRik Snel
c494e070SRik Snel This software is provided 'as is' with no explicit or implied warranties
c494e070SRik Snel in respect of its properties, including, but not limited to, correctness
c494e070SRik Snel and/or fitness for purpose.
c494e070SRik Snel ---------------------------------------------------------------------------
c494e070SRik Snel Issue Date: 31/01/2006
c494e070SRik Snel
c494e070SRik Snel An implementation of field multiplication in Galois Field GF(128)
c494e070SRik Snel*/
c494e070SRik Snel
c494e070SRik Snel#ifndef _CRYPTO_GF128MUL_H
c494e070SRik Snel#define _CRYPTO_GF128MUL_H
c494e070SRik Snel
c494e070SRik Snel#include <crypto/b128ops.h>
c494e070SRik Snel#include <linux/slab.h>
c494e070SRik Snel
c494e070SRik Snel/* Comment by Rik:
c494e070SRik Snel *
631dd1a8SJustin P. Mattock * For some background on GF(2^128) see for example:
631dd1a8SJustin P. Mattock * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf
c494e070SRik Snel *
c494e070SRik Snel * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can
c494e070SRik Snel * be mapped to computer memory in a variety of ways. Let's examine
c494e070SRik Snel * three common cases.
c494e070SRik Snel *
c494e070SRik Snel * Take a look at the 16 binary octets below in memory order. The msb's
c494e070SRik Snel * are left and the lsb's are right. char b[16] is an array and b[0] is
c494e070SRik Snel * the first octet.
c494e070SRik Snel *
c494e070SRik Snel * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000
c494e070SRik Snel *   b[0]     b[1]     b[2]     b[3]          b[13]    b[14]    b[15]
c494e070SRik Snel *
c494e070SRik Snel * Every bit is a coefficient of some power of X. We can store the bits
c494e070SRik Snel * in every byte in little-endian order and the bytes themselves also in
c494e070SRik Snel * little endian order. I will call this lle (little-little-endian).
c494e070SRik Snel * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks
c494e070SRik Snel * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }.
c494e070SRik Snel * This format was originally implemented in gf128mul and is used
c494e070SRik Snel * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length).
c494e070SRik Snel *
c494e070SRik Snel * Another convention says: store the bits in bigendian order and the
c494e070SRik Snel * bytes also. This is bbe (big-big-endian). Now the buffer above
c494e070SRik Snel * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111,
c494e070SRik Snel * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe
c494e070SRik Snel * is partly implemented.
c494e070SRik Snel *
c494e070SRik Snel * Both of the above formats are easy to implement on big-endian
c494e070SRik Snel * machines.
c494e070SRik Snel *
c494e070SRik Snel * EME (which is patent encumbered) uses the ble format (bits are stored
c494e070SRik Snel * in big endian order and the bytes in little endian). The above buffer
c494e070SRik Snel * represents X^7 in this case and the primitive polynomial is b[0] = 0x87.
c494e070SRik Snel *
c494e070SRik Snel * The common machine word-size is smaller than 128 bits, so to make
c494e070SRik Snel * an efficient implementation we must split into machine word sizes.
c494e070SRik Snel * This file uses one 32bit for the moment. Machine endianness comes into
c494e070SRik Snel * play. The lle format in relation to machine endianness is discussed
c494e070SRik Snel * below by the original author of gf128mul Dr Brian Gladman.
c494e070SRik Snel *
c494e070SRik Snel * Let's look at the bbe and ble format on a little endian machine.
c494e070SRik Snel *
c494e070SRik Snel * bbe on a little endian machine u32 x[4]:
c494e070SRik Snel *
c494e070SRik Snel *  MS            x[0]           LS  MS            x[1]		  LS
c494e070SRik Snel *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel *  103..96 111.104 119.112 127.120  71...64 79...72 87...80 95...88
c494e070SRik Snel *
c494e070SRik Snel *  MS            x[2]           LS  MS            x[3]		  LS
c494e070SRik Snel *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel *  39...32 47...40 55...48 63...56  07...00 15...08 23...16 31...24
c494e070SRik Snel *
c494e070SRik Snel * ble on a little endian machine
c494e070SRik Snel *
c494e070SRik Snel *  MS            x[0]           LS  MS            x[1]		  LS
c494e070SRik Snel *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel *  31...24 23...16 15...08 07...00  63...56 55...48 47...40 39...32
c494e070SRik Snel *
c494e070SRik Snel *  MS            x[2]           LS  MS            x[3]		  LS
c494e070SRik Snel *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel *  95...88 87...80 79...72 71...64  127.120 199.112 111.104 103..96
c494e070SRik Snel *
c494e070SRik Snel * Multiplications in GF(2^128) are mostly bit-shifts, so you see why
c494e070SRik Snel * ble (and lbe also) are easier to implement on a little-endian
c494e070SRik Snel * machine than on a big-endian machine. The converse holds for bbe
c494e070SRik Snel * and lle.
c494e070SRik Snel *
c494e070SRik Snel * Note: to have good alignment, it seems to me that it is sufficient
c494e070SRik Snel * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize
c494e070SRik Snel * machines this will automatically aligned to wordsize and on a 64-bit
c494e070SRik Snel * machine also.
c494e070SRik Snel */
c494e070SRik Snel/*	Multiply a GF128 field element by x. Field elements are held in arrays
c494e070SRik Snel    of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower
c494e070SRik Snel    indexed bits placed in the more numerically significant bit positions
c494e070SRik Snel    within bytes.
c494e070SRik Snel
c494e070SRik Snel    On little endian machines the bit indexes translate into the bit
c494e070SRik Snel    positions within four 32-bit words in the following way
c494e070SRik Snel
c494e070SRik Snel    MS            x[0]           LS  MS            x[1]		  LS
c494e070SRik Snel    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel    24...31 16...23 08...15 00...07  56...63 48...55 40...47 32...39
c494e070SRik Snel
c494e070SRik Snel    MS            x[2]           LS  MS            x[3]		  LS
c494e070SRik Snel    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel    88...95 80...87 72...79 64...71  120.127 112.119 104.111 96..103
c494e070SRik Snel
c494e070SRik Snel    On big endian machines the bit indexes translate into the bit
c494e070SRik Snel    positions within four 32-bit words in the following way
c494e070SRik Snel
c494e070SRik Snel    MS            x[0]           LS  MS            x[1]		  LS
c494e070SRik Snel    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel    00...07 08...15 16...23 24...31  32...39 40...47 48...55 56...63
c494e070SRik Snel
c494e070SRik Snel    MS            x[2]           LS  MS            x[3]		  LS
c494e070SRik Snel    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel    64...71 72...79 80...87 88...95  96..103 104.111 112.119 120.127
c494e070SRik Snel*/
c494e070SRik Snel
c494e070SRik Snel/*	A slow generic version of gf_mul, implemented for lle and bbe
c494e070SRik Snel * 	It multiplies a and b and puts the result in a */
c494e070SRik Snelvoid gf128mul_lle(be128 *a, const be128 *b);
c494e070SRik Snel
c494e070SRik Snelvoid gf128mul_bbe(be128 *a, const be128 *b);
c494e070SRik Snel
f19f5111SRik Snel/* multiply by x in ble format, needed by XTS */
f19f5111SRik Snelvoid gf128mul_x_ble(be128 *a, const be128 *b);
c494e070SRik Snel
c494e070SRik Snel/* 4k table optimization */
c494e070SRik Snel
c494e070SRik Snelstruct gf128mul_4k {
c494e070SRik Snel	be128 t[256];
c494e070SRik Snel};
c494e070SRik Snel
c494e070SRik Snelstruct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g);
c494e070SRik Snelstruct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g);
c494e070SRik Snelvoid gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t);
c494e070SRik Snelvoid gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t);
c494e070SRik Snel
c494e070SRik Snelstatic inline void gf128mul_free_4k(struct gf128mul_4k *t)
c494e070SRik Snel{
c494e070SRik Snel	kfree(t);
c494e070SRik Snel}
c494e070SRik Snel
c494e070SRik Snel
c494e070SRik Snel/* 64k table optimization, implemented for lle and bbe */
c494e070SRik Snel
c494e070SRik Snelstruct gf128mul_64k {
c494e070SRik Snel	struct gf128mul_4k *t[16];
c494e070SRik Snel};
c494e070SRik Snel
c494e070SRik Snel/* first initialize with the constant factor with which you
c494e070SRik Snel * want to multiply and then call gf128_64k_lle with the other
c494e070SRik Snel * factor in the first argument, the table in the second and a
c494e070SRik Snel * scratch register in the third. Afterwards *a = *r. */
c494e070SRik Snelstruct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g);
c494e070SRik Snelstruct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g);
c494e070SRik Snelvoid gf128mul_free_64k(struct gf128mul_64k *t);
c494e070SRik Snelvoid gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t);
c494e070SRik Snelvoid gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t);
c494e070SRik Snel
c494e070SRik Snel#endif /* _CRYPTO_GF128MUL_H */