include/crypto/gf128mul.h

c494e070SRik Snel/* gf128mul.h - GF(2^128) multiplication functions
c494e070SRik Snel *
c494e070SRik Snel * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
c494e070SRik Snel * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org>
c494e070SRik Snel *
c494e070SRik Snel * Based on Dr Brian Gladman's (GPL'd) work published at
c494e070SRik Snel * http://fp.gladman.plus.com/cryptography_technology/index.htm
c494e070SRik Snel * See the original copyright notice below.
c494e070SRik Snel *
c494e070SRik Snel * This program is free software; you can redistribute it and/or modify it
c494e070SRik Snel * under the terms of the GNU General Public License as published by the Free
c494e070SRik Snel * Software Foundation; either version 2 of the License, or (at your option)
c494e070SRik Snel * any later version.
c494e070SRik Snel */
c494e070SRik Snel/*
c494e070SRik Snel ---------------------------------------------------------------------------
c494e070SRik Snel Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
c494e070SRik Snel
c494e070SRik Snel LICENSE TERMS
c494e070SRik Snel
c494e070SRik Snel The free distribution and use of this software in both source and binary
c494e070SRik Snel form is allowed (with or without changes) provided that:
c494e070SRik Snel
c494e070SRik Snel   1. distributions of this source code include the above copyright
c494e070SRik Snel      notice, this list of conditions and the following disclaimer;
c494e070SRik Snel
c494e070SRik Snel   2. distributions in binary form include the above copyright
c494e070SRik Snel      notice, this list of conditions and the following disclaimer
c494e070SRik Snel      in the documentation and/or other associated materials;
c494e070SRik Snel
c494e070SRik Snel   3. the copyright holder's name is not used to endorse products
c494e070SRik Snel      built using this software without specific written permission.
c494e070SRik Snel
c494e070SRik Snel ALTERNATIVELY, provided that this notice is retained in full, this product
c494e070SRik Snel may be distributed under the terms of the GNU General Public License (GPL),
c494e070SRik Snel in which case the provisions of the GPL apply INSTEAD OF those given above.
c494e070SRik Snel
c494e070SRik Snel DISCLAIMER
c494e070SRik Snel
c494e070SRik Snel This software is provided 'as is' with no explicit or implied warranties
c494e070SRik Snel in respect of its properties, including, but not limited to, correctness
c494e070SRik Snel and/or fitness for purpose.
c494e070SRik Snel ---------------------------------------------------------------------------
c494e070SRik Snel Issue Date: 31/01/2006
c494e070SRik Snel
63be5b53SEric Biggers An implementation of field multiplication in Galois Field GF(2^128)
c494e070SRik Snel*/
c494e070SRik Snel
c494e070SRik Snel#ifndef _CRYPTO_GF128MUL_H
c494e070SRik Snel#define _CRYPTO_GF128MUL_H
c494e070SRik Snel
acb9b159SOndrej Mosnáček#include <asm/byteorder.h>
c494e070SRik Snel#include <crypto/b128ops.h>
c494e070SRik Snel#include <linux/slab.h>
c494e070SRik Snel
c494e070SRik Snel/* Comment by Rik:
c494e070SRik Snel *
631dd1a8SJustin P. Mattock * For some background on GF(2^128) see for example:
631dd1a8SJustin P. Mattock * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf
c494e070SRik Snel *
c494e070SRik Snel * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can
c494e070SRik Snel * be mapped to computer memory in a variety of ways. Let's examine
c494e070SRik Snel * three common cases.
c494e070SRik Snel *
c494e070SRik Snel * Take a look at the 16 binary octets below in memory order. The msb's
c494e070SRik Snel * are left and the lsb's are right. char b[16] is an array and b[0] is
c494e070SRik Snel * the first octet.
c494e070SRik Snel *
63be5b53SEric Biggers * 10000000 00000000 00000000 00000000 .... 00000000 00000000 00000000
c494e070SRik Snel *   b[0]     b[1]     b[2]     b[3]          b[13]    b[14]    b[15]
c494e070SRik Snel *
c494e070SRik Snel * Every bit is a coefficient of some power of X. We can store the bits
c494e070SRik Snel * in every byte in little-endian order and the bytes themselves also in
c494e070SRik Snel * little endian order. I will call this lle (little-little-endian).
c494e070SRik Snel * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks
c494e070SRik Snel * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }.
c494e070SRik Snel * This format was originally implemented in gf128mul and is used
c494e070SRik Snel * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length).
c494e070SRik Snel *
c494e070SRik Snel * Another convention says: store the bits in bigendian order and the
c494e070SRik Snel * bytes also. This is bbe (big-big-endian). Now the buffer above
c494e070SRik Snel * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111,
c494e070SRik Snel * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe
c494e070SRik Snel * is partly implemented.
c494e070SRik Snel *
c494e070SRik Snel * Both of the above formats are easy to implement on big-endian
c494e070SRik Snel * machines.
c494e070SRik Snel *
63be5b53SEric Biggers * XTS and EME (the latter of which is patent encumbered) use the ble
63be5b53SEric Biggers * format (bits are stored in big endian order and the bytes in little
63be5b53SEric Biggers * endian). The above buffer represents X^7 in this case and the
63be5b53SEric Biggers * primitive polynomial is b[0] = 0x87.
c494e070SRik Snel *
c494e070SRik Snel * The common machine word-size is smaller than 128 bits, so to make
c494e070SRik Snel * an efficient implementation we must split into machine word sizes.
63be5b53SEric Biggers * This implementation uses 64-bit words for the moment. Machine
63be5b53SEric Biggers * endianness comes into play. The lle format in relation to machine
63be5b53SEric Biggers * endianness is discussed below by the original author of gf128mul Dr
63be5b53SEric Biggers * Brian Gladman.
c494e070SRik Snel *
c494e070SRik Snel * Let's look at the bbe and ble format on a little endian machine.
c494e070SRik Snel *
c494e070SRik Snel * bbe on a little endian machine u32 x[4]:
c494e070SRik Snel *
c494e070SRik Snel *  MS            x[0]           LS  MS            x[1]		  LS
c494e070SRik Snel *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel *  103..96 111.104 119.112 127.120  71...64 79...72 87...80 95...88
c494e070SRik Snel *
c494e070SRik Snel *  MS            x[2]           LS  MS            x[3]		  LS
c494e070SRik Snel *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel *  39...32 47...40 55...48 63...56  07...00 15...08 23...16 31...24
c494e070SRik Snel *
c494e070SRik Snel * ble on a little endian machine
c494e070SRik Snel *
c494e070SRik Snel *  MS            x[0]           LS  MS            x[1]		  LS
c494e070SRik Snel *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel *  31...24 23...16 15...08 07...00  63...56 55...48 47...40 39...32
c494e070SRik Snel *
c494e070SRik Snel *  MS            x[2]           LS  MS            x[3]		  LS
c494e070SRik Snel *  ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel *  95...88 87...80 79...72 71...64  127.120 199.112 111.104 103..96
c494e070SRik Snel *
c494e070SRik Snel * Multiplications in GF(2^128) are mostly bit-shifts, so you see why
c494e070SRik Snel * ble (and lbe also) are easier to implement on a little-endian
c494e070SRik Snel * machine than on a big-endian machine. The converse holds for bbe
c494e070SRik Snel * and lle.
c494e070SRik Snel *
c494e070SRik Snel * Note: to have good alignment, it seems to me that it is sufficient
c494e070SRik Snel * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize
c494e070SRik Snel * machines this will automatically aligned to wordsize and on a 64-bit
c494e070SRik Snel * machine also.
c494e070SRik Snel */
63be5b53SEric Biggers/*	Multiply a GF(2^128) field element by x. Field elements are
63be5b53SEric Biggers    held in arrays of bytes in which field bits 8n..8n + 7 are held in
63be5b53SEric Biggers    byte[n], with lower indexed bits placed in the more numerically
63be5b53SEric Biggers    significant bit positions within bytes.
c494e070SRik Snel
c494e070SRik Snel    On little endian machines the bit indexes translate into the bit
c494e070SRik Snel    positions within four 32-bit words in the following way
c494e070SRik Snel
c494e070SRik Snel    MS            x[0]           LS  MS            x[1]		  LS
c494e070SRik Snel    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel    24...31 16...23 08...15 00...07  56...63 48...55 40...47 32...39
c494e070SRik Snel
c494e070SRik Snel    MS            x[2]           LS  MS            x[3]		  LS
c494e070SRik Snel    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel    88...95 80...87 72...79 64...71  120.127 112.119 104.111 96..103
c494e070SRik Snel
c494e070SRik Snel    On big endian machines the bit indexes translate into the bit
c494e070SRik Snel    positions within four 32-bit words in the following way
c494e070SRik Snel
c494e070SRik Snel    MS            x[0]           LS  MS            x[1]		  LS
c494e070SRik Snel    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel    00...07 08...15 16...23 24...31  32...39 40...47 48...55 56...63
c494e070SRik Snel
c494e070SRik Snel    MS            x[2]           LS  MS            x[3]		  LS
c494e070SRik Snel    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
c494e070SRik Snel    64...71 72...79 80...87 88...95  96..103 104.111 112.119 120.127
c494e070SRik Snel*/
c494e070SRik Snel
c494e070SRik Snel/*	A slow generic version of gf_mul, implemented for lle and bbe
c494e070SRik Snel * 	It multiplies a and b and puts the result in a */
c494e070SRik Snelvoid gf128mul_lle(be128 *a, const be128 *b);
c494e070SRik Snel
c494e070SRik Snelvoid gf128mul_bbe(be128 *a, const be128 *b);
c494e070SRik Snel
acb9b159SOndrej Mosnáček/*
acb9b159SOndrej Mosnáček * The following functions multiply a field element by x in
acb9b159SOndrej Mosnáček * the polynomial field representation.  They use 64-bit word operations
acb9b159SOndrej Mosnáček * to gain speed but compensate for machine endianness and hence work
acb9b159SOndrej Mosnáček * correctly on both styles of machine.
acb9b159SOndrej Mosnáček *
acb9b159SOndrej Mosnáček * They are defined here for performance.
acb9b159SOndrej Mosnáček */
acb9b159SOndrej Mosnáček
acb9b159SOndrej Mosnáčekstatic inline u64 gf128mul_mask_from_bit(u64 x, int which)
acb9b159SOndrej Mosnáček{
acb9b159SOndrej Mosnáček	/* a constant-time version of 'x & ((u64)1 << which) ? (u64)-1 : 0' */
acb9b159SOndrej Mosnáček	return ((s64)(x << (63 - which)) >> 63);
acb9b159SOndrej Mosnáček}
acb9b159SOndrej Mosnáček
acb9b159SOndrej Mosnáčekstatic inline void gf128mul_x_lle(be128 *r, const be128 *x)
acb9b159SOndrej Mosnáček{
acb9b159SOndrej Mosnáček	u64 a = be64_to_cpu(x->a);
acb9b159SOndrej Mosnáček	u64 b = be64_to_cpu(x->b);
acb9b159SOndrej Mosnáček
acb9b159SOndrej Mosnáček	/* equivalent to gf128mul_table_le[(b << 7) & 0xff] << 48
acb9b159SOndrej Mosnáček	 * (see crypto/gf128mul.c): */
acb9b159SOndrej Mosnáček	u64 _tt = gf128mul_mask_from_bit(b, 0) & ((u64)0xe1 << 56);
acb9b159SOndrej Mosnáček
acb9b159SOndrej Mosnáček	r->b = cpu_to_be64((b >> 1) | (a << 63));
acb9b159SOndrej Mosnáček	r->a = cpu_to_be64((a >> 1) ^ _tt);
acb9b159SOndrej Mosnáček}
acb9b159SOndrej Mosnáček
acb9b159SOndrej Mosnáčekstatic inline void gf128mul_x_bbe(be128 *r, const be128 *x)
acb9b159SOndrej Mosnáček{
acb9b159SOndrej Mosnáček	u64 a = be64_to_cpu(x->a);
acb9b159SOndrej Mosnáček	u64 b = be64_to_cpu(x->b);
acb9b159SOndrej Mosnáček
acb9b159SOndrej Mosnáček	/* equivalent to gf128mul_table_be[a >> 63] (see crypto/gf128mul.c): */
acb9b159SOndrej Mosnáček	u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87;
acb9b159SOndrej Mosnáček
acb9b159SOndrej Mosnáček	r->a = cpu_to_be64((a << 1) | (b >> 63));
acb9b159SOndrej Mosnáček	r->b = cpu_to_be64((b << 1) ^ _tt);
acb9b159SOndrej Mosnáček}
acb9b159SOndrej Mosnáček
acb9b159SOndrej Mosnáček/* needed by XTS */
e55318c8SOndrej Mosnáčekstatic inline void gf128mul_x_ble(le128 *r, const le128 *x)
acb9b159SOndrej Mosnáček{
acb9b159SOndrej Mosnáček	u64 a = le64_to_cpu(x->a);
acb9b159SOndrej Mosnáček	u64 b = le64_to_cpu(x->b);
acb9b159SOndrej Mosnáček
acb9b159SOndrej Mosnáček	/* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */
e55318c8SOndrej Mosnáček	u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87;
acb9b159SOndrej Mosnáček
e55318c8SOndrej Mosnáček	r->a = cpu_to_le64((a << 1) | (b >> 63));
e55318c8SOndrej Mosnáček	r->b = cpu_to_le64((b << 1) ^ _tt);
acb9b159SOndrej Mosnáček}
c494e070SRik Snel
c494e070SRik Snel/* 4k table optimization */
c494e070SRik Snel
c494e070SRik Snelstruct gf128mul_4k {
c494e070SRik Snel	be128 t[256];
c494e070SRik Snel};
c494e070SRik Snel
c494e070SRik Snelstruct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g);
c494e070SRik Snelstruct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g);
3ea996ddSEric Biggersvoid gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t);
3ea996ddSEric Biggersvoid gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t);
c494e070SRik Snel
c494e070SRik Snelstatic inline void gf128mul_free_4k(struct gf128mul_4k *t)
c494e070SRik Snel{
75aa0a7cSAlex Cope	kzfree(t);
c494e070SRik Snel}
c494e070SRik Snel
c494e070SRik Snel
d266f44bSAlex Cope/* 64k table optimization, implemented for bbe */
c494e070SRik Snel
c494e070SRik Snelstruct gf128mul_64k {
c494e070SRik Snel	struct gf128mul_4k *t[16];
c494e070SRik Snel};
c494e070SRik Snel
d266f44bSAlex Cope/* First initialize with the constant factor with which you
d266f44bSAlex Cope * want to multiply and then call gf128mul_64k_bbe with the other
d266f44bSAlex Cope * factor in the first argument, and the table in the second.
d266f44bSAlex Cope * Afterwards, the result is stored in *a.
d266f44bSAlex Cope */
c494e070SRik Snelstruct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g);
c494e070SRik Snelvoid gf128mul_free_64k(struct gf128mul_64k *t);
3ea996ddSEric Biggersvoid gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t);
c494e070SRik Snel
c494e070SRik Snel#endif /* _CRYPTO_GF128MUL_H */