1/* 2 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI 3 * instructions. This file contains accelerated part of ghash 4 * implementation. More information about PCLMULQDQ can be found at: 5 * 6 * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ 7 * 8 * Copyright (c) 2009 Intel Corp. 9 * Author: Huang Ying <ying.huang@intel.com> 10 * Vinodh Gopal 11 * Erdinc Ozturk 12 * Deniz Karakoyunlu 13 * 14 * This program is free software; you can redistribute it and/or modify it 15 * under the terms of the GNU General Public License version 2 as published 16 * by the Free Software Foundation. 17 */ 18 19#include <linux/linkage.h> 20#include <asm/inst.h> 21 22.data 23 24.align 16 25.Lbswap_mask: 26 .octa 0x000102030405060708090a0b0c0d0e0f 27.Lpoly: 28 .octa 0xc2000000000000000000000000000001 29.Ltwo_one: 30 .octa 0x00000001000000000000000000000001 31 32#define DATA %xmm0 33#define SHASH %xmm1 34#define T1 %xmm2 35#define T2 %xmm3 36#define T3 %xmm4 37#define BSWAP %xmm5 38#define IN1 %xmm6 39 40.text 41 42/* 43 * __clmul_gf128mul_ble: internal ABI 44 * input: 45 * DATA: operand1 46 * SHASH: operand2, hash_key << 1 mod poly 47 * output: 48 * DATA: operand1 * operand2 mod poly 49 * changed: 50 * T1 51 * T2 52 * T3 53 */ 54__clmul_gf128mul_ble: 55 movaps DATA, T1 56 pshufd $0b01001110, DATA, T2 57 pshufd $0b01001110, SHASH, T3 58 pxor DATA, T2 59 pxor SHASH, T3 60 61 PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 62 PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 63 PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) 64 pxor DATA, T2 65 pxor T1, T2 # T2 = a0 * b1 + a1 * b0 66 67 movaps T2, T3 68 pslldq $8, T3 69 psrldq $8, T2 70 pxor T3, DATA 71 pxor T2, T1 # <T1:DATA> is result of 72 # carry-less multiplication 73 74 # first phase of the reduction 75 movaps DATA, T3 76 psllq $1, T3 77 pxor DATA, T3 78 psllq $5, T3 79 pxor DATA, T3 80 psllq $57, T3 81 movaps T3, T2 82 pslldq $8, T2 83 psrldq $8, T3 84 pxor T2, DATA 85 pxor T3, T1 86 87 # second phase of the reduction 88 movaps DATA, T2 89 psrlq $5, T2 90 pxor DATA, T2 91 psrlq $1, T2 92 pxor DATA, T2 93 psrlq $1, T2 94 pxor T2, T1 95 pxor T1, DATA 96 ret 97ENDPROC(__clmul_gf128mul_ble) 98 99/* void clmul_ghash_mul(char *dst, const be128 *shash) */ 100ENTRY(clmul_ghash_mul) 101 movups (%rdi), DATA 102 movups (%rsi), SHASH 103 movaps .Lbswap_mask, BSWAP 104 PSHUFB_XMM BSWAP DATA 105 call __clmul_gf128mul_ble 106 PSHUFB_XMM BSWAP DATA 107 movups DATA, (%rdi) 108 ret 109ENDPROC(clmul_ghash_mul) 110 111/* 112 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, 113 * const be128 *shash); 114 */ 115ENTRY(clmul_ghash_update) 116 cmp $16, %rdx 117 jb .Lupdate_just_ret # check length 118 movaps .Lbswap_mask, BSWAP 119 movups (%rdi), DATA 120 movups (%rcx), SHASH 121 PSHUFB_XMM BSWAP DATA 122.align 4 123.Lupdate_loop: 124 movups (%rsi), IN1 125 PSHUFB_XMM BSWAP IN1 126 pxor IN1, DATA 127 call __clmul_gf128mul_ble 128 sub $16, %rdx 129 add $16, %rsi 130 cmp $16, %rdx 131 jge .Lupdate_loop 132 PSHUFB_XMM BSWAP DATA 133 movups DATA, (%rdi) 134.Lupdate_just_ret: 135 ret 136ENDPROC(clmul_ghash_update) 137 138/* 139 * void clmul_ghash_setkey(be128 *shash, const u8 *key); 140 * 141 * Calculate hash_key << 1 mod poly 142 */ 143ENTRY(clmul_ghash_setkey) 144 movaps .Lbswap_mask, BSWAP 145 movups (%rsi), %xmm0 146 PSHUFB_XMM BSWAP %xmm0 147 movaps %xmm0, %xmm1 148 psllq $1, %xmm0 149 psrlq $63, %xmm1 150 movaps %xmm1, %xmm2 151 pslldq $8, %xmm1 152 psrldq $8, %xmm2 153 por %xmm1, %xmm0 154 # reduction 155 pshufd $0b00100100, %xmm2, %xmm1 156 pcmpeqd .Ltwo_one, %xmm1 157 pand .Lpoly, %xmm1 158 pxor %xmm1, %xmm0 159 movups %xmm0, (%rdi) 160 ret 161ENDPROC(clmul_ghash_setkey) 162