1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI 4 * instructions. This file contains accelerated part of ghash 5 * implementation. More information about PCLMULQDQ can be found at: 6 * 7 * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ 8 * 9 * Copyright (c) 2009 Intel Corp. 10 * Author: Huang Ying <ying.huang@intel.com> 11 * Vinodh Gopal 12 * Erdinc Ozturk 13 * Deniz Karakoyunlu 14 */ 15 16#include <linux/linkage.h> 17#include <asm/inst.h> 18#include <asm/frame.h> 19 20.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 21.align 16 22.Lbswap_mask: 23 .octa 0x000102030405060708090a0b0c0d0e0f 24 25#define DATA %xmm0 26#define SHASH %xmm1 27#define T1 %xmm2 28#define T2 %xmm3 29#define T3 %xmm4 30#define BSWAP %xmm5 31#define IN1 %xmm6 32 33.text 34 35/* 36 * __clmul_gf128mul_ble: internal ABI 37 * input: 38 * DATA: operand1 39 * SHASH: operand2, hash_key << 1 mod poly 40 * output: 41 * DATA: operand1 * operand2 mod poly 42 * changed: 43 * T1 44 * T2 45 * T3 46 */ 47SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble) 48 movaps DATA, T1 49 pshufd $0b01001110, DATA, T2 50 pshufd $0b01001110, SHASH, T3 51 pxor DATA, T2 52 pxor SHASH, T3 53 54 PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 55 PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 56 PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) 57 pxor DATA, T2 58 pxor T1, T2 # T2 = a0 * b1 + a1 * b0 59 60 movaps T2, T3 61 pslldq $8, T3 62 psrldq $8, T2 63 pxor T3, DATA 64 pxor T2, T1 # <T1:DATA> is result of 65 # carry-less multiplication 66 67 # first phase of the reduction 68 movaps DATA, T3 69 psllq $1, T3 70 pxor DATA, T3 71 psllq $5, T3 72 pxor DATA, T3 73 psllq $57, T3 74 movaps T3, T2 75 pslldq $8, T2 76 psrldq $8, T3 77 pxor T2, DATA 78 pxor T3, T1 79 80 # second phase of the reduction 81 movaps DATA, T2 82 psrlq $5, T2 83 pxor DATA, T2 84 psrlq $1, T2 85 pxor DATA, T2 86 psrlq $1, T2 87 pxor T2, T1 88 pxor T1, DATA 89 ret 90SYM_FUNC_END(__clmul_gf128mul_ble) 91 92/* void clmul_ghash_mul(char *dst, const u128 *shash) */ 93SYM_FUNC_START(clmul_ghash_mul) 94 FRAME_BEGIN 95 movups (%rdi), DATA 96 movups (%rsi), SHASH 97 movaps .Lbswap_mask, BSWAP 98 PSHUFB_XMM BSWAP DATA 99 call __clmul_gf128mul_ble 100 PSHUFB_XMM BSWAP DATA 101 movups DATA, (%rdi) 102 FRAME_END 103 ret 104SYM_FUNC_END(clmul_ghash_mul) 105 106/* 107 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, 108 * const u128 *shash); 109 */ 110SYM_FUNC_START(clmul_ghash_update) 111 FRAME_BEGIN 112 cmp $16, %rdx 113 jb .Lupdate_just_ret # check length 114 movaps .Lbswap_mask, BSWAP 115 movups (%rdi), DATA 116 movups (%rcx), SHASH 117 PSHUFB_XMM BSWAP DATA 118.align 4 119.Lupdate_loop: 120 movups (%rsi), IN1 121 PSHUFB_XMM BSWAP IN1 122 pxor IN1, DATA 123 call __clmul_gf128mul_ble 124 sub $16, %rdx 125 add $16, %rsi 126 cmp $16, %rdx 127 jge .Lupdate_loop 128 PSHUFB_XMM BSWAP DATA 129 movups DATA, (%rdi) 130.Lupdate_just_ret: 131 FRAME_END 132 ret 133SYM_FUNC_END(clmul_ghash_update) 134