12eb72d66SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 278c37d19SAlexander Boyko/* 378c37d19SAlexander Boyko * Copyright 2012 Xyratex Technology Limited 478c37d19SAlexander Boyko * 578c37d19SAlexander Boyko * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 678c37d19SAlexander Boyko * calculation. 778c37d19SAlexander Boyko * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) 878c37d19SAlexander Boyko * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found 978c37d19SAlexander Boyko * at: 1078c37d19SAlexander Boyko * http://www.intel.com/products/processor/manuals/ 1178c37d19SAlexander Boyko * Intel(R) 64 and IA-32 Architectures Software Developer's Manual 1278c37d19SAlexander Boyko * Volume 2B: Instruction Set Reference, N-Z 1378c37d19SAlexander Boyko * 1478c37d19SAlexander Boyko * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com> 1578c37d19SAlexander Boyko * Alexander Boyko <Alexander_Boyko@xyratex.com> 1678c37d19SAlexander Boyko */ 1778c37d19SAlexander Boyko 1878c37d19SAlexander Boyko#include <linux/linkage.h> 1978c37d19SAlexander Boyko 2078c37d19SAlexander Boyko 215e1a6462SMikulas Patocka.section .rodata 2278c37d19SAlexander Boyko.align 16 2378c37d19SAlexander Boyko/* 2478c37d19SAlexander Boyko * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 2578c37d19SAlexander Boyko * #define CONSTANT_R1 0x154442bd4LL 2678c37d19SAlexander Boyko * 2778c37d19SAlexander Boyko * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 2878c37d19SAlexander Boyko * #define CONSTANT_R2 0x1c6e41596LL 2978c37d19SAlexander Boyko */ 3078c37d19SAlexander Boyko.Lconstant_R2R1: 3178c37d19SAlexander Boyko .octa 0x00000001c6e415960000000154442bd4 3278c37d19SAlexander Boyko/* 3378c37d19SAlexander Boyko * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 3478c37d19SAlexander Boyko * #define CONSTANT_R3 0x1751997d0LL 3578c37d19SAlexander Boyko * 3678c37d19SAlexander Boyko * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e 3778c37d19SAlexander Boyko * #define CONSTANT_R4 0x0ccaa009eLL 3878c37d19SAlexander Boyko */ 3978c37d19SAlexander Boyko.Lconstant_R4R3: 4078c37d19SAlexander Boyko .octa 0x00000000ccaa009e00000001751997d0 4178c37d19SAlexander Boyko/* 4278c37d19SAlexander Boyko * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 4378c37d19SAlexander Boyko * #define CONSTANT_R5 0x163cd6124LL 4478c37d19SAlexander Boyko */ 4578c37d19SAlexander Boyko.Lconstant_R5: 4678c37d19SAlexander Boyko .octa 0x00000000000000000000000163cd6124 4778c37d19SAlexander Boyko.Lconstant_mask32: 4878c37d19SAlexander Boyko .octa 0x000000000000000000000000FFFFFFFF 4978c37d19SAlexander Boyko/* 5078c37d19SAlexander Boyko * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL 5178c37d19SAlexander Boyko * 5278c37d19SAlexander Boyko * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL 5378c37d19SAlexander Boyko * #define CONSTANT_RU 0x1F7011641LL 5478c37d19SAlexander Boyko */ 5578c37d19SAlexander Boyko.Lconstant_RUpoly: 5678c37d19SAlexander Boyko .octa 0x00000001F701164100000001DB710641 5778c37d19SAlexander Boyko 5878c37d19SAlexander Boyko#define CONSTANT %xmm0 5978c37d19SAlexander Boyko 6078c37d19SAlexander Boyko#ifdef __x86_64__ 6178c37d19SAlexander Boyko#define BUF %rdi 6278c37d19SAlexander Boyko#define LEN %rsi 6378c37d19SAlexander Boyko#define CRC %edx 6478c37d19SAlexander Boyko#else 6578c37d19SAlexander Boyko#define BUF %eax 6678c37d19SAlexander Boyko#define LEN %edx 6778c37d19SAlexander Boyko#define CRC %ecx 6878c37d19SAlexander Boyko#endif 6978c37d19SAlexander Boyko 7078c37d19SAlexander Boyko 7178c37d19SAlexander Boyko 7278c37d19SAlexander Boyko.text 7378c37d19SAlexander Boyko/** 7478c37d19SAlexander Boyko * Calculate crc32 7578c37d19SAlexander Boyko * BUF - buffer (16 bytes aligned) 7678c37d19SAlexander Boyko * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63 7778c37d19SAlexander Boyko * CRC - initial crc32 7878c37d19SAlexander Boyko * return %eax crc32 7978c37d19SAlexander Boyko * uint crc32_pclmul_le_16(unsigned char const *buffer, 8078c37d19SAlexander Boyko * size_t len, uint crc32) 8178c37d19SAlexander Boyko */ 82eca17269SJussi Kivilinna 836dcc5627SJiri SlabySYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */ 8478c37d19SAlexander Boyko movdqa (BUF), %xmm1 8578c37d19SAlexander Boyko movdqa 0x10(BUF), %xmm2 8678c37d19SAlexander Boyko movdqa 0x20(BUF), %xmm3 8778c37d19SAlexander Boyko movdqa 0x30(BUF), %xmm4 8878c37d19SAlexander Boyko movd CRC, CONSTANT 8978c37d19SAlexander Boyko pxor CONSTANT, %xmm1 9078c37d19SAlexander Boyko sub $0x40, LEN 9178c37d19SAlexander Boyko add $0x40, BUF 9278c37d19SAlexander Boyko cmp $0x40, LEN 93*9ac589cfSArd Biesheuvel jb .Lless_64 9478c37d19SAlexander Boyko 9578c37d19SAlexander Boyko#ifdef __x86_64__ 9678c37d19SAlexander Boyko movdqa .Lconstant_R2R1(%rip), CONSTANT 9778c37d19SAlexander Boyko#else 985e1a6462SMikulas Patocka movdqa .Lconstant_R2R1, CONSTANT 9978c37d19SAlexander Boyko#endif 10078c37d19SAlexander Boyko 101*9ac589cfSArd Biesheuvel.Lloop_64:/* 64 bytes Full cache line folding */ 10278c37d19SAlexander Boyko prefetchnta 0x40(BUF) 10378c37d19SAlexander Boyko movdqa %xmm1, %xmm5 10478c37d19SAlexander Boyko movdqa %xmm2, %xmm6 10578c37d19SAlexander Boyko movdqa %xmm3, %xmm7 10678c37d19SAlexander Boyko#ifdef __x86_64__ 10778c37d19SAlexander Boyko movdqa %xmm4, %xmm8 10878c37d19SAlexander Boyko#endif 109d7866e50SUros Bizjak pclmulqdq $0x00, CONSTANT, %xmm1 110d7866e50SUros Bizjak pclmulqdq $0x00, CONSTANT, %xmm2 111d7866e50SUros Bizjak pclmulqdq $0x00, CONSTANT, %xmm3 11278c37d19SAlexander Boyko#ifdef __x86_64__ 113d7866e50SUros Bizjak pclmulqdq $0x00, CONSTANT, %xmm4 11478c37d19SAlexander Boyko#endif 115d7866e50SUros Bizjak pclmulqdq $0x11, CONSTANT, %xmm5 116d7866e50SUros Bizjak pclmulqdq $0x11, CONSTANT, %xmm6 117d7866e50SUros Bizjak pclmulqdq $0x11, CONSTANT, %xmm7 11878c37d19SAlexander Boyko#ifdef __x86_64__ 119d7866e50SUros Bizjak pclmulqdq $0x11, CONSTANT, %xmm8 12078c37d19SAlexander Boyko#endif 12178c37d19SAlexander Boyko pxor %xmm5, %xmm1 12278c37d19SAlexander Boyko pxor %xmm6, %xmm2 12378c37d19SAlexander Boyko pxor %xmm7, %xmm3 12478c37d19SAlexander Boyko#ifdef __x86_64__ 12578c37d19SAlexander Boyko pxor %xmm8, %xmm4 12678c37d19SAlexander Boyko#else 12778c37d19SAlexander Boyko /* xmm8 unsupported for x32 */ 12878c37d19SAlexander Boyko movdqa %xmm4, %xmm5 129d7866e50SUros Bizjak pclmulqdq $0x00, CONSTANT, %xmm4 130d7866e50SUros Bizjak pclmulqdq $0x11, CONSTANT, %xmm5 13178c37d19SAlexander Boyko pxor %xmm5, %xmm4 13278c37d19SAlexander Boyko#endif 13378c37d19SAlexander Boyko 13478c37d19SAlexander Boyko pxor (BUF), %xmm1 13578c37d19SAlexander Boyko pxor 0x10(BUF), %xmm2 13678c37d19SAlexander Boyko pxor 0x20(BUF), %xmm3 13778c37d19SAlexander Boyko pxor 0x30(BUF), %xmm4 13878c37d19SAlexander Boyko 13978c37d19SAlexander Boyko sub $0x40, LEN 14078c37d19SAlexander Boyko add $0x40, BUF 14178c37d19SAlexander Boyko cmp $0x40, LEN 142*9ac589cfSArd Biesheuvel jge .Lloop_64 143*9ac589cfSArd Biesheuvel.Lless_64:/* Folding cache line into 128bit */ 14478c37d19SAlexander Boyko#ifdef __x86_64__ 14578c37d19SAlexander Boyko movdqa .Lconstant_R4R3(%rip), CONSTANT 14678c37d19SAlexander Boyko#else 1475e1a6462SMikulas Patocka movdqa .Lconstant_R4R3, CONSTANT 14878c37d19SAlexander Boyko#endif 14978c37d19SAlexander Boyko prefetchnta (BUF) 15078c37d19SAlexander Boyko 15178c37d19SAlexander Boyko movdqa %xmm1, %xmm5 152d7866e50SUros Bizjak pclmulqdq $0x00, CONSTANT, %xmm1 153d7866e50SUros Bizjak pclmulqdq $0x11, CONSTANT, %xmm5 15478c37d19SAlexander Boyko pxor %xmm5, %xmm1 15578c37d19SAlexander Boyko pxor %xmm2, %xmm1 15678c37d19SAlexander Boyko 15778c37d19SAlexander Boyko movdqa %xmm1, %xmm5 158d7866e50SUros Bizjak pclmulqdq $0x00, CONSTANT, %xmm1 159d7866e50SUros Bizjak pclmulqdq $0x11, CONSTANT, %xmm5 16078c37d19SAlexander Boyko pxor %xmm5, %xmm1 16178c37d19SAlexander Boyko pxor %xmm3, %xmm1 16278c37d19SAlexander Boyko 16378c37d19SAlexander Boyko movdqa %xmm1, %xmm5 164d7866e50SUros Bizjak pclmulqdq $0x00, CONSTANT, %xmm1 165d7866e50SUros Bizjak pclmulqdq $0x11, CONSTANT, %xmm5 16678c37d19SAlexander Boyko pxor %xmm5, %xmm1 16778c37d19SAlexander Boyko pxor %xmm4, %xmm1 16878c37d19SAlexander Boyko 16978c37d19SAlexander Boyko cmp $0x10, LEN 170*9ac589cfSArd Biesheuvel jb .Lfold_64 171*9ac589cfSArd Biesheuvel.Lloop_16:/* Folding rest buffer into 128bit */ 17278c37d19SAlexander Boyko movdqa %xmm1, %xmm5 173d7866e50SUros Bizjak pclmulqdq $0x00, CONSTANT, %xmm1 174d7866e50SUros Bizjak pclmulqdq $0x11, CONSTANT, %xmm5 17578c37d19SAlexander Boyko pxor %xmm5, %xmm1 17678c37d19SAlexander Boyko pxor (BUF), %xmm1 17778c37d19SAlexander Boyko sub $0x10, LEN 17878c37d19SAlexander Boyko add $0x10, BUF 17978c37d19SAlexander Boyko cmp $0x10, LEN 180*9ac589cfSArd Biesheuvel jge .Lloop_16 18178c37d19SAlexander Boyko 182*9ac589cfSArd Biesheuvel.Lfold_64: 18378c37d19SAlexander Boyko /* perform the last 64 bit fold, also adds 32 zeroes 18478c37d19SAlexander Boyko * to the input stream */ 185d7866e50SUros Bizjak pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ 18678c37d19SAlexander Boyko psrldq $0x08, %xmm1 18778c37d19SAlexander Boyko pxor CONSTANT, %xmm1 18878c37d19SAlexander Boyko 18978c37d19SAlexander Boyko /* final 32-bit fold */ 19078c37d19SAlexander Boyko movdqa %xmm1, %xmm2 19178c37d19SAlexander Boyko#ifdef __x86_64__ 19278c37d19SAlexander Boyko movdqa .Lconstant_R5(%rip), CONSTANT 19378c37d19SAlexander Boyko movdqa .Lconstant_mask32(%rip), %xmm3 19478c37d19SAlexander Boyko#else 1955e1a6462SMikulas Patocka movdqa .Lconstant_R5, CONSTANT 1965e1a6462SMikulas Patocka movdqa .Lconstant_mask32, %xmm3 19778c37d19SAlexander Boyko#endif 19878c37d19SAlexander Boyko psrldq $0x04, %xmm2 19978c37d19SAlexander Boyko pand %xmm3, %xmm1 200d7866e50SUros Bizjak pclmulqdq $0x00, CONSTANT, %xmm1 20178c37d19SAlexander Boyko pxor %xmm2, %xmm1 20278c37d19SAlexander Boyko 20378c37d19SAlexander Boyko /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ 20478c37d19SAlexander Boyko#ifdef __x86_64__ 20578c37d19SAlexander Boyko movdqa .Lconstant_RUpoly(%rip), CONSTANT 20678c37d19SAlexander Boyko#else 2075e1a6462SMikulas Patocka movdqa .Lconstant_RUpoly, CONSTANT 20878c37d19SAlexander Boyko#endif 20978c37d19SAlexander Boyko movdqa %xmm1, %xmm2 21078c37d19SAlexander Boyko pand %xmm3, %xmm1 211d7866e50SUros Bizjak pclmulqdq $0x10, CONSTANT, %xmm1 21278c37d19SAlexander Boyko pand %xmm3, %xmm1 213d7866e50SUros Bizjak pclmulqdq $0x00, CONSTANT, %xmm1 21478c37d19SAlexander Boyko pxor %xmm2, %xmm1 215d7866e50SUros Bizjak pextrd $0x01, %xmm1, %eax 21678c37d19SAlexander Boyko 217f94909ceSPeter Zijlstra RET 2186dcc5627SJiri SlabySYM_FUNC_END(crc32_pclmul_le_16) 219