1 /* 2 * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. 3 * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) 4 * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: 5 * http://www.intel.com/products/processor/manuals/ 6 * Intel(R) 64 and IA-32 Architectures Software Developer's Manual 7 * Volume 2A: Instruction Set Reference, A-M 8 * 9 * Copyright (C) 2008 Intel Corporation 10 * Authors: Austin Zhang <austin_zhang@linux.intel.com> 11 * Kent Liu <kent.liu@intel.com> 12 * 13 * This program is free software; you can redistribute it and/or modify it 14 * under the terms and conditions of the GNU General Public License, 15 * version 2, as published by the Free Software Foundation. 16 * 17 * This program is distributed in the hope it will be useful, but WITHOUT 18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 20 * more details. 21 * 22 * You should have received a copy of the GNU General Public License along with 23 * this program; if not, write to the Free Software Foundation, Inc., 24 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 25 * 26 */ 27 #include <linux/init.h> 28 #include <linux/module.h> 29 #include <linux/string.h> 30 #include <linux/kernel.h> 31 #include <crypto/internal/hash.h> 32 #include <crypto/internal/simd.h> 33 34 #include <asm/cpufeatures.h> 35 #include <asm/cpu_device_id.h> 36 #include <asm/simd.h> 37 38 #define CHKSUM_BLOCK_SIZE 1 39 #define CHKSUM_DIGEST_SIZE 4 40 41 #define SCALE_F sizeof(unsigned long) 42 43 #ifdef CONFIG_X86_64 44 #define REX_PRE "0x48, " 45 #else 46 #define REX_PRE 47 #endif 48 49 #ifdef CONFIG_X86_64 50 /* 51 * use carryless multiply version of crc32c when buffer 52 * size is >= 512 to account 53 * for fpu state save/restore overhead. 54 */ 55 #define CRC32C_PCL_BREAKEVEN 512 56 57 asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, 58 unsigned int crc_init); 59 #endif /* CONFIG_X86_64 */ 60 61 static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) 62 { 63 while (length--) { 64 __asm__ __volatile__( 65 ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" 66 :"=S"(crc) 67 :"0"(crc), "c"(*data) 68 ); 69 data++; 70 } 71 72 return crc; 73 } 74 75 static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len) 76 { 77 unsigned int iquotient = len / SCALE_F; 78 unsigned int iremainder = len % SCALE_F; 79 unsigned long *ptmp = (unsigned long *)p; 80 81 while (iquotient--) { 82 __asm__ __volatile__( 83 ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" 84 :"=S"(crc) 85 :"0"(crc), "c"(*ptmp) 86 ); 87 ptmp++; 88 } 89 90 if (iremainder) 91 crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, 92 iremainder); 93 94 return crc; 95 } 96 97 /* 98 * Setting the seed allows arbitrary accumulators and flexible XOR policy 99 * If your algorithm starts with ~0, then XOR with ~0 before you set 100 * the seed. 101 */ 102 static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key, 103 unsigned int keylen) 104 { 105 u32 *mctx = crypto_shash_ctx(hash); 106 107 if (keylen != sizeof(u32)) { 108 crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); 109 return -EINVAL; 110 } 111 *mctx = le32_to_cpup((__le32 *)key); 112 return 0; 113 } 114 115 static int crc32c_intel_init(struct shash_desc *desc) 116 { 117 u32 *mctx = crypto_shash_ctx(desc->tfm); 118 u32 *crcp = shash_desc_ctx(desc); 119 120 *crcp = *mctx; 121 122 return 0; 123 } 124 125 static int crc32c_intel_update(struct shash_desc *desc, const u8 *data, 126 unsigned int len) 127 { 128 u32 *crcp = shash_desc_ctx(desc); 129 130 *crcp = crc32c_intel_le_hw(*crcp, data, len); 131 return 0; 132 } 133 134 static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len, 135 u8 *out) 136 { 137 *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); 138 return 0; 139 } 140 141 static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data, 142 unsigned int len, u8 *out) 143 { 144 return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out); 145 } 146 147 static int crc32c_intel_final(struct shash_desc *desc, u8 *out) 148 { 149 u32 *crcp = shash_desc_ctx(desc); 150 151 *(__le32 *)out = ~cpu_to_le32p(crcp); 152 return 0; 153 } 154 155 static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data, 156 unsigned int len, u8 *out) 157 { 158 return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len, 159 out); 160 } 161 162 static int crc32c_intel_cra_init(struct crypto_tfm *tfm) 163 { 164 u32 *key = crypto_tfm_ctx(tfm); 165 166 *key = ~0; 167 168 return 0; 169 } 170 171 #ifdef CONFIG_X86_64 172 static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, 173 unsigned int len) 174 { 175 u32 *crcp = shash_desc_ctx(desc); 176 177 /* 178 * use faster PCL version if datasize is large enough to 179 * overcome kernel fpu state save/restore overhead 180 */ 181 if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { 182 kernel_fpu_begin(); 183 *crcp = crc_pcl(data, len, *crcp); 184 kernel_fpu_end(); 185 } else 186 *crcp = crc32c_intel_le_hw(*crcp, data, len); 187 return 0; 188 } 189 190 static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, 191 u8 *out) 192 { 193 if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { 194 kernel_fpu_begin(); 195 *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); 196 kernel_fpu_end(); 197 } else 198 *(__le32 *)out = 199 ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); 200 return 0; 201 } 202 203 static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data, 204 unsigned int len, u8 *out) 205 { 206 return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out); 207 } 208 209 static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data, 210 unsigned int len, u8 *out) 211 { 212 return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len, 213 out); 214 } 215 #endif /* CONFIG_X86_64 */ 216 217 static struct shash_alg alg = { 218 .setkey = crc32c_intel_setkey, 219 .init = crc32c_intel_init, 220 .update = crc32c_intel_update, 221 .final = crc32c_intel_final, 222 .finup = crc32c_intel_finup, 223 .digest = crc32c_intel_digest, 224 .descsize = sizeof(u32), 225 .digestsize = CHKSUM_DIGEST_SIZE, 226 .base = { 227 .cra_name = "crc32c", 228 .cra_driver_name = "crc32c-intel", 229 .cra_priority = 200, 230 .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, 231 .cra_blocksize = CHKSUM_BLOCK_SIZE, 232 .cra_ctxsize = sizeof(u32), 233 .cra_module = THIS_MODULE, 234 .cra_init = crc32c_intel_cra_init, 235 } 236 }; 237 238 static const struct x86_cpu_id crc32c_cpu_id[] = { 239 X86_FEATURE_MATCH(X86_FEATURE_XMM4_2), 240 {} 241 }; 242 MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id); 243 244 static int __init crc32c_intel_mod_init(void) 245 { 246 if (!x86_match_cpu(crc32c_cpu_id)) 247 return -ENODEV; 248 #ifdef CONFIG_X86_64 249 if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { 250 alg.update = crc32c_pcl_intel_update; 251 alg.finup = crc32c_pcl_intel_finup; 252 alg.digest = crc32c_pcl_intel_digest; 253 } 254 #endif 255 return crypto_register_shash(&alg); 256 } 257 258 static void __exit crc32c_intel_mod_fini(void) 259 { 260 crypto_unregister_shash(&alg); 261 } 262 263 module_init(crc32c_intel_mod_init); 264 module_exit(crc32c_intel_mod_fini); 265 266 MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>"); 267 MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware."); 268 MODULE_LICENSE("GPL"); 269 270 MODULE_ALIAS_CRYPTO("crc32c"); 271 MODULE_ALIAS_CRYPTO("crc32c-intel"); 272