1 /* 2 * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com> 3 * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks! 4 * Code was from the public domain, copyright abandoned. Code was 5 * subsequently included in the kernel, thus was re-licensed under the 6 * GNU GPL v2. 7 * 8 * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com> 9 * Same crc32 function was used in 5 other places in the kernel. 10 * I made one version, and deleted the others. 11 * There are various incantations of crc32(). Some use a seed of 0 or ~0. 12 * Some xor at the end with ~0. The generic crc32() function takes 13 * seed as an argument, and doesn't xor at the end. Then individual 14 * users can do whatever they need. 15 * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0. 16 * fs/jffs2 uses seed 0, doesn't xor with ~0. 17 * fs/partitions/efi.c uses seed ~0, xor's with ~0. 18 * 19 * This source code is licensed under the GNU General Public License, 20 * Version 2. See the file COPYING for more details. 21 */ 22 23 #include <linux/crc32.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/compiler.h> 27 #include <linux/types.h> 28 #include <linux/init.h> 29 #include <linux/atomic.h> 30 #include "crc32defs.h" 31 #if CRC_LE_BITS == 8 32 # define tole(x) __constant_cpu_to_le32(x) 33 #else 34 # define tole(x) (x) 35 #endif 36 37 #if CRC_BE_BITS == 8 38 # define tobe(x) __constant_cpu_to_be32(x) 39 #else 40 # define tobe(x) (x) 41 #endif 42 #include "crc32table.h" 43 44 MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); 45 MODULE_DESCRIPTION("Ethernet CRC32 calculations"); 46 MODULE_LICENSE("GPL"); 47 48 #if CRC_LE_BITS == 8 || CRC_BE_BITS == 8 49 50 static inline u32 51 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) 52 { 53 # ifdef __LITTLE_ENDIAN 54 # define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8) 55 # define DO_CRC4 crc = tab[3][(crc) & 255] ^ \ 56 tab[2][(crc >> 8) & 255] ^ \ 57 tab[1][(crc >> 16) & 255] ^ \ 58 tab[0][(crc >> 24) & 255] 59 # else 60 # define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8) 61 # define DO_CRC4 crc = tab[0][(crc) & 255] ^ \ 62 tab[1][(crc >> 8) & 255] ^ \ 63 tab[2][(crc >> 16) & 255] ^ \ 64 tab[3][(crc >> 24) & 255] 65 # endif 66 const u32 *b; 67 size_t rem_len; 68 69 /* Align it */ 70 if (unlikely((long)buf & 3 && len)) { 71 do { 72 DO_CRC(*buf++); 73 } while ((--len) && ((long)buf)&3); 74 } 75 rem_len = len & 3; 76 /* load data 32 bits wide, xor data 32 bits wide. */ 77 len = len >> 2; 78 b = (const u32 *)buf; 79 for (--b; len; --len) { 80 crc ^= *++b; /* use pre increment for speed */ 81 DO_CRC4; 82 } 83 len = rem_len; 84 /* And the last few bytes */ 85 if (len) { 86 u8 *p = (u8 *)(b + 1) - 1; 87 do { 88 DO_CRC(*++p); /* use pre increment for speed */ 89 } while (--len); 90 } 91 return crc; 92 #undef DO_CRC 93 #undef DO_CRC4 94 } 95 #endif 96 /** 97 * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 98 * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for 99 * other uses, or the previous crc32 value if computing incrementally. 100 * @p: pointer to buffer over which CRC is run 101 * @len: length of buffer @p 102 */ 103 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len); 104 105 #if CRC_LE_BITS == 1 106 /* 107 * In fact, the table-based code will work in this case, but it can be 108 * simplified by inlining the table in ?: form. 109 */ 110 111 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) 112 { 113 int i; 114 while (len--) { 115 crc ^= *p++; 116 for (i = 0; i < 8; i++) 117 crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); 118 } 119 return crc; 120 } 121 #else /* Table-based approach */ 122 123 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) 124 { 125 # if CRC_LE_BITS == 8 126 const u32 (*tab)[] = crc32table_le; 127 128 crc = __cpu_to_le32(crc); 129 crc = crc32_body(crc, p, len, tab); 130 return __le32_to_cpu(crc); 131 # elif CRC_LE_BITS == 4 132 while (len--) { 133 crc ^= *p++; 134 crc = (crc >> 4) ^ crc32table_le[crc & 15]; 135 crc = (crc >> 4) ^ crc32table_le[crc & 15]; 136 } 137 return crc; 138 # elif CRC_LE_BITS == 2 139 while (len--) { 140 crc ^= *p++; 141 crc = (crc >> 2) ^ crc32table_le[crc & 3]; 142 crc = (crc >> 2) ^ crc32table_le[crc & 3]; 143 crc = (crc >> 2) ^ crc32table_le[crc & 3]; 144 crc = (crc >> 2) ^ crc32table_le[crc & 3]; 145 } 146 return crc; 147 # endif 148 } 149 #endif 150 151 /** 152 * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 153 * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for 154 * other uses, or the previous crc32 value if computing incrementally. 155 * @p: pointer to buffer over which CRC is run 156 * @len: length of buffer @p 157 */ 158 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len); 159 160 #if CRC_BE_BITS == 1 161 /* 162 * In fact, the table-based code will work in this case, but it can be 163 * simplified by inlining the table in ?: form. 164 */ 165 166 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) 167 { 168 int i; 169 while (len--) { 170 crc ^= *p++ << 24; 171 for (i = 0; i < 8; i++) 172 crc = 173 (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 174 0); 175 } 176 return crc; 177 } 178 179 #else /* Table-based approach */ 180 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) 181 { 182 # if CRC_BE_BITS == 8 183 const u32 (*tab)[] = crc32table_be; 184 185 crc = __cpu_to_be32(crc); 186 crc = crc32_body(crc, p, len, tab); 187 return __be32_to_cpu(crc); 188 # elif CRC_BE_BITS == 4 189 while (len--) { 190 crc ^= *p++ << 24; 191 crc = (crc << 4) ^ crc32table_be[crc >> 28]; 192 crc = (crc << 4) ^ crc32table_be[crc >> 28]; 193 } 194 return crc; 195 # elif CRC_BE_BITS == 2 196 while (len--) { 197 crc ^= *p++ << 24; 198 crc = (crc << 2) ^ crc32table_be[crc >> 30]; 199 crc = (crc << 2) ^ crc32table_be[crc >> 30]; 200 crc = (crc << 2) ^ crc32table_be[crc >> 30]; 201 crc = (crc << 2) ^ crc32table_be[crc >> 30]; 202 } 203 return crc; 204 # endif 205 } 206 #endif 207 208 EXPORT_SYMBOL(crc32_le); 209 EXPORT_SYMBOL(crc32_be); 210 211 /* 212 * A brief CRC tutorial. 213 * 214 * A CRC is a long-division remainder. You add the CRC to the message, 215 * and the whole thing (message+CRC) is a multiple of the given 216 * CRC polynomial. To check the CRC, you can either check that the 217 * CRC matches the recomputed value, *or* you can check that the 218 * remainder computed on the message+CRC is 0. This latter approach 219 * is used by a lot of hardware implementations, and is why so many 220 * protocols put the end-of-frame flag after the CRC. 221 * 222 * It's actually the same long division you learned in school, except that 223 * - We're working in binary, so the digits are only 0 and 1, and 224 * - When dividing polynomials, there are no carries. Rather than add and 225 * subtract, we just xor. Thus, we tend to get a bit sloppy about 226 * the difference between adding and subtracting. 227 * 228 * A 32-bit CRC polynomial is actually 33 bits long. But since it's 229 * 33 bits long, bit 32 is always going to be set, so usually the CRC 230 * is written in hex with the most significant bit omitted. (If you're 231 * familiar with the IEEE 754 floating-point format, it's the same idea.) 232 * 233 * Note that a CRC is computed over a string of *bits*, so you have 234 * to decide on the endianness of the bits within each byte. To get 235 * the best error-detecting properties, this should correspond to the 236 * order they're actually sent. For example, standard RS-232 serial is 237 * little-endian; the most significant bit (sometimes used for parity) 238 * is sent last. And when appending a CRC word to a message, you should 239 * do it in the right order, matching the endianness. 240 * 241 * Just like with ordinary division, the remainder is always smaller than 242 * the divisor (the CRC polynomial) you're dividing by. Each step of the 243 * division, you take one more digit (bit) of the dividend and append it 244 * to the current remainder. Then you figure out the appropriate multiple 245 * of the divisor to subtract to being the remainder back into range. 246 * In binary, it's easy - it has to be either 0 or 1, and to make the 247 * XOR cancel, it's just a copy of bit 32 of the remainder. 248 * 249 * When computing a CRC, we don't care about the quotient, so we can 250 * throw the quotient bit away, but subtract the appropriate multiple of 251 * the polynomial from the remainder and we're back to where we started, 252 * ready to process the next bit. 253 * 254 * A big-endian CRC written this way would be coded like: 255 * for (i = 0; i < input_bits; i++) { 256 * multiple = remainder & 0x80000000 ? CRCPOLY : 0; 257 * remainder = (remainder << 1 | next_input_bit()) ^ multiple; 258 * } 259 * Notice how, to get at bit 32 of the shifted remainder, we look 260 * at bit 31 of the remainder *before* shifting it. 261 * 262 * But also notice how the next_input_bit() bits we're shifting into 263 * the remainder don't actually affect any decision-making until 264 * 32 bits later. Thus, the first 32 cycles of this are pretty boring. 265 * Also, to add the CRC to a message, we need a 32-bit-long hole for it at 266 * the end, so we have to add 32 extra cycles shifting in zeros at the 267 * end of every message, 268 * 269 * So the standard trick is to rearrage merging in the next_input_bit() 270 * until the moment it's needed. Then the first 32 cycles can be precomputed, 271 * and merging in the final 32 zero bits to make room for the CRC can be 272 * skipped entirely. 273 * This changes the code to: 274 * for (i = 0; i < input_bits; i++) { 275 * remainder ^= next_input_bit() << 31; 276 * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; 277 * remainder = (remainder << 1) ^ multiple; 278 * } 279 * With this optimization, the little-endian code is simpler: 280 * for (i = 0; i < input_bits; i++) { 281 * remainder ^= next_input_bit(); 282 * multiple = (remainder & 1) ? CRCPOLY : 0; 283 * remainder = (remainder >> 1) ^ multiple; 284 * } 285 * 286 * Note that the other details of endianness have been hidden in CRCPOLY 287 * (which must be bit-reversed) and next_input_bit(). 288 * 289 * However, as long as next_input_bit is returning the bits in a sensible 290 * order, we can actually do the merging 8 or more bits at a time rather 291 * than one bit at a time: 292 * for (i = 0; i < input_bytes; i++) { 293 * remainder ^= next_input_byte() << 24; 294 * for (j = 0; j < 8; j++) { 295 * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; 296 * remainder = (remainder << 1) ^ multiple; 297 * } 298 * } 299 * Or in little-endian: 300 * for (i = 0; i < input_bytes; i++) { 301 * remainder ^= next_input_byte(); 302 * for (j = 0; j < 8; j++) { 303 * multiple = (remainder & 1) ? CRCPOLY : 0; 304 * remainder = (remainder << 1) ^ multiple; 305 * } 306 * } 307 * If the input is a multiple of 32 bits, you can even XOR in a 32-bit 308 * word at a time and increase the inner loop count to 32. 309 * 310 * You can also mix and match the two loop styles, for example doing the 311 * bulk of a message byte-at-a-time and adding bit-at-a-time processing 312 * for any fractional bytes at the end. 313 * 314 * The only remaining optimization is to the byte-at-a-time table method. 315 * Here, rather than just shifting one bit of the remainder to decide 316 * in the correct multiple to subtract, we can shift a byte at a time. 317 * This produces a 40-bit (rather than a 33-bit) intermediate remainder, 318 * but again the multiple of the polynomial to subtract depends only on 319 * the high bits, the high 8 bits in this case. 320 * 321 * The multiple we need in that case is the low 32 bits of a 40-bit 322 * value whose high 8 bits are given, and which is a multiple of the 323 * generator polynomial. This is simply the CRC-32 of the given 324 * one-byte message. 325 * 326 * Two more details: normally, appending zero bits to a message which 327 * is already a multiple of a polynomial produces a larger multiple of that 328 * polynomial. To enable a CRC to detect this condition, it's common to 329 * invert the CRC before appending it. This makes the remainder of the 330 * message+crc come out not as zero, but some fixed non-zero value. 331 * 332 * The same problem applies to zero bits prepended to the message, and 333 * a similar solution is used. Instead of starting with a remainder of 334 * 0, an initial remainder of all ones is used. As long as you start 335 * the same way on decoding, it doesn't make a difference. 336 */ 337 338 #ifdef UNITTEST 339 340 #include <stdlib.h> 341 #include <stdio.h> 342 343 #if 0 /*Not used at present */ 344 static void 345 buf_dump(char const *prefix, unsigned char const *buf, size_t len) 346 { 347 fputs(prefix, stdout); 348 while (len--) 349 printf(" %02x", *buf++); 350 putchar('\n'); 351 352 } 353 #endif 354 355 static void bytereverse(unsigned char *buf, size_t len) 356 { 357 while (len--) { 358 unsigned char x = bitrev8(*buf); 359 *buf++ = x; 360 } 361 } 362 363 static void random_garbage(unsigned char *buf, size_t len) 364 { 365 while (len--) 366 *buf++ = (unsigned char) random(); 367 } 368 369 #if 0 /* Not used at present */ 370 static void store_le(u32 x, unsigned char *buf) 371 { 372 buf[0] = (unsigned char) x; 373 buf[1] = (unsigned char) (x >> 8); 374 buf[2] = (unsigned char) (x >> 16); 375 buf[3] = (unsigned char) (x >> 24); 376 } 377 #endif 378 379 static void store_be(u32 x, unsigned char *buf) 380 { 381 buf[0] = (unsigned char) (x >> 24); 382 buf[1] = (unsigned char) (x >> 16); 383 buf[2] = (unsigned char) (x >> 8); 384 buf[3] = (unsigned char) x; 385 } 386 387 /* 388 * This checks that CRC(buf + CRC(buf)) = 0, and that 389 * CRC commutes with bit-reversal. This has the side effect 390 * of bytewise bit-reversing the input buffer, and returns 391 * the CRC of the reversed buffer. 392 */ 393 static u32 test_step(u32 init, unsigned char *buf, size_t len) 394 { 395 u32 crc1, crc2; 396 size_t i; 397 398 crc1 = crc32_be(init, buf, len); 399 store_be(crc1, buf + len); 400 crc2 = crc32_be(init, buf, len + 4); 401 if (crc2) 402 printf("\nCRC cancellation fail: 0x%08x should be 0\n", 403 crc2); 404 405 for (i = 0; i <= len + 4; i++) { 406 crc2 = crc32_be(init, buf, i); 407 crc2 = crc32_be(crc2, buf + i, len + 4 - i); 408 if (crc2) 409 printf("\nCRC split fail: 0x%08x\n", crc2); 410 } 411 412 /* Now swap it around for the other test */ 413 414 bytereverse(buf, len + 4); 415 init = bitrev32(init); 416 crc2 = bitrev32(crc1); 417 if (crc1 != bitrev32(crc2)) 418 printf("\nBit reversal fail: 0x%08x -> 0x%08x -> 0x%08x\n", 419 crc1, crc2, bitrev32(crc2)); 420 crc1 = crc32_le(init, buf, len); 421 if (crc1 != crc2) 422 printf("\nCRC endianness fail: 0x%08x != 0x%08x\n", crc1, 423 crc2); 424 crc2 = crc32_le(init, buf, len + 4); 425 if (crc2) 426 printf("\nCRC cancellation fail: 0x%08x should be 0\n", 427 crc2); 428 429 for (i = 0; i <= len + 4; i++) { 430 crc2 = crc32_le(init, buf, i); 431 crc2 = crc32_le(crc2, buf + i, len + 4 - i); 432 if (crc2) 433 printf("\nCRC split fail: 0x%08x\n", crc2); 434 } 435 436 return crc1; 437 } 438 439 #define SIZE 64 440 #define INIT1 0 441 #define INIT2 0 442 443 int main(void) 444 { 445 unsigned char buf1[SIZE + 4]; 446 unsigned char buf2[SIZE + 4]; 447 unsigned char buf3[SIZE + 4]; 448 int i, j; 449 u32 crc1, crc2, crc3; 450 451 for (i = 0; i <= SIZE; i++) { 452 printf("\rTesting length %d...", i); 453 fflush(stdout); 454 random_garbage(buf1, i); 455 random_garbage(buf2, i); 456 for (j = 0; j < i; j++) 457 buf3[j] = buf1[j] ^ buf2[j]; 458 459 crc1 = test_step(INIT1, buf1, i); 460 crc2 = test_step(INIT2, buf2, i); 461 /* Now check that CRC(buf1 ^ buf2) = CRC(buf1) ^ CRC(buf2) */ 462 crc3 = test_step(INIT1 ^ INIT2, buf3, i); 463 if (crc3 != (crc1 ^ crc2)) 464 printf("CRC XOR fail: 0x%08x != 0x%08x ^ 0x%08x\n", 465 crc3, crc1, crc2); 466 } 467 printf("\nAll test complete. No failures expected.\n"); 468 return 0; 469 } 470 471 #endif /* UNITTEST */ 472