1 /* 2 * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com> 3 * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks! 4 * Code was from the public domain, copyright abandoned. Code was 5 * subsequently included in the kernel, thus was re-licensed under the 6 * GNU GPL v2. 7 * 8 * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com> 9 * Same crc32 function was used in 5 other places in the kernel. 10 * I made one version, and deleted the others. 11 * There are various incantations of crc32(). Some use a seed of 0 or ~0. 12 * Some xor at the end with ~0. The generic crc32() function takes 13 * seed as an argument, and doesn't xor at the end. Then individual 14 * users can do whatever they need. 15 * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0. 16 * fs/jffs2 uses seed 0, doesn't xor with ~0. 17 * fs/partitions/efi.c uses seed ~0, xor's with ~0. 18 * 19 * This source code is licensed under the GNU General Public License, 20 * Version 2. See the file COPYING for more details. 21 */ 22 23 #include <linux/crc32.h> 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/compiler.h> 27 #include <linux/types.h> 28 #include <linux/slab.h> 29 #include <linux/init.h> 30 #include <asm/atomic.h> 31 #include "crc32defs.h" 32 #if CRC_LE_BITS == 8 33 #define tole(x) __constant_cpu_to_le32(x) 34 #define tobe(x) __constant_cpu_to_be32(x) 35 #else 36 #define tole(x) (x) 37 #define tobe(x) (x) 38 #endif 39 #include "crc32table.h" 40 41 MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); 42 MODULE_DESCRIPTION("Ethernet CRC32 calculations"); 43 MODULE_LICENSE("GPL"); 44 45 #if CRC_LE_BITS == 1 46 /* 47 * In fact, the table-based code will work in this case, but it can be 48 * simplified by inlining the table in ?: form. 49 */ 50 51 /** 52 * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 53 * @crc - seed value for computation. ~0 for Ethernet, sometimes 0 for 54 * other uses, or the previous crc32 value if computing incrementally. 55 * @p - pointer to buffer over which CRC is run 56 * @len - length of buffer @p 57 * 58 */ 59 u32 __attribute_pure__ crc32_le(u32 crc, unsigned char const *p, size_t len) 60 { 61 int i; 62 while (len--) { 63 crc ^= *p++; 64 for (i = 0; i < 8; i++) 65 crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); 66 } 67 return crc; 68 } 69 #else /* Table-based approach */ 70 71 /** 72 * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 73 * @crc - seed value for computation. ~0 for Ethernet, sometimes 0 for 74 * other uses, or the previous crc32 value if computing incrementally. 75 * @p - pointer to buffer over which CRC is run 76 * @len - length of buffer @p 77 * 78 */ 79 u32 __attribute_pure__ crc32_le(u32 crc, unsigned char const *p, size_t len) 80 { 81 # if CRC_LE_BITS == 8 82 const u32 *b =(u32 *)p; 83 const u32 *tab = crc32table_le; 84 85 # ifdef __LITTLE_ENDIAN 86 # define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8) 87 # else 88 # define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8) 89 # endif 90 91 crc = __cpu_to_le32(crc); 92 /* Align it */ 93 if(unlikely(((long)b)&3 && len)){ 94 do { 95 u8 *p = (u8 *)b; 96 DO_CRC(*p++); 97 b = (void *)p; 98 } while ((--len) && ((long)b)&3 ); 99 } 100 if(likely(len >= 4)){ 101 /* load data 32 bits wide, xor data 32 bits wide. */ 102 size_t save_len = len & 3; 103 len = len >> 2; 104 --b; /* use pre increment below(*++b) for speed */ 105 do { 106 crc ^= *++b; 107 DO_CRC(0); 108 DO_CRC(0); 109 DO_CRC(0); 110 DO_CRC(0); 111 } while (--len); 112 b++; /* point to next byte(s) */ 113 len = save_len; 114 } 115 /* And the last few bytes */ 116 if(len){ 117 do { 118 u8 *p = (u8 *)b; 119 DO_CRC(*p++); 120 b = (void *)p; 121 } while (--len); 122 } 123 124 return __le32_to_cpu(crc); 125 #undef ENDIAN_SHIFT 126 #undef DO_CRC 127 128 # elif CRC_LE_BITS == 4 129 while (len--) { 130 crc ^= *p++; 131 crc = (crc >> 4) ^ crc32table_le[crc & 15]; 132 crc = (crc >> 4) ^ crc32table_le[crc & 15]; 133 } 134 return crc; 135 # elif CRC_LE_BITS == 2 136 while (len--) { 137 crc ^= *p++; 138 crc = (crc >> 2) ^ crc32table_le[crc & 3]; 139 crc = (crc >> 2) ^ crc32table_le[crc & 3]; 140 crc = (crc >> 2) ^ crc32table_le[crc & 3]; 141 crc = (crc >> 2) ^ crc32table_le[crc & 3]; 142 } 143 return crc; 144 # endif 145 } 146 #endif 147 148 #if CRC_BE_BITS == 1 149 /* 150 * In fact, the table-based code will work in this case, but it can be 151 * simplified by inlining the table in ?: form. 152 */ 153 154 /** 155 * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 156 * @crc - seed value for computation. ~0 for Ethernet, sometimes 0 for 157 * other uses, or the previous crc32 value if computing incrementally. 158 * @p - pointer to buffer over which CRC is run 159 * @len - length of buffer @p 160 * 161 */ 162 u32 __attribute_pure__ crc32_be(u32 crc, unsigned char const *p, size_t len) 163 { 164 int i; 165 while (len--) { 166 crc ^= *p++ << 24; 167 for (i = 0; i < 8; i++) 168 crc = 169 (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 170 0); 171 } 172 return crc; 173 } 174 175 #else /* Table-based approach */ 176 /** 177 * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 178 * @crc - seed value for computation. ~0 for Ethernet, sometimes 0 for 179 * other uses, or the previous crc32 value if computing incrementally. 180 * @p - pointer to buffer over which CRC is run 181 * @len - length of buffer @p 182 * 183 */ 184 u32 __attribute_pure__ crc32_be(u32 crc, unsigned char const *p, size_t len) 185 { 186 # if CRC_BE_BITS == 8 187 const u32 *b =(u32 *)p; 188 const u32 *tab = crc32table_be; 189 190 # ifdef __LITTLE_ENDIAN 191 # define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8) 192 # else 193 # define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8) 194 # endif 195 196 crc = __cpu_to_be32(crc); 197 /* Align it */ 198 if(unlikely(((long)b)&3 && len)){ 199 do { 200 u8 *p = (u8 *)b; 201 DO_CRC(*p++); 202 b = (u32 *)p; 203 } while ((--len) && ((long)b)&3 ); 204 } 205 if(likely(len >= 4)){ 206 /* load data 32 bits wide, xor data 32 bits wide. */ 207 size_t save_len = len & 3; 208 len = len >> 2; 209 --b; /* use pre increment below(*++b) for speed */ 210 do { 211 crc ^= *++b; 212 DO_CRC(0); 213 DO_CRC(0); 214 DO_CRC(0); 215 DO_CRC(0); 216 } while (--len); 217 b++; /* point to next byte(s) */ 218 len = save_len; 219 } 220 /* And the last few bytes */ 221 if(len){ 222 do { 223 u8 *p = (u8 *)b; 224 DO_CRC(*p++); 225 b = (void *)p; 226 } while (--len); 227 } 228 return __be32_to_cpu(crc); 229 #undef ENDIAN_SHIFT 230 #undef DO_CRC 231 232 # elif CRC_BE_BITS == 4 233 while (len--) { 234 crc ^= *p++ << 24; 235 crc = (crc << 4) ^ crc32table_be[crc >> 28]; 236 crc = (crc << 4) ^ crc32table_be[crc >> 28]; 237 } 238 return crc; 239 # elif CRC_BE_BITS == 2 240 while (len--) { 241 crc ^= *p++ << 24; 242 crc = (crc << 2) ^ crc32table_be[crc >> 30]; 243 crc = (crc << 2) ^ crc32table_be[crc >> 30]; 244 crc = (crc << 2) ^ crc32table_be[crc >> 30]; 245 crc = (crc << 2) ^ crc32table_be[crc >> 30]; 246 } 247 return crc; 248 # endif 249 } 250 #endif 251 252 u32 bitreverse(u32 x) 253 { 254 x = (x >> 16) | (x << 16); 255 x = (x >> 8 & 0x00ff00ff) | (x << 8 & 0xff00ff00); 256 x = (x >> 4 & 0x0f0f0f0f) | (x << 4 & 0xf0f0f0f0); 257 x = (x >> 2 & 0x33333333) | (x << 2 & 0xcccccccc); 258 x = (x >> 1 & 0x55555555) | (x << 1 & 0xaaaaaaaa); 259 return x; 260 } 261 262 EXPORT_SYMBOL(crc32_le); 263 EXPORT_SYMBOL(crc32_be); 264 EXPORT_SYMBOL(bitreverse); 265 266 /* 267 * A brief CRC tutorial. 268 * 269 * A CRC is a long-division remainder. You add the CRC to the message, 270 * and the whole thing (message+CRC) is a multiple of the given 271 * CRC polynomial. To check the CRC, you can either check that the 272 * CRC matches the recomputed value, *or* you can check that the 273 * remainder computed on the message+CRC is 0. This latter approach 274 * is used by a lot of hardware implementations, and is why so many 275 * protocols put the end-of-frame flag after the CRC. 276 * 277 * It's actually the same long division you learned in school, except that 278 * - We're working in binary, so the digits are only 0 and 1, and 279 * - When dividing polynomials, there are no carries. Rather than add and 280 * subtract, we just xor. Thus, we tend to get a bit sloppy about 281 * the difference between adding and subtracting. 282 * 283 * A 32-bit CRC polynomial is actually 33 bits long. But since it's 284 * 33 bits long, bit 32 is always going to be set, so usually the CRC 285 * is written in hex with the most significant bit omitted. (If you're 286 * familiar with the IEEE 754 floating-point format, it's the same idea.) 287 * 288 * Note that a CRC is computed over a string of *bits*, so you have 289 * to decide on the endianness of the bits within each byte. To get 290 * the best error-detecting properties, this should correspond to the 291 * order they're actually sent. For example, standard RS-232 serial is 292 * little-endian; the most significant bit (sometimes used for parity) 293 * is sent last. And when appending a CRC word to a message, you should 294 * do it in the right order, matching the endianness. 295 * 296 * Just like with ordinary division, the remainder is always smaller than 297 * the divisor (the CRC polynomial) you're dividing by. Each step of the 298 * division, you take one more digit (bit) of the dividend and append it 299 * to the current remainder. Then you figure out the appropriate multiple 300 * of the divisor to subtract to being the remainder back into range. 301 * In binary, it's easy - it has to be either 0 or 1, and to make the 302 * XOR cancel, it's just a copy of bit 32 of the remainder. 303 * 304 * When computing a CRC, we don't care about the quotient, so we can 305 * throw the quotient bit away, but subtract the appropriate multiple of 306 * the polynomial from the remainder and we're back to where we started, 307 * ready to process the next bit. 308 * 309 * A big-endian CRC written this way would be coded like: 310 * for (i = 0; i < input_bits; i++) { 311 * multiple = remainder & 0x80000000 ? CRCPOLY : 0; 312 * remainder = (remainder << 1 | next_input_bit()) ^ multiple; 313 * } 314 * Notice how, to get at bit 32 of the shifted remainder, we look 315 * at bit 31 of the remainder *before* shifting it. 316 * 317 * But also notice how the next_input_bit() bits we're shifting into 318 * the remainder don't actually affect any decision-making until 319 * 32 bits later. Thus, the first 32 cycles of this are pretty boring. 320 * Also, to add the CRC to a message, we need a 32-bit-long hole for it at 321 * the end, so we have to add 32 extra cycles shifting in zeros at the 322 * end of every message, 323 * 324 * So the standard trick is to rearrage merging in the next_input_bit() 325 * until the moment it's needed. Then the first 32 cycles can be precomputed, 326 * and merging in the final 32 zero bits to make room for the CRC can be 327 * skipped entirely. 328 * This changes the code to: 329 * for (i = 0; i < input_bits; i++) { 330 * remainder ^= next_input_bit() << 31; 331 * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; 332 * remainder = (remainder << 1) ^ multiple; 333 * } 334 * With this optimization, the little-endian code is simpler: 335 * for (i = 0; i < input_bits; i++) { 336 * remainder ^= next_input_bit(); 337 * multiple = (remainder & 1) ? CRCPOLY : 0; 338 * remainder = (remainder >> 1) ^ multiple; 339 * } 340 * 341 * Note that the other details of endianness have been hidden in CRCPOLY 342 * (which must be bit-reversed) and next_input_bit(). 343 * 344 * However, as long as next_input_bit is returning the bits in a sensible 345 * order, we can actually do the merging 8 or more bits at a time rather 346 * than one bit at a time: 347 * for (i = 0; i < input_bytes; i++) { 348 * remainder ^= next_input_byte() << 24; 349 * for (j = 0; j < 8; j++) { 350 * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; 351 * remainder = (remainder << 1) ^ multiple; 352 * } 353 * } 354 * Or in little-endian: 355 * for (i = 0; i < input_bytes; i++) { 356 * remainder ^= next_input_byte(); 357 * for (j = 0; j < 8; j++) { 358 * multiple = (remainder & 1) ? CRCPOLY : 0; 359 * remainder = (remainder << 1) ^ multiple; 360 * } 361 * } 362 * If the input is a multiple of 32 bits, you can even XOR in a 32-bit 363 * word at a time and increase the inner loop count to 32. 364 * 365 * You can also mix and match the two loop styles, for example doing the 366 * bulk of a message byte-at-a-time and adding bit-at-a-time processing 367 * for any fractional bytes at the end. 368 * 369 * The only remaining optimization is to the byte-at-a-time table method. 370 * Here, rather than just shifting one bit of the remainder to decide 371 * in the correct multiple to subtract, we can shift a byte at a time. 372 * This produces a 40-bit (rather than a 33-bit) intermediate remainder, 373 * but again the multiple of the polynomial to subtract depends only on 374 * the high bits, the high 8 bits in this case. 375 * 376 * The multile we need in that case is the low 32 bits of a 40-bit 377 * value whose high 8 bits are given, and which is a multiple of the 378 * generator polynomial. This is simply the CRC-32 of the given 379 * one-byte message. 380 * 381 * Two more details: normally, appending zero bits to a message which 382 * is already a multiple of a polynomial produces a larger multiple of that 383 * polynomial. To enable a CRC to detect this condition, it's common to 384 * invert the CRC before appending it. This makes the remainder of the 385 * message+crc come out not as zero, but some fixed non-zero value. 386 * 387 * The same problem applies to zero bits prepended to the message, and 388 * a similar solution is used. Instead of starting with a remainder of 389 * 0, an initial remainder of all ones is used. As long as you start 390 * the same way on decoding, it doesn't make a difference. 391 */ 392 393 #ifdef UNITTEST 394 395 #include <stdlib.h> 396 #include <stdio.h> 397 398 #if 0 /*Not used at present */ 399 static void 400 buf_dump(char const *prefix, unsigned char const *buf, size_t len) 401 { 402 fputs(prefix, stdout); 403 while (len--) 404 printf(" %02x", *buf++); 405 putchar('\n'); 406 407 } 408 #endif 409 410 static void bytereverse(unsigned char *buf, size_t len) 411 { 412 while (len--) { 413 unsigned char x = *buf; 414 x = (x >> 4) | (x << 4); 415 x = (x >> 2 & 0x33) | (x << 2 & 0xcc); 416 x = (x >> 1 & 0x55) | (x << 1 & 0xaa); 417 *buf++ = x; 418 } 419 } 420 421 static void random_garbage(unsigned char *buf, size_t len) 422 { 423 while (len--) 424 *buf++ = (unsigned char) random(); 425 } 426 427 #if 0 /* Not used at present */ 428 static void store_le(u32 x, unsigned char *buf) 429 { 430 buf[0] = (unsigned char) x; 431 buf[1] = (unsigned char) (x >> 8); 432 buf[2] = (unsigned char) (x >> 16); 433 buf[3] = (unsigned char) (x >> 24); 434 } 435 #endif 436 437 static void store_be(u32 x, unsigned char *buf) 438 { 439 buf[0] = (unsigned char) (x >> 24); 440 buf[1] = (unsigned char) (x >> 16); 441 buf[2] = (unsigned char) (x >> 8); 442 buf[3] = (unsigned char) x; 443 } 444 445 /* 446 * This checks that CRC(buf + CRC(buf)) = 0, and that 447 * CRC commutes with bit-reversal. This has the side effect 448 * of bytewise bit-reversing the input buffer, and returns 449 * the CRC of the reversed buffer. 450 */ 451 static u32 test_step(u32 init, unsigned char *buf, size_t len) 452 { 453 u32 crc1, crc2; 454 size_t i; 455 456 crc1 = crc32_be(init, buf, len); 457 store_be(crc1, buf + len); 458 crc2 = crc32_be(init, buf, len + 4); 459 if (crc2) 460 printf("\nCRC cancellation fail: 0x%08x should be 0\n", 461 crc2); 462 463 for (i = 0; i <= len + 4; i++) { 464 crc2 = crc32_be(init, buf, i); 465 crc2 = crc32_be(crc2, buf + i, len + 4 - i); 466 if (crc2) 467 printf("\nCRC split fail: 0x%08x\n", crc2); 468 } 469 470 /* Now swap it around for the other test */ 471 472 bytereverse(buf, len + 4); 473 init = bitreverse(init); 474 crc2 = bitreverse(crc1); 475 if (crc1 != bitreverse(crc2)) 476 printf("\nBit reversal fail: 0x%08x -> %0x08x -> 0x%08x\n", 477 crc1, crc2, bitreverse(crc2)); 478 crc1 = crc32_le(init, buf, len); 479 if (crc1 != crc2) 480 printf("\nCRC endianness fail: 0x%08x != 0x%08x\n", crc1, 481 crc2); 482 crc2 = crc32_le(init, buf, len + 4); 483 if (crc2) 484 printf("\nCRC cancellation fail: 0x%08x should be 0\n", 485 crc2); 486 487 for (i = 0; i <= len + 4; i++) { 488 crc2 = crc32_le(init, buf, i); 489 crc2 = crc32_le(crc2, buf + i, len + 4 - i); 490 if (crc2) 491 printf("\nCRC split fail: 0x%08x\n", crc2); 492 } 493 494 return crc1; 495 } 496 497 #define SIZE 64 498 #define INIT1 0 499 #define INIT2 0 500 501 int main(void) 502 { 503 unsigned char buf1[SIZE + 4]; 504 unsigned char buf2[SIZE + 4]; 505 unsigned char buf3[SIZE + 4]; 506 int i, j; 507 u32 crc1, crc2, crc3; 508 509 for (i = 0; i <= SIZE; i++) { 510 printf("\rTesting length %d...", i); 511 fflush(stdout); 512 random_garbage(buf1, i); 513 random_garbage(buf2, i); 514 for (j = 0; j < i; j++) 515 buf3[j] = buf1[j] ^ buf2[j]; 516 517 crc1 = test_step(INIT1, buf1, i); 518 crc2 = test_step(INIT2, buf2, i); 519 /* Now check that CRC(buf1 ^ buf2) = CRC(buf1) ^ CRC(buf2) */ 520 crc3 = test_step(INIT1 ^ INIT2, buf3, i); 521 if (crc3 != (crc1 ^ crc2)) 522 printf("CRC XOR fail: 0x%08x != 0x%08x ^ 0x%08x\n", 523 crc3, crc1, crc2); 524 } 525 printf("\nAll test complete. No failures expected.\n"); 526 return 0; 527 } 528 529 #endif /* UNITTEST */ 530