Lines Matching +full:1 +full:w

3  * This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental
17 … http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/
62 /* we keep window of 64 w[i]+K pre-calculated values in a circular buffer */
67 * This macro implements the SHA-1 function's body for single 64-byte block
109 * This macro implements 80 rounds of SHA-1 for one 64-byte block
123 .set i, (i+1)
127 1:
188 jne 1b
250 * RR does two rounds of SHA-1 back to back with W[] pre-calc
251 * t1 = F(b, c, d); e += w(i)
252 * e += t1; b <<= 30; d += w(i+1);
266 add WK(\round + 1), \d
269 W_PRECALC (\round + W_PRECALC_AHEAD + 1)
312 .set W, W0 define
320 .set W_minus_32, W
331 .set W_minus_04, W
332 .set W, W_minus_32 define
351 .elseif ((i & 3) == 1)
353 movdqa W_TMP1, W
364 * - calculating last 32 w[i] values in 8 XMM registers
365 * - pre-calculate K+w[i] values and store to mem, for later load by ALU add
368 * some "heavy-lifting" vectorization for rounds 16-31 due to w[i]->w[i-3]
375 movdqa W_minus_12, W
376 palignr $8, W_minus_16, W # w[i-14]
378 psrldq $4, W_TMP1 # w[i-3]
379 pxor W_minus_08, W
380 .elseif ((i & 3) == 1)
382 pxor W_TMP1, W
383 movdqa W, W_TMP2
384 movdqa W, W_TMP1
387 psrld $31, W
388 pslld $1, W_TMP1
389 por W, W_TMP1
390 movdqa W_TMP2, W
392 pslld $2, W
394 pxor W, W_TMP1
396 movdqa W_TMP1, W
405 * in SHA-1 specification: w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1
406 * instead we do equal: w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
407 * allows more efficient vectorization since w[i]=>w[i-3] dependency is broken
412 pxor W_minus_28, W # W is W_minus_32 before xor
414 .elseif ((i & 3) == 1)
415 pxor W_minus_16, W
416 pxor W_TMP1, W
417 movdqa W, W_TMP1
419 psrld $30, W
421 por W, W_TMP1
423 movdqa W_TMP1, W
489 .elseif ((i & 3) == 1)
490 vpshufb XMM_SHUFB_BSWAP, W_TMP1, W
492 vpaddd (K_BASE), W, W_TMP1
501 vpalignr $8, W_minus_16, W_minus_12, W # w[i-14]
502 vpsrldq $4, W_minus_04, W_TMP1 # w[i-3]
503 vpxor W_minus_08, W, W
505 .elseif ((i & 3) == 1)
506 vpxor W_TMP1, W, W
507 vpslldq $12, W, W_TMP2
508 vpslld $1, W, W_TMP1
510 vpsrld $31, W, W
511 vpor W, W_TMP1, W_TMP1
512 vpslld $2, W_TMP2, W
515 vpxor W, W_TMP1, W_TMP1
516 vpxor W_TMP2, W_TMP1, W
517 vpaddd K_XMM(K_BASE), W, W_TMP1
526 vpxor W_minus_28, W, W # W is W_minus_32 before xor
527 .elseif ((i & 3) == 1)
529 vpxor W_TMP1, W, W
531 vpslld $2, W, W_TMP1
532 vpsrld $30, W, W
533 vpor W, W_TMP1, W
535 vpaddd K_XMM(K_BASE), W, W_TMP1