Lines Matching +full:0 +full:- +full:7 +full:a +full:- +full:e

2 # Implement fast SHA-256 with AVX1 instructions. (x86_64)
11 # This software is available to you under a choice of one of two
21 # - Redistributions of source code must retain the above
25 # - Redistributions in binary form must reproduce the above
32 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
40 # This code is described in an Intel White-Paper:
41 # "Fast SHA-256 Implementations on Intel Architecture Processors"
47 # This code schedules 1 block at a time, with 4 lanes per block
59 # Add reg to mem using reg-mem add and store
67 shld $(32-(\p1)), \p2, \p2
94 SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
95 SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00
105 e = %edx define
107 a = %eax define
122 _XMM_SAVE_SIZE = 0
124 _INP_END = 0
141 # Rotate values of symbols a...h
146 f = e
147 e = d define
150 b = a
151 a = TMP_ define
155 ## compute s0 four at a time and s1 two at a time
156 ## compute W[-16] + W[-7] 4 at a time
158 mov e, y0 # y0 = e
159 MY_ROR (25-11), y0 # y0 = e >> (25-11)
160 mov a, y1 # y1 = a
161 vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7]
162 MY_ROR (22-13), y1 # y1 = a >> (22-13)
163 xor e, y0 # y0 = e ^ (e >> (25-11))
165 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
166 xor a, y1 # y1 = a ^ (a >> (22-13)
168 vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]
169 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
170 and e, y2 # y2 = (f^g)&e
171 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
173 vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15]
174 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
175 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
176 xor g, y2 # y2 = CH = ((f^g)&e)^g
177 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
180 mov a, y0 # y0 = a
182 mov a, y2 # y2 = a
183 vpsrld $7, XTMP1, XTMP2
184 or c, y0 # y0 = a|c
186 and c, y2 # y2 = a&c
187 vpslld $(32-7), XTMP1, XTMP3
188 and b, y0 # y0 = (a|c)&b
190 vpor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7
191 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
194 mov e, y0 # y0 = e
195 mov a, y1 # y1 = a
196 MY_ROR (25-11), y0 # y0 = e >> (25-11)
197 xor e, y0 # y0 = e ^ (e >> (25-11))
199 MY_ROR (22-13), y1 # y1 = a >> (22-13)
201 xor a, y1 # y1 = a ^ (a >> (22-13)
202 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
204 vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3
205 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
206 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
207 and e, y2 # y2 = (f^g)&e
208 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
209 vpslld $(32-18), XTMP1, XTMP1
210 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
211 xor g, y2 # y2 = CH = ((f^g)&e)^g
215 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
216 vpxor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR
217 mov a, y0 # y0 = a
219 mov a, y2 # y2 = a
221 or c, y0 # y0 = a|c
223 and c, y2 # y2 = a&c
225 vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
226 and b, y0 # y0 = (a|c)&b
228 vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
229 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
232 mov e, y0 # y0 = e
233 mov a, y1 # y1 = a
234 MY_ROR (25-11), y0 # y0 = e >> (25-11)
235 xor e, y0 # y0 = e ^ (e >> (25-11))
236 MY_ROR (22-13), y1 # y1 = a >> (22-13)
238 xor a, y1 # y1 = a ^ (a >> (22-13)
239 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
240 vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
242 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xBxA}
243 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
244 and e, y2 # y2 = (f^g)&e
245 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xBxA}
246 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
247 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
248 xor g, y2 # y2 = CH = ((f^g)&e)^g
249 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
252 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
255 mov a, y0 # y0 = a
257 mov a, y2 # y2 = a
259 or c, y0 # y0 = a|c
261 and c, y2 # y2 = a&c
262 vpaddd XTMP4, XTMP0, XTMP0 # XTMP0 = {..., ..., W[1], W[0]}
263 and b, y0 # y0 = (a|c)&b
266 vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC}
267 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
270 mov e, y0 # y0 = e
271 MY_ROR (25-11), y0 # y0 = e >> (25-11)
272 mov a, y1 # y1 = a
273 MY_ROR (22-13), y1 # y1 = a >> (22-13)
274 xor e, y0 # y0 = e ^ (e >> (25-11))
276 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
277 vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC}
278 xor a, y1 # y1 = a ^ (a >> (22-13)
280 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xDxC}
281 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
282 and e, y2 # y2 = (f^g)&e
283 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
284 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xDxC}
285 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
286 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
287 xor g, y2 # y2 = CH = ((f^g)&e)^g
289 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
293 mov a, y0 # y0 = a
295 mov a, y2 # y2 = a
297 or c, y0 # y0 = a|c
299 and c, y2 # y2 = a&c
300 vpaddd XTMP0, XTMP5, X0 # X0 = {W[3], W[2], W[1], W[0]}
301 and b, y0 # y0 = (a|c)&b
303 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
311 mov e, y0 # y0 = e
312 MY_ROR (25-11), y0 # y0 = e >> (25-11)
313 mov a, y1 # y1 = a
314 xor e, y0 # y0 = e ^ (e >> (25-11))
315 MY_ROR (22-13), y1 # y1 = a >> (22-13)
317 xor a, y1 # y1 = a ^ (a >> (22-13)
318 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
320 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
321 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
322 and e, y2 # y2 = (f^g)&e
323 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
324 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
325 xor g, y2 # y2 = CH = ((f^g)&e)^g
327 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
330 mov a, y0 # y0 = a
332 mov a, y2 # y2 = a
333 or c, y0 # y0 = a|c
335 and c, y2 # y2 = a&c
336 and b, y0 # y0 = (a|c)&b
338 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
368 mov 4*0(CTX), a
372 mov 4*4(CTX), e
375 mov 4*7(CTX), h
384 COPY_XMM_AND_BSWAP X0, 0*16(INP), BYTE_FLIP_MASK
419 DO_ROUND 0
427 DO_ROUND 0
438 addm (4*0)(CTX),a
442 addm (4*4)(CTX),e
445 addm (4*7)(CTX),h
467 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
468 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
469 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
470 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
471 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
472 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
473 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
474 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
475 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
476 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
477 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
478 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
479 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
480 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
481 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
482 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
487 .octa 0x0c0d0e0f08090a0b0405060700010203
491 # shuffle xBxA -> 00BA
493 .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
497 # shuffle xDxC -> DC00
499 .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF