x86/crypto/aesni-intel_asm.S

195 .macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7
198 	movdqa	SHUF_MASK(%rip), \TMP2
199 	pshufb	\TMP2, \TMP3
203 	movdqa	\TMP3, \TMP2
205 	psrlq	$63, \TMP2
206 	movdqa	\TMP2, \TMP1
207 	pslldq	$8, \TMP2
209 	por	\TMP2, \TMP3
213 	pshufd	$0x24, \TMP1, \TMP2
214 	pcmpeqd TWOONE(%rip), \TMP2
215 	pand	POLY(%rip), \TMP2
216 	pxor	\TMP2, \TMP3
224 	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
232 	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
239 	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
510 .macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5
512 	pshufd	  $78, \GH, \TMP2
514 	pxor	  \GH, \TMP2            # TMP2 = a1+a0
518 	pclmulqdq $0x00, \TMP3, \TMP2   # TMP2 = (a0+a1)*(b1+b0)
519 	pxor	  \GH, \TMP2
520 	pxor	  \TMP1, \TMP2          # TMP2 = (a0*b0)+(a1*b0)
521 	movdqa	  \TMP2, \TMP3
523 	psrldq	  $8, \TMP2             # right shift TMP2 2 DWs
525 	pxor	  \TMP2, \TMP1          # TMP2:GH holds the result of GH*HK
529 	movdqa    \GH, \TMP2
531 	movdqa    \GH, \TMP4            # copy GH into TMP2,TMP3 and TMP4
534 	pslld     $31, \TMP2            # packed right shift <<31
537 	pxor      \TMP3, \TMP2          # xor the shifted versions
538 	pxor      \TMP4, \TMP2
539 	movdqa    \TMP2, \TMP5
541 	pslldq    $12, \TMP2            # left shift TMP2 3 DWs
542 	pxor      \TMP2, \GH
546 	movdqa    \GH,\TMP2             # copy GH into TMP2,TMP3 and TMP4
551 	psrld     $1,\TMP2              # packed left shift >>1
554 	pxor      \TMP3,\TMP2		# xor the shifted versions
555 	pxor      \TMP4,\TMP2
556 	pxor      \TMP5, \TMP2
557 	pxor      \TMP2, \GH
594 .macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \
608 	GHASH_MUL  \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
624 	GHASH_MUL  \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
787 .macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
800 	MOVADQ		0(%arg1),\TMP2
809 	pxor		\TMP2, %xmm\index
849 	GHASH_MUL  %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
851 	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
853 	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
856 	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
858 	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
861 	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
914 	MOVADQ	   (%r10),\TMP2
916 	aesenc	   \TMP2, %xmm\index
923 	MOVADQ	   (%r10), \TMP2
924 	aesenclast \TMP2, \XMM1
925 	aesenclast \TMP2, \XMM2
926 	aesenclast \TMP2, \XMM3
927 	aesenclast \TMP2, \XMM4
976 .macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \
1023 	pshufd	  $78, \XMM6, \TMP2
1024 	pxor	  \XMM6, \TMP2
1039 	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
1048 	pxor	  \TMP2, \TMP6
1050 	pshufd	  $78, \XMM7, \TMP2
1051 	pxor	  \XMM7, \TMP2
1069 	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
1078 	pxor	  \TMP2, \TMP6
1084 	pshufd	  $78, \XMM8, \TMP2
1085 	pxor	  \XMM8, \TMP2
1116 	pclmulqdq $0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
1136 	pxor	  \TMP6, \TMP2
1137 	pxor	  \TMP1, \TMP2
1138 	pxor	  \XMM5, \TMP2
1139 	movdqa	  \TMP2, \TMP3
1141 	psrldq	  $8, \TMP2                    # right shift TMP2 2 DWs
1143 	pxor	  \TMP2, \TMP1	  # accumulate the results in TMP1:XMM5
1147 	movdqa    \XMM5, \TMP2
1150 # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
1151 	pslld     $31, \TMP2                   # packed right shift << 31
1154 	pxor      \TMP3, \TMP2	               # xor the shifted versions
1155 	pxor      \TMP4, \TMP2
1156 	movdqa    \TMP2, \TMP5
1158 	pslldq    $12, \TMP2                   # left shift T2 3 DWs
1159 	pxor      \TMP2, \XMM5
1163 	movdqa    \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
1166 	psrld     $1, \TMP2                    # packed left shift >>1
1169 	pxor      \TMP3,\TMP2		       # xor the shifted versions
1170 	pxor      \TMP4,\TMP2
1171 	pxor      \TMP5, \TMP2
1172 	pxor      \TMP2, \XMM5
1184 .macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \
1231 	pshufd	  $78, \XMM6, \TMP2
1232 	pxor	  \XMM6, \TMP2
1247 	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
1256 	pxor	  \TMP2, \TMP6
1258 	pshufd	  $78, \XMM7, \TMP2
1259 	pxor	  \XMM7, \TMP2
1277 	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
1286 	pxor	  \TMP2, \TMP6
1292 	pshufd	  $78, \XMM8, \TMP2
1293 	pxor	  \XMM8, \TMP2
1324 	pclmulqdq $0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
1348 	pxor	  \TMP6, \TMP2
1349 	pxor	  \TMP1, \TMP2
1350 	pxor	  \XMM5, \TMP2
1351 	movdqa	  \TMP2, \TMP3
1353 	psrldq	  $8, \TMP2                    # right shift TMP2 2 DWs
1355 	pxor	  \TMP2, \TMP1	  # accumulate the results in TMP1:XMM5
1359 	movdqa    \XMM5, \TMP2
1362 # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
1363 	pslld     $31, \TMP2                   # packed right shift << 31
1366 	pxor      \TMP3, \TMP2	               # xor the shifted versions
1367 	pxor      \TMP4, \TMP2
1368 	movdqa    \TMP2, \TMP5
1370 	pslldq    $12, \TMP2                   # left shift T2 3 DWs
1371 	pxor      \TMP2, \XMM5
1375 	movdqa    \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
1378 	psrld     $1, \TMP2                    # packed left shift >>1
1381 	pxor      \TMP3,\TMP2		       # xor the shifted versions
1382 	pxor      \TMP4,\TMP2
1383 	pxor      \TMP5, \TMP2
1384 	pxor      \TMP2, \XMM5
1391 .macro	GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \
1397 	pshufd	  $78, \XMM1, \TMP2
1398 	pxor	  \XMM1, \TMP2
1403 	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
1405 	movdqa	  \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
1410 	pshufd	  $78, \XMM2, \TMP2
1411 	pxor	  \XMM2, \TMP2
1416 	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
1419 	pxor	  \TMP2, \XMM1
1425 	pshufd	  $78, \XMM3, \TMP2
1426 	pxor	  \XMM3, \TMP2
1431 	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
1434 	pxor	  \TMP2, \XMM1   # results accumulated in TMP6, XMMDst, XMM1
1438 	pshufd	  $78, \XMM4, \TMP2
1439 	pxor	  \XMM4, \TMP2
1444 	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
1447 	pxor	  \XMM1, \TMP2
1448 	pxor	  \TMP6, \TMP2
1449 	pxor	  \XMMDst, \TMP2
1451 	movdqa	  \TMP2, \TMP4
1453 	psrldq	  $8, \TMP2                 # right shift TMP2 2 DWs
1455 	pxor	  \TMP2, \TMP6
1458 	movdqa    \XMMDst, \TMP2
1461 # move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently
1462 	pslld     $31, \TMP2                # packed right shifting << 31
1465 	pxor      \TMP3, \TMP2              # xor the shifted versions
1466 	pxor      \TMP4, \TMP2
1467 	movdqa    \TMP2, \TMP7
1469 	pslldq    $12, \TMP2                # left shift TMP2 3 DWs
1470 	pxor      \TMP2, \XMMDst
1473 	movdqa    \XMMDst, \TMP2
1477 	psrld     $1, \TMP2                 # packed left shift >> 1
1480 	pxor      \TMP3, \TMP2              # xor the shifted versions
1481 	pxor      \TMP4, \TMP2
1482 	pxor      \TMP7, \TMP2
1483 	pxor      \TMP2, \XMMDst