Lines Matching +full:0 +full:- +full:5

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
16 # clamp r &= 0x0FFFFFFC0FFFFFFC 0x0FFFFFFC0FFFFFFF
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
35 # vs5 = [r1*5,...]
36 # vs6 = [r2*5,...]
37 # vs7 = [r2*5,...]
38 # vs8 = [r4*5,...]
42 # r0, r4*5, r3*5, r2*5, r1*5;
43 # r1, r0, r4*5, r3*5, r2*5;
44 # r2, r1, r0, r4*5, r3*5;
45 # r3, r2, r1, r0, r4*5;
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
93 mflr 0
94 std 0, 16(1)
95 stdu 1,-752(1)
117 SAVE_VRS 20, 0, 9
152 RESTORE_VRS 20, 0, 9
204 ld 0, 16(1)
205 mtlr 0
209 # p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5;
210 # p[1] = a0*r1 + a1*r0 + a2*r4*5 + a3*r3*5 + a4*r2*5;
211 # p[2] = a0*r2 + a1*r1 + a2*r0 + a3*r4*5 + a4*r3*5;
212 # p[3] = a0*r3 + a1*r2 + a2*r1 + a3*r0 + a4*r4*5;
221 vmulouw 10, 5, 3
224 vmulouw 13, 8, 0
228 vmulouw 10, 5, 26
239 vmulouw 10, 5, 27
248 vmulouw 10, 5, 28
257 vmulouw 10, 5, 29
269 vmuleuw 10, 5, 3
272 vmuleuw 13, 8, 0
280 vmuleuw 10, 5, 26
291 vmuleuw 10, 5, 27
302 vmuleuw 10, 5, 28
313 vmuleuw 10, 5, 29
334 # vs5 = [r4*5,...]
335 # vs6 = [r3*5,...]
336 # vs7 = [r2*5,...]
337 # vs8 = [r1*5,...]
339 # r0, r4*5, r3*5, r2*5, r1*5;
340 # r1, r0, r4*5, r3*5, r2*5;
341 # r2, r1, r0, r4*5, r3*5;
342 # r3, r2, r1, r0, r4*5;
359 vmr 5, 27
364 xxpermdi 58, 58, 36, 0x3 # r0
365 xxpermdi 59, 59, 37, 0x3 # r1
366 xxpermdi 60, 60, 38, 0x3 # r2
367 xxpermdi 61, 61, 39, 0x3 # r3
368 xxpermdi 62, 62, 40, 0x3 # r4
369 xxpermdi 36, 36, 36, 0x3
370 xxpermdi 37, 37, 37, 0x3
371 xxpermdi 38, 38, 38, 0x3
372 xxpermdi 39, 39, 39, 0x3
373 xxpermdi 40, 40, 40, 0x3
379 vaddudm 0, 9, 27
386 vmrgow 27, 27, 5
395 vaddudm 0, 9, 27
401 xxlor 0, 58, 58
406 xxlor 5, 32, 32
431 vaddudm 0, 9, 27
453 vand 5, 15, 25
467 vaddudm 5, 5, 10
479 ld 11, 0(10)
486 lvx 25, 0, 10 # v25 - mask
502 extrdi 16, 9, 12, 0
503 mtvsrdd 58, 0, 14
505 mtvsrdd 59, 0, 15
507 mtvsrdd 60, 0, 16
508 extrdi 18, 10, 24, 0
509 mtvsrdd 61, 0, 17
510 mtvsrdd 62, 0, 18
512 # r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5
513 li 9, 5
514 mtvsrdd 36, 0, 9
515 vmulouw 0, 27, 4 # v0 = rr0
529 .align 5
530 cmpdi 5, 64
537 li 21, 0 # counter to message
543 ld 9, 0(3)
547 mtvsrdd 41, 0, 19
550 extrdi 16, 9, 12, 0
551 mtvsrdd 36, 0, 14
553 mtvsrdd 37, 0, 15
555 mtvsrdd 38, 0, 16
556 extrdi 18, 10, 24, 0
557 mtvsrdd 39, 0, 17
558 mtvsrdd 40, 0, 18
565 lxvw4x 43, 0, 20
567 lxvw4x 44, 0, 17
585 vaddudm 21, 5, 10
592 lxvw4x 43, 0, 17
594 lxvw4x 44, 0, 17
611 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
613 vmrgow 5, 10, 21
619 addi 5, 5, -64 # len -= 64
623 divdu 31, 5, 9
625 cmpdi 31, 0
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
655 vand 5, 15, 25
669 vaddudm 5, 5, 10
676 lxvw4x 43, 0, 20
678 lxvw4x 44, 0, 17
682 lxvw4x 43, 0, 17
684 lxvw4x 44, 0, 17
716 vaddudm 5, 5, 21
721 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
723 vmrgow 5, 10, 5
729 addi 5, 5, -64 # len -= 64
735 xxlor 58, 0, 0
740 xxlor 32, 5, 5
750 xxpermdi 41, 31, 46, 0
751 xxpermdi 42, 31, 47, 0
754 vaddudm 5, 15, 10
756 xxpermdi 43, 31, 48, 0
759 xxpermdi 44, 31, 49, 0
762 xxpermdi 45, 31, 50, 0
774 vaddudm 5, 5, 10
776 vsrd 11, 5, 31
778 vand 5, 5, 25
792 vaddudm 5, 5, 10
793 vsrd 10, 5, 31
794 vand 5, 5, 25
803 vsld 5, 5, 31
804 vor 20, 4, 5
821 std 17, 0(3)
826 li 3, 0
833 li 3, 0
842 # mask 0x0FFFFFFC0FFFFFFC
843 # mask 0x0FFFFFFC0FFFFFFF
846 ld 11, 0(10)
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
860 li 25, 0
861 mtvsrdd 32+0, 9, 19 # r0, s1
878 vmsumudm 7, 6, 0, 9 # h0 * r0, h1 * s1
905 srdi 22, 29, 0x2
906 sldi 23, 22, 0x2
907 add 23, 23, 22 # (h2 & 3) * 5
910 andi. 29, 29, 0x3 # h2
923 # - no highbit if final leftover block (highbit = 0)
926 cmpdi 5, 0
929 mflr 0
930 std 0, 16(1)
931 stdu 1,-400(1)
955 li 25, 0 # offset to inp and outp
961 ld 27, 0(3)
966 divdu 31, 5, 30
975 ld 20, 0(11)
983 li 22, 0
993 std 27, 0(3)
997 li 3, 0
1019 ld 0, 16(1)
1020 mtlr 0
1025 li 3, 0
1034 ld 10, 0(3)
1039 # h + 5 + (-p)
1043 addic. 6, 6, 5
1047 cmpdi 9, 0
1054 ld 6, 0(4)
1060 std 10, 0(5)
1061 std 11, 8(5)
1066 .align 5
1068 .byte 0xff, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f
1070 .long 0x03ffffff, 0x00000000, 0x03ffffff, 0x00000000
1071 .long 0x1a, 0x00, 0x1a, 0x00
1072 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000
1073 .long 0x00010203, 0x04050607, 0x10111213, 0x14151617
1074 .long 0x08090a0b, 0x0c0d0e0f, 0x18191a1b, 0x1c1d1e1f