Lines Matching +full:4 +full:- +full:31
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
28 # setup r^4, r^3, r^2, r vectors
29 # vs [r^1, r^3, r^2, r^4]
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
95 stdu 1,-752(1)
114 SAVE_GPR 31, 248, 1
128 SAVE_VRS 31, 176, 9
147 SAVE_VSX 31, 464, 9
163 RESTORE_VRS 31, 176, 9
182 RESTORE_VSX 31, 464, 9
201 RESTORE_GPR 31, 248, 1
213 # p[4] = a0*r4 + a1*r3 + a2*r2 + a3*r1 + a4*r0 ;
215 # [r^2, r^3, r^1, r^4]
220 vmulouw 14, 4, 26
225 vmulouw 15, 4, 27
238 vmulouw 16, 4, 28
247 vmulouw 17, 4, 29
256 vmulouw 18, 4, 30
268 vmuleuw 9, 4, 26
279 vmuleuw 9, 4, 27
290 vmuleuw 9, 4, 28
301 vmuleuw 9, 4, 29
312 vmuleuw 9, 4, 30
327 # setup r^4, r^3, r^2, r vectors
328 # [r, r^3, r^2, r^4]
354 xxlxor 31, 31, 31
356 # [r, r^3, r^2, r^4]
358 vmr 4, 26
384 bl do_mul # r^4 r^3
385 vmrgow 26, 26, 4
400 # r^2 r^4
405 xxlor 4, 62, 62
443 vsrd 10, 14, 31
444 vsrd 11, 17, 31
446 vand 4, 14, 25
448 vsrd 12, 18, 31
451 vsrd 11, 15, 31
454 vaddudm 4, 4, 12
458 vsrd 13, 6, 31
460 vaddudm 4, 4, 10
461 vsrd 10, 4, 31
464 vsrd 11, 7, 31
466 vand 4, 4, 25
486 lvx 25, 0, 10 # v25 - mask
487 lvx 31, 14, 10 # v31 = 1a
515 vmulouw 0, 27, 4 # v0 = rr0
516 vmulouw 1, 28, 4 # v1 = rr1
517 vmulouw 2, 29, 4 # v2 = rr2
518 vmulouw 3, 30, 4 # v3 = rr3
562 add 20, 4, 21
571 vsrd 10, 14, 31 # >> 26
572 vsrd 11, 10, 31 # 12 bits left
581 vsrd 13, 12, 31 # >> 26, a4
584 vaddudm 20, 4, 9
598 vsrd 10, 14, 31 # >> 26
599 vsrd 11, 10, 31 # 12 bits left
608 vsrd 13, 12, 31 # >> 26, a4
611 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
612 vmrgow 4, 9, 20
619 addi 5, 5, -64 # len -= 64
623 divdu 31, 5, 9
625 cmpdi 31, 0
628 mtctr 31
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
645 vsrd 10, 14, 31
646 vsrd 11, 17, 31
648 vand 4, 14, 25
650 vsrd 12, 18, 31
653 vsrd 11, 15, 31
656 vaddudm 4, 4, 12
660 vsrd 13, 6, 31
662 vaddudm 4, 4, 10
663 vsrd 10, 4, 31
666 vsrd 11, 7, 31
668 vand 4, 4, 25
673 add 20, 4, 21
690 vsrd 21, 14, 31 # >> 26
691 vsrd 22, 21, 31 # 12 bits left
692 vsrd 10, 17, 31 # >> 26
693 vsrd 11, 10, 31 # 12 bits left
709 vsrd 24, 23, 31 # >> 26, a4
712 vsrd 13, 12, 31 # >> 26, a4
715 vaddudm 4, 4, 20
721 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
722 vmrgow 4, 9, 4
729 addi 5, 5, -64 # len -= 64
739 xxlor 62, 4, 4
750 xxpermdi 41, 31, 46, 0
751 xxpermdi 42, 31, 47, 0
752 vaddudm 4, 14, 9
753 xxpermdi 36, 31, 36, 3
755 xxpermdi 37, 31, 37, 3
756 xxpermdi 43, 31, 48, 0
758 xxpermdi 38, 31, 38, 3
759 xxpermdi 44, 31, 49, 0
761 xxpermdi 39, 31, 39, 3
762 xxpermdi 45, 31, 50, 0
764 xxpermdi 40, 31, 40, 3
768 vsrd 10, 4, 31
769 vsrd 11, 7, 31
771 vand 4, 4, 25
773 vsrd 12, 8, 31
776 vsrd 11, 5, 31
779 vaddudm 4, 4, 12
783 vsrd 13, 6, 31
785 vaddudm 4, 4, 10
786 vsrd 10, 4, 31
789 vsrd 11, 7, 31
791 vand 4, 4, 25
793 vsrd 10, 5, 31
803 vsld 5, 5, 31
804 vor 20, 4, 5
807 vsld 6, 6, 31
808 vsld 6, 6, 31
815 vsld 8, 8, 31
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
923 # - no highbit if final leftover block (highbit = 0)
931 stdu 1,-400(1)
950 SAVE_GPR 31, 248, 1
957 add 11, 25, 4
966 divdu 31, 5, 30
968 mtctr 31
1016 RESTORE_GPR 31, 248, 1
1039 # h + 5 + (-p)
1054 ld 6, 0(4)
1055 ld 7, 8(4)