Lines Matching full:i
32 * for (i = 0; i < 256; ++i) {
35 * if ((i >> j) & 1) {
133 * for (i = 0; i < 256; ++i) {
135 * if (i & 0xaa) {
139 * if ((i >> j) & 1) {
143 * printf("[0x%x] = 0x%016lx,\n", i, m);
182 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
185 for (i = 0; i < opr_sz; ++i) { in HELPER()
186 d[i] = do_sqrdmlah_b(n[i], m[i], a[i], false, true); in HELPER()
193 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
196 for (i = 0; i < opr_sz; ++i) { in HELPER()
197 d[i] = do_sqrdmlah_b(n[i], m[i], a[i], true, true); in HELPER()
203 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
206 for (i = 0; i < opr_sz; ++i) { in HELPER()
207 d[i] = do_sqrdmlah_b(n[i], m[i], 0, false, false); in HELPER()
213 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
216 for (i = 0; i < opr_sz; ++i) { in HELPER()
217 d[i] = do_sqrdmlah_b(n[i], m[i], 0, false, true); in HELPER()
257 uintptr_t i; in HELPER() local
259 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
260 d[i] = do_sqrdmlah_h(n[i], m[i], d[i], false, true, vq); in HELPER()
282 uintptr_t i; in HELPER() local
284 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
285 d[i] = do_sqrdmlah_h(n[i], m[i], d[i], true, true, vq); in HELPER()
293 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
296 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
297 d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, vq); in HELPER()
305 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
308 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
309 d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, vq); in HELPER()
317 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
323 for (i = 0; i < elements; i += 16 / 2) { in HELPER()
324 int16_t mm = m[i]; in HELPER()
326 d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq); in HELPER()
335 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
341 for (i = 0; i < elements; i += 16 / 2) { in HELPER()
342 int16_t mm = m[i]; in HELPER()
344 d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq); in HELPER()
353 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
359 for (i = 0; i < elements; i += 16 / 2) { in HELPER()
360 int16_t mm = m[i]; in HELPER()
362 d[i + j] = do_sqrdmlah_h(n[i + j], mm, d[i + j], false, true, vq); in HELPER()
371 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
377 for (i = 0; i < elements; i += 16 / 2) { in HELPER()
378 int16_t mm = m[i]; in HELPER()
380 d[i + j] = do_sqrdmlah_h(n[i + j], mm, d[i + j], true, true, vq); in HELPER()
389 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
393 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
394 d[i] = do_sqrdmlah_h(n[i], m[i], a[i], false, true, &discard); in HELPER()
401 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
405 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
406 d[i] = do_sqrdmlah_h(n[i], m[i], a[i], true, true, &discard); in HELPER()
412 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
416 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
417 d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, &discard); in HELPER()
423 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
427 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
428 d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, &discard); in HELPER()
434 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
439 for (i = 0; i < opr_sz / 2; i += 16 / 2) { in HELPER()
440 int16_t mm = m[i]; in HELPER()
442 d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, &discard); in HELPER()
449 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
454 for (i = 0; i < opr_sz / 2; i += 16 / 2) { in HELPER()
455 int16_t mm = m[i]; in HELPER()
457 d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, &discard); in HELPER()
495 uintptr_t i; in HELPER() local
497 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
498 d[i] = do_sqrdmlah_s(n[i], m[i], d[i], false, true, vq); in HELPER()
517 uintptr_t i; in HELPER() local
519 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
520 d[i] = do_sqrdmlah_s(n[i], m[i], d[i], true, true, vq); in HELPER()
528 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
531 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
532 d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, vq); in HELPER()
540 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
543 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
544 d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, vq); in HELPER()
552 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
558 for (i = 0; i < elements; i += 16 / 4) { in HELPER()
559 int32_t mm = m[i]; in HELPER()
561 d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq); in HELPER()
570 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
576 for (i = 0; i < elements; i += 16 / 4) { in HELPER()
577 int32_t mm = m[i]; in HELPER()
579 d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq); in HELPER()
588 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
594 for (i = 0; i < elements; i += 16 / 4) { in HELPER()
595 int32_t mm = m[i]; in HELPER()
597 d[i + j] = do_sqrdmlah_s(n[i + j], mm, d[i + j], false, true, vq); in HELPER()
606 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
612 for (i = 0; i < elements; i += 16 / 4) { in HELPER()
613 int32_t mm = m[i]; in HELPER()
615 d[i + j] = do_sqrdmlah_s(n[i + j], mm, d[i + j], true, true, vq); in HELPER()
624 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
628 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
629 d[i] = do_sqrdmlah_s(n[i], m[i], a[i], false, true, &discard); in HELPER()
636 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
640 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
641 d[i] = do_sqrdmlah_s(n[i], m[i], a[i], true, true, &discard); in HELPER()
647 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
651 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
652 d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, &discard); in HELPER()
658 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
662 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
663 d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, &discard); in HELPER()
669 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
674 for (i = 0; i < opr_sz / 4; i += 16 / 4) { in HELPER()
675 int32_t mm = m[i]; in HELPER()
677 d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, &discard); in HELPER()
684 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
689 for (i = 0; i < opr_sz / 4; i += 16 / 4) { in HELPER()
690 int32_t mm = m[i]; in HELPER()
692 d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, &discard); in HELPER()
737 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
740 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
741 d[i] = do_sqrdmlah_d(n[i], m[i], a[i], false, true); in HELPER()
748 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
751 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
752 d[i] = do_sqrdmlah_d(n[i], m[i], a[i], true, true); in HELPER()
758 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
761 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
762 d[i] = do_sqrdmlah_d(n[i], m[i], 0, false, false); in HELPER()
768 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
771 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
772 d[i] = do_sqrdmlah_d(n[i], m[i], 0, false, true); in HELPER()
778 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
782 for (i = 0; i < opr_sz / 8; i += 16 / 8) { in HELPER()
783 int64_t mm = m[i]; in HELPER()
785 d[i + j] = do_sqrdmlah_d(n[i + j], mm, 0, false, false); in HELPER()
792 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
796 for (i = 0; i < opr_sz / 8; i += 16 / 8) { in HELPER()
797 int64_t mm = m[i]; in HELPER()
799 d[i + j] = do_sqrdmlah_d(n[i + j], mm, 0, false, true); in HELPER()
814 intptr_t i, opr_sz = simd_oprsz(desc); \
818 for (i = 0; i < opr_sz / sizeof(TYPED); ++i) { \
819 d[i] = (a[i] + \
820 (TYPED)n[i * 4 + 0] * m[i * 4 + 0] + \
821 (TYPED)n[i * 4 + 1] * m[i * 4 + 1] + \
822 (TYPED)n[i * 4 + 2] * m[i * 4 + 2] + \
823 (TYPED)n[i * 4 + 3] * m[i * 4 + 3]); \
837 intptr_t i = 0, opr_sz = simd_oprsz(desc); \ in DO_DOT()
852 TYPED m0 = m_indexed[i * 4 + 0]; \ in DO_DOT()
853 TYPED m1 = m_indexed[i * 4 + 1]; \ in DO_DOT()
854 TYPED m2 = m_indexed[i * 4 + 2]; \ in DO_DOT()
855 TYPED m3 = m_indexed[i * 4 + 3]; \ in DO_DOT()
857 d[i] = (a[i] + \ in DO_DOT()
858 n[i * 4 + 0] * m0 + \ in DO_DOT()
859 n[i * 4 + 1] * m1 + \ in DO_DOT()
860 n[i * 4 + 2] * m2 + \ in DO_DOT()
861 n[i * 4 + 3] * m3); \ in DO_DOT()
862 } while (++i < segend); \ in DO_DOT()
863 segend = i + (16 / sizeof(TYPED)); \ in DO_DOT()
864 } while (i < opr_sz_n); \ in DO_DOT()
882 intptr_t i, opr_sz = simd_oprsz(desc); \
886 for (i = 0; i < opr_sz / sizeof(TYPED); ++i) { \
887 d[i] = (a[i] + \
888 (TYPED)n[i * 2 + 0] * m[i * 2 + 0] + \
889 (TYPED)n[i * 2 + 1] * m[i * 2 + 1]); \
897 intptr_t i = 0, opr_sz = simd_oprsz(desc); \
905 TYPED m0 = m_indexed[i * 2 + 0]; \
906 TYPED m1 = m_indexed[i * 2 + 1]; \
908 d[i] = (a[i] + \
909 n[i * 2 + 0] * m0 + \
910 n[i * 2 + 1] * m1); \
911 } while (++i < segend); \
912 segend = i + (16 / sizeof(TYPED)); \
913 } while (i < opr_sz_n); \
935 uintptr_t i; local
937 for (i = 0; i < opr_sz / 2; i += 2) {
938 float16 e0 = n[H2(i)];
939 float16 e1 = m[H2(i + 1)];
940 float16 e2 = n[H2(i + 1)];
941 float16 e3 = m[H2(i)];
949 d[H2(i)] = float16_add(e0, e1, fpst);
950 d[H2(i + 1)] = float16_add(e2, e3, fpst);
964 uintptr_t i; in HELPER() local
966 for (i = 0; i < opr_sz / 4; i += 2) { in HELPER()
967 float32 e0 = n[H4(i)]; in HELPER()
968 float32 e1 = m[H4(i + 1)]; in HELPER()
969 float32 e2 = n[H4(i + 1)]; in HELPER()
970 float32 e3 = m[H4(i)]; in HELPER()
978 d[H4(i)] = float32_add(e0, e1, fpst); in HELPER()
979 d[H4(i + 1)] = float32_add(e2, e3, fpst); in HELPER()
993 uintptr_t i; in HELPER() local
995 for (i = 0; i < opr_sz / 8; i += 2) { in HELPER()
996 float64 e0 = n[i]; in HELPER()
997 float64 e1 = m[i + 1]; in HELPER()
998 float64 e2 = n[i + 1]; in HELPER()
999 float64 e3 = m[i]; in HELPER()
1007 d[i] = float64_add(e0, e1, fpst); in HELPER()
1008 d[i + 1] = float64_add(e2, e3, fpst); in HELPER()
1023 uintptr_t i; in HELPER() local
1031 for (i = 0; i < opr_sz / 2; i += 2) { in HELPER()
1032 float16 e2 = n[H2(i + flip)]; in HELPER()
1033 float16 e1 = m[H2(i + flip)] ^ negx_real; in HELPER()
1035 float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag; in HELPER()
1037 d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst); in HELPER()
1038 d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst); in HELPER()
1056 intptr_t i, j; in HELPER() local
1064 for (i = 0; i < elements; i += eltspersegment) { in HELPER()
1065 float16 mr = m[H2(i + 2 * index + 0)]; in HELPER()
1066 float16 mi = m[H2(i + 2 * index + 1)]; in HELPER()
1070 for (j = i; j < i + eltspersegment; j += 2) { in HELPER()
1091 uintptr_t i; in HELPER() local
1099 for (i = 0; i < opr_sz / 4; i += 2) { in HELPER()
1100 float32 e2 = n[H4(i + flip)]; in HELPER()
1101 float32 e1 = m[H4(i + flip)] ^ negx_real; in HELPER()
1103 float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag; in HELPER()
1105 d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst); in HELPER()
1106 d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst); in HELPER()
1124 intptr_t i, j; in HELPER() local
1132 for (i = 0; i < elements; i += eltspersegment) { in HELPER()
1133 float32 mr = m[H4(i + 2 * index + 0)]; in HELPER()
1134 float32 mi = m[H4(i + 2 * index + 1)]; in HELPER()
1138 for (j = i; j < i + eltspersegment; j += 2) { in HELPER()
1159 uintptr_t i; in HELPER() local
1167 for (i = 0; i < opr_sz / 8; i += 2) { in HELPER()
1168 float64 e2 = n[i + flip]; in HELPER()
1169 float64 e1 = m[i + flip] ^ negx_real; in HELPER()
1171 float64 e3 = m[i + 1 - flip] ^ negx_imag; in HELPER()
1173 d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst); in HELPER()
1174 d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst); in HELPER()
1280 intptr_t i, oprsz = simd_oprsz(desc); \
1282 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
1283 d[i] = FUNC(n[i], stat); \
1462 intptr_t i, oprsz = simd_oprsz(desc); \
1464 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
1465 d[i] = FUNC(n[i], m[i], stat); \
1686 intptr_t i, oprsz = simd_oprsz(desc); \
1688 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
1689 d[i] = FUNC(d[i], n[i], m[i], stat); \
1724 intptr_t i, j, oprsz = simd_oprsz(desc); \ in DO_MULADD()
1728 for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ in DO_MULADD()
1729 TYPE mm = m[H(i + idx)]; \ in DO_MULADD()
1731 d[i + j] = n[i + j] * mm; \ in DO_MULADD()
1746 intptr_t i, j, oprsz = simd_oprsz(desc); \
1750 for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
1751 TYPE mm = m[H(i + idx)]; \
1753 d[i + j] = a[i + j] OP n[i + j] * mm; \
1773 intptr_t i, j, oprsz = simd_oprsz(desc); \
1777 for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
1778 TYPE mm = m[H(i + idx)]; \
1780 d[i + j] = ADD(d[i + j], MUL(n[i + j], mm, stat), stat); \
1818 intptr_t i, j, oprsz = simd_oprsz(desc); \
1822 for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
1823 TYPE mm = m[H(i + idx)]; \
1825 d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \
1826 a[i + j], NEGF, stat); \
1852 intptr_t i, oprsz = simd_oprsz(desc); \
1855 for (i = 0; i < oprsz / sizeof(TYPEN); i++) { \
1856 WTYPE dd = (WTYPE)n[i] OP m[i]; \
1864 d[i] = dd; \
1902 intptr_t i, oprsz = simd_oprsz(desc); local
1906 for (i = 0; i < oprsz / 8; i++) {
1907 uint64_t nn = n[i], mm = m[i], dd = nn + mm;
1912 d[i] = dd;
1924 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
1928 for (i = 0; i < oprsz / 8; i++) { in HELPER()
1929 uint64_t nn = n[i], mm = m[i], dd = nn - mm; in HELPER()
1934 d[i] = dd; in HELPER()
1946 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
1950 for (i = 0; i < oprsz / 8; i++) { in HELPER()
1951 int64_t nn = n[i], mm = m[i], dd = nn + mm; in HELPER()
1956 d[i] = dd; in HELPER()
1968 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
1972 for (i = 0; i < oprsz / 8; i++) { in HELPER()
1973 int64_t nn = n[i], mm = m[i], dd = nn - mm; in HELPER()
1978 d[i] = dd; in HELPER()
1990 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
1994 for (i = 0; i < oprsz / 8; i++) { in HELPER()
1995 uint64_t nn = n[i]; in HELPER()
1996 int64_t mm = m[i]; in HELPER()
2010 d[i] = dd; in HELPER()
2022 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
2026 for (i = 0; i < oprsz / 8; i++) { in HELPER()
2027 int64_t nn = n[i]; in HELPER()
2028 uint64_t mm = m[i]; in HELPER()
2035 d[i] = dd; in HELPER()
2047 intptr_t i, oprsz = simd_oprsz(desc); \
2050 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2051 d[i] += n[i] >> shift; \
2071 intptr_t i, oprsz = simd_oprsz(desc); \ in DO_SRA()
2074 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ in DO_SRA()
2075 TYPE tmp = n[i] >> (shift - 1); \ in DO_SRA()
2076 d[i] = (tmp >> 1) + (tmp & 1); \ in DO_SRA()
2096 intptr_t i, oprsz = simd_oprsz(desc); \
2099 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2100 TYPE tmp = n[i] >> (shift - 1); \
2101 d[i] += (tmp >> 1) + (tmp & 1); \
2121 intptr_t i, oprsz = simd_oprsz(desc); \
2124 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2125 d[i] = deposit64(d[i], 0, sizeof(TYPE) * 8 - shift, n[i] >> shift); \
2140 intptr_t i, oprsz = simd_oprsz(desc); \
2143 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2144 d[i] = deposit64(d[i], shift, sizeof(TYPE) * 8 - shift, n[i]); \
2225 intptr_t i, oprsz = simd_oprsz(desc); in do_fmlal() local
2237 for (i = 0; i < oprsz / 4; i++) { in do_fmlal()
2238 float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); in do_fmlal()
2239 float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16); in do_fmlal()
2240 d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); in do_fmlal()
2274 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
2290 for (i = 0; i < oprsz; i += sizeof(float32)) { in HELPER()
2291 float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx; in HELPER()
2292 float16 mm_16 = *(float16 *)(vm + H1_2(i + sel)); in HELPER()
2295 float32 aa = *(float32 *)(va + H1_4(i)); in HELPER()
2297 *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status); in HELPER()
2308 intptr_t i, oprsz = simd_oprsz(desc); in do_fmlal_idx() local
2322 for (i = 0; i < oprsz / 4; i++) { in do_fmlal_idx()
2323 float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); in do_fmlal_idx()
2324 d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); in do_fmlal_idx()
2358 intptr_t i, j, oprsz = simd_oprsz(desc); in HELPER() local
2374 for (i = 0; i < oprsz; i += 16) { in HELPER()
2375 float16 mm_16 = *(float16 *)(vm + i + idx); in HELPER()
2379 float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx; in HELPER()
2381 float32 aa = *(float32 *)(va + H1_4(i + j)); in HELPER()
2383 *(float32 *)(vd + H1_4(i + j)) = in HELPER()
2391 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2394 for (i = 0; i < opr_sz; ++i) { in HELPER()
2395 int8_t mm = m[i]; in HELPER()
2396 int8_t nn = n[i]; in HELPER()
2405 d[i] = res; in HELPER()
2412 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2415 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
2416 int8_t mm = m[i]; /* only 8 bits of shift are significant */ in HELPER()
2417 int16_t nn = n[i]; in HELPER()
2426 d[i] = res; in HELPER()
2433 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2436 for (i = 0; i < opr_sz; ++i) { in HELPER()
2437 int8_t mm = m[i]; in HELPER()
2438 uint8_t nn = n[i]; in HELPER()
2449 d[i] = res; in HELPER()
2456 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2459 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
2460 int8_t mm = m[i]; /* only 8 bits of shift are significant */ in HELPER()
2461 uint16_t nn = n[i]; in HELPER()
2472 d[i] = res; in HELPER()
2488 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2491 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
2492 d[i] = clmul_8x8_low(n[i], m[i]); in HELPER()
2504 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2508 for (i = 0; i < opr_sz / 8; i += 2) { in HELPER()
2509 Int128 r = clmul_64(n[i + hi], m[i + hi]); in HELPER()
2510 d[i] = int128_getlo(r); in HELPER()
2511 d[i + 1] = int128_gethi(r); in HELPER()
2534 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2537 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
2538 d[i] = clmul_8x4_even(n[i] >> shift, m[i] >> shift); in HELPER()
2545 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2549 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
2550 d[i] = clmul_32(n[2 * i + sel], m[2 * i + sel]); in HELPER()
2558 intptr_t i, opr_sz = simd_oprsz(desc); \
2559 for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
2560 TYPE nn = *(TYPE *)(vn + i); \
2561 *(TYPE *)(vd + i) = -(nn OP 0); \
2583 intptr_t i, opr_sz = simd_oprsz(desc); \
2586 for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \
2587 d[i] = n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \
2607 intptr_t i, opr_sz = simd_oprsz(desc); \ in DO_ABD()
2610 for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \ in DO_ABD()
2611 d[i] += n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \ in DO_ABD()
2639 for (intptr_t i = 0; i < half; ++i) { \
2640 d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)], stat); \
2642 for (intptr_t i = 0; i < half; ++i) { \
2643 d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)], stat); \
2690 for (intptr_t i = 0; i < half; ++i) { \
2691 d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)]); \
2693 for (intptr_t i = 0; i < half; ++i) { \
2694 d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)]); \
2727 intptr_t i, oprsz = simd_oprsz(desc); \
2731 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2732 d[i] = FUNC(n[i], shift, fpst); \
2756 intptr_t i, oprsz = simd_oprsz(desc); \
2761 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2762 d[i] = FUNC(n[i], 0, fpst); \
2780 intptr_t i, oprsz = simd_oprsz(desc); \
2785 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2786 d[i] = FUNC(n[i], fpst); \
2823 for (size_t i = 0; i < oprsz; ++i) { local
2824 uint32_t index = indices[H1(i)];
2835 result.b[H1(i)] = table[H1(index % 16)];
2852 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2855 for (i = 0; i < opr_sz; ++i) { in HELPER()
2856 d[i] = ((int32_t)n[i] * m[i]) >> 8; in HELPER()
2863 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2866 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
2867 d[i] = ((int32_t)n[i] * m[i]) >> 16; in HELPER()
2874 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2877 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
2878 d[i] = ((int64_t)n[i] * m[i]) >> 32; in HELPER()
2885 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2889 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
2890 muls64(&discard, &d[i], n[i], m[i]); in HELPER()
2897 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2900 for (i = 0; i < opr_sz; ++i) { in HELPER()
2901 d[i] = ((uint32_t)n[i] * m[i]) >> 8; in HELPER()
2908 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2911 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
2912 d[i] = ((uint32_t)n[i] * m[i]) >> 16; in HELPER()
2919 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2922 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
2923 d[i] = ((uint64_t)n[i] * m[i]) >> 32; in HELPER()
2930 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2934 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
2935 mulu64(&discard, &d[i], n[i], m[i]); in HELPER()
2942 intptr_t i, opr_sz = simd_oprsz(desc) / 8; in HELPER() local
2946 for (i = 0; i < opr_sz; ++i) { in HELPER()
2947 d[i] = ror64(n[i] ^ m[i], shr); in HELPER()
3002 * i j i j in do_mmla_b()
3148 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
3154 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
3155 d[i] = bfdotadd_ebf(a[i], n[i], m[i], &fpst, &fpst_odd); in HELPER()
3158 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
3159 d[i] = bfdotadd(a[i], n[i], m[i], &fpst); in HELPER()
3168 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
3177 for (i = 0; i < elements; i += eltspersegment) { in HELPER()
3178 uint32_t m_idx = m[i + H4(index)]; in HELPER()
3180 for (j = i; j < i + eltspersegment; j++) { in HELPER()
3185 for (i = 0; i < elements; i += eltspersegment) { in HELPER()
3186 uint32_t m_idx = m[i + H4(index)]; in HELPER()
3188 for (j = i; j < i + eltspersegment; j++) { in HELPER()
3199 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
3211 for (i = 0; i < elements; i += eltspersegment) { in HELPER()
3212 uint32_t m_idx = m[i + H4(idx)]; in HELPER()
3215 uint32_t nn = (n0[H2(2 * (i + j) + sel)]) in HELPER()
3216 | (n1[H2(2 * (i + j) + sel)] << 16); in HELPER()
3217 d[i + H4(j)] = bfdotadd_ebf(a[i + H4(j)], nn, m_idx, in HELPER()
3222 for (i = 0; i < elements; i += eltspersegment) { in HELPER()
3223 uint32_t m_idx = m[i + H4(idx)]; in HELPER()
3226 uint32_t nn = (n0[H2(2 * (i + j) + sel)]) in HELPER()
3227 | (n1[H2(2 * (i + j) + sel)] << 16); in HELPER()
3228 d[i + H4(j)] = bfdotadd(a[i + H4(j)], nn, m_idx, &fpst); in HELPER()
3252 * i j i k j k in HELPER()
3284 * i j i k j k in HELPER()
3314 intptr_t i, opr_sz = simd_oprsz(desc); in do_bfmlal() local
3317 for (i = 0; i < opr_sz / 4; ++i) { in do_bfmlal()
3318 float32 nn = (negx ^ n[H2(i * 2 + sel)]) << 16; in do_bfmlal()
3319 float32 mm = m[H2(i * 2 + sel)] << 16; in do_bfmlal()
3320 d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], negf, stat); in do_bfmlal()
3346 intptr_t i, j, opr_sz = simd_oprsz(desc); in do_bfmlal_idx() local
3352 for (i = 0; i < elements; i += eltspersegment) { in do_bfmlal_idx()
3353 float32 m_idx = m[H2(2 * i + index)] << 16; in do_bfmlal_idx()
3355 for (j = i; j < i + eltspersegment; j++) { in do_bfmlal_idx()
3384 intptr_t i, opr_sz = simd_oprsz(desc); \
3385 for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
3386 TYPE aa = *(TYPE *)(a + i); \
3387 TYPE nn = *(TYPE *)(n + i); \
3388 TYPE mm = *(TYPE *)(m + i); \
3390 *(TYPE *)(d + i) = dd; \
3408 intptr_t i, opr_sz = simd_oprsz(desc); in DO_CLAMP() local
3411 for (i = 0; i < opr_sz; ++i) { in DO_CLAMP()
3412 d[i] = ctpop8(n[i]); in DO_CLAMP()
3420 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
3423 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
3424 d[i] = revbit64(bswap64(n[i])); in HELPER()
3431 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
3434 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
3435 d[i] = helper_recpe_u32(n[i]); in HELPER()
3442 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
3445 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
3446 d[i] = helper_rsqrte_u32(n[i]); in HELPER()