Lines Matching full:row
53 * so each row is discontiguous within ZA[]. in helper_sme_zero()
68 * are interleaved, so if type T is size N bytes then row 1 of
69 * the tile is N rows away from row 0. The division by N to
83 * each row of the ZA storage has one byte of the vslice data,
84 * and (counting from 0) byte 8 goes in row 8 of the storage
85 * at offset (8 * row-size-in-bytes).
86 * If we have 8 bytes per element then each row of the ZA storage
88 * so byte 8 of the data goes into row 1 of the tile,
89 * which is again row 8 of the storage, so the offset is still
90 * (8 * row-size-in-bytes). Similarly for other element sizes.
819 intptr_t row, col, oprsz = simd_oprsz(desc) / 4; in DO_ST() local
823 for (row = 0; row < oprsz; ) { in DO_ST()
824 uint64_t pa = pn[row >> 4]; in DO_ST()
831 zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)]; in DO_ST()
838 } while (++row & 15); in DO_ST()
845 intptr_t row, col, oprsz = simd_oprsz(desc) / 8; in HELPER() local
849 for (row = 0; row < oprsz; ++row) { in HELPER()
850 if (pn[H1(row)] & 1) { in HELPER()
853 zda[tile_vslice_index(row) + col] += zn[col]; in HELPER()
863 intptr_t row, col, oprsz = simd_oprsz(desc) / 4; in HELPER() local
867 for (row = 0; row < oprsz; ) { in HELPER()
868 uint64_t pa = pn[row >> 4]; in HELPER()
871 uint32_t zn_row = zn[H4(row)]; in HELPER()
876 zda[tile_vslice_index(row) + H4(col)] += zn_row; in HELPER()
883 } while (++row & 15); in HELPER()
890 intptr_t row, col, oprsz = simd_oprsz(desc) / 8; in HELPER() local
894 for (row = 0; row < oprsz; ++row) { in HELPER()
895 if (pn[H1(row)] & 1) { in HELPER()
896 uint64_t zn_row = zn[row]; in HELPER()
899 zda[tile_vslice_index(row) + col] += zn_row; in HELPER()
909 intptr_t row, col, oprsz = simd_maxsz(desc); in HELPER() local
922 for (row = 0; row < oprsz; ) { in HELPER()
923 uint16_t pa = pn[H2(row >> 4)]; in HELPER()
926 void *vza_row = vza + tile_vslice_offset(row); in HELPER()
927 uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg; in HELPER()
942 row += 4; in HELPER()
944 } while (row & 15); in HELPER()
951 intptr_t row, col, oprsz = simd_oprsz(desc) / 8; in HELPER() local
959 for (row = 0; row < oprsz; ++row) { in HELPER()
960 if (pn[H1(row)] & 1) { in HELPER()
961 uint64_t *za_row = &za[tile_vslice_index(row)]; in HELPER()
962 uint64_t n = zn[row] ^ neg; in HELPER()
976 * and for NEG on an enabled row element.
1035 intptr_t row, col, oprsz = simd_maxsz(desc); in HELPER() local
1053 for (row = 0; row < oprsz; ) { in HELPER()
1054 uint16_t prow = pn[H2(row >> 4)]; in HELPER()
1056 void *vza_row = vza + tile_vslice_offset(row); in HELPER()
1057 uint32_t n = *(uint32_t *)(vzn + H1_4(row)); in HELPER()
1076 row += 4; in HELPER()
1078 } while (row & 15); in HELPER()
1085 intptr_t row, col, oprsz = simd_maxsz(desc); in HELPER() local
1091 for (row = 0; row < oprsz; ) { in HELPER()
1092 uint16_t prow = pn[H2(row >> 4)]; in HELPER()
1094 void *vza_row = vza + tile_vslice_offset(row); in HELPER()
1095 uint32_t n = *(uint32_t *)(vzn + H1_4(row)); in HELPER()
1113 row += 4; in HELPER()
1115 } while (row & 15); in HELPER()
1118 for (row = 0; row < oprsz; ) { in HELPER()
1119 uint16_t prow = pn[H2(row >> 4)]; in HELPER()
1121 void *vza_row = vza + tile_vslice_offset(row); in HELPER()
1122 uint32_t n = *(uint32_t *)(vzn + H1_4(row)); in HELPER()
1140 row += 4; in HELPER()
1142 } while (row & 15); in HELPER()
1152 intptr_t row, col, oprsz = simd_oprsz(desc) / 4; in do_imopa_s() local
1155 for (row = 0; row < oprsz; ++row) { in do_imopa_s()
1156 uint8_t pa = (pn[H1(row >> 1)] >> ((row & 1) * 4)) & 0xf; in do_imopa_s()
1157 uint32_t *za_row = &za[tile_vslice_index(row)]; in do_imopa_s()
1158 uint32_t n = zn[H4(row)]; in do_imopa_s()
1174 intptr_t row, col, oprsz = simd_oprsz(desc) / 8; in do_imopa_d() local
1177 for (row = 0; row < oprsz; ++row) { in do_imopa_d()
1178 uint8_t pa = pn[H1(row)]; in do_imopa_d()
1179 uint64_t *za_row = &za[tile_vslice_index(row)]; in do_imopa_d()
1180 uint64_t n = zn[row]; in do_imopa_d()