1 /*
2 * Test program for MSA instruction DPSUB_S.W
3 *
4 * Copyright (C) 2019 Wave Computing, Inc.
5 * Copyright (C) 2019 Aleksandar Markovic <amarkovic@wavecomp.com>
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 2 of the License, or
10 * (at your option) any later version.
11 *`
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
19 *
20 */
21
22 #include <sys/time.h>
23 #include <stdint.h>
24
25 #include "../../../../include/wrappers_msa.h"
26 #include "../../../../include/test_inputs_128.h"
27 #include "../../../../include/test_utils_128.h"
28
29 #define TEST_COUNT_TOTAL ( \
30 (PATTERN_INPUTS_SHORT_COUNT) * (PATTERN_INPUTS_SHORT_COUNT) + \
31 3 * (RANDOM_INPUTS_SHORT_COUNT) * (RANDOM_INPUTS_SHORT_COUNT))
32
33
main(void)34 int32_t main(void)
35 {
36 char *isa_ase_name = "MSA";
37 char *group_name = "Int Dot Product";
38 char *instruction_name = "DPSUB_S.W";
39 int32_t ret;
40 uint32_t i, j;
41 struct timeval start, end;
42 double elapsed_time;
43
44 uint64_t b128_result[TEST_COUNT_TOTAL][2];
45 uint64_t b128_expect[TEST_COUNT_TOTAL][2] = {
46 { 0xfffffffefffffffeULL, 0xfffffffefffffffeULL, }, /* 0 */
47 { 0xfffffffefffffffeULL, 0xfffffffefffffffeULL, },
48 { 0xffff5552ffff5552ULL, 0xffff5552ffff5552ULL, },
49 { 0xfffffffcfffffffcULL, 0xfffffffcfffffffcULL, },
50 { 0xffff9994ffff9994ULL, 0xffff9994ffff9994ULL, },
51 { 0xfffffffafffffffaULL, 0xfffffffafffffffaULL, },
52 { 0x00001c6bffff71c0ULL, 0xffffc71500001c6bULL, },
53 { 0xfffffff8fffffff8ULL, 0xfffffff8fffffff8ULL, },
54 { 0xfffffff8fffffff8ULL, 0xfffffff8fffffff8ULL, }, /* 8 */
55 { 0xfffffff8fffffff8ULL, 0xfffffff8fffffff8ULL, },
56 { 0xfffffff8fffffff8ULL, 0xfffffff8fffffff8ULL, },
57 { 0xfffffff8fffffff8ULL, 0xfffffff8fffffff8ULL, },
58 { 0xfffffff8fffffff8ULL, 0xfffffff8fffffff8ULL, },
59 { 0xfffffff8fffffff8ULL, 0xfffffff8fffffff8ULL, },
60 { 0xfffffff8fffffff8ULL, 0xfffffff8fffffff8ULL, },
61 { 0xfffffff8fffffff8ULL, 0xfffffff8fffffff8ULL, },
62 { 0xffff554cffff554cULL, 0xffff554cffff554cULL, }, /* 16 */
63 { 0xffff554cffff554cULL, 0xffff554cffff554cULL, },
64 { 0xc71ae384c71ae384ULL, 0xc71ae384c71ae384ULL, },
65 { 0xfffeaaa0fffeaaa0ULL, 0xfffeaaa0fffeaaa0ULL, },
66 { 0xdddbbbb0dddbbbb0ULL, 0xdddbbbb0dddbbbb0ULL, },
67 { 0xfffdfff4fffdfff4ULL, 0xfffdfff4fffdfff4ULL, },
68 { 0x097912ead094f678ULL, 0xed06da06097912eaULL, },
69 { 0xfffd5548fffd5548ULL, 0xfffd5548fffd5548ULL, },
70 { 0xfffdfff2fffdfff2ULL, 0xfffdfff2fffdfff2ULL, }, /* 24 */
71 { 0xfffdfff2fffdfff2ULL, 0xfffdfff2fffdfff2ULL, },
72 { 0x38e1c70e38e1c70eULL, 0x38e1c70e38e1c70eULL, },
73 { 0xfffeaa9cfffeaa9cULL, 0xfffeaa9cfffeaa9cULL, },
74 { 0x2221332422213324ULL, 0x2221332422213324ULL, },
75 { 0xffff5546ffff5546ULL, 0xffff5546ffff5546ULL, },
76 { 0xf6845ec12f67d088ULL, 0x12f6424ff6845ec1ULL, },
77 { 0xfffffff0fffffff0ULL, 0xfffffff0fffffff0ULL, },
78 { 0xffff9988ffff9988ULL, 0xffff9988ffff9988ULL, }, /* 32 */
79 { 0xffff9988ffff9988ULL, 0xffff9988ffff9988ULL, },
80 { 0xdddcaa98dddcaa98ULL, 0xdddcaa98dddcaa98ULL, },
81 { 0xffff3320ffff3320ULL, 0xffff3320ffff3320ULL, },
82 { 0xeb83ae00eb83ae00ULL, 0xeb83ae00eb83ae00ULL, },
83 { 0xfffeccb8fffeccb8ULL, 0xfffeccb8fffeccb8ULL, },
84 { 0x05af16ace38c5af0ULL, 0xf49d9f3405af16acULL, },
85 { 0xfffe6650fffe6650ULL, 0xfffe6650fffe6650ULL, },
86 { 0xfffeccb6fffeccb6ULL, 0xfffeccb6fffeccb6ULL, }, /* 40 */
87 { 0xfffeccb6fffeccb6ULL, 0xfffeccb6fffeccb6ULL, },
88 { 0x222110fa222110faULL, 0x222110fa222110faULL, },
89 { 0xffff331cffff331cULL, 0xffff331cffff331cULL, },
90 { 0x147a51d4147a51d4ULL, 0x147a51d4147a51d4ULL, },
91 { 0xffff9982ffff9982ULL, 0xffff9982ffff9982ULL, },
92 { 0xfa4f6bff1c717d10ULL, 0x0b608e21fa4f6bffULL, },
93 { 0xffffffe8ffffffe8ULL, 0xffffffe8ffffffe8ULL, },
94 { 0x00001c59ffff71aeULL, 0xffffc70300001c59ULL, }, /* 48 */
95 { 0x00001c59ffff71aeULL, 0xffffc70300001c59ULL, },
96 { 0x097b2f4fd0966832ULL, 0xed08a115097b2f4fULL, },
97 { 0x000038cafffee374ULL, 0xffff8e1e000038caULL, },
98 { 0x05b082bee38c71acULL, 0xf49e609a05b082beULL, },
99 { 0x0000553bfffe553aULL, 0xffff55390000553bULL, },
100 { 0xf033192eca430636ULL, 0xc0c90fb0f033192eULL, },
101 { 0x000071acfffdc700ULL, 0xffff1c54000071acULL, },
102 { 0x00005539fffe5538ULL, 0xffff553700005539ULL, }, /* 56 */
103 { 0x00005539fffe5538ULL, 0xffff553700005539ULL, },
104 { 0xf68497972f66b408ULL, 0x12f5d079f6849797ULL, },
105 { 0x000038c6fffee370ULL, 0xffff8e1a000038c6ULL, },
106 { 0xfa4f886a1c70eed0ULL, 0x0b605536fa4f886aULL, },
107 { 0x00001c53ffff71a8ULL, 0xffffc6fd00001c53ULL, },
108 { 0x0fcd74d135ba3272ULL, 0x3f35d3a10fcd74d1ULL, },
109 { 0xffffffe0ffffffe0ULL, 0xffffffe0ffffffe0ULL, },
110 { 0xc5a8016cdd3daa5cULL, 0xe94945ebe7053037ULL, }, /* 64 */
111 { 0xc3b493dce3f99616ULL, 0xe6c275fe01105522ULL, },
112 { 0x949f7b2015d7bcd8ULL, 0xdd8e1f740c23f089ULL, },
113 { 0xcb480f0e10df8c96ULL, 0x0470e12d02738253ULL, },
114 { 0xc954a17e179b7850ULL, 0x01ea11401c7ea73eULL, },
115 { 0xc9425a31f36c45a7ULL, 0xedf7684bffd4d9adULL, },
116 { 0xc7fda5a7eec474caULL, 0xdbac4bfdfada4b68ULL, },
117 { 0xc9d3363ecb9ded37ULL, 0xc40db8860b92e4aaULL, },
118 { 0x9abe1d82fd7c13f9ULL, 0xbad961fc16a68011ULL, }, /* 72 */
119 { 0x997968f8f8d4431cULL, 0xa88e45ae11abf1ccULL, },
120 { 0x644cd070b0912dbbULL, 0x95a94d6df030af03ULL, },
121 { 0x90151b88bce11a1cULL, 0x8ce173edd7b3566dULL, },
122 { 0xc6bdaf76b7e8e9daULL, 0xb3c435a6ce02e837ULL, },
123 { 0xc893400d94c26247ULL, 0x9c25a22fdebb8179ULL, },
124 { 0xf45b8b25a1124ea8ULL, 0x935dc8afc63e28e3ULL, },
125 { 0xc124ff9b7af87983ULL, 0x2916358ea57b0fdfULL, },
126 { 0xa3bdf52f3f1bc6d3ULL, 0x1a9b7790a9e67552ULL, }, /* 80 */
127 { 0xa2394ebc1f432fbaULL, 0x38d091638b040700ULL, },
128 { 0x9c98e9da3d8da28dULL, 0x17578e46633c7554ULL, },
129 { 0xca2304601c11139aULL, 0xecce6f4f9252c75cULL, },
130 { 0xb167fd62111ca498ULL, 0xed848a6b7ffb85a6ULL, },
131 { 0xb01a590af79618c4ULL, 0xcf3de0319d05b479ULL, },
132 { 0xb2490b42008cb27aULL, 0xcfbf82ea8729672eULL, },
133 { 0xd36607e1f75b1a82ULL, 0x8006f7ab6a0e64dcULL, },
134 { 0xbf56e259efe4672cULL, 0xa61769778a2f91d2ULL, }, /* 88 */
135 { 0xbe4f061a0bbba5e0ULL, 0xc922e830b7ade689ULL, },
136 { 0xaac85110e5ef76abULL, 0xcc5f9db0a366adc6ULL, },
137 { 0xc91b5b88fd4a93d2ULL, 0x879c58c17a96cfbaULL, },
138 { 0xb8799dfa21be5efeULL, 0xa721331f6c3d78f0ULL, },
139 { 0xb76ef97e2ca86ef4ULL, 0xbb78ca223c0de8adULL, },
140 { 0x9da743266b64f51cULL, 0xba24b1045354f4faULL, },
141 { 0xc2f3162f429e4870ULL, 0x764125c06e4d3512ULL, },
142 { 0xa89d5e1d1ffccbf4ULL, 0x51bf6a197f87f33bULL, }, /* 96 */
143 { 0x890f17ff2c462c7cULL, 0x34f589127c4cc49aULL, },
144 { 0x53dc26951679feb0ULL, 0x2aa458e36a7c8cdeULL, },
145 { 0x7ed4f0c1135e605eULL, 0x1a22c08d472920e2ULL, },
146 { 0x80f6d8c622f1e674ULL, 0x071f986d36987e53ULL, },
147 { 0x7ee91ba012abf971ULL, 0xeab87172091da737ULL, },
148 { 0x80fac8d20b8e2fb8ULL, 0x0ad43e562523cff0ULL, },
149 { 0x7ef3481012ac516eULL, 0x1acdbd0e31a33d13ULL, },
150 { 0xbf53a8023cd97b5aULL, 0x07b9c024393d8136ULL, }, /* 104 */
151 { 0x8e3cb38085aaebe3ULL, 0xf84dd1305e923ebfULL, },
152 { 0x50c22f685af8caedULL, 0xef14166874d2544dULL, },
153 { 0x7a3548245bc2dee5ULL, 0xf6b38ff08f52b803ULL, },
154 { 0x3e4f96f53628fefdULL, 0xbe65c7ed60e1faffULL, },
155 { 0x2c2056e3221de63fULL, 0x871151e081227a9dULL, },
156 { 0x113314bc1293f380ULL, 0x774bb8df643781b9ULL, },
157 { 0x07d911730a4b3a5dULL, 0x8b56a81c77aef6ebULL, },
158 };
159
160 reset_msa_registers();
161
162 gettimeofday(&start, NULL);
163
164 for (i = 0; i < PATTERN_INPUTS_SHORT_COUNT; i++) {
165 for (j = 0; j < PATTERN_INPUTS_SHORT_COUNT; j++) {
166 do_msa_DPSUB_S_W(b128_pattern[i], b128_pattern[j],
167 b128_result[PATTERN_INPUTS_SHORT_COUNT * i + j]);
168 }
169 }
170
171 for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
172 for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
173 do_msa_DPSUB_S_W(b128_random[i], b128_random[j],
174 b128_result[((PATTERN_INPUTS_SHORT_COUNT) *
175 (PATTERN_INPUTS_SHORT_COUNT)) +
176 RANDOM_INPUTS_SHORT_COUNT * i + j]);
177 }
178 }
179
180 for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
181 for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
182 do_msa_DPSUB_S_W__DDT(b128_random[i], b128_random[j],
183 b128_result[
184 ((PATTERN_INPUTS_SHORT_COUNT) *
185 (PATTERN_INPUTS_SHORT_COUNT)) +
186 ((RANDOM_INPUTS_SHORT_COUNT) *
187 (RANDOM_INPUTS_SHORT_COUNT)) +
188 RANDOM_INPUTS_SHORT_COUNT * i + j]);
189 }
190 }
191
192 for (i = 0; i < RANDOM_INPUTS_SHORT_COUNT; i++) {
193 for (j = 0; j < RANDOM_INPUTS_SHORT_COUNT; j++) {
194 do_msa_DPSUB_S_W__DSD(b128_random[i], b128_random[j],
195 b128_result[
196 ((PATTERN_INPUTS_SHORT_COUNT) *
197 (PATTERN_INPUTS_SHORT_COUNT)) +
198 (2 * (RANDOM_INPUTS_SHORT_COUNT) *
199 (RANDOM_INPUTS_SHORT_COUNT)) +
200 RANDOM_INPUTS_SHORT_COUNT * i + j]);
201 }
202 }
203
204 gettimeofday(&end, NULL);
205
206 elapsed_time = (end.tv_sec - start.tv_sec) * 1000.0;
207 elapsed_time += (end.tv_usec - start.tv_usec) / 1000.0;
208
209 ret = check_results_128(isa_ase_name, group_name, instruction_name,
210 TEST_COUNT_TOTAL, elapsed_time,
211 &b128_result[0][0], &b128_expect[0][0]);
212
213 return ret;
214 }
215