1 /* 2 * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include <stdio.h> 19 #include <stdint.h> 20 #include <stdbool.h> 21 #include <string.h> 22 #include <limits.h> 23 24 int err; 25 26 #include "hvx_misc.h" 27 28 #define fVROUND(VAL, SHAMT) \ 29 ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0)) 30 31 #define fVSATUB(VAL) \ 32 ((((VAL) & 0xffLL) == (VAL)) ? \ 33 (VAL) : \ 34 ((((int32_t)(VAL)) < 0) ? 0 : 0xff)) 35 36 #define fVSATUH(VAL) \ 37 ((((VAL) & 0xffffLL) == (VAL)) ? \ 38 (VAL) : \ 39 ((((int32_t)(VAL)) < 0) ? 0 : 0xffff)) 40 41 static void test_vasrvuhubrndsat(void) 42 { 43 void *p0 = buffer0; 44 void *p1 = buffer1; 45 void *pout = output; 46 47 memset(expect, 0xaa, sizeof(expect)); 48 memset(output, 0xbb, sizeof(output)); 49 50 for (int i = 0; i < BUFSIZE / 2; i++) { 51 asm("v4 = vmem(%0 + #0)\n\t" 52 "v5 = vmem(%0 + #1)\n\t" 53 "v6 = vmem(%1 + #0)\n\t" 54 "v5.ub = vasr(v5:4.uh, v6.ub):rnd:sat\n\t" 55 "vmem(%2) = v5\n\t" 56 : : "r"(p0), "r"(p1), "r"(pout) 57 : "v4", "v5", "v6", "memory"); 58 p0 += sizeof(MMVector) * 2; 59 p1 += sizeof(MMVector); 60 pout += sizeof(MMVector); 61 62 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { 63 int shamt; 64 uint8_t byte0; 65 uint8_t byte1; 66 67 shamt = buffer1[i].ub[2 * j + 0] & 0x7; 68 byte0 = fVSATUB(fVROUND(buffer0[2 * i + 0].uh[j], shamt) >> shamt); 69 shamt = buffer1[i].ub[2 * j + 1] & 0x7; 70 byte1 = fVSATUB(fVROUND(buffer0[2 * i + 1].uh[j], shamt) >> shamt); 71 expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff); 72 } 73 } 74 75 check_output_h(__LINE__, BUFSIZE / 2); 76 } 77 78 static void test_vasrvuhubsat(void) 79 { 80 void *p0 = buffer0; 81 void *p1 = buffer1; 82 void *pout = output; 83 84 memset(expect, 0xaa, sizeof(expect)); 85 memset(output, 0xbb, sizeof(output)); 86 87 for (int i = 0; i < BUFSIZE / 2; i++) { 88 asm("v4 = vmem(%0 + #0)\n\t" 89 "v5 = vmem(%0 + #1)\n\t" 90 "v6 = vmem(%1 + #0)\n\t" 91 "v5.ub = vasr(v5:4.uh, v6.ub):sat\n\t" 92 "vmem(%2) = v5\n\t" 93 : : "r"(p0), "r"(p1), "r"(pout) 94 : "v4", "v5", "v6", "memory"); 95 p0 += sizeof(MMVector) * 2; 96 p1 += sizeof(MMVector); 97 pout += sizeof(MMVector); 98 99 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { 100 int shamt; 101 uint8_t byte0; 102 uint8_t byte1; 103 104 shamt = buffer1[i].ub[2 * j + 0] & 0x7; 105 byte0 = fVSATUB(buffer0[2 * i + 0].uh[j] >> shamt); 106 shamt = buffer1[i].ub[2 * j + 1] & 0x7; 107 byte1 = fVSATUB(buffer0[2 * i + 1].uh[j] >> shamt); 108 expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff); 109 } 110 } 111 112 check_output_h(__LINE__, BUFSIZE / 2); 113 } 114 115 static void test_vasrvwuhrndsat(void) 116 { 117 void *p0 = buffer0; 118 void *p1 = buffer1; 119 void *pout = output; 120 121 memset(expect, 0xaa, sizeof(expect)); 122 memset(output, 0xbb, sizeof(output)); 123 124 for (int i = 0; i < BUFSIZE / 2; i++) { 125 asm("v4 = vmem(%0 + #0)\n\t" 126 "v5 = vmem(%0 + #1)\n\t" 127 "v6 = vmem(%1 + #0)\n\t" 128 "v5.uh = vasr(v5:4.w, v6.uh):rnd:sat\n\t" 129 "vmem(%2) = v5\n\t" 130 : : "r"(p0), "r"(p1), "r"(pout) 131 : "v4", "v5", "v6", "memory"); 132 p0 += sizeof(MMVector) * 2; 133 p1 += sizeof(MMVector); 134 pout += sizeof(MMVector); 135 136 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { 137 int shamt; 138 uint16_t half0; 139 uint16_t half1; 140 141 shamt = buffer1[i].uh[2 * j + 0] & 0xf; 142 half0 = fVSATUH(fVROUND(buffer0[2 * i + 0].w[j], shamt) >> shamt); 143 shamt = buffer1[i].uh[2 * j + 1] & 0xf; 144 half1 = fVSATUH(fVROUND(buffer0[2 * i + 1].w[j], shamt) >> shamt); 145 expect[i].w[j] = (half1 << 16) | (half0 & 0xffff); 146 } 147 } 148 149 check_output_w(__LINE__, BUFSIZE / 2); 150 } 151 152 static void test_vasrvwuhsat(void) 153 { 154 void *p0 = buffer0; 155 void *p1 = buffer1; 156 void *pout = output; 157 158 memset(expect, 0xaa, sizeof(expect)); 159 memset(output, 0xbb, sizeof(output)); 160 161 for (int i = 0; i < BUFSIZE / 2; i++) { 162 asm("v4 = vmem(%0 + #0)\n\t" 163 "v5 = vmem(%0 + #1)\n\t" 164 "v6 = vmem(%1 + #0)\n\t" 165 "v5.uh = vasr(v5:4.w, v6.uh):sat\n\t" 166 "vmem(%2) = v5\n\t" 167 : : "r"(p0), "r"(p1), "r"(pout) 168 : "v4", "v5", "v6", "memory"); 169 p0 += sizeof(MMVector) * 2; 170 p1 += sizeof(MMVector); 171 pout += sizeof(MMVector); 172 173 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { 174 int shamt; 175 uint16_t half0; 176 uint16_t half1; 177 178 shamt = buffer1[i].uh[2 * j + 0] & 0xf; 179 half0 = fVSATUH(buffer0[2 * i + 0].w[j] >> shamt); 180 shamt = buffer1[i].uh[2 * j + 1] & 0xf; 181 half1 = fVSATUH(buffer0[2 * i + 1].w[j] >> shamt); 182 expect[i].w[j] = (half1 << 16) | (half0 & 0xffff); 183 } 184 } 185 186 check_output_w(__LINE__, BUFSIZE / 2); 187 } 188 189 static void test_vassign_tmp(void) 190 { 191 void *p0 = buffer0; 192 void *pout = output; 193 194 memset(expect, 0xaa, sizeof(expect)); 195 memset(output, 0xbb, sizeof(output)); 196 197 for (int i = 0; i < BUFSIZE; i++) { 198 /* 199 * Assign into v12 as .tmp, then use it in the next packet 200 * Should get the new value within the same packet and 201 * the old value in the next packet 202 */ 203 asm("v3 = vmem(%0 + #0)\n\t" 204 "r1 = #1\n\t" 205 "v12 = vsplat(r1)\n\t" 206 "r1 = #2\n\t" 207 "v13 = vsplat(r1)\n\t" 208 "{\n\t" 209 " v12.tmp = v13\n\t" 210 " v4.w = vadd(v12.w, v3.w)\n\t" 211 "}\n\t" 212 "v4.w = vadd(v4.w, v12.w)\n\t" 213 "vmem(%1 + #0) = v4\n\t" 214 : : "r"(p0), "r"(pout) 215 : "r1", "v3", "v4", "v12", "v13", "memory"); 216 p0 += sizeof(MMVector); 217 pout += sizeof(MMVector); 218 219 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { 220 expect[i].w[j] = buffer0[i].w[j] + 3; 221 } 222 } 223 224 check_output_w(__LINE__, BUFSIZE); 225 } 226 227 static void test_vcombine_tmp(void) 228 { 229 void *p0 = buffer0; 230 void *p1 = buffer1; 231 void *pout = output; 232 233 memset(expect, 0xaa, sizeof(expect)); 234 memset(output, 0xbb, sizeof(output)); 235 236 for (int i = 0; i < BUFSIZE; i++) { 237 /* 238 * Combine into v13:12 as .tmp, then use it in the next packet 239 * Should get the new value within the same packet and 240 * the old value in the next packet 241 */ 242 asm("v3 = vmem(%0 + #0)\n\t" 243 "r1 = #1\n\t" 244 "v12 = vsplat(r1)\n\t" 245 "r1 = #2\n\t" 246 "v13 = vsplat(r1)\n\t" 247 "r1 = #3\n\t" 248 "v14 = vsplat(r1)\n\t" 249 "r1 = #4\n\t" 250 "v15 = vsplat(r1)\n\t" 251 "{\n\t" 252 " v13:12.tmp = vcombine(v15, v14)\n\t" 253 " v4.w = vadd(v12.w, v3.w)\n\t" 254 " v16 = v13\n\t" 255 "}\n\t" 256 "v4.w = vadd(v4.w, v12.w)\n\t" 257 "v4.w = vadd(v4.w, v13.w)\n\t" 258 "v4.w = vadd(v4.w, v16.w)\n\t" 259 "vmem(%2 + #0) = v4\n\t" 260 : : "r"(p0), "r"(p1), "r"(pout) 261 : "r1", "v3", "v4", "v12", "v13", "v14", "v15", "v16", "memory"); 262 p0 += sizeof(MMVector); 263 p1 += sizeof(MMVector); 264 pout += sizeof(MMVector); 265 266 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { 267 expect[i].w[j] = buffer0[i].w[j] + 10; 268 } 269 } 270 271 check_output_w(__LINE__, BUFSIZE); 272 } 273 274 static void test_vmpyuhvs(void) 275 { 276 void *p0 = buffer0; 277 void *p1 = buffer1; 278 void *pout = output; 279 280 memset(expect, 0xaa, sizeof(expect)); 281 memset(output, 0xbb, sizeof(output)); 282 283 for (int i = 0; i < BUFSIZE; i++) { 284 asm("v4 = vmem(%0 + #0)\n\t" 285 "v5 = vmem(%1 + #0)\n\t" 286 "v4.uh = vmpy(V4.uh, v5.uh):>>16\n\t" 287 "vmem(%2) = v4\n\t" 288 : : "r"(p0), "r"(p1), "r"(pout) 289 : "v4", "v5", "memory"); 290 p0 += sizeof(MMVector); 291 p1 += sizeof(MMVector); 292 pout += sizeof(MMVector); 293 294 for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { 295 expect[i].uh[j] = (buffer0[i].uh[j] * buffer1[i].uh[j]) >> 16; 296 } 297 } 298 299 check_output_h(__LINE__, BUFSIZE); 300 } 301 302 int main() 303 { 304 init_buffers(); 305 306 test_vasrvuhubrndsat(); 307 test_vasrvuhubsat(); 308 test_vasrvwuhrndsat(); 309 test_vasrvwuhsat(); 310 311 test_vassign_tmp(); 312 test_vcombine_tmp(); 313 314 test_vmpyuhvs(); 315 316 puts(err ? "FAIL" : "PASS"); 317 return err ? 1 : 0; 318 } 319