1 #include <stdio.h> 2 #include <stdint.h> 3 #include <stdlib.h> 4 #include <string.h> 5 6 typedef void (*testfn)(void); 7 8 typedef struct { 9 uint64_t q0, q1; 10 } __attribute__((aligned(16))) v2di; 11 12 typedef struct { 13 uint64_t mm[8]; 14 v2di xmm[16]; 15 uint64_t r[16]; 16 uint64_t flags; 17 uint32_t ff; 18 uint64_t pad; 19 v2di mem[4]; 20 v2di mem0[4]; 21 } reg_state; 22 23 typedef struct { 24 int n; 25 testfn fn; 26 const char *s; 27 reg_state *init; 28 } TestDef; 29 30 reg_state initI; 31 reg_state initF32; 32 reg_state initF64; 33 34 static void dump_xmm(const char *name, int n, const v2di *r, int ff) 35 { 36 printf("%s%d = %016lx %016lx\n", 37 name, n, r->q1, r->q0); 38 if (ff == 64) { 39 double v[2]; 40 memcpy(v, r, sizeof(v)); 41 printf(" %16g %16g\n", 42 v[1], v[0]); 43 } else if (ff == 32) { 44 float v[4]; 45 memcpy(v, r, sizeof(v)); 46 printf(" %8g %8g %8g %8g\n", 47 v[3], v[2], v[1], v[0]); 48 } 49 } 50 51 static void dump_regs(reg_state *s) 52 { 53 int i; 54 55 for (i = 0; i < 16; i++) { 56 dump_xmm("xmm", i, &s->xmm[i], 0); 57 } 58 for (i = 0; i < 4; i++) { 59 dump_xmm("mem", i, &s->mem0[i], 0); 60 } 61 } 62 63 static void compare_state(const reg_state *a, const reg_state *b) 64 { 65 int i; 66 for (i = 0; i < 8; i++) { 67 if (a->mm[i] != b->mm[i]) { 68 printf("MM%d = %016lx\n", i, b->mm[i]); 69 } 70 } 71 for (i = 0; i < 16; i++) { 72 if (a->r[i] != b->r[i]) { 73 printf("r%d = %016lx\n", i, b->r[i]); 74 } 75 } 76 for (i = 0; i < 16; i++) { 77 if (memcmp(&a->xmm[i], &b->xmm[i], 16)) { 78 dump_xmm("xmm", i, &b->xmm[i], a->ff); 79 } 80 } 81 for (i = 0; i < 4; i++) { 82 if (memcmp(&a->mem0[i], &a->mem[i], 16)) { 83 dump_xmm("mem", i, &a->mem[i], a->ff); 84 } 85 } 86 if (a->flags != b->flags) { 87 printf("FLAGS = %016lx\n", b->flags); 88 } 89 } 90 91 #define LOADMM(r, o) "movq " #r ", " #o "[%0]\n\t" 92 #define LOADXMM(r, o) "movdqa " #r ", " #o "[%0]\n\t" 93 #define STOREMM(r, o) "movq " #o "[%1], " #r "\n\t" 94 #define STOREXMM(r, o) "movdqa " #o "[%1], " #r "\n\t" 95 #define MMREG(F) \ 96 F(mm0, 0x00) \ 97 F(mm1, 0x08) \ 98 F(mm2, 0x10) \ 99 F(mm3, 0x18) \ 100 F(mm4, 0x20) \ 101 F(mm5, 0x28) \ 102 F(mm6, 0x30) \ 103 F(mm7, 0x38) 104 #define XMMREG(F) \ 105 F(xmm0, 0x040) \ 106 F(xmm1, 0x050) \ 107 F(xmm2, 0x060) \ 108 F(xmm3, 0x070) \ 109 F(xmm4, 0x080) \ 110 F(xmm5, 0x090) \ 111 F(xmm6, 0x0a0) \ 112 F(xmm7, 0x0b0) \ 113 F(xmm8, 0x0c0) \ 114 F(xmm9, 0x0d0) \ 115 F(xmm10, 0x0e0) \ 116 F(xmm11, 0x0f0) \ 117 F(xmm12, 0x100) \ 118 F(xmm13, 0x110) \ 119 F(xmm14, 0x120) \ 120 F(xmm15, 0x130) 121 #define LOADREG(r, o) "mov " #r ", " #o "[rax]\n\t" 122 #define STOREREG(r, o) "mov " #o "[rax], " #r "\n\t" 123 #define REG(F) \ 124 F(rbx, 0x148) \ 125 F(rcx, 0x150) \ 126 F(rdx, 0x158) \ 127 F(rsi, 0x160) \ 128 F(rdi, 0x168) \ 129 F(r8, 0x180) \ 130 F(r9, 0x188) \ 131 F(r10, 0x190) \ 132 F(r11, 0x198) \ 133 F(r12, 0x1a0) \ 134 F(r13, 0x1a8) \ 135 F(r14, 0x1b0) \ 136 F(r15, 0x1b8) \ 137 138 static void run_test(const TestDef *t) 139 { 140 reg_state result; 141 reg_state *init = t->init; 142 memcpy(init->mem, init->mem0, sizeof(init->mem)); 143 printf("%5d %s\n", t->n, t->s); 144 asm volatile( 145 MMREG(LOADMM) 146 XMMREG(LOADXMM) 147 "sub rsp, 128\n\t" 148 "push rax\n\t" 149 "push rbx\n\t" 150 "push rcx\n\t" 151 "push rdx\n\t" 152 "push %1\n\t" 153 "push %2\n\t" 154 "mov rax, %0\n\t" 155 "pushf\n\t" 156 "pop rbx\n\t" 157 "shr rbx, 8\n\t" 158 "shl rbx, 8\n\t" 159 "mov rcx, 0x1c0[rax]\n\t" 160 "and rcx, 0xff\n\t" 161 "or rbx, rcx\n\t" 162 "push rbx\n\t" 163 "popf\n\t" 164 REG(LOADREG) 165 "mov rax, 0x140[rax]\n\t" 166 "call [rsp]\n\t" 167 "mov [rsp], rax\n\t" 168 "mov rax, 8[rsp]\n\t" 169 REG(STOREREG) 170 "mov rbx, [rsp]\n\t" 171 "mov 0x140[rax], rbx\n\t" 172 "mov rbx, 0\n\t" 173 "mov 0x170[rax], rbx\n\t" 174 "mov 0x178[rax], rbx\n\t" 175 "pushf\n\t" 176 "pop rbx\n\t" 177 "and rbx, 0xff\n\t" 178 "mov 0x1c0[rax], rbx\n\t" 179 "add rsp, 16\n\t" 180 "pop rdx\n\t" 181 "pop rcx\n\t" 182 "pop rbx\n\t" 183 "pop rax\n\t" 184 "add rsp, 128\n\t" 185 MMREG(STOREMM) 186 XMMREG(STOREXMM) 187 : : "r"(init), "r"(&result), "r"(t->fn) 188 : "memory", "cc", 189 "rsi", "rdi", 190 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 191 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", 192 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", 193 "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", 194 "xmm12", "xmm13", "xmm14", "xmm15" 195 ); 196 compare_state(init, &result); 197 } 198 199 #define TEST(n, cmd, type) \ 200 static void __attribute__((naked)) test_##n(void) \ 201 { \ 202 asm volatile(cmd); \ 203 asm volatile("ret"); \ 204 } 205 #include "test-avx.h" 206 207 208 static const TestDef test_table[] = { 209 #define TEST(n, cmd, type) {n, test_##n, cmd, &init##type}, 210 #include "test-avx.h" 211 {-1, NULL, "", NULL} 212 }; 213 214 static void run_all(void) 215 { 216 const TestDef *t; 217 for (t = test_table; t->fn; t++) { 218 run_test(t); 219 } 220 } 221 222 #define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) 223 224 float val_f32[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3}; 225 double val_f64[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5}; 226 v2di val_i64[] = { 227 {0x3d6b3b6a9e4118f2lu, 0x355ae76d2774d78clu}, 228 {0xd851c54a56bf1f29lu, 0x4a84d1d50bf4c4fflu}, 229 {0x5826475e2c5fd799lu, 0xfd32edc01243f5e9lu}, 230 }; 231 232 v2di deadbeef = {0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull}; 233 v2di indexq = {0x000000000000001full, 0x000000000000008full}; 234 v2di indexd = {0x00000002000000efull, 0xfffffff500000010ull}; 235 236 void init_f32reg(v2di *r) 237 { 238 static int n; 239 float v[4]; 240 int i; 241 for (i = 0; i < 4; i++) { 242 v[i] = val_f32[n++]; 243 if (n == ARRAY_LEN(val_f32)) { 244 n = 0; 245 } 246 } 247 memcpy(r, v, sizeof(*r)); 248 } 249 250 void init_f64reg(v2di *r) 251 { 252 static int n; 253 double v[2]; 254 int i; 255 for (i = 0; i < 2; i++) { 256 v[i] = val_f64[n++]; 257 if (n == ARRAY_LEN(val_f64)) { 258 n = 0; 259 } 260 } 261 memcpy(r, v, sizeof(*r)); 262 } 263 264 void init_intreg(v2di *r) 265 { 266 static uint64_t mask; 267 static int n; 268 269 r->q0 = val_i64[n].q0 ^ mask; 270 r->q1 = val_i64[n].q1 ^ mask; 271 n++; 272 if (n == ARRAY_LEN(val_i64)) { 273 n = 0; 274 mask *= 0x104C11DB7; 275 } 276 } 277 278 static void init_all(reg_state *s) 279 { 280 int i; 281 282 s->r[3] = (uint64_t)&s->mem[0]; /* rdx */ 283 s->r[5] = (uint64_t)&s->mem[2]; /* rdi */ 284 s->flags = 2; 285 for (i = 0; i < 8; i++) { 286 s->xmm[i] = deadbeef; 287 } 288 s->xmm[13] = indexd; 289 s->xmm[14] = indexq; 290 for (i = 0; i < 2; i++) { 291 s->mem0[i] = deadbeef; 292 } 293 } 294 295 int main(int argc, char *argv[]) 296 { 297 init_all(&initI); 298 init_intreg(&initI.xmm[10]); 299 init_intreg(&initI.xmm[11]); 300 init_intreg(&initI.xmm[12]); 301 init_intreg(&initI.mem0[1]); 302 printf("Int:\n"); 303 dump_regs(&initI); 304 305 init_all(&initF32); 306 init_f32reg(&initF32.xmm[10]); 307 init_f32reg(&initF32.xmm[11]); 308 init_f32reg(&initF32.xmm[12]); 309 init_f32reg(&initF32.mem0[1]); 310 initF32.ff = 32; 311 printf("F32:\n"); 312 dump_regs(&initF32); 313 314 init_all(&initF64); 315 init_f64reg(&initF64.xmm[10]); 316 init_f64reg(&initF64.xmm[11]); 317 init_f64reg(&initF64.xmm[12]); 318 init_f64reg(&initF64.mem0[1]); 319 initF64.ff = 64; 320 printf("F64:\n"); 321 dump_regs(&initF64); 322 323 if (argc > 1) { 324 int n = atoi(argv[1]); 325 run_test(&test_table[n]); 326 } else { 327 run_all(); 328 } 329 return 0; 330 } 331