1 #include <stdio.h> 2 #include <stdint.h> 3 #include <stdlib.h> 4 #include <string.h> 5 6 #ifndef TEST_FILE 7 #define TEST_FILE "test-mmx.h" 8 #endif 9 #ifndef EMMS 10 #define EMMS "emms" 11 #endif 12 13 typedef void (*testfn)(void); 14 15 typedef struct { 16 uint64_t q0, q1; 17 } __attribute__((aligned(16))) v2di; 18 19 typedef struct { 20 uint64_t mm[8]; 21 v2di xmm[8]; 22 uint64_t r[16]; 23 uint64_t flags; 24 uint32_t ff; 25 uint64_t pad; 26 v2di mem[4]; 27 v2di mem0[4]; 28 } reg_state; 29 30 typedef struct { 31 int n; 32 testfn fn; 33 const char *s; 34 reg_state *init; 35 } TestDef; 36 37 reg_state initI; 38 reg_state initF32; 39 reg_state initF64; 40 41 static void dump_mmx(int n, const uint64_t *r, int ff) 42 { 43 if (ff == 32) { 44 float v[2]; 45 memcpy(v, r, sizeof(v)); 46 printf("MM%d = %016lx %8g %8g\n", n, *r, v[1], v[0]); 47 } else { 48 printf("MM%d = %016lx\n", n, *r); 49 } 50 } 51 52 static void dump_xmm(const char *name, int n, const v2di *r, int ff) 53 { 54 printf("%s%d = %016lx %016lx\n", 55 name, n, r->q1, r->q0); 56 if (ff == 32) { 57 float v[4]; 58 memcpy(v, r, sizeof(v)); 59 printf(" %8g %8g %8g %8g\n", 60 v[3], v[2], v[1], v[0]); 61 } 62 } 63 64 static void dump_regs(reg_state *s, int ff) 65 { 66 int i; 67 68 for (i = 0; i < 8; i++) { 69 dump_mmx(i, &s->mm[i], ff); 70 } 71 for (i = 0; i < 4; i++) { 72 dump_xmm("mem", i, &s->mem0[i], 0); 73 } 74 } 75 76 static void compare_state(const reg_state *a, const reg_state *b) 77 { 78 int i; 79 for (i = 0; i < 8; i++) { 80 if (a->mm[i] != b->mm[i]) { 81 printf("MM%d = %016lx\n", i, b->mm[i]); 82 } 83 } 84 for (i = 0; i < 16; i++) { 85 if (a->r[i] != b->r[i]) { 86 printf("r%d = %016lx\n", i, b->r[i]); 87 } 88 } 89 for (i = 0; i < 8; i++) { 90 if (memcmp(&a->xmm[i], &b->xmm[i], 8)) { 91 dump_xmm("xmm", i, &b->xmm[i], a->ff); 92 } 93 } 94 for (i = 0; i < 4; i++) { 95 if (memcmp(&a->mem0[i], &a->mem[i], 16)) { 96 dump_xmm("mem", i, &a->mem[i], a->ff); 97 } 98 } 99 if (a->flags != b->flags) { 100 printf("FLAGS = %016lx\n", b->flags); 101 } 102 } 103 104 #define LOADMM(r, o) "movq " #r ", " #o "[%0]\n\t" 105 #define LOADXMM(r, o) "movdqa " #r ", " #o "[%0]\n\t" 106 #define STOREMM(r, o) "movq " #o "[%1], " #r "\n\t" 107 #define STOREXMM(r, o) "movdqa " #o "[%1], " #r "\n\t" 108 #define MMREG(F) \ 109 F(mm0, 0x00) \ 110 F(mm1, 0x08) \ 111 F(mm2, 0x10) \ 112 F(mm3, 0x18) \ 113 F(mm4, 0x20) \ 114 F(mm5, 0x28) \ 115 F(mm6, 0x30) \ 116 F(mm7, 0x38) 117 #define XMMREG(F) \ 118 F(xmm0, 0x040) \ 119 F(xmm1, 0x050) \ 120 F(xmm2, 0x060) \ 121 F(xmm3, 0x070) \ 122 F(xmm4, 0x080) \ 123 F(xmm5, 0x090) \ 124 F(xmm6, 0x0a0) \ 125 F(xmm7, 0x0b0) 126 #define LOADREG(r, o) "mov " #r ", " #o "[rax]\n\t" 127 #define STOREREG(r, o) "mov " #o "[rax], " #r "\n\t" 128 #define REG(F) \ 129 F(rbx, 0xc8) \ 130 F(rcx, 0xd0) \ 131 F(rdx, 0xd8) \ 132 F(rsi, 0xe0) \ 133 F(rdi, 0xe8) \ 134 F(r8, 0x100) \ 135 F(r9, 0x108) \ 136 F(r10, 0x110) \ 137 F(r11, 0x118) \ 138 F(r12, 0x120) \ 139 F(r13, 0x128) \ 140 F(r14, 0x130) \ 141 F(r15, 0x138) \ 142 143 static void run_test(const TestDef *t) 144 { 145 reg_state result; 146 reg_state *init = t->init; 147 memcpy(init->mem, init->mem0, sizeof(init->mem)); 148 printf("%5d %s\n", t->n, t->s); 149 asm volatile( 150 MMREG(LOADMM) 151 XMMREG(LOADXMM) 152 "sub rsp, 128\n\t" 153 "push rax\n\t" 154 "push rbx\n\t" 155 "push rcx\n\t" 156 "push rdx\n\t" 157 "push %1\n\t" 158 "push %2\n\t" 159 "mov rax, %0\n\t" 160 "pushf\n\t" 161 "pop rbx\n\t" 162 "shr rbx, 8\n\t" 163 "shl rbx, 8\n\t" 164 "mov rcx, 0x140[rax]\n\t" 165 "and rcx, 0xff\n\t" 166 "or rbx, rcx\n\t" 167 "push rbx\n\t" 168 "popf\n\t" 169 REG(LOADREG) 170 "mov rax, 0xc0[rax]\n\t" 171 "call [rsp]\n\t" 172 "mov [rsp], rax\n\t" 173 "mov rax, 8[rsp]\n\t" 174 REG(STOREREG) 175 "mov rbx, [rsp]\n\t" 176 "mov 0xc0[rax], rbx\n\t" 177 "mov rbx, 0\n\t" 178 "mov 0xf0[rax], rbx\n\t" 179 "mov 0xf8[rax], rbx\n\t" 180 "pushf\n\t" 181 "pop rbx\n\t" 182 "and rbx, 0xff\n\t" 183 "mov 0x140[rax], rbx\n\t" 184 "add rsp, 16\n\t" 185 "pop rdx\n\t" 186 "pop rcx\n\t" 187 "pop rbx\n\t" 188 "pop rax\n\t" 189 "add rsp, 128\n\t" 190 MMREG(STOREMM) 191 EMMS "\n\t" 192 XMMREG(STOREXMM) 193 : : "r"(init), "r"(&result), "r"(t->fn) 194 : "memory", "cc", 195 "rsi", "rdi", 196 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", 197 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", 198 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", 199 "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", 200 "xmm12", "xmm13", "xmm14", "xmm15" 201 ); 202 compare_state(init, &result); 203 } 204 205 #define TEST(n, cmd, type) \ 206 static void __attribute__((naked)) test_##n(void) \ 207 { \ 208 asm volatile(cmd); \ 209 asm volatile("ret"); \ 210 } 211 #include TEST_FILE 212 213 214 static const TestDef test_table[] = { 215 #define TEST(n, cmd, type) {n, test_##n, cmd, &init##type}, 216 #include TEST_FILE 217 {-1, NULL, "", NULL} 218 }; 219 220 static void run_all(void) 221 { 222 const TestDef *t; 223 for (t = test_table; t->fn; t++) { 224 run_test(t); 225 } 226 } 227 228 #define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) 229 230 float val_f32[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3}; 231 uint64_t val_i64[] = { 232 0x3d6b3b6a9e4118f2lu, 0x355ae76d2774d78clu, 233 0xd851c54a56bf1f29lu, 0x4a84d1d50bf4c4fflu, 234 0x5826475e2c5fd799lu, 0xfd32edc01243f5e9lu, 235 }; 236 237 v2di deadbeef = {0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull}; 238 239 void init_f32reg(uint64_t *r) 240 { 241 static int n; 242 float v[2]; 243 int i; 244 for (i = 0; i < 2; i++) { 245 v[i] = val_f32[n++]; 246 if (n == ARRAY_LEN(val_f32)) { 247 n = 0; 248 } 249 } 250 memcpy(r, v, sizeof(*r)); 251 } 252 253 void init_intreg(uint64_t *r) 254 { 255 static uint64_t mask; 256 static int n; 257 258 *r = val_i64[n] ^ mask; 259 n++; 260 if (n == ARRAY_LEN(val_i64)) { 261 n = 0; 262 mask *= 0x104C11DB7; 263 } 264 } 265 266 static void init_all(reg_state *s) 267 { 268 int i; 269 270 for (i = 0; i < 16; i++) { 271 init_intreg(&s->r[i]); 272 } 273 s->r[3] = (uint64_t)&s->mem[0]; /* rdx */ 274 s->r[5] = (uint64_t)&s->mem[2]; /* rdi */ 275 s->r[6] = 0; 276 s->r[7] = 0; 277 s->flags = 2; 278 for (i = 0; i < 8; i++) { 279 s->xmm[i] = deadbeef; 280 memcpy(&s->mm[i], &s->xmm[i], sizeof(s->mm[i])); 281 } 282 for (i = 0; i < 2; i++) { 283 s->mem0[i] = deadbeef; 284 } 285 } 286 287 int main(int argc, char *argv[]) 288 { 289 init_all(&initI); 290 init_intreg(&initI.mm[5]); 291 init_intreg(&initI.mm[6]); 292 init_intreg(&initI.mm[7]); 293 init_intreg(&initI.mem0[1].q0); 294 init_intreg(&initI.mem0[1].q1); 295 printf("Int:\n"); 296 dump_regs(&initI, 0); 297 298 init_all(&initF32); 299 init_f32reg(&initF32.mm[5]); 300 init_f32reg(&initF32.mm[6]); 301 init_f32reg(&initF32.mm[7]); 302 init_f32reg(&initF32.mem0[1].q0); 303 init_f32reg(&initF32.mem0[1].q1); 304 initF32.ff = 32; 305 printf("F32:\n"); 306 dump_regs(&initF32, 32); 307 308 if (argc > 1) { 309 int n = atoi(argv[1]); 310 run_test(&test_table[n]); 311 } else { 312 run_all(); 313 } 314 return 0; 315 } 316