1 /* 2 * x86 CPU test 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #define _GNU_SOURCE 20 #include <stdlib.h> 21 #include <stdio.h> 22 #include <string.h> 23 #include <inttypes.h> 24 #include <math.h> 25 #include <signal.h> 26 #include <setjmp.h> 27 #include <errno.h> 28 #include <sys/ucontext.h> 29 #include <sys/mman.h> 30 31 #if !defined(__x86_64__) 32 //#define TEST_VM86 33 #define TEST_SEGS 34 #endif 35 //#define LINUX_VM86_IOPL_FIX 36 //#define TEST_P4_FLAGS 37 #ifdef __SSE__ 38 #define TEST_SSE 39 #define TEST_CMOV 1 40 #define TEST_FCOMI 1 41 #else 42 #undef TEST_SSE 43 #define TEST_CMOV 1 44 #define TEST_FCOMI 1 45 #endif 46 47 #if defined(__x86_64__) 48 #define FMT64X "%016lx" 49 #define FMTLX "%016lx" 50 #define X86_64_ONLY(x) x 51 #else 52 #define FMT64X "%016" PRIx64 53 #define FMTLX "%08lx" 54 #define X86_64_ONLY(x) 55 #endif 56 57 #ifdef TEST_VM86 58 #include <asm/vm86.h> 59 #endif 60 61 #define xglue(x, y) x ## y 62 #define glue(x, y) xglue(x, y) 63 #define stringify(s) tostring(s) 64 #define tostring(s) #s 65 66 #define CC_C 0x0001 67 #define CC_P 0x0004 68 #define CC_A 0x0010 69 #define CC_Z 0x0040 70 #define CC_S 0x0080 71 #define CC_O 0x0800 72 73 #define __init_call __attribute__ ((unused,__section__ ("initcall"))) 74 75 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A) 76 77 #if defined(__x86_64__) 78 static inline long i2l(long v) 79 { 80 return v | ((v ^ 0xabcd) << 32); 81 } 82 #else 83 static inline long i2l(long v) 84 { 85 return v; 86 } 87 #endif 88 89 #define OP add 90 #include "test-i386.h" 91 92 #define OP sub 93 #include "test-i386.h" 94 95 #define OP xor 96 #include "test-i386.h" 97 98 #define OP and 99 #include "test-i386.h" 100 101 #define OP or 102 #include "test-i386.h" 103 104 #define OP cmp 105 #include "test-i386.h" 106 107 #define OP adc 108 #define OP_CC 109 #include "test-i386.h" 110 111 #define OP sbb 112 #define OP_CC 113 #include "test-i386.h" 114 115 #define OP inc 116 #define OP_CC 117 #define OP1 118 #include "test-i386.h" 119 120 #define OP dec 121 #define OP_CC 122 #define OP1 123 #include "test-i386.h" 124 125 #define OP neg 126 #define OP_CC 127 #define OP1 128 #include "test-i386.h" 129 130 #define OP not 131 #define OP_CC 132 #define OP1 133 #include "test-i386.h" 134 135 #undef CC_MASK 136 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O) 137 138 #define OP shl 139 #include "test-i386-shift.h" 140 141 #define OP shr 142 #include "test-i386-shift.h" 143 144 #define OP sar 145 #include "test-i386-shift.h" 146 147 #define OP rol 148 #include "test-i386-shift.h" 149 150 #define OP ror 151 #include "test-i386-shift.h" 152 153 #define OP rcr 154 #define OP_CC 155 #include "test-i386-shift.h" 156 157 #define OP rcl 158 #define OP_CC 159 #include "test-i386-shift.h" 160 161 #define OP shld 162 #define OP_SHIFTD 163 #define OP_NOBYTE 164 #include "test-i386-shift.h" 165 166 #define OP shrd 167 #define OP_SHIFTD 168 #define OP_NOBYTE 169 #include "test-i386-shift.h" 170 171 /* XXX: should be more precise ? */ 172 #undef CC_MASK 173 #define CC_MASK (CC_C) 174 175 #define OP bt 176 #define OP_NOBYTE 177 #include "test-i386-shift.h" 178 179 #define OP bts 180 #define OP_NOBYTE 181 #include "test-i386-shift.h" 182 183 #define OP btr 184 #define OP_NOBYTE 185 #include "test-i386-shift.h" 186 187 #define OP btc 188 #define OP_NOBYTE 189 #include "test-i386-shift.h" 190 191 /* lea test (modrm support) */ 192 #define TEST_LEAQ(STR)\ 193 {\ 194 asm("lea " STR ", %0"\ 195 : "=r" (res)\ 196 : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\ 197 printf("lea %s = " FMTLX "\n", STR, res);\ 198 } 199 200 #define TEST_LEA(STR)\ 201 {\ 202 asm("lea " STR ", %0"\ 203 : "=r" (res)\ 204 : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\ 205 printf("lea %s = " FMTLX "\n", STR, res);\ 206 } 207 208 #define TEST_LEA16(STR)\ 209 {\ 210 asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\ 211 : "=r" (res)\ 212 : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\ 213 printf("lea %s = %08lx\n", STR, res);\ 214 } 215 216 217 void test_lea(void) 218 { 219 long eax, ebx, ecx, edx, esi, edi, res; 220 eax = i2l(0x0001); 221 ebx = i2l(0x0002); 222 ecx = i2l(0x0004); 223 edx = i2l(0x0008); 224 esi = i2l(0x0010); 225 edi = i2l(0x0020); 226 227 TEST_LEA("0x4000"); 228 229 TEST_LEA("(%%eax)"); 230 TEST_LEA("(%%ebx)"); 231 TEST_LEA("(%%ecx)"); 232 TEST_LEA("(%%edx)"); 233 TEST_LEA("(%%esi)"); 234 TEST_LEA("(%%edi)"); 235 236 TEST_LEA("0x40(%%eax)"); 237 TEST_LEA("0x40(%%ebx)"); 238 TEST_LEA("0x40(%%ecx)"); 239 TEST_LEA("0x40(%%edx)"); 240 TEST_LEA("0x40(%%esi)"); 241 TEST_LEA("0x40(%%edi)"); 242 243 TEST_LEA("0x4000(%%eax)"); 244 TEST_LEA("0x4000(%%ebx)"); 245 TEST_LEA("0x4000(%%ecx)"); 246 TEST_LEA("0x4000(%%edx)"); 247 TEST_LEA("0x4000(%%esi)"); 248 TEST_LEA("0x4000(%%edi)"); 249 250 TEST_LEA("(%%eax, %%ecx)"); 251 TEST_LEA("(%%ebx, %%edx)"); 252 TEST_LEA("(%%ecx, %%ecx)"); 253 TEST_LEA("(%%edx, %%ecx)"); 254 TEST_LEA("(%%esi, %%ecx)"); 255 TEST_LEA("(%%edi, %%ecx)"); 256 257 TEST_LEA("0x40(%%eax, %%ecx)"); 258 TEST_LEA("0x4000(%%ebx, %%edx)"); 259 260 TEST_LEA("(%%ecx, %%ecx, 2)"); 261 TEST_LEA("(%%edx, %%ecx, 4)"); 262 TEST_LEA("(%%esi, %%ecx, 8)"); 263 264 TEST_LEA("(,%%eax, 2)"); 265 TEST_LEA("(,%%ebx, 4)"); 266 TEST_LEA("(,%%ecx, 8)"); 267 268 TEST_LEA("0x40(,%%eax, 2)"); 269 TEST_LEA("0x40(,%%ebx, 4)"); 270 TEST_LEA("0x40(,%%ecx, 8)"); 271 272 273 TEST_LEA("-10(%%ecx, %%ecx, 2)"); 274 TEST_LEA("-10(%%edx, %%ecx, 4)"); 275 TEST_LEA("-10(%%esi, %%ecx, 8)"); 276 277 TEST_LEA("0x4000(%%ecx, %%ecx, 2)"); 278 TEST_LEA("0x4000(%%edx, %%ecx, 4)"); 279 TEST_LEA("0x4000(%%esi, %%ecx, 8)"); 280 281 #if defined(__x86_64__) 282 TEST_LEAQ("0x4000"); 283 TEST_LEAQ("0x4000(%%rip)"); 284 285 TEST_LEAQ("(%%rax)"); 286 TEST_LEAQ("(%%rbx)"); 287 TEST_LEAQ("(%%rcx)"); 288 TEST_LEAQ("(%%rdx)"); 289 TEST_LEAQ("(%%rsi)"); 290 TEST_LEAQ("(%%rdi)"); 291 292 TEST_LEAQ("0x40(%%rax)"); 293 TEST_LEAQ("0x40(%%rbx)"); 294 TEST_LEAQ("0x40(%%rcx)"); 295 TEST_LEAQ("0x40(%%rdx)"); 296 TEST_LEAQ("0x40(%%rsi)"); 297 TEST_LEAQ("0x40(%%rdi)"); 298 299 TEST_LEAQ("0x4000(%%rax)"); 300 TEST_LEAQ("0x4000(%%rbx)"); 301 TEST_LEAQ("0x4000(%%rcx)"); 302 TEST_LEAQ("0x4000(%%rdx)"); 303 TEST_LEAQ("0x4000(%%rsi)"); 304 TEST_LEAQ("0x4000(%%rdi)"); 305 306 TEST_LEAQ("(%%rax, %%rcx)"); 307 TEST_LEAQ("(%%rbx, %%rdx)"); 308 TEST_LEAQ("(%%rcx, %%rcx)"); 309 TEST_LEAQ("(%%rdx, %%rcx)"); 310 TEST_LEAQ("(%%rsi, %%rcx)"); 311 TEST_LEAQ("(%%rdi, %%rcx)"); 312 313 TEST_LEAQ("0x40(%%rax, %%rcx)"); 314 TEST_LEAQ("0x4000(%%rbx, %%rdx)"); 315 316 TEST_LEAQ("(%%rcx, %%rcx, 2)"); 317 TEST_LEAQ("(%%rdx, %%rcx, 4)"); 318 TEST_LEAQ("(%%rsi, %%rcx, 8)"); 319 320 TEST_LEAQ("(,%%rax, 2)"); 321 TEST_LEAQ("(,%%rbx, 4)"); 322 TEST_LEAQ("(,%%rcx, 8)"); 323 324 TEST_LEAQ("0x40(,%%rax, 2)"); 325 TEST_LEAQ("0x40(,%%rbx, 4)"); 326 TEST_LEAQ("0x40(,%%rcx, 8)"); 327 328 329 TEST_LEAQ("-10(%%rcx, %%rcx, 2)"); 330 TEST_LEAQ("-10(%%rdx, %%rcx, 4)"); 331 TEST_LEAQ("-10(%%rsi, %%rcx, 8)"); 332 333 TEST_LEAQ("0x4000(%%rcx, %%rcx, 2)"); 334 TEST_LEAQ("0x4000(%%rdx, %%rcx, 4)"); 335 TEST_LEAQ("0x4000(%%rsi, %%rcx, 8)"); 336 #else 337 /* limited 16 bit addressing test */ 338 TEST_LEA16("0x4000"); 339 TEST_LEA16("(%%bx)"); 340 TEST_LEA16("(%%si)"); 341 TEST_LEA16("(%%di)"); 342 TEST_LEA16("0x40(%%bx)"); 343 TEST_LEA16("0x40(%%si)"); 344 TEST_LEA16("0x40(%%di)"); 345 TEST_LEA16("0x4000(%%bx)"); 346 TEST_LEA16("0x4000(%%si)"); 347 TEST_LEA16("(%%bx,%%si)"); 348 TEST_LEA16("(%%bx,%%di)"); 349 TEST_LEA16("0x40(%%bx,%%si)"); 350 TEST_LEA16("0x40(%%bx,%%di)"); 351 TEST_LEA16("0x4000(%%bx,%%si)"); 352 TEST_LEA16("0x4000(%%bx,%%di)"); 353 #endif 354 } 355 356 #define TEST_JCC(JCC, v1, v2)\ 357 {\ 358 int res;\ 359 asm("movl $1, %0\n\t"\ 360 "cmpl %2, %1\n\t"\ 361 "j" JCC " 1f\n\t"\ 362 "movl $0, %0\n\t"\ 363 "1:\n\t"\ 364 : "=r" (res)\ 365 : "r" (v1), "r" (v2));\ 366 printf("%-10s %d\n", "j" JCC, res);\ 367 \ 368 asm("movl $0, %0\n\t"\ 369 "cmpl %2, %1\n\t"\ 370 "set" JCC " %b0\n\t"\ 371 : "=r" (res)\ 372 : "r" (v1), "r" (v2));\ 373 printf("%-10s %d\n", "set" JCC, res);\ 374 if (TEST_CMOV) {\ 375 long val = i2l(1);\ 376 long res = i2l(0x12345678);\ 377 X86_64_ONLY(\ 378 asm("cmpl %2, %1\n\t"\ 379 "cmov" JCC "q %3, %0\n\t"\ 380 : "=r" (res)\ 381 : "r" (v1), "r" (v2), "m" (val), "0" (res));\ 382 printf("%-10s R=" FMTLX "\n", "cmov" JCC "q", res);)\ 383 asm("cmpl %2, %1\n\t"\ 384 "cmov" JCC "l %k3, %k0\n\t"\ 385 : "=r" (res)\ 386 : "r" (v1), "r" (v2), "m" (val), "0" (res));\ 387 printf("%-10s R=" FMTLX "\n", "cmov" JCC "l", res);\ 388 asm("cmpl %2, %1\n\t"\ 389 "cmov" JCC "w %w3, %w0\n\t"\ 390 : "=r" (res)\ 391 : "r" (v1), "r" (v2), "r" (1), "0" (res));\ 392 printf("%-10s R=" FMTLX "\n", "cmov" JCC "w", res);\ 393 } \ 394 } 395 396 /* various jump tests */ 397 void test_jcc(void) 398 { 399 TEST_JCC("ne", 1, 1); 400 TEST_JCC("ne", 1, 0); 401 402 TEST_JCC("e", 1, 1); 403 TEST_JCC("e", 1, 0); 404 405 TEST_JCC("l", 1, 1); 406 TEST_JCC("l", 1, 0); 407 TEST_JCC("l", 1, -1); 408 409 TEST_JCC("le", 1, 1); 410 TEST_JCC("le", 1, 0); 411 TEST_JCC("le", 1, -1); 412 413 TEST_JCC("ge", 1, 1); 414 TEST_JCC("ge", 1, 0); 415 TEST_JCC("ge", -1, 1); 416 417 TEST_JCC("g", 1, 1); 418 TEST_JCC("g", 1, 0); 419 TEST_JCC("g", 1, -1); 420 421 TEST_JCC("b", 1, 1); 422 TEST_JCC("b", 1, 0); 423 TEST_JCC("b", 1, -1); 424 425 TEST_JCC("be", 1, 1); 426 TEST_JCC("be", 1, 0); 427 TEST_JCC("be", 1, -1); 428 429 TEST_JCC("ae", 1, 1); 430 TEST_JCC("ae", 1, 0); 431 TEST_JCC("ae", 1, -1); 432 433 TEST_JCC("a", 1, 1); 434 TEST_JCC("a", 1, 0); 435 TEST_JCC("a", 1, -1); 436 437 438 TEST_JCC("p", 1, 1); 439 TEST_JCC("p", 1, 0); 440 441 TEST_JCC("np", 1, 1); 442 TEST_JCC("np", 1, 0); 443 444 TEST_JCC("o", 0x7fffffff, 0); 445 TEST_JCC("o", 0x7fffffff, -1); 446 447 TEST_JCC("no", 0x7fffffff, 0); 448 TEST_JCC("no", 0x7fffffff, -1); 449 450 TEST_JCC("s", 0, 1); 451 TEST_JCC("s", 0, -1); 452 TEST_JCC("s", 0, 0); 453 454 TEST_JCC("ns", 0, 1); 455 TEST_JCC("ns", 0, -1); 456 TEST_JCC("ns", 0, 0); 457 } 458 459 #define TEST_LOOP(insn) \ 460 {\ 461 for(i = 0; i < sizeof(ecx_vals) / sizeof(long); i++) {\ 462 ecx = ecx_vals[i];\ 463 for(zf = 0; zf < 2; zf++) {\ 464 asm("test %2, %2\n\t"\ 465 "movl $1, %0\n\t"\ 466 insn " 1f\n\t" \ 467 "movl $0, %0\n\t"\ 468 "1:\n\t"\ 469 : "=a" (res)\ 470 : "c" (ecx), "b" (!zf)); \ 471 printf("%-10s ECX=" FMTLX " ZF=%ld r=%d\n", insn, ecx, zf, res); \ 472 }\ 473 }\ 474 } 475 476 void test_loop(void) 477 { 478 long ecx, zf; 479 const long ecx_vals[] = { 480 0, 481 1, 482 0x10000, 483 0x10001, 484 #if defined(__x86_64__) 485 0x100000000L, 486 0x100000001L, 487 #endif 488 }; 489 int i, res; 490 491 #if !defined(__x86_64__) 492 TEST_LOOP("jcxz"); 493 TEST_LOOP("loopw"); 494 TEST_LOOP("loopzw"); 495 TEST_LOOP("loopnzw"); 496 #endif 497 498 TEST_LOOP("jecxz"); 499 TEST_LOOP("loopl"); 500 TEST_LOOP("loopzl"); 501 TEST_LOOP("loopnzl"); 502 } 503 504 #undef CC_MASK 505 #ifdef TEST_P4_FLAGS 506 #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A) 507 #else 508 #define CC_MASK (CC_O | CC_C) 509 #endif 510 511 #define OP mul 512 #include "test-i386-muldiv.h" 513 514 #define OP imul 515 #include "test-i386-muldiv.h" 516 517 void test_imulw2(long op0, long op1) 518 { 519 long res, s1, s0, flags; 520 s0 = op0; 521 s1 = op1; 522 res = s0; 523 flags = 0; 524 asm volatile ("push %4\n\t" 525 "popf\n\t" 526 "imulw %w2, %w0\n\t" 527 "pushf\n\t" 528 "pop %1\n\t" 529 : "=q" (res), "=g" (flags) 530 : "q" (s1), "0" (res), "1" (flags)); 531 printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n", 532 "imulw", s0, s1, res, flags & CC_MASK); 533 } 534 535 void test_imull2(long op0, long op1) 536 { 537 long res, s1, s0, flags; 538 s0 = op0; 539 s1 = op1; 540 res = s0; 541 flags = 0; 542 asm volatile ("push %4\n\t" 543 "popf\n\t" 544 "imull %k2, %k0\n\t" 545 "pushf\n\t" 546 "pop %1\n\t" 547 : "=q" (res), "=g" (flags) 548 : "q" (s1), "0" (res), "1" (flags)); 549 printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n", 550 "imull", s0, s1, res, flags & CC_MASK); 551 } 552 553 #if defined(__x86_64__) 554 void test_imulq2(long op0, long op1) 555 { 556 long res, s1, s0, flags; 557 s0 = op0; 558 s1 = op1; 559 res = s0; 560 flags = 0; 561 asm volatile ("push %4\n\t" 562 "popf\n\t" 563 "imulq %2, %0\n\t" 564 "pushf\n\t" 565 "pop %1\n\t" 566 : "=q" (res), "=g" (flags) 567 : "q" (s1), "0" (res), "1" (flags)); 568 printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n", 569 "imulq", s0, s1, res, flags & CC_MASK); 570 } 571 #endif 572 573 #define TEST_IMUL_IM(size, rsize, op0, op1)\ 574 {\ 575 long res, flags, s1;\ 576 flags = 0;\ 577 res = 0;\ 578 s1 = op1;\ 579 asm volatile ("push %3\n\t"\ 580 "popf\n\t"\ 581 "imul" size " $" #op0 ", %" rsize "2, %" rsize "0\n\t" \ 582 "pushf\n\t"\ 583 "pop %1\n\t"\ 584 : "=r" (res), "=g" (flags)\ 585 : "r" (s1), "1" (flags), "0" (res));\ 586 printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",\ 587 "imul" size " im", (long)op0, (long)op1, res, flags & CC_MASK);\ 588 } 589 590 591 #undef CC_MASK 592 #define CC_MASK (0) 593 594 #define OP div 595 #include "test-i386-muldiv.h" 596 597 #define OP idiv 598 #include "test-i386-muldiv.h" 599 600 void test_mul(void) 601 { 602 test_imulb(0x1234561d, 4); 603 test_imulb(3, -4); 604 test_imulb(0x80, 0x80); 605 test_imulb(0x10, 0x10); 606 607 test_imulw(0, 0x1234001d, 45); 608 test_imulw(0, 23, -45); 609 test_imulw(0, 0x8000, 0x8000); 610 test_imulw(0, 0x100, 0x100); 611 612 test_imull(0, 0x1234001d, 45); 613 test_imull(0, 23, -45); 614 test_imull(0, 0x80000000, 0x80000000); 615 test_imull(0, 0x10000, 0x10000); 616 617 test_mulb(0x1234561d, 4); 618 test_mulb(3, -4); 619 test_mulb(0x80, 0x80); 620 test_mulb(0x10, 0x10); 621 622 test_mulw(0, 0x1234001d, 45); 623 test_mulw(0, 23, -45); 624 test_mulw(0, 0x8000, 0x8000); 625 test_mulw(0, 0x100, 0x100); 626 627 test_mull(0, 0x1234001d, 45); 628 test_mull(0, 23, -45); 629 test_mull(0, 0x80000000, 0x80000000); 630 test_mull(0, 0x10000, 0x10000); 631 632 test_imulw2(0x1234001d, 45); 633 test_imulw2(23, -45); 634 test_imulw2(0x8000, 0x8000); 635 test_imulw2(0x100, 0x100); 636 637 test_imull2(0x1234001d, 45); 638 test_imull2(23, -45); 639 test_imull2(0x80000000, 0x80000000); 640 test_imull2(0x10000, 0x10000); 641 642 TEST_IMUL_IM("w", "w", 45, 0x1234); 643 TEST_IMUL_IM("w", "w", -45, 23); 644 TEST_IMUL_IM("w", "w", 0x8000, 0x80000000); 645 TEST_IMUL_IM("w", "w", 0x7fff, 0x1000); 646 647 TEST_IMUL_IM("l", "k", 45, 0x1234); 648 TEST_IMUL_IM("l", "k", -45, 23); 649 TEST_IMUL_IM("l", "k", 0x8000, 0x80000000); 650 TEST_IMUL_IM("l", "k", 0x7fff, 0x1000); 651 652 test_idivb(0x12341678, 0x127e); 653 test_idivb(0x43210123, -5); 654 test_idivb(0x12340004, -1); 655 656 test_idivw(0, 0x12345678, 12347); 657 test_idivw(0, -23223, -45); 658 test_idivw(0, 0x12348000, -1); 659 test_idivw(0x12343, 0x12345678, 0x81238567); 660 661 test_idivl(0, 0x12345678, 12347); 662 test_idivl(0, -233223, -45); 663 test_idivl(0, 0x80000000, -1); 664 test_idivl(0x12343, 0x12345678, 0x81234567); 665 666 test_divb(0x12341678, 0x127e); 667 test_divb(0x43210123, -5); 668 test_divb(0x12340004, -1); 669 670 test_divw(0, 0x12345678, 12347); 671 test_divw(0, -23223, -45); 672 test_divw(0, 0x12348000, -1); 673 test_divw(0x12343, 0x12345678, 0x81238567); 674 675 test_divl(0, 0x12345678, 12347); 676 test_divl(0, -233223, -45); 677 test_divl(0, 0x80000000, -1); 678 test_divl(0x12343, 0x12345678, 0x81234567); 679 680 #if defined(__x86_64__) 681 test_imulq(0, 0x1234001d1234001d, 45); 682 test_imulq(0, 23, -45); 683 test_imulq(0, 0x8000000000000000, 0x8000000000000000); 684 test_imulq(0, 0x100000000, 0x100000000); 685 686 test_mulq(0, 0x1234001d1234001d, 45); 687 test_mulq(0, 23, -45); 688 test_mulq(0, 0x8000000000000000, 0x8000000000000000); 689 test_mulq(0, 0x100000000, 0x100000000); 690 691 test_imulq2(0x1234001d1234001d, 45); 692 test_imulq2(23, -45); 693 test_imulq2(0x8000000000000000, 0x8000000000000000); 694 test_imulq2(0x100000000, 0x100000000); 695 696 TEST_IMUL_IM("q", "", 45, 0x12341234); 697 TEST_IMUL_IM("q", "", -45, 23); 698 TEST_IMUL_IM("q", "", 0x8000, 0x8000000000000000); 699 TEST_IMUL_IM("q", "", 0x7fff, 0x10000000); 700 701 test_idivq(0, 0x12345678abcdef, 12347); 702 test_idivq(0, -233223, -45); 703 test_idivq(0, 0x8000000000000000, -1); 704 test_idivq(0x12343, 0x12345678, 0x81234567); 705 706 test_divq(0, 0x12345678abcdef, 12347); 707 test_divq(0, -233223, -45); 708 test_divq(0, 0x8000000000000000, -1); 709 test_divq(0x12343, 0x12345678, 0x81234567); 710 #endif 711 } 712 713 #define TEST_BSX(op, size, op0)\ 714 {\ 715 long res, val, resz;\ 716 val = op0;\ 717 asm("xor %1, %1\n"\ 718 "mov $0x12345678, %0\n"\ 719 #op " %" size "2, %" size "0 ; setz %b1" \ 720 : "=&r" (res), "=&q" (resz)\ 721 : "r" (val));\ 722 printf("%-10s A=" FMTLX " R=" FMTLX " %ld\n", #op, val, res, resz);\ 723 } 724 725 void test_bsx(void) 726 { 727 TEST_BSX(bsrw, "w", 0); 728 TEST_BSX(bsrw, "w", 0x12340128); 729 TEST_BSX(bsfw, "w", 0); 730 TEST_BSX(bsfw, "w", 0x12340128); 731 TEST_BSX(bsrl, "k", 0); 732 TEST_BSX(bsrl, "k", 0x00340128); 733 TEST_BSX(bsfl, "k", 0); 734 TEST_BSX(bsfl, "k", 0x00340128); 735 #if defined(__x86_64__) 736 TEST_BSX(bsrq, "", 0); 737 TEST_BSX(bsrq, "", 0x003401281234); 738 TEST_BSX(bsfq, "", 0); 739 TEST_BSX(bsfq, "", 0x003401281234); 740 #endif 741 } 742 743 /**********************************************/ 744 745 union float64u { 746 double d; 747 uint64_t l; 748 }; 749 750 union float64u q_nan = { .l = 0xFFF8000000000000LL }; 751 union float64u s_nan = { .l = 0xFFF0000000000000LL }; 752 753 void test_fops(double a, double b) 754 { 755 printf("a=%f b=%f a+b=%f\n", a, b, a + b); 756 printf("a=%f b=%f a-b=%f\n", a, b, a - b); 757 printf("a=%f b=%f a*b=%f\n", a, b, a * b); 758 printf("a=%f b=%f a/b=%f\n", a, b, a / b); 759 printf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b)); 760 printf("a=%f sqrt(a)=%f\n", a, sqrt(a)); 761 printf("a=%f sin(a)=%f\n", a, sin(a)); 762 printf("a=%f cos(a)=%f\n", a, cos(a)); 763 printf("a=%f tan(a)=%f\n", a, tan(a)); 764 printf("a=%f log(a)=%f\n", a, log(a)); 765 printf("a=%f exp(a)=%f\n", a, exp(a)); 766 printf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b)); 767 /* just to test some op combining */ 768 printf("a=%f asin(sin(a))=%f\n", a, asin(sin(a))); 769 printf("a=%f acos(cos(a))=%f\n", a, acos(cos(a))); 770 printf("a=%f atan(tan(a))=%f\n", a, atan(tan(a))); 771 772 } 773 774 void fpu_clear_exceptions(void) 775 { 776 struct QEMU_PACKED { 777 uint16_t fpuc; 778 uint16_t dummy1; 779 uint16_t fpus; 780 uint16_t dummy2; 781 uint16_t fptag; 782 uint16_t dummy3; 783 uint32_t ignored[4]; 784 long double fpregs[8]; 785 } float_env32; 786 787 asm volatile ("fnstenv %0\n" : "=m" (float_env32)); 788 float_env32.fpus &= ~0x7f; 789 asm volatile ("fldenv %0\n" : : "m" (float_env32)); 790 } 791 792 /* XXX: display exception bits when supported */ 793 #define FPUS_EMASK 0x0000 794 //#define FPUS_EMASK 0x007f 795 796 void test_fcmp(double a, double b) 797 { 798 long eflags, fpus; 799 800 fpu_clear_exceptions(); 801 asm("fcom %2\n" 802 "fstsw %%ax\n" 803 : "=a" (fpus) 804 : "t" (a), "u" (b)); 805 printf("fcom(%f %f)=%04lx\n", 806 a, b, fpus & (0x4500 | FPUS_EMASK)); 807 fpu_clear_exceptions(); 808 asm("fucom %2\n" 809 "fstsw %%ax\n" 810 : "=a" (fpus) 811 : "t" (a), "u" (b)); 812 printf("fucom(%f %f)=%04lx\n", 813 a, b, fpus & (0x4500 | FPUS_EMASK)); 814 if (TEST_FCOMI) { 815 /* test f(u)comi instruction */ 816 fpu_clear_exceptions(); 817 asm("fcomi %3, %2\n" 818 "fstsw %%ax\n" 819 "pushf\n" 820 "pop %0\n" 821 : "=r" (eflags), "=a" (fpus) 822 : "t" (a), "u" (b)); 823 printf("fcomi(%f %f)=%04lx %02lx\n", 824 a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C)); 825 fpu_clear_exceptions(); 826 asm("fucomi %3, %2\n" 827 "fstsw %%ax\n" 828 "pushf\n" 829 "pop %0\n" 830 : "=r" (eflags), "=a" (fpus) 831 : "t" (a), "u" (b)); 832 printf("fucomi(%f %f)=%04lx %02lx\n", 833 a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C)); 834 } 835 fpu_clear_exceptions(); 836 asm volatile("fxam\n" 837 "fstsw %%ax\n" 838 : "=a" (fpus) 839 : "t" (a)); 840 printf("fxam(%f)=%04lx\n", a, fpus & 0x4700); 841 fpu_clear_exceptions(); 842 } 843 844 void test_fcvt(double a) 845 { 846 float fa; 847 long double la; 848 int16_t fpuc; 849 int i; 850 int64_t lla; 851 int ia; 852 int16_t wa; 853 double ra; 854 855 fa = a; 856 la = a; 857 printf("(float)%f = %f\n", a, fa); 858 printf("(long double)%f = %Lf\n", a, la); 859 printf("a=" FMT64X "\n", *(uint64_t *)&a); 860 printf("la=" FMT64X " %04x\n", *(uint64_t *)&la, 861 *(unsigned short *)((char *)(&la) + 8)); 862 863 /* test all roundings */ 864 asm volatile ("fstcw %0" : "=m" (fpuc)); 865 for(i=0;i<4;i++) { 866 uint16_t val16; 867 val16 = (fpuc & ~0x0c00) | (i << 10); 868 asm volatile ("fldcw %0" : : "m" (val16)); 869 asm volatile ("fist %0" : "=m" (wa) : "t" (a)); 870 asm volatile ("fistl %0" : "=m" (ia) : "t" (a)); 871 asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st"); 872 asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a)); 873 asm volatile ("fldcw %0" : : "m" (fpuc)); 874 printf("(short)a = %d\n", wa); 875 printf("(int)a = %d\n", ia); 876 printf("(int64_t)a = " FMT64X "\n", lla); 877 printf("rint(a) = %f\n", ra); 878 } 879 } 880 881 #define TEST(N) \ 882 asm("fld" #N : "=t" (a)); \ 883 printf("fld" #N "= %f\n", a); 884 885 void test_fconst(void) 886 { 887 double a; 888 TEST(1); 889 TEST(l2t); 890 TEST(l2e); 891 TEST(pi); 892 TEST(lg2); 893 TEST(ln2); 894 TEST(z); 895 } 896 897 void test_fbcd(double a) 898 { 899 unsigned short bcd[5]; 900 double b; 901 902 asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st"); 903 asm("fbld %1" : "=t" (b) : "m" (bcd[0])); 904 printf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n", 905 a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b); 906 } 907 908 #define TEST_ENV(env, save, restore)\ 909 {\ 910 memset((env), 0xaa, sizeof(*(env)));\ 911 for(i=0;i<5;i++)\ 912 asm volatile ("fldl %0" : : "m" (dtab[i]));\ 913 asm volatile (save " %0\n" : : "m" (*(env)));\ 914 asm volatile (restore " %0\n": : "m" (*(env)));\ 915 for(i=0;i<5;i++)\ 916 asm volatile ("fstpl %0" : "=m" (rtab[i]));\ 917 for(i=0;i<5;i++)\ 918 printf("res[%d]=%f\n", i, rtab[i]);\ 919 printf("fpuc=%04x fpus=%04x fptag=%04x\n",\ 920 (env)->fpuc,\ 921 (env)->fpus & 0xff00,\ 922 (env)->fptag);\ 923 } 924 925 void test_fenv(void) 926 { 927 struct __attribute__((__packed__)) { 928 uint16_t fpuc; 929 uint16_t dummy1; 930 uint16_t fpus; 931 uint16_t dummy2; 932 uint16_t fptag; 933 uint16_t dummy3; 934 uint32_t ignored[4]; 935 long double fpregs[8]; 936 } float_env32; 937 struct __attribute__((__packed__)) { 938 uint16_t fpuc; 939 uint16_t fpus; 940 uint16_t fptag; 941 uint16_t ignored[4]; 942 long double fpregs[8]; 943 } float_env16; 944 double dtab[8]; 945 double rtab[8]; 946 int i; 947 948 for(i=0;i<8;i++) 949 dtab[i] = i + 1; 950 951 TEST_ENV(&float_env16, "data16 fnstenv", "data16 fldenv"); 952 TEST_ENV(&float_env16, "data16 fnsave", "data16 frstor"); 953 TEST_ENV(&float_env32, "fnstenv", "fldenv"); 954 TEST_ENV(&float_env32, "fnsave", "frstor"); 955 956 /* test for ffree */ 957 for(i=0;i<5;i++) 958 asm volatile ("fldl %0" : : "m" (dtab[i])); 959 asm volatile("ffree %st(2)"); 960 asm volatile ("fnstenv %0\n" : : "m" (float_env32)); 961 asm volatile ("fninit"); 962 printf("fptag=%04x\n", float_env32.fptag); 963 } 964 965 966 #define TEST_FCMOV(a, b, eflags, CC)\ 967 {\ 968 double res;\ 969 asm("push %3\n"\ 970 "popf\n"\ 971 "fcmov" CC " %2, %0\n"\ 972 : "=t" (res)\ 973 : "0" (a), "u" (b), "g" (eflags));\ 974 printf("fcmov%s eflags=0x%04lx-> %f\n", \ 975 CC, (long)eflags, res);\ 976 } 977 978 void test_fcmov(void) 979 { 980 double a, b; 981 long eflags, i; 982 983 a = 1.0; 984 b = 2.0; 985 for(i = 0; i < 4; i++) { 986 eflags = 0; 987 if (i & 1) 988 eflags |= CC_C; 989 if (i & 2) 990 eflags |= CC_Z; 991 TEST_FCMOV(a, b, eflags, "b"); 992 TEST_FCMOV(a, b, eflags, "e"); 993 TEST_FCMOV(a, b, eflags, "be"); 994 TEST_FCMOV(a, b, eflags, "nb"); 995 TEST_FCMOV(a, b, eflags, "ne"); 996 TEST_FCMOV(a, b, eflags, "nbe"); 997 } 998 TEST_FCMOV(a, b, 0, "u"); 999 TEST_FCMOV(a, b, CC_P, "u"); 1000 TEST_FCMOV(a, b, 0, "nu"); 1001 TEST_FCMOV(a, b, CC_P, "nu"); 1002 } 1003 1004 void test_floats(void) 1005 { 1006 test_fops(2, 3); 1007 test_fops(1.4, -5); 1008 test_fcmp(2, -1); 1009 test_fcmp(2, 2); 1010 test_fcmp(2, 3); 1011 test_fcmp(2, q_nan.d); 1012 test_fcmp(q_nan.d, -1); 1013 test_fcmp(-1.0/0.0, -1); 1014 test_fcmp(1.0/0.0, -1); 1015 test_fcvt(0.5); 1016 test_fcvt(-0.5); 1017 test_fcvt(1.0/7.0); 1018 test_fcvt(-1.0/9.0); 1019 test_fcvt(32768); 1020 test_fcvt(-1e20); 1021 test_fcvt(-1.0/0.0); 1022 test_fcvt(1.0/0.0); 1023 test_fcvt(q_nan.d); 1024 test_fconst(); 1025 test_fbcd(1234567890123456.0); 1026 test_fbcd(-123451234567890.0); 1027 test_fenv(); 1028 if (TEST_CMOV) { 1029 test_fcmov(); 1030 } 1031 } 1032 1033 /**********************************************/ 1034 #if !defined(__x86_64__) 1035 1036 #define TEST_BCD(op, op0, cc_in, cc_mask)\ 1037 {\ 1038 int res, flags;\ 1039 res = op0;\ 1040 flags = cc_in;\ 1041 asm ("push %3\n\t"\ 1042 "popf\n\t"\ 1043 #op "\n\t"\ 1044 "pushf\n\t"\ 1045 "pop %1\n\t"\ 1046 : "=a" (res), "=g" (flags)\ 1047 : "0" (res), "1" (flags));\ 1048 printf("%-10s A=%08x R=%08x CCIN=%04x CC=%04x\n",\ 1049 #op, op0, res, cc_in, flags & cc_mask);\ 1050 } 1051 1052 void test_bcd(void) 1053 { 1054 TEST_BCD(daa, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1055 TEST_BCD(daa, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1056 TEST_BCD(daa, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1057 TEST_BCD(daa, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1058 TEST_BCD(daa, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1059 TEST_BCD(daa, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1060 TEST_BCD(daa, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1061 TEST_BCD(daa, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1062 TEST_BCD(daa, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1063 TEST_BCD(daa, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1064 TEST_BCD(daa, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1065 TEST_BCD(daa, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1066 TEST_BCD(daa, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1067 1068 TEST_BCD(das, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1069 TEST_BCD(das, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1070 TEST_BCD(das, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1071 TEST_BCD(das, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1072 TEST_BCD(das, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1073 TEST_BCD(das, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1074 TEST_BCD(das, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1075 TEST_BCD(das, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1076 TEST_BCD(das, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1077 TEST_BCD(das, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1078 TEST_BCD(das, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1079 TEST_BCD(das, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1080 TEST_BCD(das, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); 1081 1082 TEST_BCD(aaa, 0x12340205, CC_A, (CC_C | CC_A)); 1083 TEST_BCD(aaa, 0x12340306, CC_A, (CC_C | CC_A)); 1084 TEST_BCD(aaa, 0x1234040a, CC_A, (CC_C | CC_A)); 1085 TEST_BCD(aaa, 0x123405fa, CC_A, (CC_C | CC_A)); 1086 TEST_BCD(aaa, 0x12340205, 0, (CC_C | CC_A)); 1087 TEST_BCD(aaa, 0x12340306, 0, (CC_C | CC_A)); 1088 TEST_BCD(aaa, 0x1234040a, 0, (CC_C | CC_A)); 1089 TEST_BCD(aaa, 0x123405fa, 0, (CC_C | CC_A)); 1090 1091 TEST_BCD(aas, 0x12340205, CC_A, (CC_C | CC_A)); 1092 TEST_BCD(aas, 0x12340306, CC_A, (CC_C | CC_A)); 1093 TEST_BCD(aas, 0x1234040a, CC_A, (CC_C | CC_A)); 1094 TEST_BCD(aas, 0x123405fa, CC_A, (CC_C | CC_A)); 1095 TEST_BCD(aas, 0x12340205, 0, (CC_C | CC_A)); 1096 TEST_BCD(aas, 0x12340306, 0, (CC_C | CC_A)); 1097 TEST_BCD(aas, 0x1234040a, 0, (CC_C | CC_A)); 1098 TEST_BCD(aas, 0x123405fa, 0, (CC_C | CC_A)); 1099 1100 TEST_BCD(aam, 0x12340547, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)); 1101 TEST_BCD(aad, 0x12340407, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)); 1102 } 1103 #endif 1104 1105 #define TEST_XCHG(op, size, opconst)\ 1106 {\ 1107 long op0, op1;\ 1108 op0 = i2l(0x12345678);\ 1109 op1 = i2l(0xfbca7654);\ 1110 asm(#op " %" size "0, %" size "1" \ 1111 : "=q" (op0), opconst (op1) \ 1112 : "0" (op0));\ 1113 printf("%-10s A=" FMTLX " B=" FMTLX "\n",\ 1114 #op, op0, op1);\ 1115 } 1116 1117 #define TEST_CMPXCHG(op, size, opconst, eax)\ 1118 {\ 1119 long op0, op1, op2;\ 1120 op0 = i2l(0x12345678);\ 1121 op1 = i2l(0xfbca7654);\ 1122 op2 = i2l(eax);\ 1123 asm(#op " %" size "0, %" size "1" \ 1124 : "=q" (op0), opconst (op1) \ 1125 : "0" (op0), "a" (op2));\ 1126 printf("%-10s EAX=" FMTLX " A=" FMTLX " C=" FMTLX "\n",\ 1127 #op, op2, op0, op1);\ 1128 } 1129 1130 void test_xchg(void) 1131 { 1132 #if defined(__x86_64__) 1133 TEST_XCHG(xchgq, "", "+q"); 1134 #endif 1135 TEST_XCHG(xchgl, "k", "+q"); 1136 TEST_XCHG(xchgw, "w", "+q"); 1137 TEST_XCHG(xchgb, "b", "+q"); 1138 1139 #if defined(__x86_64__) 1140 TEST_XCHG(xchgq, "", "=m"); 1141 #endif 1142 TEST_XCHG(xchgl, "k", "+m"); 1143 TEST_XCHG(xchgw, "w", "+m"); 1144 TEST_XCHG(xchgb, "b", "+m"); 1145 1146 #if defined(__x86_64__) 1147 TEST_XCHG(xaddq, "", "+q"); 1148 #endif 1149 TEST_XCHG(xaddl, "k", "+q"); 1150 TEST_XCHG(xaddw, "w", "+q"); 1151 TEST_XCHG(xaddb, "b", "+q"); 1152 1153 { 1154 int res; 1155 res = 0x12345678; 1156 asm("xaddl %1, %0" : "=r" (res) : "0" (res)); 1157 printf("xaddl same res=%08x\n", res); 1158 } 1159 1160 #if defined(__x86_64__) 1161 TEST_XCHG(xaddq, "", "+m"); 1162 #endif 1163 TEST_XCHG(xaddl, "k", "+m"); 1164 TEST_XCHG(xaddw, "w", "+m"); 1165 TEST_XCHG(xaddb, "b", "+m"); 1166 1167 #if defined(__x86_64__) 1168 TEST_CMPXCHG(cmpxchgq, "", "+q", 0xfbca7654); 1169 #endif 1170 TEST_CMPXCHG(cmpxchgl, "k", "+q", 0xfbca7654); 1171 TEST_CMPXCHG(cmpxchgw, "w", "+q", 0xfbca7654); 1172 TEST_CMPXCHG(cmpxchgb, "b", "+q", 0xfbca7654); 1173 1174 #if defined(__x86_64__) 1175 TEST_CMPXCHG(cmpxchgq, "", "+q", 0xfffefdfc); 1176 #endif 1177 TEST_CMPXCHG(cmpxchgl, "k", "+q", 0xfffefdfc); 1178 TEST_CMPXCHG(cmpxchgw, "w", "+q", 0xfffefdfc); 1179 TEST_CMPXCHG(cmpxchgb, "b", "+q", 0xfffefdfc); 1180 1181 #if defined(__x86_64__) 1182 TEST_CMPXCHG(cmpxchgq, "", "+m", 0xfbca7654); 1183 #endif 1184 TEST_CMPXCHG(cmpxchgl, "k", "+m", 0xfbca7654); 1185 TEST_CMPXCHG(cmpxchgw, "w", "+m", 0xfbca7654); 1186 TEST_CMPXCHG(cmpxchgb, "b", "+m", 0xfbca7654); 1187 1188 #if defined(__x86_64__) 1189 TEST_CMPXCHG(cmpxchgq, "", "+m", 0xfffefdfc); 1190 #endif 1191 TEST_CMPXCHG(cmpxchgl, "k", "+m", 0xfffefdfc); 1192 TEST_CMPXCHG(cmpxchgw, "w", "+m", 0xfffefdfc); 1193 TEST_CMPXCHG(cmpxchgb, "b", "+m", 0xfffefdfc); 1194 1195 { 1196 uint64_t op0, op1, op2; 1197 long eax, edx; 1198 long i, eflags; 1199 1200 for(i = 0; i < 2; i++) { 1201 op0 = 0x123456789abcdLL; 1202 eax = i2l(op0 & 0xffffffff); 1203 edx = i2l(op0 >> 32); 1204 if (i == 0) 1205 op1 = 0xfbca765423456LL; 1206 else 1207 op1 = op0; 1208 op2 = 0x6532432432434LL; 1209 asm("cmpxchg8b %2\n" 1210 "pushf\n" 1211 "pop %3\n" 1212 : "=a" (eax), "=d" (edx), "=m" (op1), "=g" (eflags) 1213 : "0" (eax), "1" (edx), "m" (op1), "b" ((int)op2), "c" ((int)(op2 >> 32))); 1214 printf("cmpxchg8b: eax=" FMTLX " edx=" FMTLX " op1=" FMT64X " CC=%02lx\n", 1215 eax, edx, op1, eflags & CC_Z); 1216 } 1217 } 1218 } 1219 1220 #ifdef TEST_SEGS 1221 /**********************************************/ 1222 /* segmentation tests */ 1223 1224 #include <sys/syscall.h> 1225 #include <unistd.h> 1226 #include <asm/ldt.h> 1227 #include <linux/version.h> 1228 1229 static inline int modify_ldt(int func, void * ptr, unsigned long bytecount) 1230 { 1231 return syscall(__NR_modify_ldt, func, ptr, bytecount); 1232 } 1233 1234 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 66) 1235 #define modify_ldt_ldt_s user_desc 1236 #endif 1237 1238 #define MK_SEL(n) (((n) << 3) | 7) 1239 1240 uint8_t seg_data1[4096]; 1241 uint8_t seg_data2[4096]; 1242 1243 #define TEST_LR(op, size, seg, mask)\ 1244 {\ 1245 int res, res2;\ 1246 uint16_t mseg = seg;\ 1247 res = 0x12345678;\ 1248 asm (op " %" size "2, %" size "0\n" \ 1249 "movl $0, %1\n"\ 1250 "jnz 1f\n"\ 1251 "movl $1, %1\n"\ 1252 "1:\n"\ 1253 : "=r" (res), "=r" (res2) : "m" (mseg), "0" (res));\ 1254 printf(op ": Z=%d %08x\n", res2, res & ~(mask));\ 1255 } 1256 1257 #define TEST_ARPL(op, size, op1, op2)\ 1258 {\ 1259 long a, b, c; \ 1260 a = (op1); \ 1261 b = (op2); \ 1262 asm volatile(op " %" size "3, %" size "0\n"\ 1263 "movl $0,%1\n"\ 1264 "jnz 1f\n"\ 1265 "movl $1,%1\n"\ 1266 "1:\n"\ 1267 : "=r" (a), "=r" (c) : "0" (a), "r" (b)); \ 1268 printf(op size " A=" FMTLX " B=" FMTLX " R=" FMTLX " z=%ld\n",\ 1269 (long)(op1), (long)(op2), a, c);\ 1270 } 1271 1272 /* NOTE: we use Linux modify_ldt syscall */ 1273 void test_segs(void) 1274 { 1275 struct modify_ldt_ldt_s ldt; 1276 long long ldt_table[3]; 1277 int res, res2; 1278 char tmp; 1279 struct { 1280 uint32_t offset; 1281 uint16_t seg; 1282 } __attribute__((__packed__)) segoff; 1283 1284 ldt.entry_number = 1; 1285 ldt.base_addr = (unsigned long)&seg_data1; 1286 ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12; 1287 ldt.seg_32bit = 1; 1288 ldt.contents = MODIFY_LDT_CONTENTS_DATA; 1289 ldt.read_exec_only = 0; 1290 ldt.limit_in_pages = 1; 1291 ldt.seg_not_present = 0; 1292 ldt.useable = 1; 1293 modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */ 1294 1295 ldt.entry_number = 2; 1296 ldt.base_addr = (unsigned long)&seg_data2; 1297 ldt.limit = (sizeof(seg_data2) + 0xfff) >> 12; 1298 ldt.seg_32bit = 1; 1299 ldt.contents = MODIFY_LDT_CONTENTS_DATA; 1300 ldt.read_exec_only = 0; 1301 ldt.limit_in_pages = 1; 1302 ldt.seg_not_present = 0; 1303 ldt.useable = 1; 1304 modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */ 1305 1306 modify_ldt(0, &ldt_table, sizeof(ldt_table)); /* read ldt entries */ 1307 #if 0 1308 { 1309 int i; 1310 for(i=0;i<3;i++) 1311 printf("%d: %016Lx\n", i, ldt_table[i]); 1312 } 1313 #endif 1314 /* do some tests with fs or gs */ 1315 asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1))); 1316 1317 seg_data1[1] = 0xaa; 1318 seg_data2[1] = 0x55; 1319 1320 asm volatile ("fs movzbl 0x1, %0" : "=r" (res)); 1321 printf("FS[1] = %02x\n", res); 1322 1323 asm volatile ("pushl %%gs\n" 1324 "movl %1, %%gs\n" 1325 "gs movzbl 0x1, %0\n" 1326 "popl %%gs\n" 1327 : "=r" (res) 1328 : "r" (MK_SEL(2))); 1329 printf("GS[1] = %02x\n", res); 1330 1331 /* tests with ds/ss (implicit segment case) */ 1332 tmp = 0xa5; 1333 asm volatile ("pushl %%ebp\n\t" 1334 "pushl %%ds\n\t" 1335 "movl %2, %%ds\n\t" 1336 "movl %3, %%ebp\n\t" 1337 "movzbl 0x1, %0\n\t" 1338 "movzbl (%%ebp), %1\n\t" 1339 "popl %%ds\n\t" 1340 "popl %%ebp\n\t" 1341 : "=r" (res), "=r" (res2) 1342 : "r" (MK_SEL(1)), "r" (&tmp)); 1343 printf("DS[1] = %02x\n", res); 1344 printf("SS[tmp] = %02x\n", res2); 1345 1346 segoff.seg = MK_SEL(2); 1347 segoff.offset = 0xabcdef12; 1348 asm volatile("lfs %2, %0\n\t" 1349 "movl %%fs, %1\n\t" 1350 : "=r" (res), "=g" (res2) 1351 : "m" (segoff)); 1352 printf("FS:reg = %04x:%08x\n", res2, res); 1353 1354 TEST_LR("larw", "w", MK_SEL(2), 0x0100); 1355 TEST_LR("larl", "", MK_SEL(2), 0x0100); 1356 TEST_LR("lslw", "w", MK_SEL(2), 0); 1357 TEST_LR("lsll", "", MK_SEL(2), 0); 1358 1359 TEST_LR("larw", "w", 0xfff8, 0); 1360 TEST_LR("larl", "", 0xfff8, 0); 1361 TEST_LR("lslw", "w", 0xfff8, 0); 1362 TEST_LR("lsll", "", 0xfff8, 0); 1363 1364 TEST_ARPL("arpl", "w", 0x12345678 | 3, 0x762123c | 1); 1365 TEST_ARPL("arpl", "w", 0x12345678 | 1, 0x762123c | 3); 1366 TEST_ARPL("arpl", "w", 0x12345678 | 1, 0x762123c | 1); 1367 } 1368 1369 /* 16 bit code test */ 1370 extern char code16_start, code16_end; 1371 extern char code16_func1; 1372 extern char code16_func2; 1373 extern char code16_func3; 1374 1375 void test_code16(void) 1376 { 1377 struct modify_ldt_ldt_s ldt; 1378 int res, res2; 1379 1380 /* build a code segment */ 1381 ldt.entry_number = 1; 1382 ldt.base_addr = (unsigned long)&code16_start; 1383 ldt.limit = &code16_end - &code16_start; 1384 ldt.seg_32bit = 0; 1385 ldt.contents = MODIFY_LDT_CONTENTS_CODE; 1386 ldt.read_exec_only = 0; 1387 ldt.limit_in_pages = 0; 1388 ldt.seg_not_present = 0; 1389 ldt.useable = 1; 1390 modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */ 1391 1392 /* call the first function */ 1393 asm volatile ("lcall %1, %2" 1394 : "=a" (res) 1395 : "i" (MK_SEL(1)), "i" (&code16_func1): "memory", "cc"); 1396 printf("func1() = 0x%08x\n", res); 1397 asm volatile ("lcall %2, %3" 1398 : "=a" (res), "=c" (res2) 1399 : "i" (MK_SEL(1)), "i" (&code16_func2): "memory", "cc"); 1400 printf("func2() = 0x%08x spdec=%d\n", res, res2); 1401 asm volatile ("lcall %1, %2" 1402 : "=a" (res) 1403 : "i" (MK_SEL(1)), "i" (&code16_func3): "memory", "cc"); 1404 printf("func3() = 0x%08x\n", res); 1405 } 1406 #endif 1407 1408 #if defined(__x86_64__) 1409 asm(".globl func_lret\n" 1410 "func_lret:\n" 1411 "movl $0x87654641, %eax\n" 1412 "lretq\n"); 1413 #else 1414 asm(".globl func_lret\n" 1415 "func_lret:\n" 1416 "movl $0x87654321, %eax\n" 1417 "lret\n" 1418 1419 ".globl func_iret\n" 1420 "func_iret:\n" 1421 "movl $0xabcd4321, %eax\n" 1422 "iret\n"); 1423 #endif 1424 1425 extern char func_lret; 1426 extern char func_iret; 1427 1428 void test_misc(void) 1429 { 1430 char table[256]; 1431 long res, i; 1432 1433 for(i=0;i<256;i++) table[i] = 256 - i; 1434 res = 0x12345678; 1435 asm ("xlat" : "=a" (res) : "b" (table), "0" (res)); 1436 printf("xlat: EAX=" FMTLX "\n", res); 1437 1438 #if defined(__x86_64__) 1439 #if 0 1440 { 1441 /* XXX: see if Intel Core2 and AMD64 behavior really 1442 differ. Here we implemented the Intel way which is not 1443 compatible yet with QEMU. */ 1444 static struct QEMU_PACKED { 1445 uint64_t offset; 1446 uint16_t seg; 1447 } desc; 1448 long cs_sel; 1449 1450 asm volatile ("mov %%cs, %0" : "=r" (cs_sel)); 1451 1452 asm volatile ("push %1\n" 1453 "call func_lret\n" 1454 : "=a" (res) 1455 : "r" (cs_sel) : "memory", "cc"); 1456 printf("func_lret=" FMTLX "\n", res); 1457 1458 desc.offset = (long)&func_lret; 1459 desc.seg = cs_sel; 1460 1461 asm volatile ("xor %%rax, %%rax\n" 1462 "rex64 lcall *(%%rcx)\n" 1463 : "=a" (res) 1464 : "c" (&desc) 1465 : "memory", "cc"); 1466 printf("func_lret2=" FMTLX "\n", res); 1467 1468 asm volatile ("push %2\n" 1469 "mov $ 1f, %%rax\n" 1470 "push %%rax\n" 1471 "rex64 ljmp *(%%rcx)\n" 1472 "1:\n" 1473 : "=a" (res) 1474 : "c" (&desc), "b" (cs_sel) 1475 : "memory", "cc"); 1476 printf("func_lret3=" FMTLX "\n", res); 1477 } 1478 #endif 1479 #else 1480 asm volatile ("push %%cs ; call %1" 1481 : "=a" (res) 1482 : "m" (func_lret): "memory", "cc"); 1483 printf("func_lret=" FMTLX "\n", res); 1484 1485 asm volatile ("pushf ; push %%cs ; call %1" 1486 : "=a" (res) 1487 : "m" (func_iret): "memory", "cc"); 1488 printf("func_iret=" FMTLX "\n", res); 1489 #endif 1490 1491 #if defined(__x86_64__) 1492 /* specific popl test */ 1493 asm volatile ("push $12345432 ; push $0x9abcdef ; pop (%%rsp) ; pop %0" 1494 : "=g" (res)); 1495 printf("popl esp=" FMTLX "\n", res); 1496 #else 1497 /* specific popl test */ 1498 asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popl (%%esp) ; popl %0" 1499 : "=g" (res)); 1500 printf("popl esp=" FMTLX "\n", res); 1501 1502 /* specific popw test */ 1503 asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popw (%%esp) ; addl $2, %%esp ; popl %0" 1504 : "=g" (res)); 1505 printf("popw esp=" FMTLX "\n", res); 1506 #endif 1507 } 1508 1509 uint8_t str_buffer[4096]; 1510 1511 #define TEST_STRING1(OP, size, DF, REP)\ 1512 {\ 1513 long esi, edi, eax, ecx, eflags;\ 1514 \ 1515 esi = (long)(str_buffer + sizeof(str_buffer) / 2);\ 1516 edi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\ 1517 eax = i2l(0x12345678);\ 1518 ecx = 17;\ 1519 \ 1520 asm volatile ("push $0\n\t"\ 1521 "popf\n\t"\ 1522 DF "\n\t"\ 1523 REP #OP size "\n\t"\ 1524 "cld\n\t"\ 1525 "pushf\n\t"\ 1526 "pop %4\n\t"\ 1527 : "=S" (esi), "=D" (edi), "=a" (eax), "=c" (ecx), "=g" (eflags)\ 1528 : "0" (esi), "1" (edi), "2" (eax), "3" (ecx));\ 1529 printf("%-10s ESI=" FMTLX " EDI=" FMTLX " EAX=" FMTLX " ECX=" FMTLX " EFL=%04x\n",\ 1530 REP #OP size, esi, edi, eax, ecx,\ 1531 (int)(eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)));\ 1532 } 1533 1534 #define TEST_STRING(OP, REP)\ 1535 TEST_STRING1(OP, "b", "", REP);\ 1536 TEST_STRING1(OP, "w", "", REP);\ 1537 TEST_STRING1(OP, "l", "", REP);\ 1538 X86_64_ONLY(TEST_STRING1(OP, "q", "", REP));\ 1539 TEST_STRING1(OP, "b", "std", REP);\ 1540 TEST_STRING1(OP, "w", "std", REP);\ 1541 TEST_STRING1(OP, "l", "std", REP);\ 1542 X86_64_ONLY(TEST_STRING1(OP, "q", "std", REP)) 1543 1544 void test_string(void) 1545 { 1546 int i; 1547 for(i = 0;i < sizeof(str_buffer); i++) 1548 str_buffer[i] = i + 0x56; 1549 TEST_STRING(stos, ""); 1550 TEST_STRING(stos, "rep "); 1551 TEST_STRING(lods, ""); /* to verify stos */ 1552 TEST_STRING(lods, "rep "); 1553 TEST_STRING(movs, ""); 1554 TEST_STRING(movs, "rep "); 1555 TEST_STRING(lods, ""); /* to verify stos */ 1556 1557 /* XXX: better tests */ 1558 TEST_STRING(scas, ""); 1559 TEST_STRING(scas, "repz "); 1560 TEST_STRING(scas, "repnz "); 1561 TEST_STRING(cmps, ""); 1562 TEST_STRING(cmps, "repz "); 1563 TEST_STRING(cmps, "repnz "); 1564 } 1565 1566 #ifdef TEST_VM86 1567 /* VM86 test */ 1568 1569 static inline void set_bit(uint8_t *a, unsigned int bit) 1570 { 1571 a[bit / 8] |= (1 << (bit % 8)); 1572 } 1573 1574 static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg) 1575 { 1576 return (uint8_t *)((seg << 4) + (reg & 0xffff)); 1577 } 1578 1579 static inline void pushw(struct vm86_regs *r, int val) 1580 { 1581 r->esp = (r->esp & ~0xffff) | ((r->esp - 2) & 0xffff); 1582 *(uint16_t *)seg_to_linear(r->ss, r->esp) = val; 1583 } 1584 1585 static inline int vm86(int func, struct vm86plus_struct *v86) 1586 { 1587 return syscall(__NR_vm86, func, v86); 1588 } 1589 1590 extern char vm86_code_start; 1591 extern char vm86_code_end; 1592 1593 #define VM86_CODE_CS 0x100 1594 #define VM86_CODE_IP 0x100 1595 1596 void test_vm86(void) 1597 { 1598 struct vm86plus_struct ctx; 1599 struct vm86_regs *r; 1600 uint8_t *vm86_mem; 1601 int seg, ret; 1602 1603 vm86_mem = mmap((void *)0x00000000, 0x110000, 1604 PROT_WRITE | PROT_READ | PROT_EXEC, 1605 MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); 1606 if (vm86_mem == MAP_FAILED) { 1607 printf("ERROR: could not map vm86 memory"); 1608 return; 1609 } 1610 memset(&ctx, 0, sizeof(ctx)); 1611 1612 /* init basic registers */ 1613 r = &ctx.regs; 1614 r->eip = VM86_CODE_IP; 1615 r->esp = 0xfffe; 1616 seg = VM86_CODE_CS; 1617 r->cs = seg; 1618 r->ss = seg; 1619 r->ds = seg; 1620 r->es = seg; 1621 r->fs = seg; 1622 r->gs = seg; 1623 r->eflags = VIF_MASK; 1624 1625 /* move code to proper address. We use the same layout as a .com 1626 dos program. */ 1627 memcpy(vm86_mem + (VM86_CODE_CS << 4) + VM86_CODE_IP, 1628 &vm86_code_start, &vm86_code_end - &vm86_code_start); 1629 1630 /* mark int 0x21 as being emulated */ 1631 set_bit((uint8_t *)&ctx.int_revectored, 0x21); 1632 1633 for(;;) { 1634 ret = vm86(VM86_ENTER, &ctx); 1635 switch(VM86_TYPE(ret)) { 1636 case VM86_INTx: 1637 { 1638 int int_num, ah, v; 1639 1640 int_num = VM86_ARG(ret); 1641 if (int_num != 0x21) 1642 goto unknown_int; 1643 ah = (r->eax >> 8) & 0xff; 1644 switch(ah) { 1645 case 0x00: /* exit */ 1646 goto the_end; 1647 case 0x02: /* write char */ 1648 { 1649 uint8_t c = r->edx; 1650 putchar(c); 1651 } 1652 break; 1653 case 0x09: /* write string */ 1654 { 1655 uint8_t c, *ptr; 1656 ptr = seg_to_linear(r->ds, r->edx); 1657 for(;;) { 1658 c = *ptr++; 1659 if (c == '$') 1660 break; 1661 putchar(c); 1662 } 1663 r->eax = (r->eax & ~0xff) | '$'; 1664 } 1665 break; 1666 case 0xff: /* extension: write eflags number in edx */ 1667 v = (int)r->edx; 1668 #ifndef LINUX_VM86_IOPL_FIX 1669 v &= ~0x3000; 1670 #endif 1671 printf("%08x\n", v); 1672 break; 1673 default: 1674 unknown_int: 1675 printf("unsupported int 0x%02x\n", int_num); 1676 goto the_end; 1677 } 1678 } 1679 break; 1680 case VM86_SIGNAL: 1681 /* a signal came, we just ignore that */ 1682 break; 1683 case VM86_STI: 1684 break; 1685 default: 1686 printf("ERROR: unhandled vm86 return code (0x%x)\n", ret); 1687 goto the_end; 1688 } 1689 } 1690 the_end: 1691 printf("VM86 end\n"); 1692 munmap(vm86_mem, 0x110000); 1693 } 1694 #endif 1695 1696 /* exception tests */ 1697 #if defined(__i386__) && !defined(REG_EAX) 1698 #define REG_EAX EAX 1699 #define REG_EBX EBX 1700 #define REG_ECX ECX 1701 #define REG_EDX EDX 1702 #define REG_ESI ESI 1703 #define REG_EDI EDI 1704 #define REG_EBP EBP 1705 #define REG_ESP ESP 1706 #define REG_EIP EIP 1707 #define REG_EFL EFL 1708 #define REG_TRAPNO TRAPNO 1709 #define REG_ERR ERR 1710 #endif 1711 1712 #if defined(__x86_64__) 1713 #define REG_EIP REG_RIP 1714 #endif 1715 1716 jmp_buf jmp_env; 1717 int v1; 1718 int tab[2]; 1719 1720 void sig_handler(int sig, siginfo_t *info, void *puc) 1721 { 1722 ucontext_t *uc = puc; 1723 1724 printf("si_signo=%d si_errno=%d si_code=%d", 1725 info->si_signo, info->si_errno, info->si_code); 1726 printf(" si_addr=0x%08lx", 1727 (unsigned long)info->si_addr); 1728 printf("\n"); 1729 1730 printf("trapno=" FMTLX " err=" FMTLX, 1731 (long)uc->uc_mcontext.gregs[REG_TRAPNO], 1732 (long)uc->uc_mcontext.gregs[REG_ERR]); 1733 printf(" EIP=" FMTLX, (long)uc->uc_mcontext.gregs[REG_EIP]); 1734 printf("\n"); 1735 longjmp(jmp_env, 1); 1736 } 1737 1738 void test_exceptions(void) 1739 { 1740 struct sigaction act; 1741 volatile int val; 1742 1743 act.sa_sigaction = sig_handler; 1744 sigemptyset(&act.sa_mask); 1745 act.sa_flags = SA_SIGINFO | SA_NODEFER; 1746 sigaction(SIGFPE, &act, NULL); 1747 sigaction(SIGILL, &act, NULL); 1748 sigaction(SIGSEGV, &act, NULL); 1749 sigaction(SIGBUS, &act, NULL); 1750 sigaction(SIGTRAP, &act, NULL); 1751 1752 /* test division by zero reporting */ 1753 printf("DIVZ exception:\n"); 1754 if (setjmp(jmp_env) == 0) { 1755 /* now divide by zero */ 1756 v1 = 0; 1757 v1 = 2 / v1; 1758 } 1759 1760 #if !defined(__x86_64__) 1761 printf("BOUND exception:\n"); 1762 if (setjmp(jmp_env) == 0) { 1763 /* bound exception */ 1764 tab[0] = 1; 1765 tab[1] = 10; 1766 asm volatile ("bound %0, %1" : : "r" (11), "m" (tab[0])); 1767 } 1768 #endif 1769 1770 #ifdef TEST_SEGS 1771 printf("segment exceptions:\n"); 1772 if (setjmp(jmp_env) == 0) { 1773 /* load an invalid segment */ 1774 asm volatile ("movl %0, %%fs" : : "r" ((0x1234 << 3) | 1)); 1775 } 1776 if (setjmp(jmp_env) == 0) { 1777 /* null data segment is valid */ 1778 asm volatile ("movl %0, %%fs" : : "r" (3)); 1779 /* null stack segment */ 1780 asm volatile ("movl %0, %%ss" : : "r" (3)); 1781 } 1782 1783 { 1784 struct modify_ldt_ldt_s ldt; 1785 ldt.entry_number = 1; 1786 ldt.base_addr = (unsigned long)&seg_data1; 1787 ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12; 1788 ldt.seg_32bit = 1; 1789 ldt.contents = MODIFY_LDT_CONTENTS_DATA; 1790 ldt.read_exec_only = 0; 1791 ldt.limit_in_pages = 1; 1792 ldt.seg_not_present = 1; 1793 ldt.useable = 1; 1794 modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */ 1795 1796 if (setjmp(jmp_env) == 0) { 1797 /* segment not present */ 1798 asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1))); 1799 } 1800 } 1801 #endif 1802 1803 /* test SEGV reporting */ 1804 printf("PF exception:\n"); 1805 if (setjmp(jmp_env) == 0) { 1806 val = 1; 1807 /* we add a nop to test a weird PC retrieval case */ 1808 asm volatile ("nop"); 1809 /* now store in an invalid address */ 1810 *(char *)0x1234 = 1; 1811 } 1812 1813 /* test SEGV reporting */ 1814 printf("PF exception:\n"); 1815 if (setjmp(jmp_env) == 0) { 1816 val = 1; 1817 /* read from an invalid address */ 1818 v1 = *(char *)0x1234; 1819 } 1820 1821 /* test illegal instruction reporting */ 1822 printf("UD2 exception:\n"); 1823 if (setjmp(jmp_env) == 0) { 1824 /* now execute an invalid instruction */ 1825 asm volatile("ud2"); 1826 } 1827 printf("lock nop exception:\n"); 1828 if (setjmp(jmp_env) == 0) { 1829 /* now execute an invalid instruction */ 1830 asm volatile(".byte 0xf0, 0x90"); 1831 } 1832 1833 printf("INT exception:\n"); 1834 if (setjmp(jmp_env) == 0) { 1835 asm volatile ("int $0xfd"); 1836 } 1837 if (setjmp(jmp_env) == 0) { 1838 asm volatile ("int $0x01"); 1839 } 1840 if (setjmp(jmp_env) == 0) { 1841 asm volatile (".byte 0xcd, 0x03"); 1842 } 1843 if (setjmp(jmp_env) == 0) { 1844 asm volatile ("int $0x04"); 1845 } 1846 if (setjmp(jmp_env) == 0) { 1847 asm volatile ("int $0x05"); 1848 } 1849 1850 printf("INT3 exception:\n"); 1851 if (setjmp(jmp_env) == 0) { 1852 asm volatile ("int3"); 1853 } 1854 1855 printf("CLI exception:\n"); 1856 if (setjmp(jmp_env) == 0) { 1857 asm volatile ("cli"); 1858 } 1859 1860 printf("STI exception:\n"); 1861 if (setjmp(jmp_env) == 0) { 1862 asm volatile ("cli"); 1863 } 1864 1865 #if !defined(__x86_64__) 1866 printf("INTO exception:\n"); 1867 if (setjmp(jmp_env) == 0) { 1868 /* overflow exception */ 1869 asm volatile ("addl $1, %0 ; into" : : "r" (0x7fffffff)); 1870 } 1871 #endif 1872 1873 printf("OUTB exception:\n"); 1874 if (setjmp(jmp_env) == 0) { 1875 asm volatile ("outb %%al, %%dx" : : "d" (0x4321), "a" (0)); 1876 } 1877 1878 printf("INB exception:\n"); 1879 if (setjmp(jmp_env) == 0) { 1880 asm volatile ("inb %%dx, %%al" : "=a" (val) : "d" (0x4321)); 1881 } 1882 1883 printf("REP OUTSB exception:\n"); 1884 if (setjmp(jmp_env) == 0) { 1885 asm volatile ("rep outsb" : : "d" (0x4321), "S" (tab), "c" (1)); 1886 } 1887 1888 printf("REP INSB exception:\n"); 1889 if (setjmp(jmp_env) == 0) { 1890 asm volatile ("rep insb" : : "d" (0x4321), "D" (tab), "c" (1)); 1891 } 1892 1893 printf("HLT exception:\n"); 1894 if (setjmp(jmp_env) == 0) { 1895 asm volatile ("hlt"); 1896 } 1897 1898 printf("single step exception:\n"); 1899 val = 0; 1900 if (setjmp(jmp_env) == 0) { 1901 asm volatile ("pushf\n" 1902 "orl $0x00100, (%%esp)\n" 1903 "popf\n" 1904 "movl $0xabcd, %0\n" 1905 "movl $0x0, %0\n" : "=m" (val) : : "cc", "memory"); 1906 } 1907 printf("val=0x%x\n", val); 1908 } 1909 1910 #if !defined(__x86_64__) 1911 /* specific precise single step test */ 1912 void sig_trap_handler(int sig, siginfo_t *info, void *puc) 1913 { 1914 ucontext_t *uc = puc; 1915 printf("EIP=" FMTLX "\n", (long)uc->uc_mcontext.gregs[REG_EIP]); 1916 } 1917 1918 const uint8_t sstep_buf1[4] = { 1, 2, 3, 4}; 1919 uint8_t sstep_buf2[4]; 1920 1921 void test_single_step(void) 1922 { 1923 struct sigaction act; 1924 volatile int val; 1925 int i; 1926 1927 val = 0; 1928 act.sa_sigaction = sig_trap_handler; 1929 sigemptyset(&act.sa_mask); 1930 act.sa_flags = SA_SIGINFO; 1931 sigaction(SIGTRAP, &act, NULL); 1932 asm volatile ("pushf\n" 1933 "orl $0x00100, (%%esp)\n" 1934 "popf\n" 1935 "movl $0xabcd, %0\n" 1936 1937 /* jmp test */ 1938 "movl $3, %%ecx\n" 1939 "1:\n" 1940 "addl $1, %0\n" 1941 "decl %%ecx\n" 1942 "jnz 1b\n" 1943 1944 /* movsb: the single step should stop at each movsb iteration */ 1945 "movl $sstep_buf1, %%esi\n" 1946 "movl $sstep_buf2, %%edi\n" 1947 "movl $0, %%ecx\n" 1948 "rep movsb\n" 1949 "movl $3, %%ecx\n" 1950 "rep movsb\n" 1951 "movl $1, %%ecx\n" 1952 "rep movsb\n" 1953 1954 /* cmpsb: the single step should stop at each cmpsb iteration */ 1955 "movl $sstep_buf1, %%esi\n" 1956 "movl $sstep_buf2, %%edi\n" 1957 "movl $0, %%ecx\n" 1958 "rep cmpsb\n" 1959 "movl $4, %%ecx\n" 1960 "rep cmpsb\n" 1961 1962 /* getpid() syscall: single step should skip one 1963 instruction */ 1964 "movl $20, %%eax\n" 1965 "int $0x80\n" 1966 "movl $0, %%eax\n" 1967 1968 /* when modifying SS, trace is not done on the next 1969 instruction */ 1970 "movl %%ss, %%ecx\n" 1971 "movl %%ecx, %%ss\n" 1972 "addl $1, %0\n" 1973 "movl $1, %%eax\n" 1974 "movl %%ecx, %%ss\n" 1975 "jmp 1f\n" 1976 "addl $1, %0\n" 1977 "1:\n" 1978 "movl $1, %%eax\n" 1979 "pushl %%ecx\n" 1980 "popl %%ss\n" 1981 "addl $1, %0\n" 1982 "movl $1, %%eax\n" 1983 1984 "pushf\n" 1985 "andl $~0x00100, (%%esp)\n" 1986 "popf\n" 1987 : "=m" (val) 1988 : 1989 : "cc", "memory", "eax", "ecx", "esi", "edi"); 1990 printf("val=%d\n", val); 1991 for(i = 0; i < 4; i++) 1992 printf("sstep_buf2[%d] = %d\n", i, sstep_buf2[i]); 1993 } 1994 1995 /* self modifying code test */ 1996 uint8_t code[] = { 1997 0xb8, 0x1, 0x00, 0x00, 0x00, /* movl $1, %eax */ 1998 0xc3, /* ret */ 1999 }; 2000 2001 asm(".section \".data\"\n" 2002 "smc_code2:\n" 2003 "movl 4(%esp), %eax\n" 2004 "movl %eax, smc_patch_addr2 + 1\n" 2005 "nop\n" 2006 "nop\n" 2007 "nop\n" 2008 "nop\n" 2009 "nop\n" 2010 "nop\n" 2011 "nop\n" 2012 "nop\n" 2013 "smc_patch_addr2:\n" 2014 "movl $1, %eax\n" 2015 "ret\n" 2016 ".previous\n" 2017 ); 2018 2019 typedef int FuncType(void); 2020 extern int smc_code2(int); 2021 void test_self_modifying_code(void) 2022 { 2023 int i; 2024 printf("self modifying code:\n"); 2025 printf("func1 = 0x%x\n", ((FuncType *)code)()); 2026 for(i = 2; i <= 4; i++) { 2027 code[1] = i; 2028 printf("func%d = 0x%x\n", i, ((FuncType *)code)()); 2029 } 2030 2031 /* more difficult test : the modified code is just after the 2032 modifying instruction. It is forbidden in Intel specs, but it 2033 is used by old DOS programs */ 2034 for(i = 2; i <= 4; i++) { 2035 printf("smc_code2(%d) = %d\n", i, smc_code2(i)); 2036 } 2037 } 2038 #endif 2039 2040 long enter_stack[4096]; 2041 2042 #if defined(__x86_64__) 2043 #define RSP "%%rsp" 2044 #define RBP "%%rbp" 2045 #else 2046 #define RSP "%%esp" 2047 #define RBP "%%ebp" 2048 #endif 2049 2050 #if !defined(__x86_64__) 2051 /* causes an infinite loop, disable it for now. */ 2052 #define TEST_ENTER(size, stack_type, level) 2053 #else 2054 #define TEST_ENTER(size, stack_type, level)\ 2055 {\ 2056 long esp_save, esp_val, ebp_val, ebp_save, i;\ 2057 stack_type *ptr, *stack_end, *stack_ptr;\ 2058 memset(enter_stack, 0, sizeof(enter_stack));\ 2059 stack_end = stack_ptr = (stack_type *)(enter_stack + 4096);\ 2060 ebp_val = (long)stack_ptr;\ 2061 for(i=1;i<=32;i++)\ 2062 *--stack_ptr = i;\ 2063 esp_val = (long)stack_ptr;\ 2064 asm("mov " RSP ", %[esp_save]\n"\ 2065 "mov " RBP ", %[ebp_save]\n"\ 2066 "mov %[esp_val], " RSP "\n"\ 2067 "mov %[ebp_val], " RBP "\n"\ 2068 "enter" size " $8, $" #level "\n"\ 2069 "mov " RSP ", %[esp_val]\n"\ 2070 "mov " RBP ", %[ebp_val]\n"\ 2071 "mov %[esp_save], " RSP "\n"\ 2072 "mov %[ebp_save], " RBP "\n"\ 2073 : [esp_save] "=r" (esp_save),\ 2074 [ebp_save] "=r" (ebp_save),\ 2075 [esp_val] "=r" (esp_val),\ 2076 [ebp_val] "=r" (ebp_val)\ 2077 : "[esp_val]" (esp_val),\ 2078 "[ebp_val]" (ebp_val));\ 2079 printf("level=%d:\n", level);\ 2080 printf("esp_val=" FMTLX "\n", esp_val - (long)stack_end);\ 2081 printf("ebp_val=" FMTLX "\n", ebp_val - (long)stack_end);\ 2082 for(ptr = (stack_type *)esp_val; ptr < stack_end; ptr++)\ 2083 printf(FMTLX "\n", (long)ptr[0]);\ 2084 } 2085 #endif 2086 2087 static void test_enter(void) 2088 { 2089 #if defined(__x86_64__) 2090 TEST_ENTER("q", uint64_t, 0); 2091 TEST_ENTER("q", uint64_t, 1); 2092 TEST_ENTER("q", uint64_t, 2); 2093 TEST_ENTER("q", uint64_t, 31); 2094 #else 2095 TEST_ENTER("l", uint32_t, 0); 2096 TEST_ENTER("l", uint32_t, 1); 2097 TEST_ENTER("l", uint32_t, 2); 2098 TEST_ENTER("l", uint32_t, 31); 2099 #endif 2100 2101 TEST_ENTER("w", uint16_t, 0); 2102 TEST_ENTER("w", uint16_t, 1); 2103 TEST_ENTER("w", uint16_t, 2); 2104 TEST_ENTER("w", uint16_t, 31); 2105 } 2106 2107 #ifdef TEST_SSE 2108 2109 typedef int __m64 __attribute__ ((vector_size(8))); 2110 typedef float __m128 __attribute__ ((vector_size(16))); 2111 2112 typedef union { 2113 double d[2]; 2114 float s[4]; 2115 uint32_t l[4]; 2116 uint64_t q[2]; 2117 __m128 dq; 2118 } XMMReg; 2119 2120 static uint64_t __attribute__((aligned(16))) test_values[4][2] = { 2121 { 0x456723c698694873, 0xdc515cff944a58ec }, 2122 { 0x1f297ccd58bad7ab, 0x41f21efba9e3e146 }, 2123 { 0x007c62c2085427f8, 0x231be9e8cde7438d }, 2124 { 0x0f76255a085427f8, 0xc233e9e8c4c9439a }, 2125 }; 2126 2127 #define SSE_OP(op)\ 2128 {\ 2129 asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\ 2130 printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\ 2131 #op,\ 2132 a.q[1], a.q[0],\ 2133 b.q[1], b.q[0],\ 2134 r.q[1], r.q[0]);\ 2135 } 2136 2137 #define SSE_OP2(op)\ 2138 {\ 2139 int i;\ 2140 for(i=0;i<2;i++) {\ 2141 a.q[0] = test_values[2*i][0];\ 2142 a.q[1] = test_values[2*i][1];\ 2143 b.q[0] = test_values[2*i+1][0];\ 2144 b.q[1] = test_values[2*i+1][1];\ 2145 SSE_OP(op);\ 2146 }\ 2147 } 2148 2149 #define MMX_OP2(op)\ 2150 {\ 2151 int i;\ 2152 for(i=0;i<2;i++) {\ 2153 a.q[0] = test_values[2*i][0];\ 2154 b.q[0] = test_values[2*i+1][0];\ 2155 asm volatile (#op " %2, %0" : "=y" (r.q[0]) : "0" (a.q[0]), "y" (b.q[0]));\ 2156 printf("%-9s: a=" FMT64X " b=" FMT64X " r=" FMT64X "\n",\ 2157 #op,\ 2158 a.q[0],\ 2159 b.q[0],\ 2160 r.q[0]);\ 2161 }\ 2162 SSE_OP2(op);\ 2163 } 2164 2165 #define SHUF_OP(op, ib)\ 2166 {\ 2167 a.q[0] = test_values[0][0];\ 2168 a.q[1] = test_values[0][1];\ 2169 b.q[0] = test_values[1][0];\ 2170 b.q[1] = test_values[1][1];\ 2171 asm volatile (#op " $" #ib ", %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\ 2172 printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\ 2173 #op,\ 2174 a.q[1], a.q[0],\ 2175 b.q[1], b.q[0],\ 2176 ib,\ 2177 r.q[1], r.q[0]);\ 2178 } 2179 2180 #define PSHUF_OP(op, ib)\ 2181 {\ 2182 int i;\ 2183 for(i=0;i<2;i++) {\ 2184 a.q[0] = test_values[2*i][0];\ 2185 a.q[1] = test_values[2*i][1];\ 2186 asm volatile (#op " $" #ib ", %1, %0" : "=x" (r.dq) : "x" (a.dq));\ 2187 printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\ 2188 #op,\ 2189 a.q[1], a.q[0],\ 2190 ib,\ 2191 r.q[1], r.q[0]);\ 2192 }\ 2193 } 2194 2195 #define SHIFT_IM(op, ib)\ 2196 {\ 2197 int i;\ 2198 for(i=0;i<2;i++) {\ 2199 a.q[0] = test_values[2*i][0];\ 2200 a.q[1] = test_values[2*i][1];\ 2201 asm volatile (#op " $" #ib ", %0" : "=x" (r.dq) : "0" (a.dq));\ 2202 printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\ 2203 #op,\ 2204 a.q[1], a.q[0],\ 2205 ib,\ 2206 r.q[1], r.q[0]);\ 2207 }\ 2208 } 2209 2210 #define SHIFT_OP(op, ib)\ 2211 {\ 2212 int i;\ 2213 SHIFT_IM(op, ib);\ 2214 for(i=0;i<2;i++) {\ 2215 a.q[0] = test_values[2*i][0];\ 2216 a.q[1] = test_values[2*i][1];\ 2217 b.q[0] = ib;\ 2218 b.q[1] = 0;\ 2219 asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\ 2220 printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\ 2221 #op,\ 2222 a.q[1], a.q[0],\ 2223 b.q[1], b.q[0],\ 2224 r.q[1], r.q[0]);\ 2225 }\ 2226 } 2227 2228 #define MOVMSK(op)\ 2229 {\ 2230 int i, reg;\ 2231 for(i=0;i<2;i++) {\ 2232 a.q[0] = test_values[2*i][0];\ 2233 a.q[1] = test_values[2*i][1];\ 2234 asm volatile (#op " %1, %0" : "=r" (reg) : "x" (a.dq));\ 2235 printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\ 2236 #op,\ 2237 a.q[1], a.q[0],\ 2238 reg);\ 2239 }\ 2240 } 2241 2242 #define SSE_OPS(a) \ 2243 SSE_OP(a ## ps);\ 2244 SSE_OP(a ## ss); 2245 2246 #define SSE_OPD(a) \ 2247 SSE_OP(a ## pd);\ 2248 SSE_OP(a ## sd); 2249 2250 #define SSE_COMI(op, field)\ 2251 {\ 2252 unsigned long eflags;\ 2253 XMMReg a, b;\ 2254 a.field[0] = a1;\ 2255 b.field[0] = b1;\ 2256 asm volatile (#op " %2, %1\n"\ 2257 "pushf\n"\ 2258 "pop %0\n"\ 2259 : "=rm" (eflags)\ 2260 : "x" (a.dq), "x" (b.dq));\ 2261 printf("%-9s: a=%f b=%f cc=%04lx\n",\ 2262 #op, a1, b1,\ 2263 eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\ 2264 } 2265 2266 void test_sse_comi(double a1, double b1) 2267 { 2268 SSE_COMI(ucomiss, s); 2269 SSE_COMI(ucomisd, d); 2270 SSE_COMI(comiss, s); 2271 SSE_COMI(comisd, d); 2272 } 2273 2274 #define CVT_OP_XMM(op)\ 2275 {\ 2276 asm volatile (#op " %1, %0" : "=x" (r.dq) : "x" (a.dq));\ 2277 printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\ 2278 #op,\ 2279 a.q[1], a.q[0],\ 2280 r.q[1], r.q[0]);\ 2281 } 2282 2283 /* Force %xmm0 usage to avoid the case where both register index are 0 2284 to test instruction decoding more extensively */ 2285 #define CVT_OP_XMM2MMX(op)\ 2286 {\ 2287 asm volatile (#op " %1, %0" : "=y" (r.q[0]) : "x" (a.dq) \ 2288 : "%xmm0"); \ 2289 asm volatile("emms\n"); \ 2290 printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "\n",\ 2291 #op,\ 2292 a.q[1], a.q[0],\ 2293 r.q[0]);\ 2294 } 2295 2296 #define CVT_OP_MMX2XMM(op)\ 2297 {\ 2298 asm volatile (#op " %1, %0" : "=x" (r.dq) : "y" (a.q[0]));\ 2299 asm volatile("emms\n"); \ 2300 printf("%-9s: a=" FMT64X " r=" FMT64X "" FMT64X "\n",\ 2301 #op,\ 2302 a.q[0],\ 2303 r.q[1], r.q[0]);\ 2304 } 2305 2306 #define CVT_OP_REG2XMM(op)\ 2307 {\ 2308 asm volatile (#op " %1, %0" : "=x" (r.dq) : "r" (a.l[0]));\ 2309 printf("%-9s: a=%08x r=" FMT64X "" FMT64X "\n",\ 2310 #op,\ 2311 a.l[0],\ 2312 r.q[1], r.q[0]);\ 2313 } 2314 2315 #define CVT_OP_XMM2REG(op)\ 2316 {\ 2317 asm volatile (#op " %1, %0" : "=r" (r.l[0]) : "x" (a.dq));\ 2318 printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\ 2319 #op,\ 2320 a.q[1], a.q[0],\ 2321 r.l[0]);\ 2322 } 2323 2324 struct fpxstate { 2325 uint16_t fpuc; 2326 uint16_t fpus; 2327 uint16_t fptag; 2328 uint16_t fop; 2329 uint32_t fpuip; 2330 uint16_t cs_sel; 2331 uint16_t dummy0; 2332 uint32_t fpudp; 2333 uint16_t ds_sel; 2334 uint16_t dummy1; 2335 uint32_t mxcsr; 2336 uint32_t mxcsr_mask; 2337 uint8_t fpregs1[8 * 16]; 2338 uint8_t xmm_regs[8 * 16]; 2339 uint8_t dummy2[224]; 2340 }; 2341 2342 static struct fpxstate fpx_state __attribute__((aligned(16))); 2343 static struct fpxstate fpx_state2 __attribute__((aligned(16))); 2344 2345 void test_fxsave(void) 2346 { 2347 struct fpxstate *fp = &fpx_state; 2348 struct fpxstate *fp2 = &fpx_state2; 2349 int i, nb_xmm; 2350 XMMReg a, b; 2351 a.q[0] = test_values[0][0]; 2352 a.q[1] = test_values[0][1]; 2353 b.q[0] = test_values[1][0]; 2354 b.q[1] = test_values[1][1]; 2355 2356 asm("movdqa %2, %%xmm0\n" 2357 "movdqa %3, %%xmm7\n" 2358 #if defined(__x86_64__) 2359 "movdqa %2, %%xmm15\n" 2360 #endif 2361 " fld1\n" 2362 " fldpi\n" 2363 " fldln2\n" 2364 " fxsave %0\n" 2365 " fxrstor %0\n" 2366 " fxsave %1\n" 2367 " fninit\n" 2368 : "=m" (*(uint32_t *)fp2), "=m" (*(uint32_t *)fp) 2369 : "m" (a), "m" (b)); 2370 printf("fpuc=%04x\n", fp->fpuc); 2371 printf("fpus=%04x\n", fp->fpus); 2372 printf("fptag=%04x\n", fp->fptag); 2373 for(i = 0; i < 3; i++) { 2374 printf("ST%d: " FMT64X " %04x\n", 2375 i, 2376 *(uint64_t *)&fp->fpregs1[i * 16], 2377 *(uint16_t *)&fp->fpregs1[i * 16 + 8]); 2378 } 2379 printf("mxcsr=%08x\n", fp->mxcsr & 0x1f80); 2380 #if defined(__x86_64__) 2381 nb_xmm = 16; 2382 #else 2383 nb_xmm = 8; 2384 #endif 2385 for(i = 0; i < nb_xmm; i++) { 2386 printf("xmm%d: " FMT64X "" FMT64X "\n", 2387 i, 2388 *(uint64_t *)&fp->xmm_regs[i * 16], 2389 *(uint64_t *)&fp->xmm_regs[i * 16 + 8]); 2390 } 2391 } 2392 2393 void test_sse(void) 2394 { 2395 XMMReg r, a, b; 2396 int i; 2397 2398 MMX_OP2(punpcklbw); 2399 MMX_OP2(punpcklwd); 2400 MMX_OP2(punpckldq); 2401 MMX_OP2(packsswb); 2402 MMX_OP2(pcmpgtb); 2403 MMX_OP2(pcmpgtw); 2404 MMX_OP2(pcmpgtd); 2405 MMX_OP2(packuswb); 2406 MMX_OP2(punpckhbw); 2407 MMX_OP2(punpckhwd); 2408 MMX_OP2(punpckhdq); 2409 MMX_OP2(packssdw); 2410 MMX_OP2(pcmpeqb); 2411 MMX_OP2(pcmpeqw); 2412 MMX_OP2(pcmpeqd); 2413 2414 MMX_OP2(paddq); 2415 MMX_OP2(pmullw); 2416 MMX_OP2(psubusb); 2417 MMX_OP2(psubusw); 2418 MMX_OP2(pminub); 2419 MMX_OP2(pand); 2420 MMX_OP2(paddusb); 2421 MMX_OP2(paddusw); 2422 MMX_OP2(pmaxub); 2423 MMX_OP2(pandn); 2424 2425 MMX_OP2(pmulhuw); 2426 MMX_OP2(pmulhw); 2427 2428 MMX_OP2(psubsb); 2429 MMX_OP2(psubsw); 2430 MMX_OP2(pminsw); 2431 MMX_OP2(por); 2432 MMX_OP2(paddsb); 2433 MMX_OP2(paddsw); 2434 MMX_OP2(pmaxsw); 2435 MMX_OP2(pxor); 2436 MMX_OP2(pmuludq); 2437 MMX_OP2(pmaddwd); 2438 MMX_OP2(psadbw); 2439 MMX_OP2(psubb); 2440 MMX_OP2(psubw); 2441 MMX_OP2(psubd); 2442 MMX_OP2(psubq); 2443 MMX_OP2(paddb); 2444 MMX_OP2(paddw); 2445 MMX_OP2(paddd); 2446 2447 MMX_OP2(pavgb); 2448 MMX_OP2(pavgw); 2449 2450 asm volatile ("pinsrw $1, %1, %0" : "=y" (r.q[0]) : "r" (0x12345678)); 2451 printf("%-9s: r=" FMT64X "\n", "pinsrw", r.q[0]); 2452 2453 asm volatile ("pinsrw $5, %1, %0" : "=x" (r.dq) : "r" (0x12345678)); 2454 printf("%-9s: r=" FMT64X "" FMT64X "\n", "pinsrw", r.q[1], r.q[0]); 2455 2456 a.q[0] = test_values[0][0]; 2457 a.q[1] = test_values[0][1]; 2458 asm volatile ("pextrw $1, %1, %0" : "=r" (r.l[0]) : "y" (a.q[0])); 2459 printf("%-9s: r=%08x\n", "pextrw", r.l[0]); 2460 2461 asm volatile ("pextrw $5, %1, %0" : "=r" (r.l[0]) : "x" (a.dq)); 2462 printf("%-9s: r=%08x\n", "pextrw", r.l[0]); 2463 2464 asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "y" (a.q[0])); 2465 printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]); 2466 2467 asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "x" (a.dq)); 2468 printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]); 2469 2470 { 2471 r.q[0] = -1; 2472 r.q[1] = -1; 2473 2474 a.q[0] = test_values[0][0]; 2475 a.q[1] = test_values[0][1]; 2476 b.q[0] = test_values[1][0]; 2477 b.q[1] = test_values[1][1]; 2478 asm volatile("maskmovq %1, %0" : 2479 : "y" (a.q[0]), "y" (b.q[0]), "D" (&r) 2480 : "memory"); 2481 printf("%-9s: r=" FMT64X " a=" FMT64X " b=" FMT64X "\n", 2482 "maskmov", 2483 r.q[0], 2484 a.q[0], 2485 b.q[0]); 2486 asm volatile("maskmovdqu %1, %0" : 2487 : "x" (a.dq), "x" (b.dq), "D" (&r) 2488 : "memory"); 2489 printf("%-9s: r=" FMT64X "" FMT64X " a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X "\n", 2490 "maskmov", 2491 r.q[1], r.q[0], 2492 a.q[1], a.q[0], 2493 b.q[1], b.q[0]); 2494 } 2495 2496 asm volatile ("emms"); 2497 2498 SSE_OP2(punpcklqdq); 2499 SSE_OP2(punpckhqdq); 2500 SSE_OP2(andps); 2501 SSE_OP2(andpd); 2502 SSE_OP2(andnps); 2503 SSE_OP2(andnpd); 2504 SSE_OP2(orps); 2505 SSE_OP2(orpd); 2506 SSE_OP2(xorps); 2507 SSE_OP2(xorpd); 2508 2509 SSE_OP2(unpcklps); 2510 SSE_OP2(unpcklpd); 2511 SSE_OP2(unpckhps); 2512 SSE_OP2(unpckhpd); 2513 2514 SHUF_OP(shufps, 0x78); 2515 SHUF_OP(shufpd, 0x02); 2516 2517 PSHUF_OP(pshufd, 0x78); 2518 PSHUF_OP(pshuflw, 0x78); 2519 PSHUF_OP(pshufhw, 0x78); 2520 2521 SHIFT_OP(psrlw, 7); 2522 SHIFT_OP(psrlw, 16); 2523 SHIFT_OP(psraw, 7); 2524 SHIFT_OP(psraw, 16); 2525 SHIFT_OP(psllw, 7); 2526 SHIFT_OP(psllw, 16); 2527 2528 SHIFT_OP(psrld, 7); 2529 SHIFT_OP(psrld, 32); 2530 SHIFT_OP(psrad, 7); 2531 SHIFT_OP(psrad, 32); 2532 SHIFT_OP(pslld, 7); 2533 SHIFT_OP(pslld, 32); 2534 2535 SHIFT_OP(psrlq, 7); 2536 SHIFT_OP(psrlq, 32); 2537 SHIFT_OP(psllq, 7); 2538 SHIFT_OP(psllq, 32); 2539 2540 SHIFT_IM(psrldq, 16); 2541 SHIFT_IM(psrldq, 7); 2542 SHIFT_IM(pslldq, 16); 2543 SHIFT_IM(pslldq, 7); 2544 2545 MOVMSK(movmskps); 2546 MOVMSK(movmskpd); 2547 2548 /* FPU specific ops */ 2549 2550 { 2551 uint32_t mxcsr; 2552 asm volatile("stmxcsr %0" : "=m" (mxcsr)); 2553 printf("mxcsr=%08x\n", mxcsr & 0x1f80); 2554 asm volatile("ldmxcsr %0" : : "m" (mxcsr)); 2555 } 2556 2557 test_sse_comi(2, -1); 2558 test_sse_comi(2, 2); 2559 test_sse_comi(2, 3); 2560 test_sse_comi(2, q_nan.d); 2561 test_sse_comi(q_nan.d, -1); 2562 2563 for(i = 0; i < 2; i++) { 2564 a.s[0] = 2.7; 2565 a.s[1] = 3.4; 2566 a.s[2] = 4; 2567 a.s[3] = -6.3; 2568 b.s[0] = 45.7; 2569 b.s[1] = 353.4; 2570 b.s[2] = 4; 2571 b.s[3] = 56.3; 2572 if (i == 1) { 2573 a.s[0] = q_nan.d; 2574 b.s[3] = q_nan.d; 2575 } 2576 2577 SSE_OPS(add); 2578 SSE_OPS(mul); 2579 SSE_OPS(sub); 2580 SSE_OPS(min); 2581 SSE_OPS(div); 2582 SSE_OPS(max); 2583 SSE_OPS(sqrt); 2584 SSE_OPS(cmpeq); 2585 SSE_OPS(cmplt); 2586 SSE_OPS(cmple); 2587 SSE_OPS(cmpunord); 2588 SSE_OPS(cmpneq); 2589 SSE_OPS(cmpnlt); 2590 SSE_OPS(cmpnle); 2591 SSE_OPS(cmpord); 2592 2593 2594 a.d[0] = 2.7; 2595 a.d[1] = -3.4; 2596 b.d[0] = 45.7; 2597 b.d[1] = -53.4; 2598 if (i == 1) { 2599 a.d[0] = q_nan.d; 2600 b.d[1] = q_nan.d; 2601 } 2602 SSE_OPD(add); 2603 SSE_OPD(mul); 2604 SSE_OPD(sub); 2605 SSE_OPD(min); 2606 SSE_OPD(div); 2607 SSE_OPD(max); 2608 SSE_OPD(sqrt); 2609 SSE_OPD(cmpeq); 2610 SSE_OPD(cmplt); 2611 SSE_OPD(cmple); 2612 SSE_OPD(cmpunord); 2613 SSE_OPD(cmpneq); 2614 SSE_OPD(cmpnlt); 2615 SSE_OPD(cmpnle); 2616 SSE_OPD(cmpord); 2617 } 2618 2619 /* float to float/int */ 2620 a.s[0] = 2.7; 2621 a.s[1] = 3.4; 2622 a.s[2] = 4; 2623 a.s[3] = -6.3; 2624 CVT_OP_XMM(cvtps2pd); 2625 CVT_OP_XMM(cvtss2sd); 2626 CVT_OP_XMM2MMX(cvtps2pi); 2627 CVT_OP_XMM2MMX(cvttps2pi); 2628 CVT_OP_XMM2REG(cvtss2si); 2629 CVT_OP_XMM2REG(cvttss2si); 2630 CVT_OP_XMM(cvtps2dq); 2631 CVT_OP_XMM(cvttps2dq); 2632 2633 a.d[0] = 2.6; 2634 a.d[1] = -3.4; 2635 CVT_OP_XMM(cvtpd2ps); 2636 CVT_OP_XMM(cvtsd2ss); 2637 CVT_OP_XMM2MMX(cvtpd2pi); 2638 CVT_OP_XMM2MMX(cvttpd2pi); 2639 CVT_OP_XMM2REG(cvtsd2si); 2640 CVT_OP_XMM2REG(cvttsd2si); 2641 CVT_OP_XMM(cvtpd2dq); 2642 CVT_OP_XMM(cvttpd2dq); 2643 2644 /* sse/mmx moves */ 2645 CVT_OP_XMM2MMX(movdq2q); 2646 CVT_OP_MMX2XMM(movq2dq); 2647 2648 /* int to float */ 2649 a.l[0] = -6; 2650 a.l[1] = 2; 2651 a.l[2] = 100; 2652 a.l[3] = -60000; 2653 CVT_OP_MMX2XMM(cvtpi2ps); 2654 CVT_OP_MMX2XMM(cvtpi2pd); 2655 CVT_OP_REG2XMM(cvtsi2ss); 2656 CVT_OP_REG2XMM(cvtsi2sd); 2657 CVT_OP_XMM(cvtdq2ps); 2658 CVT_OP_XMM(cvtdq2pd); 2659 2660 /* XXX: test PNI insns */ 2661 #if 0 2662 SSE_OP2(movshdup); 2663 #endif 2664 asm volatile ("emms"); 2665 } 2666 2667 #endif 2668 2669 #define TEST_CONV_RAX(op)\ 2670 {\ 2671 unsigned long a, r;\ 2672 a = i2l(0x8234a6f8);\ 2673 r = a;\ 2674 asm volatile(#op : "=a" (r) : "0" (r));\ 2675 printf("%-10s A=" FMTLX " R=" FMTLX "\n", #op, a, r);\ 2676 } 2677 2678 #define TEST_CONV_RAX_RDX(op)\ 2679 {\ 2680 unsigned long a, d, r, rh; \ 2681 a = i2l(0x8234a6f8);\ 2682 d = i2l(0x8345a1f2);\ 2683 r = a;\ 2684 rh = d;\ 2685 asm volatile(#op : "=a" (r), "=d" (rh) : "0" (r), "1" (rh)); \ 2686 printf("%-10s A=" FMTLX " R=" FMTLX ":" FMTLX "\n", #op, a, r, rh); \ 2687 } 2688 2689 void test_conv(void) 2690 { 2691 TEST_CONV_RAX(cbw); 2692 TEST_CONV_RAX(cwde); 2693 #if defined(__x86_64__) 2694 TEST_CONV_RAX(cdqe); 2695 #endif 2696 2697 TEST_CONV_RAX_RDX(cwd); 2698 TEST_CONV_RAX_RDX(cdq); 2699 #if defined(__x86_64__) 2700 TEST_CONV_RAX_RDX(cqo); 2701 #endif 2702 2703 { 2704 unsigned long a, r; 2705 a = i2l(0x12345678); 2706 asm volatile("bswapl %k0" : "=r" (r) : "0" (a)); 2707 printf("%-10s: A=" FMTLX " R=" FMTLX "\n", "bswapl", a, r); 2708 } 2709 #if defined(__x86_64__) 2710 { 2711 unsigned long a, r; 2712 a = i2l(0x12345678); 2713 asm volatile("bswapq %0" : "=r" (r) : "0" (a)); 2714 printf("%-10s: A=" FMTLX " R=" FMTLX "\n", "bswapq", a, r); 2715 } 2716 #endif 2717 } 2718 2719 extern void *__start_initcall; 2720 extern void *__stop_initcall; 2721 2722 2723 int main(int argc, char **argv) 2724 { 2725 void **ptr; 2726 void (*func)(void); 2727 2728 ptr = &__start_initcall; 2729 while (ptr != &__stop_initcall) { 2730 func = *ptr++; 2731 func(); 2732 } 2733 test_bsx(); 2734 test_mul(); 2735 test_jcc(); 2736 test_loop(); 2737 test_floats(); 2738 #if !defined(__x86_64__) 2739 test_bcd(); 2740 #endif 2741 test_xchg(); 2742 test_string(); 2743 test_misc(); 2744 test_lea(); 2745 #ifdef TEST_SEGS 2746 test_segs(); 2747 test_code16(); 2748 #endif 2749 #ifdef TEST_VM86 2750 test_vm86(); 2751 #endif 2752 #if !defined(__x86_64__) 2753 test_exceptions(); 2754 test_self_modifying_code(); 2755 test_single_step(); 2756 #endif 2757 test_enter(); 2758 test_conv(); 2759 #ifdef TEST_SSE 2760 test_sse(); 2761 test_fxsave(); 2762 #endif 2763 return 0; 2764 } 2765