1 /* See if various BMI2 instructions give expected results */ 2 #include <assert.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 6 #define insn1q(name, arg0) \ 7 static inline uint64_t name##q(uint64_t arg0) \ 8 { \ 9 uint64_t result64; \ 10 asm volatile (#name "q %1, %0" : "=r"(result64) : "rm"(arg0)); \ 11 return result64; \ 12 } 13 14 #define insn1l(name, arg0) \ 15 static inline uint32_t name##l(uint32_t arg0) \ 16 { \ 17 uint32_t result32; \ 18 asm volatile (#name "l %k1, %k0" : "=r"(result32) : "rm"(arg0)); \ 19 return result32; \ 20 } 21 22 #define insn2q(name, arg0, c0, arg1, c1) \ 23 static inline uint64_t name##q(uint64_t arg0, uint64_t arg1) \ 24 { \ 25 uint64_t result64; \ 26 asm volatile (#name "q %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1)); \ 27 return result64; \ 28 } 29 30 #define insn2l(name, arg0, c0, arg1, c1) \ 31 static inline uint32_t name##l(uint32_t arg0, uint32_t arg1) \ 32 { \ 33 uint32_t result32; \ 34 asm volatile (#name "l %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1)); \ 35 return result32; \ 36 } 37 38 #ifdef __x86_64 39 insn2q(pext, src, "r", mask, "rm") 40 insn2q(pdep, src, "r", mask, "rm") 41 insn2q(andn, clear, "rm", val, "r") 42 insn2q(bextr, range, "rm", val, "r") 43 insn2q(bzhi, pos, "rm", val, "r") 44 insn2q(rorx, val, "r", n, "i") 45 insn2q(sarx, val, "rm", n, "r") 46 insn2q(shlx, val, "rm", n, "r") 47 insn2q(shrx, val, "rm", n, "r") 48 insn1q(blsi, src) 49 insn1q(blsmsk, src) 50 insn1q(blsr, src) 51 #endif 52 insn2l(pext, src, "r", mask, "rm") 53 insn2l(pdep, src, "r", mask, "rm") 54 insn2l(andn, clear, "rm", val, "r") 55 insn2l(bextr, range, "rm", val, "r") 56 insn2l(bzhi, pos, "rm", val, "r") 57 insn2l(rorx, val, "r", n, "i") 58 insn2l(sarx, val, "rm", n, "r") 59 insn2l(shlx, val, "rm", n, "r") 60 insn2l(shrx, val, "rm", n, "r") 61 insn1l(blsi, src) 62 insn1l(blsmsk, src) 63 insn1l(blsr, src) 64 65 int main(int argc, char *argv[]) { 66 uint64_t ehlo = 0x202020204f4c4845ull; 67 uint64_t mask = 0xa080800302020001ull; 68 uint32_t result32; 69 70 #ifdef __x86_64 71 uint64_t result64; 72 73 /* 64 bits */ 74 result64 = andnq(mask, ehlo); 75 assert(result64 == 0x002020204d4c4844); 76 77 result64 = pextq(ehlo, mask); 78 assert(result64 == 133); 79 80 result64 = pdepq(result64, mask); 81 assert(result64 == (ehlo & mask)); 82 83 result64 = pextq(-1ull, mask); 84 assert(result64 == 511); /* mask has 9 bits set */ 85 86 result64 = pdepq(-1ull, mask); 87 assert(result64 == mask); 88 89 result64 = bextrq(mask, 0x3f00); 90 assert(result64 == (mask & ~INT64_MIN)); 91 92 result64 = bextrq(mask, 0x1038); 93 assert(result64 == 0xa0); 94 95 result64 = bextrq(mask, 0x10f8); 96 assert(result64 == 0); 97 98 result64 = blsiq(0x30); 99 assert(result64 == 0x10); 100 101 result64 = blsiq(0x30ull << 32); 102 assert(result64 == 0x10ull << 32); 103 104 result64 = blsmskq(0x30); 105 assert(result64 == 0x1f); 106 107 result64 = blsrq(0x30); 108 assert(result64 == 0x20); 109 110 result64 = blsrq(0x30ull << 32); 111 assert(result64 == 0x20ull << 32); 112 113 result64 = bzhiq(mask, 0x3f); 114 assert(result64 == (mask & ~INT64_MIN)); 115 116 result64 = bzhiq(mask, 0x1f); 117 assert(result64 == (mask & ~(-1 << 30))); 118 119 result64 = rorxq(0x2132435465768798, 8); 120 assert(result64 == 0x9821324354657687); 121 122 result64 = sarxq(0xffeeddccbbaa9988, 8); 123 assert(result64 == 0xffffeeddccbbaa99); 124 125 result64 = sarxq(0x77eeddccbbaa9988, 8 | 64); 126 assert(result64 == 0x0077eeddccbbaa99); 127 128 result64 = shrxq(0xffeeddccbbaa9988, 8); 129 assert(result64 == 0x00ffeeddccbbaa99); 130 131 result64 = shrxq(0x77eeddccbbaa9988, 8 | 192); 132 assert(result64 == 0x0077eeddccbbaa99); 133 134 result64 = shlxq(0xffeeddccbbaa9988, 8); 135 assert(result64 == 0xeeddccbbaa998800); 136 #endif 137 138 /* 32 bits */ 139 result32 = andnl(mask, ehlo); 140 assert(result32 == 0x04d4c4844); 141 142 result32 = pextl((uint32_t) ehlo, mask); 143 assert(result32 == 5); 144 145 result32 = pdepl(result32, mask); 146 assert(result32 == (uint32_t)(ehlo & mask)); 147 148 result32 = pextl(-1u, mask); 149 assert(result32 == 7); /* mask has 3 bits set */ 150 151 result32 = pdepl(-1u, mask); 152 assert(result32 == (uint32_t)mask); 153 154 result32 = bextrl(mask, 0x1f00); 155 assert(result32 == (mask & ~INT32_MIN)); 156 157 result32 = bextrl(ehlo, 0x1018); 158 assert(result32 == 0x4f); 159 160 result32 = bextrl(mask, 0x1038); 161 assert(result32 == 0); 162 163 result32 = blsil(0xffff); 164 assert(result32 == 1); 165 166 result32 = blsmskl(0x300); 167 assert(result32 == 0x1ff); 168 169 result32 = blsrl(0xffc); 170 assert(result32 == 0xff8); 171 172 result32 = bzhil(mask, 0xf); 173 assert(result32 == 1); 174 175 result32 = rorxl(0x65768798, 8); 176 assert(result32 == 0x98657687); 177 178 result32 = sarxl(0xffeeddcc, 8); 179 assert(result32 == 0xffffeedd); 180 181 result32 = sarxl(0x77eeddcc, 8 | 32); 182 assert(result32 == 0x0077eedd); 183 184 result32 = shrxl(0xffeeddcc, 8); 185 assert(result32 == 0x00ffeedd); 186 187 result32 = shrxl(0x77eeddcc, 8 | 128); 188 assert(result32 == 0x0077eedd); 189 190 result32 = shlxl(0xffeeddcc, 8); 191 assert(result32 == 0xeeddcc00); 192 193 return 0; 194 } 195 196