1 /* See if various BMI2 instructions give expected results */ 2 #include <assert.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 6 #ifdef __x86_64 7 typedef uint64_t reg_t; 8 #else 9 typedef uint32_t reg_t; 10 #endif 11 12 #define insn1q(name, arg0) \ 13 static inline reg_t name##q(reg_t arg0) \ 14 { \ 15 reg_t result64; \ 16 asm volatile (#name "q %1, %0" : "=r"(result64) : "rm"(arg0)); \ 17 return result64; \ 18 } 19 20 #define insn1l(name, arg0) \ 21 static inline reg_t name##l(reg_t arg0) \ 22 { \ 23 reg_t result32; \ 24 asm volatile (#name "l %k1, %k0" : "=r"(result32) : "rm"(arg0)); \ 25 return result32; \ 26 } 27 28 #define insn2q(name, arg0, c0, arg1, c1) \ 29 static inline reg_t name##q(reg_t arg0, reg_t arg1) \ 30 { \ 31 reg_t result64; \ 32 asm volatile (#name "q %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1)); \ 33 return result64; \ 34 } 35 36 #define insn2l(name, arg0, c0, arg1, c1) \ 37 static inline reg_t name##l(reg_t arg0, reg_t arg1) \ 38 { \ 39 reg_t result32; \ 40 asm volatile (#name "l %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1)); \ 41 return result32; \ 42 } 43 44 #ifdef __x86_64 45 insn2q(pext, src, "r", mask, "rm") 46 insn2q(pdep, src, "r", mask, "rm") 47 insn2q(andn, clear, "rm", val, "r") 48 insn2q(bextr, range, "rm", val, "r") 49 insn2q(bzhi, pos, "rm", val, "r") 50 insn2q(rorx, val, "r", n, "i") 51 insn2q(sarx, val, "rm", n, "r") 52 insn2q(shlx, val, "rm", n, "r") 53 insn2q(shrx, val, "rm", n, "r") 54 insn1q(blsi, src) 55 insn1q(blsmsk, src) 56 insn1q(blsr, src) 57 #endif 58 insn2l(pext, src, "r", mask, "rm") 59 insn2l(pdep, src, "r", mask, "rm") 60 insn2l(andn, clear, "rm", val, "r") 61 insn2l(bextr, range, "rm", val, "r") 62 insn2l(bzhi, pos, "rm", val, "r") 63 insn2l(rorx, val, "r", n, "i") 64 insn2l(sarx, val, "rm", n, "r") 65 insn2l(shlx, val, "rm", n, "r") 66 insn2l(shrx, val, "rm", n, "r") 67 insn1l(blsi, src) 68 insn1l(blsmsk, src) 69 insn1l(blsr, src) 70 71 int main(int argc, char *argv[]) { 72 uint64_t ehlo = 0x202020204f4c4845ull; 73 uint64_t mask = 0xa080800302020001ull; 74 reg_t result; 75 76 #ifdef __x86_64 77 /* 64 bits */ 78 result = andnq(mask, ehlo); 79 assert(result == 0x002020204d4c4844); 80 81 result = pextq(ehlo, mask); 82 assert(result == 133); 83 84 result = pdepq(result, mask); 85 assert(result == (ehlo & mask)); 86 87 result = pextq(-1ull, mask); 88 assert(result == 511); /* mask has 9 bits set */ 89 90 result = pdepq(-1ull, mask); 91 assert(result == mask); 92 93 result = bextrq(mask, 0x3f00); 94 assert(result == (mask & ~INT64_MIN)); 95 96 result = bextrq(mask, 0x1038); 97 assert(result == 0xa0); 98 99 result = bextrq(mask, 0x10f8); 100 assert(result == 0); 101 102 result = bextrq(0xfedcba9876543210ull, 0x7f00); 103 assert(result == 0xfedcba9876543210ull); 104 105 result = blsiq(0x30); 106 assert(result == 0x10); 107 108 result = blsiq(0x30ull << 32); 109 assert(result == 0x10ull << 32); 110 111 result = blsmskq(0x30); 112 assert(result == 0x1f); 113 114 result = blsrq(0x30); 115 assert(result == 0x20); 116 117 result = blsrq(0x30ull << 32); 118 assert(result == 0x20ull << 32); 119 120 result = bzhiq(mask, 0x3f); 121 assert(result == (mask & ~INT64_MIN)); 122 123 result = bzhiq(mask, 0x1f); 124 assert(result == (mask & ~(-1 << 30))); 125 126 result = rorxq(0x2132435465768798, 8); 127 assert(result == 0x9821324354657687); 128 129 result = sarxq(0xffeeddccbbaa9988, 8); 130 assert(result == 0xffffeeddccbbaa99); 131 132 result = sarxq(0x77eeddccbbaa9988, 8 | 64); 133 assert(result == 0x0077eeddccbbaa99); 134 135 result = shrxq(0xffeeddccbbaa9988, 8); 136 assert(result == 0x00ffeeddccbbaa99); 137 138 result = shrxq(0x77eeddccbbaa9988, 8 | 192); 139 assert(result == 0x0077eeddccbbaa99); 140 141 result = shlxq(0xffeeddccbbaa9988, 8); 142 assert(result == 0xeeddccbbaa998800); 143 #endif 144 145 /* 32 bits */ 146 result = andnl(mask, ehlo); 147 assert(result == 0x04d4c4844); 148 149 result = pextl((uint32_t) ehlo, mask); 150 assert(result == 5); 151 152 result = pdepl(result, mask); 153 assert(result == (uint32_t)(ehlo & mask)); 154 155 result = pextl(-1u, mask); 156 assert(result == 7); /* mask has 3 bits set */ 157 158 result = pdepl(-1u, mask); 159 assert(result == (uint32_t)mask); 160 161 result = bextrl(mask, 0x1f00); 162 assert(result == (mask & ~INT32_MIN)); 163 164 result = bextrl(ehlo, 0x1018); 165 assert(result == 0x4f); 166 167 result = bextrl(mask, 0x1038); 168 assert(result == 0); 169 170 result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018); 171 assert(result == 0x5a); 172 173 result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00); 174 assert(result == 0x76543210u); 175 176 result = bextrl(-1, 0); 177 assert(result == 0); 178 179 result = blsil(0xffff); 180 assert(result == 1); 181 182 result = blsmskl(0x300); 183 assert(result == 0x1ff); 184 185 result = blsrl(0xffc); 186 assert(result == 0xff8); 187 188 result = bzhil(mask, 0xf); 189 assert(result == 1); 190 191 result = rorxl(0x65768798, 8); 192 assert(result == 0x98657687); 193 194 result = sarxl(0xffeeddcc, 8); 195 assert(result == 0xffffeedd); 196 197 result = sarxl(0x77eeddcc, 8 | 32); 198 assert(result == 0x0077eedd); 199 200 result = shrxl(0xffeeddcc, 8); 201 assert(result == 0x00ffeedd); 202 203 result = shrxl(0x77eeddcc, 8 | 128); 204 assert(result == 0x0077eedd); 205 206 result = shlxl(0xffeeddcc, 8); 207 assert(result == 0xeeddcc00); 208 209 return 0; 210 } 211 212