1 /* See if various BMI2 instructions give expected results */
2 #include <assert.h>
3 #include <stdint.h>
4 #include <stdio.h>
5
6 #ifdef __x86_64
7 typedef uint64_t reg_t;
8 #else
9 typedef uint32_t reg_t;
10 #endif
11
12 #define insn1q(name, arg0) \
13 static inline reg_t name##q(reg_t arg0) \
14 { \
15 reg_t result64; \
16 asm volatile (#name "q %1, %0" : "=r"(result64) : "rm"(arg0)); \
17 return result64; \
18 }
19
20 #define insn1l(name, arg0) \
21 static inline reg_t name##l(reg_t arg0) \
22 { \
23 reg_t result32; \
24 asm volatile (#name "l %k1, %k0" : "=r"(result32) : "rm"(arg0)); \
25 return result32; \
26 }
27
28 #define insn2q(name, arg0, c0, arg1, c1) \
29 static inline reg_t name##q(reg_t arg0, reg_t arg1) \
30 { \
31 reg_t result64; \
32 asm volatile (#name "q %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1)); \
33 return result64; \
34 }
35
36 #define insn2l(name, arg0, c0, arg1, c1) \
37 static inline reg_t name##l(reg_t arg0, reg_t arg1) \
38 { \
39 reg_t result32; \
40 asm volatile (#name "l %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1)); \
41 return result32; \
42 }
43
44 #ifdef __x86_64
45 insn2q(pext, src, "r", mask, "rm")
46 insn2q(pdep, src, "r", mask, "rm")
47 insn2q(andn, clear, "rm", val, "r")
48 insn2q(bextr, range, "rm", val, "r")
49 insn2q(bzhi, pos, "rm", val, "r")
50 insn2q(rorx, val, "r", n, "i")
51 insn2q(sarx, val, "rm", n, "r")
52 insn2q(shlx, val, "rm", n, "r")
53 insn2q(shrx, val, "rm", n, "r")
insn1q(blsi,src)54 insn1q(blsi, src)
55 insn1q(blsmsk, src)
56 insn1q(blsr, src)
57 #endif
58 insn2l(pext, src, "r", mask, "rm")
59 insn2l(pdep, src, "r", mask, "rm")
60 insn2l(andn, clear, "rm", val, "r")
61 insn2l(bextr, range, "rm", val, "r")
62 insn2l(bzhi, pos, "rm", val, "r")
63 insn2l(rorx, val, "r", n, "i")
64 insn2l(sarx, val, "rm", n, "r")
65 insn2l(shlx, val, "rm", n, "r")
66 insn2l(shrx, val, "rm", n, "r")
67 insn1l(blsi, src)
68 insn1l(blsmsk, src)
69 insn1l(blsr, src)
70
71 int main(int argc, char *argv[]) {
72 uint64_t ehlo = 0x202020204f4c4845ull;
73 uint64_t mask = 0xa080800302020001ull;
74 reg_t result;
75
76 #ifdef __x86_64
77 /* 64 bits */
78 result = andnq(mask, ehlo);
79 assert(result == 0x002020204d4c4844);
80
81 result = pextq(ehlo, mask);
82 assert(result == 133);
83
84 result = pdepq(result, mask);
85 assert(result == (ehlo & mask));
86
87 result = pextq(-1ull, mask);
88 assert(result == 511); /* mask has 9 bits set */
89
90 result = pdepq(-1ull, mask);
91 assert(result == mask);
92
93 result = bextrq(mask, 0x3f00);
94 assert(result == (mask & ~INT64_MIN));
95
96 result = bextrq(mask, 0x1038);
97 assert(result == 0xa0);
98
99 result = bextrq(mask, 0x10f8);
100 assert(result == 0);
101
102 result = bextrq(0xfedcba9876543210ull, 0x7f00);
103 assert(result == 0xfedcba9876543210ull);
104
105 result = blsiq(0x30);
106 assert(result == 0x10);
107
108 result = blsiq(0x30ull << 32);
109 assert(result == 0x10ull << 32);
110
111 result = blsmskq(0x30);
112 assert(result == 0x1f);
113
114 result = blsrq(0x30);
115 assert(result == 0x20);
116
117 result = blsrq(0x30ull << 32);
118 assert(result == 0x20ull << 32);
119
120 result = bzhiq(mask, 0x3f);
121 assert(result == (mask & ~INT64_MIN));
122
123 result = bzhiq(mask, 0x1f);
124 assert(result == (mask & ~(-1 << 30)));
125
126 result = bzhiq(mask, 0x40);
127 assert(result == mask);
128
129 result = rorxq(0x2132435465768798, 8);
130 assert(result == 0x9821324354657687);
131
132 result = sarxq(0xffeeddccbbaa9988, 8);
133 assert(result == 0xffffeeddccbbaa99);
134
135 result = sarxq(0x77eeddccbbaa9988, 8 | 64);
136 assert(result == 0x0077eeddccbbaa99);
137
138 result = shrxq(0xffeeddccbbaa9988, 8);
139 assert(result == 0x00ffeeddccbbaa99);
140
141 result = shrxq(0x77eeddccbbaa9988, 8 | 192);
142 assert(result == 0x0077eeddccbbaa99);
143
144 result = shlxq(0xffeeddccbbaa9988, 8);
145 assert(result == 0xeeddccbbaa998800);
146 #endif
147
148 /* 32 bits */
149 result = andnl(mask, ehlo);
150 assert(result == 0x04d4c4844);
151
152 result = pextl((uint32_t) ehlo, mask);
153 assert(result == 5);
154
155 result = pdepl(result, mask);
156 assert(result == (uint32_t)(ehlo & mask));
157
158 result = pextl(-1u, mask);
159 assert(result == 7); /* mask has 3 bits set */
160
161 result = pdepl(-1u, mask);
162 assert(result == (uint32_t)mask);
163
164 result = bextrl(mask, 0x1f00);
165 assert(result == (mask & ~INT32_MIN));
166
167 result = bextrl(ehlo, 0x1018);
168 assert(result == 0x4f);
169
170 result = bextrl(mask, 0x1038);
171 assert(result == 0);
172
173 result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018);
174 assert(result == 0x5a);
175
176 result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00);
177 assert(result == 0x76543210u);
178
179 result = bextrl(-1, 0);
180 assert(result == 0);
181
182 result = blsil(0xffff);
183 assert(result == 1);
184
185 result = blsmskl(0x300);
186 assert(result == 0x1ff);
187
188 result = blsrl(0xffc);
189 assert(result == 0xff8);
190
191 result = bzhil(mask, 0xf);
192 assert(result == 1);
193
194 result = rorxl(0x65768798, 8);
195 assert(result == 0x98657687);
196
197 result = sarxl(0xffeeddcc, 8);
198 assert(result == 0xffffeedd);
199
200 result = sarxl(0x77eeddcc, 8 | 32);
201 assert(result == 0x0077eedd);
202
203 result = shrxl(0xffeeddcc, 8);
204 assert(result == 0x00ffeedd);
205
206 result = shrxl(0x77eeddcc, 8 | 128);
207 assert(result == 0x0077eedd);
208
209 result = shlxl(0xffeeddcc, 8);
210 assert(result == 0xeeddcc00);
211
212 return 0;
213 }
214
215