xref: /openbmc/qemu/target/hexagon/imported/alu.idef (revision 5054ba1066f1131502ddcb770743eb85937a95c7)
1/*
2 *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
3 *
4 *  This program is free software; you can redistribute it and/or modify
5 *  it under the terms of the GNU General Public License as published by
6 *  the Free Software Foundation; either version 2 of the License, or
7 *  (at your option) any later version.
8 *
9 *  This program is distributed in the hope that it will be useful,
10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 *  GNU General Public License for more details.
13 *
14 *  You should have received a copy of the GNU General Public License
15 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18/*
19 * ALU Instructions
20 */
21
22
23/**********************************************/
24/* Add/Sub instructions                       */
25/**********************************************/
26
27Q6INSN(A2_add,"Rd32=add(Rs32,Rt32)",ATTRIBS(),
28"Add 32-bit registers",
29{ RdV=RsV+RtV;})
30
31Q6INSN(A2_sub,"Rd32=sub(Rt32,Rs32)",ATTRIBS(),
32"Subtract 32-bit registers",
33{ RdV=RtV-RsV;})
34
35#define COND_ALU(TAG,OPER,DESCR,SEMANTICS)\
36Q6INSN(TAG##t,"if (Pu4) "OPER,ATTRIBS(A_ARCHV2),DESCR,{if(fLSBOLD(PuV)){SEMANTICS;} else {CANCEL;}})\
37Q6INSN(TAG##f,"if (!Pu4) "OPER,ATTRIBS(A_ARCHV2),DESCR,{if(fLSBOLDNOT(PuV)){SEMANTICS;} else {CANCEL;}})\
38Q6INSN(TAG##tnew,"if (Pu4.new) " OPER,ATTRIBS(A_ARCHV2),DESCR,{if(fLSBNEW(PuN)){SEMANTICS;} else {CANCEL;}})\
39Q6INSN(TAG##fnew,"if (!Pu4.new) "OPER,ATTRIBS(A_ARCHV2),DESCR,{if(fLSBNEWNOT(PuN)){SEMANTICS;} else {CANCEL;}})
40
41COND_ALU(A2_padd,"Rd32=add(Rs32,Rt32)","Conditionally Add 32-bit registers",RdV=RsV+RtV)
42COND_ALU(A2_psub,"Rd32=sub(Rt32,Rs32)","Conditionally Subtract 32-bit registers",RdV=RtV-RsV)
43COND_ALU(A2_paddi,"Rd32=add(Rs32,#s8)","Conditionally Add Register and immediate",fIMMEXT(siV); RdV=RsV+siV)
44COND_ALU(A2_pxor,"Rd32=xor(Rs32,Rt32)","Conditionally XOR registers",RdV=RsV^RtV)
45COND_ALU(A2_pand,"Rd32=and(Rs32,Rt32)","Conditionally AND registers",RdV=RsV&RtV)
46COND_ALU(A2_por,"Rd32=or(Rs32,Rt32)","Conditionally OR registers",RdV=RsV|RtV)
47
48COND_ALU(A4_psxtb,"Rd32=sxtb(Rs32)","Conditionally sign-extend byte", RdV=fSXTN(8,32,RsV))
49COND_ALU(A4_pzxtb,"Rd32=zxtb(Rs32)","Conditionally zero-extend byte", RdV=fZXTN(8,32,RsV))
50COND_ALU(A4_psxth,"Rd32=sxth(Rs32)","Conditionally sign-extend halfword", RdV=fSXTN(16,32,RsV))
51COND_ALU(A4_pzxth,"Rd32=zxth(Rs32)","Conditionally zero-extend halfword", RdV=fZXTN(16,32,RsV))
52COND_ALU(A4_paslh,"Rd32=aslh(Rs32)","Conditionally zero-extend halfword", RdV=RsV<<16)
53COND_ALU(A4_pasrh,"Rd32=asrh(Rs32)","Conditionally zero-extend halfword", RdV=RsV>>16)
54
55
56Q6INSN(A2_addsat,"Rd32=add(Rs32,Rt32):sat",ATTRIBS(),
57"Add 32-bit registers with saturation",
58{ RdV=fSAT(fSE32_64(RsV)+fSE32_64(RtV)); })
59
60Q6INSN(A2_subsat,"Rd32=sub(Rt32,Rs32):sat",ATTRIBS(),
61"Subtract 32-bit registers with saturation",
62{ RdV=fSAT(fSE32_64(RtV) - fSE32_64(RsV)); })
63
64
65Q6INSN(A2_addi,"Rd32=add(Rs32,#s16)",ATTRIBS(),
66"Add a signed immediate to a register",
67{ fIMMEXT(siV); RdV=RsV+siV;})
68
69
70Q6INSN(C4_addipc,"Rd32=add(pc,#u6)",ATTRIBS(),
71"Add immediate to PC",
72{ RdV=fREAD_PC()+fIMMEXT(uiV);})
73
74
75
76/**********************************************/
77/* Single-precision HL forms                  */
78/* These insns and the SP mpy are the ones    */
79/* that can do .HL stuff                      */
80/**********************************************/
81#define STD_HL_INSN(TAG,OPER,AOPER,ATR,SEM)\
82Q6INSN(A2_##TAG##_ll, OPER"(Rt.L32,Rs.L32)"AOPER,    ATR,"",{SEM(fGETHALF(0,RtV),fGETHALF(0,RsV));})\
83Q6INSN(A2_##TAG##_lh, OPER"(Rt.L32,Rs.H32)"AOPER,    ATR,"",{SEM(fGETHALF(0,RtV),fGETHALF(1,RsV));})\
84Q6INSN(A2_##TAG##_hl, OPER"(Rt.H32,Rs.L32)"AOPER,    ATR,"",{SEM(fGETHALF(1,RtV),fGETHALF(0,RsV));})\
85Q6INSN(A2_##TAG##_hh, OPER"(Rt.H32,Rs.H32)"AOPER,    ATR,"",{SEM(fGETHALF(1,RtV),fGETHALF(1,RsV));})
86
87#define SUBSTD_HL_INSN(TAG,OPER,AOPER,ATR,SEM)\
88Q6INSN(A2_##TAG##_ll, OPER"(Rt.L32,Rs.L32)"AOPER,    ATR,"",{SEM(fGETHALF(0,RtV),fGETHALF(0,RsV));})\
89Q6INSN(A2_##TAG##_hl, OPER"(Rt.L32,Rs.H32)"AOPER,    ATR,"",{SEM(fGETHALF(0,RtV),fGETHALF(1,RsV));})
90
91
92#undef HLSEM
93#define HLSEM(A,B) RdV=fSXTN(16,32,(A+B))
94SUBSTD_HL_INSN(addh_l16,"Rd32=add","",ATTRIBS(),HLSEM)
95
96#undef HLSEM
97#define HLSEM(A,B) RdV=fSATH(A+B)
98SUBSTD_HL_INSN(addh_l16_sat,"Rd32=add",":sat",ATTRIBS(),HLSEM)
99
100#undef HLSEM
101#define HLSEM(A,B) RdV=fSXTN(16,32,(A-B))
102SUBSTD_HL_INSN(subh_l16,"Rd32=sub","",ATTRIBS(),HLSEM)
103
104#undef HLSEM
105#define HLSEM(A,B) RdV=fSATH(A-B)
106SUBSTD_HL_INSN(subh_l16_sat,"Rd32=sub",":sat",ATTRIBS(),HLSEM)
107
108#undef HLSEM
109#define HLSEM(A,B) RdV=(A+B)<<16
110STD_HL_INSN(addh_h16,"Rd32=add",":<<16",ATTRIBS(),HLSEM)
111
112#undef HLSEM
113#define HLSEM(A,B) RdV=(fSATH(A+B))<<16
114STD_HL_INSN(addh_h16_sat,"Rd32=add",":sat:<<16",ATTRIBS(),HLSEM)
115
116#undef HLSEM
117#define HLSEM(A,B) RdV=(A-B)<<16
118STD_HL_INSN(subh_h16,"Rd32=sub",":<<16",ATTRIBS(),HLSEM)
119
120#undef HLSEM
121#define HLSEM(A,B) RdV=(fSATH(A-B))<<16
122STD_HL_INSN(subh_h16_sat,"Rd32=sub",":sat:<<16",ATTRIBS(),HLSEM)
123
124
125
126
127Q6INSN(A2_aslh,"Rd32=aslh(Rs32)",ATTRIBS(),
128"Arithmetic Shift Left by Halfword",{ RdV=RsV<<16; })
129
130Q6INSN(A2_asrh,"Rd32=asrh(Rs32)",ATTRIBS(),
131"Arithmetic Shift Right by Halfword",{ RdV=RsV>>16; })
132
133
134/* 64-bit versions */
135
136Q6INSN(A2_addp,"Rdd32=add(Rss32,Rtt32)",ATTRIBS(),
137"Add",
138{ RddV=RssV+RttV;})
139
140Q6INSN(A2_addpsat,"Rdd32=add(Rss32,Rtt32):sat",ATTRIBS(A_ARCHV3),
141"Add",
142{ fADDSAT64(RddV,RssV,RttV);})
143
144Q6INSN(A2_addspl,"Rdd32=add(Rss32,Rtt32):raw:lo",ATTRIBS(A_ARCHV3),
145"Add",
146{ RddV=RttV+fSXTN(32,64,fGETWORD(0,RssV));})
147
148Q6INSN(A2_addsph,"Rdd32=add(Rss32,Rtt32):raw:hi",ATTRIBS(A_ARCHV3),
149"Add",
150{ RddV=RttV+fSXTN(32,64,fGETWORD(1,RssV));})
151
152Q6INSN(A2_subp,"Rdd32=sub(Rtt32,Rss32)",ATTRIBS(),
153"Sub",
154{ RddV=RttV-RssV;})
155
156/* NEG and ABS */
157
158Q6INSN(A2_negsat,"Rd32=neg(Rs32):sat",ATTRIBS(),
159"Arithmetic negate register", { RdV = fSAT(-fCAST8s(RsV)); })
160
161Q6INSN(A2_abs,"Rd32=abs(Rs32)",ATTRIBS(),
162"Absolute Value register", { RdV = fABS(RsV); })
163
164Q6INSN(A2_abssat,"Rd32=abs(Rs32):sat",ATTRIBS(),
165"Arithmetic negate register", { RdV = fSAT(fABS(fCAST4_8s(RsV))); })
166
167Q6INSN(A2_vconj,"Rdd32=vconj(Rss32):sat",ATTRIBS(A_ARCHV2),
168"Vector Complex conjugate of Rss",
169{  fSETHALF(1,RddV,fSATN(16,-fGETHALF(1,RssV)));
170   fSETHALF(0,RddV,fGETHALF(0,RssV));
171   fSETHALF(3,RddV,fSATN(16,-fGETHALF(3,RssV)));
172   fSETHALF(2,RddV,fGETHALF(2,RssV));
173})
174
175
176/* 64-bit versions */
177
178Q6INSN(A2_negp,"Rdd32=neg(Rss32)",ATTRIBS(),
179"Arithmetic negate register", { RddV = -RssV; })
180
181Q6INSN(A2_absp,"Rdd32=abs(Rss32)",ATTRIBS(),
182"Absolute Value register", { RddV = fABS(RssV); })
183
184
185/* MIN and MAX  R */
186
187Q6INSN(A2_max,"Rd32=max(Rs32,Rt32)",ATTRIBS(),
188"Maximum of two registers",
189{ RdV = fMAX(RsV,RtV); })
190
191Q6INSN(A2_maxu,"Rd32=maxu(Rs32,Rt32)",ATTRIBS(),
192"Maximum of two registers (unsigned)",
193{ RdV = fMAX(fCAST4u(RsV),fCAST4u(RtV)); })
194
195Q6INSN(A2_min,"Rd32=min(Rt32,Rs32)",ATTRIBS(),
196"Minimum of two registers",
197{ RdV = fMIN(RtV,RsV); })
198
199Q6INSN(A2_minu,"Rd32=minu(Rt32,Rs32)",ATTRIBS(),
200"Minimum of two registers (unsigned)",
201{ RdV = fMIN(fCAST4u(RtV),fCAST4u(RsV)); })
202
203/* MIN and MAX Pairs */
204#if 1
205Q6INSN(A2_maxp,"Rdd32=max(Rss32,Rtt32)",ATTRIBS(A_ARCHV3),
206"Maximum of two register pairs",
207{ RddV = fMAX(RssV,RttV); })
208
209Q6INSN(A2_maxup,"Rdd32=maxu(Rss32,Rtt32)",ATTRIBS(A_ARCHV3),
210"Maximum of two register pairs (unsigned)",
211{ RddV = fMAX(fCAST8u(RssV),fCAST8u(RttV)); })
212
213Q6INSN(A2_minp,"Rdd32=min(Rtt32,Rss32)",ATTRIBS(A_ARCHV3),
214"Minimum of two register pairs",
215{ RddV = fMIN(RttV,RssV); })
216
217Q6INSN(A2_minup,"Rdd32=minu(Rtt32,Rss32)",ATTRIBS(A_ARCHV3),
218"Minimum of two register pairs (unsigned)",
219{ RddV = fMIN(fCAST8u(RttV),fCAST8u(RssV)); })
220#endif
221
222/**********************************************/
223/* Register and Immediate Transfers           */
224/**********************************************/
225
226Q6INSN(A2_nop,"nop",ATTRIBS(A_IT_NOP),
227"Nop (32-bit encoding)",
228 fHIDE( { }  ))
229
230
231Q6INSN(A4_ext,"immext(#u26:6)",ATTRIBS(A_IT_EXTENDER),
232"This instruction carries the 26 most-significant immediate bits for the next instruction",
233{ fHIDE(); })
234
235
236Q6INSN(A2_tfr,"Rd32=Rs32",ATTRIBS(),
237"tfr register",{ RdV=RsV;})
238
239Q6INSN(A2_tfrsi,"Rd32=#s16",ATTRIBS(),
240"transfer signed immediate to register",{ fIMMEXT(siV); RdV=siV;})
241
242Q6INSN(A2_sxtb,"Rd32=sxtb(Rs32)",ATTRIBS(),
243"Sign extend byte", {RdV = fSXTN(8,32,RsV);})
244
245Q6INSN(A2_zxth,"Rd32=zxth(Rs32)",ATTRIBS(),
246"Zero extend half", {RdV = fZXTN(16,32,RsV);})
247
248Q6INSN(A2_sxth,"Rd32=sxth(Rs32)",ATTRIBS(),
249"Sign extend half", {RdV = fSXTN(16,32,RsV);})
250
251Q6INSN(A2_combinew,"Rdd32=combine(Rs32,Rt32)",ATTRIBS(),
252"Combine two words into a register pair",
253{ fSETWORD(0,RddV,RtV);
254  fSETWORD(1,RddV,RsV);
255})
256
257Q6INSN(A4_combineri,"Rdd32=combine(Rs32,#s8)",ATTRIBS(),
258"Combine a word and an immediate into a register pair",
259{ fIMMEXT(siV); fSETWORD(0,RddV,siV);
260  fSETWORD(1,RddV,RsV);
261})
262
263Q6INSN(A4_combineir,"Rdd32=combine(#s8,Rs32)",ATTRIBS(),
264"Combine a word and an immediate into a register pair",
265{ fIMMEXT(siV); fSETWORD(0,RddV,RsV);
266  fSETWORD(1,RddV,siV);
267})
268
269
270
271Q6INSN(A2_combineii,"Rdd32=combine(#s8,#S8)",ATTRIBS(A_ARCHV2),
272"Set two small immediates",
273{ fIMMEXT(siV); fSETWORD(0,RddV,SiV); fSETWORD(1,RddV,siV); })
274
275Q6INSN(A4_combineii,"Rdd32=combine(#s8,#U6)",ATTRIBS(),"Set two small immediates",
276{ fIMMEXT(UiV); fSETWORD(0,RddV,UiV); fSETWORD(1,RddV,siV); })
277
278
279Q6INSN(A2_combine_hh,"Rd32=combine(Rt.H32,Rs.H32)",ATTRIBS(),
280"Combine two halfs into a register", {RdV = (fGETUHALF(1,RtV)<<16) | fGETUHALF(1,RsV);})
281
282Q6INSN(A2_combine_hl,"Rd32=combine(Rt.H32,Rs.L32)",ATTRIBS(),
283"Combine two halfs into a register", {RdV = (fGETUHALF(1,RtV)<<16) | fGETUHALF(0,RsV);})
284
285Q6INSN(A2_combine_lh,"Rd32=combine(Rt.L32,Rs.H32)",ATTRIBS(),
286"Combine two halfs into a register", {RdV = (fGETUHALF(0,RtV)<<16) | fGETUHALF(1,RsV);})
287
288Q6INSN(A2_combine_ll,"Rd32=combine(Rt.L32,Rs.L32)",ATTRIBS(),
289"Combine two halfs into a register", {RdV = (fGETUHALF(0,RtV)<<16) | fGETUHALF(0,RsV);})
290
291Q6INSN(A2_tfril,"Rx.L32=#u16",ATTRIBS(),
292"Set low 16-bits, leave upper 16 unchanged",{ fSETHALF(0,RxV,uiV);})
293
294Q6INSN(A2_tfrih,"Rx.H32=#u16",ATTRIBS(),
295"Set high 16-bits, leave low 16 unchanged",{ fSETHALF(1,RxV,uiV);})
296
297Q6INSN(A2_tfrcrr,"Rd32=Cs32",ATTRIBS(),
298"transfer control register to general register",{ RdV=CsV;})
299
300Q6INSN(A2_tfrrcr,"Cd32=Rs32",ATTRIBS(),
301"transfer general register to control register",{ CdV=RsV;})
302
303Q6INSN(A4_tfrcpp,"Rdd32=Css32",ATTRIBS(),
304"transfer control register to general register",{ RddV=CssV;})
305
306Q6INSN(A4_tfrpcp,"Cdd32=Rss32",ATTRIBS(),
307"transfer general register to control register",{ CddV=RssV;})
308
309
310/**********************************************/
311/* Logicals                                   */
312/**********************************************/
313
314Q6INSN(A2_and,"Rd32=and(Rs32,Rt32)",ATTRIBS(),
315"logical AND",{ RdV=RsV&RtV;})
316
317Q6INSN(A2_or,"Rd32=or(Rs32,Rt32)",ATTRIBS(),
318"logical OR",{ RdV=RsV|RtV;})
319
320Q6INSN(A2_xor,"Rd32=xor(Rs32,Rt32)",ATTRIBS(),
321"logical XOR",{ RdV=RsV^RtV;})
322
323Q6INSN(M2_xor_xacc,"Rx32^=xor(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
324"logical XOR with XOR accumulation",{ RxV^=RsV^RtV;})
325
326Q6INSN(M4_xor_xacc,"Rxx32^=xor(Rss32,Rtt32)",,
327"logical XOR with XOR accumulation",{ RxxV^=RssV^RttV;})
328
329
330
331Q6INSN(A4_andn,"Rd32=and(Rt32,~Rs32)",,
332"And-Not", { RdV = (RtV & ~RsV); })
333
334Q6INSN(A4_orn,"Rd32=or(Rt32,~Rs32)",,
335"Or-Not", { RdV = (RtV | ~RsV); })
336
337
338Q6INSN(A4_andnp,"Rdd32=and(Rtt32,~Rss32)",,
339"And-Not", { RddV = (RttV & ~RssV); })
340
341Q6INSN(A4_ornp,"Rdd32=or(Rtt32,~Rss32)",,
342"Or-Not", { RddV = (RttV | ~RssV); })
343
344
345
346
347/********************/
348/* Compound add-add */
349/********************/
350
351Q6INSN(S4_addaddi,"Rd32=add(Rs32,add(Ru32,#s6))",ATTRIBS(),
352        "3-input add",
353        { RdV = RsV + RuV + fIMMEXT(siV); })
354
355
356Q6INSN(S4_subaddi,"Rd32=add(Rs32,sub(#s6,Ru32))",ATTRIBS(),
357        "3-input sub",
358        { RdV = RsV - RuV + fIMMEXT(siV); })
359
360
361
362/****************************/
363/* Compound logical-logical */
364/****************************/
365
366Q6INSN(M4_and_and,"Rx32&=and(Rs32,Rt32)",ATTRIBS(),
367"Compound And-And", { RxV &= (RsV & RtV); })
368
369Q6INSN(M4_and_andn,"Rx32&=and(Rs32,~Rt32)",ATTRIBS(),
370"Compound And-Andn", { RxV &= (RsV & ~RtV); })
371
372Q6INSN(M4_and_or,"Rx32&=or(Rs32,Rt32)",ATTRIBS(),
373"Compound And-Or", { RxV &= (RsV | RtV); })
374
375Q6INSN(M4_and_xor,"Rx32&=xor(Rs32,Rt32)",ATTRIBS(),
376"Compound And-xor", { RxV &= (RsV ^ RtV); })
377
378
379
380Q6INSN(M4_or_and,"Rx32|=and(Rs32,Rt32)",ATTRIBS(),
381"Compound Or-And", { RxV |= (RsV & RtV); })
382
383Q6INSN(M4_or_andn,"Rx32|=and(Rs32,~Rt32)",ATTRIBS(),
384"Compound Or-AndN", { RxV |= (RsV & ~RtV); })
385
386Q6INSN(M4_or_or,"Rx32|=or(Rs32,Rt32)",ATTRIBS(),
387"Compound Or-Or", { RxV |= (RsV | RtV); })
388
389Q6INSN(M4_or_xor,"Rx32|=xor(Rs32,Rt32)",ATTRIBS(),
390"Compound Or-xor", { RxV |= (RsV ^ RtV); })
391
392
393Q6INSN(S4_or_andix,"Rx32=or(Ru32,and(Rx32,#s10))",ATTRIBS(),
394"Compound Or-And", { RxV = RuV | (RxV & fIMMEXT(siV)); })
395
396Q6INSN(S4_or_andi,"Rx32|=and(Rs32,#s10)",ATTRIBS(),
397"Compound Or-And", { RxV = RxV | (RsV & fIMMEXT(siV)); })
398
399Q6INSN(S4_or_ori,"Rx32|=or(Rs32,#s10)",ATTRIBS(),
400"Compound Or-And", { RxV = RxV | (RsV | fIMMEXT(siV)); })
401
402
403
404
405Q6INSN(M4_xor_and,"Rx32^=and(Rs32,Rt32)",ATTRIBS(),
406"Compound Xor-And", { RxV ^= (RsV & RtV); })
407
408Q6INSN(M4_xor_or,"Rx32^=or(Rs32,Rt32)",ATTRIBS(),
409"Compound Xor-Or", { RxV ^= (RsV | RtV); })
410
411Q6INSN(M4_xor_andn,"Rx32^=and(Rs32,~Rt32)",ATTRIBS(),
412"Compound Xor-And", { RxV ^= (RsV & ~RtV); })
413
414
415
416
417
418
419Q6INSN(A2_subri,"Rd32=sub(#s10,Rs32)",ATTRIBS(A_ARCHV2),
420"Subtract register from immediate",{ fIMMEXT(siV); RdV=siV-RsV;})
421
422Q6INSN(A2_andir,"Rd32=and(Rs32,#s10)",ATTRIBS(A_ARCHV2),
423"logical AND with immediate",{ fIMMEXT(siV); RdV=RsV&siV;})
424
425Q6INSN(A2_orir,"Rd32=or(Rs32,#s10)",ATTRIBS(A_ARCHV2),
426"logical OR with immediate",{ fIMMEXT(siV); RdV=RsV|siV;})
427
428
429
430
431Q6INSN(A2_andp,"Rdd32=and(Rss32,Rtt32)",ATTRIBS(),
432"logical AND pair",{ RddV=RssV&RttV;})
433
434Q6INSN(A2_orp,"Rdd32=or(Rss32,Rtt32)",ATTRIBS(),
435"logical OR pair",{ RddV=RssV|RttV;})
436
437Q6INSN(A2_xorp,"Rdd32=xor(Rss32,Rtt32)",ATTRIBS(),
438"logical eXclusive OR pair",{ RddV=RssV^RttV;})
439
440Q6INSN(A2_notp,"Rdd32=not(Rss32)",ATTRIBS(),
441"logical NOT pair",{ RddV=~RssV;})
442
443Q6INSN(A2_sxtw,"Rdd32=sxtw(Rs32)",ATTRIBS(),
444"Sign extend 32-bit word to 64-bit pair",
445{ RddV = fCAST4_8s(RsV); })
446
447Q6INSN(A2_sat,"Rd32=sat(Rss32)",ATTRIBS(),
448"Saturate to 32-bit Signed",
449{ RdV = fSAT(RssV); })
450
451Q6INSN(A2_roundsat,"Rd32=round(Rss32):sat",ATTRIBS(),
452"Round & Saturate to 32-bit Signed",
453{ fHIDE(size8s_t tmp;) fADDSAT64(tmp,RssV,0x080000000ULL); RdV = fGETWORD(1,tmp); })
454
455Q6INSN(A2_sath,"Rd32=sath(Rs32)",ATTRIBS(),
456"Saturate to 16-bit Signed",
457{ RdV = fSATH(RsV); })
458
459Q6INSN(A2_satuh,"Rd32=satuh(Rs32)",ATTRIBS(),
460"Saturate to 16-bit Unsigned",
461{ RdV = fSATUH(RsV); })
462
463Q6INSN(A2_satub,"Rd32=satub(Rs32)",ATTRIBS(),
464"Saturate to 8-bit Unsigned",
465{ RdV = fSATUB(RsV); })
466
467Q6INSN(A2_satb,"Rd32=satb(Rs32)",ATTRIBS(A_ARCHV2),
468"Saturate to 8-bit Signed",
469{ RdV = fSATB(RsV); })
470
471/**********************************************/
472/* Vector Add                                 */
473/**********************************************/
474
475Q6INSN(A2_vaddub,"Rdd32=vaddub(Rss32,Rtt32)",ATTRIBS(),
476"Add vector of bytes",
477{
478        fHIDE(int i;)
479        for (i = 0; i < 8; i++) {
480            fSETBYTE(i,RddV,(fGETUBYTE(i,RssV)+fGETUBYTE(i,RttV)));
481        }
482})
483
484Q6INSN(A2_vaddubs,"Rdd32=vaddub(Rss32,Rtt32):sat",ATTRIBS(),
485"Add vector of bytes",
486{
487        fHIDE(int i;)
488        for (i = 0; i < 8; i++) {
489            fSETBYTE(i,RddV,fSATUN(8,fGETUBYTE(i,RssV)+fGETUBYTE(i,RttV)));
490        }
491})
492
493Q6INSN(A2_vaddh,"Rdd32=vaddh(Rss32,Rtt32)",ATTRIBS(),
494"Add vector of half integers",
495{
496        fHIDE(int i;)
497        for (i=0;i<4;i++) {
498            fSETHALF(i,RddV,fGETHALF(i,RssV)+fGETHALF(i,RttV));
499        }
500})
501
502Q6INSN(A2_vaddhs,"Rdd32=vaddh(Rss32,Rtt32):sat",ATTRIBS(),
503"Add vector of half integers with saturation",
504{
505        fHIDE(int i;)
506        for (i=0;i<4;i++) {
507            fSETHALF(i,RddV,fSATN(16,fGETHALF(i,RssV)+fGETHALF(i,RttV)));
508        }
509})
510
511Q6INSN(A2_vadduhs,"Rdd32=vadduh(Rss32,Rtt32):sat",ATTRIBS(),
512"Add vector of unsigned half integers with saturation",
513{
514        fHIDE(int i;)
515        for (i=0;i<4;i++) {
516            fSETHALF(i,RddV,fSATUN(16,fGETUHALF(i,RssV)+fGETUHALF(i,RttV)));
517        }
518})
519
520Q6INSN(A5_vaddhubs,"Rd32=vaddhub(Rss32,Rtt32):sat",ATTRIBS(),
521"Add vector of half integers with saturation and pack to unsigned bytes",
522{
523        fHIDE(int i;)
524        for (i=0;i<4;i++) {
525            fSETBYTE(i,RdV,fSATUB(fGETHALF(i,RssV)+fGETHALF(i,RttV)));
526        }
527})
528
529Q6INSN(A2_vaddw,"Rdd32=vaddw(Rss32,Rtt32)",ATTRIBS(),
530"Add vector of words",
531{
532        fHIDE(int i;)
533        for (i=0;i<2;i++) {
534            fSETWORD(i,RddV,fGETWORD(i,RssV)+fGETWORD(i,RttV));
535        }
536})
537
538Q6INSN(A2_vaddws,"Rdd32=vaddw(Rss32,Rtt32):sat",ATTRIBS(),
539"Add vector of words with saturation",
540{
541        fHIDE(int i;)
542        for (i=0;i<2;i++) {
543            fSETWORD(i,RddV,fSATN(32,fGETWORD(i,RssV)+fGETWORD(i,RttV)));
544        }
545})
546
547
548
549Q6INSN(S4_vxaddsubw,"Rdd32=vxaddsubw(Rss32,Rtt32):sat",ATTRIBS(),
550"Cross vector add-sub words with saturation",
551{
552        fSETWORD(0,RddV,fSAT(fGETWORD(0,RssV)+fGETWORD(1,RttV)));
553        fSETWORD(1,RddV,fSAT(fGETWORD(1,RssV)-fGETWORD(0,RttV)));
554})
555Q6INSN(S4_vxsubaddw,"Rdd32=vxsubaddw(Rss32,Rtt32):sat",ATTRIBS(),
556"Cross vector sub-add words with saturation",
557{
558        fSETWORD(0,RddV,fSAT(fGETWORD(0,RssV)-fGETWORD(1,RttV)));
559        fSETWORD(1,RddV,fSAT(fGETWORD(1,RssV)+fGETWORD(0,RttV)));
560})
561
562
563
564Q6INSN(S4_vxaddsubh,"Rdd32=vxaddsubh(Rss32,Rtt32):sat",ATTRIBS(),
565"Cross vector add-sub halfwords with saturation",
566{
567        fSETHALF(0,RddV,fSATH(fGETHALF(0,RssV)+fGETHALF(1,RttV)));
568        fSETHALF(1,RddV,fSATH(fGETHALF(1,RssV)-fGETHALF(0,RttV)));
569
570        fSETHALF(2,RddV,fSATH(fGETHALF(2,RssV)+fGETHALF(3,RttV)));
571        fSETHALF(3,RddV,fSATH(fGETHALF(3,RssV)-fGETHALF(2,RttV)));
572
573})
574Q6INSN(S4_vxsubaddh,"Rdd32=vxsubaddh(Rss32,Rtt32):sat",ATTRIBS(),
575"Cross vector sub-add halfwords with saturation",
576{
577        fSETHALF(0,RddV,fSATH(fGETHALF(0,RssV)-fGETHALF(1,RttV)));
578        fSETHALF(1,RddV,fSATH(fGETHALF(1,RssV)+fGETHALF(0,RttV)));
579
580        fSETHALF(2,RddV,fSATH(fGETHALF(2,RssV)-fGETHALF(3,RttV)));
581        fSETHALF(3,RddV,fSATH(fGETHALF(3,RssV)+fGETHALF(2,RttV)));
582})
583
584
585
586
587Q6INSN(S4_vxaddsubhr,"Rdd32=vxaddsubh(Rss32,Rtt32):rnd:>>1:sat",ATTRIBS(),
588"Cross vector add-sub halfwords with shift, round, and saturation",
589{
590        fSETHALF(0,RddV,fSATH((fGETHALF(0,RssV)+fGETHALF(1,RttV)+1)>>1));
591        fSETHALF(1,RddV,fSATH((fGETHALF(1,RssV)-fGETHALF(0,RttV)+1)>>1));
592
593        fSETHALF(2,RddV,fSATH((fGETHALF(2,RssV)+fGETHALF(3,RttV)+1)>>1));
594        fSETHALF(3,RddV,fSATH((fGETHALF(3,RssV)-fGETHALF(2,RttV)+1)>>1));
595
596})
597Q6INSN(S4_vxsubaddhr,"Rdd32=vxsubaddh(Rss32,Rtt32):rnd:>>1:sat",ATTRIBS(),
598"Cross vector sub-add halfwords with shift, round, and saturation",
599{
600        fSETHALF(0,RddV,fSATH((fGETHALF(0,RssV)-fGETHALF(1,RttV)+1)>>1));
601        fSETHALF(1,RddV,fSATH((fGETHALF(1,RssV)+fGETHALF(0,RttV)+1)>>1));
602
603        fSETHALF(2,RddV,fSATH((fGETHALF(2,RssV)-fGETHALF(3,RttV)+1)>>1));
604        fSETHALF(3,RddV,fSATH((fGETHALF(3,RssV)+fGETHALF(2,RttV)+1)>>1));
605})
606
607
608
609
610
611/**********************************************/
612/* 1/2 Vector operations                      */
613/**********************************************/
614
615
616Q6INSN(A2_svavgh,"Rd32=vavgh(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
617"Avg vector of half integers",
618{
619        fHIDE(int i;)
620        for (i=0;i<2;i++) {
621            fSETHALF(i,RdV,((fGETHALF(i,RsV)+fGETHALF(i,RtV))>>1));
622        }
623})
624
625Q6INSN(A2_svavghs,"Rd32=vavgh(Rs32,Rt32):rnd",ATTRIBS(A_ARCHV2),
626"Avg vector of half integers with rounding",
627{
628        fHIDE(int i;)
629        for (i=0;i<2;i++) {
630            fSETHALF(i,RdV,((fGETHALF(i,RsV)+fGETHALF(i,RtV)+1)>>1));
631        }
632})
633
634
635
636Q6INSN(A2_svnavgh,"Rd32=vnavgh(Rt32,Rs32)",ATTRIBS(A_ARCHV2),
637"Avg vector of half integers",
638{
639        fHIDE(int i;)
640        for (i=0;i<2;i++) {
641            fSETHALF(i,RdV,((fGETHALF(i,RtV)-fGETHALF(i,RsV))>>1));
642        }
643})
644
645
646Q6INSN(A2_svaddh,"Rd32=vaddh(Rs32,Rt32)",ATTRIBS(),
647"Add vector of half integers",
648{
649        fHIDE(int i;)
650        for (i=0;i<2;i++) {
651            fSETHALF(i,RdV,fGETHALF(i,RsV)+fGETHALF(i,RtV));
652        }
653})
654
655Q6INSN(A2_svaddhs,"Rd32=vaddh(Rs32,Rt32):sat",ATTRIBS(),
656"Add vector of half integers with saturation",
657{
658        fHIDE(int i;)
659        for (i=0;i<2;i++) {
660            fSETHALF(i,RdV,fSATN(16,fGETHALF(i,RsV)+fGETHALF(i,RtV)));
661        }
662})
663
664Q6INSN(A2_svadduhs,"Rd32=vadduh(Rs32,Rt32):sat",ATTRIBS(),
665"Add vector of unsigned half integers with saturation",
666{
667        fHIDE(int i;)
668        for (i=0;i<2;i++) {
669            fSETHALF(i,RdV,fSATUN(16,fGETUHALF(i,RsV)+fGETUHALF(i,RtV)));
670        }
671})
672
673
674Q6INSN(A2_svsubh,"Rd32=vsubh(Rt32,Rs32)",ATTRIBS(),
675"Sub vector of half integers",
676{
677        fHIDE(int i;)
678        for (i=0;i<2;i++) {
679            fSETHALF(i,RdV,fGETHALF(i,RtV)-fGETHALF(i,RsV));
680        }
681})
682
683Q6INSN(A2_svsubhs,"Rd32=vsubh(Rt32,Rs32):sat",ATTRIBS(),
684"Sub vector of half integers with saturation",
685{
686        fHIDE(int i;)
687        for (i=0;i<2;i++) {
688            fSETHALF(i,RdV,fSATN(16,fGETHALF(i,RtV)-fGETHALF(i,RsV)));
689        }
690})
691
692Q6INSN(A2_svsubuhs,"Rd32=vsubuh(Rt32,Rs32):sat",ATTRIBS(),
693"Sub vector of unsigned half integers with saturation",
694{
695        fHIDE(int i;)
696        for (i=0;i<2;i++) {
697            fSETHALF(i,RdV,fSATUN(16,fGETUHALF(i,RtV)-fGETUHALF(i,RsV)));
698        }
699})
700
701
702
703
704/**********************************************/
705/* Vector Reduce Add                          */
706/**********************************************/
707
708Q6INSN(A2_vraddub,"Rdd32=vraddub(Rss32,Rtt32)",ATTRIBS(),
709"Sum: two vectors of unsigned bytes",
710{
711        fHIDE(int i;)
712        RddV = 0;
713        for (i=0;i<4;i++) {
714            fSETWORD(0,RddV,(fGETWORD(0,RddV) + (fGETUBYTE(i,RssV)+fGETUBYTE(i,RttV))));
715        }
716        for (i=4;i<8;i++) {
717            fSETWORD(1,RddV,(fGETWORD(1,RddV) + (fGETUBYTE(i,RssV)+fGETUBYTE(i,RttV))));
718        }
719})
720
721Q6INSN(A2_vraddub_acc,"Rxx32+=vraddub(Rss32,Rtt32)",ATTRIBS(),
722"Sum: two vectors of unsigned bytes",
723{
724        fHIDE(int i;)
725        for (i = 0; i < 4; i++) {
726            fSETWORD(0,RxxV,(fGETWORD(0,RxxV) + (fGETUBYTE(i,RssV)+fGETUBYTE(i,RttV))));
727        }
728        for (i = 4; i < 8; i++) {
729            fSETWORD(1,RxxV,(fGETWORD(1,RxxV) + (fGETUBYTE(i,RssV)+fGETUBYTE(i,RttV))));
730        }
731})
732
733
734
735Q6INSN(M2_vraddh,"Rd32=vraddh(Rss32,Rtt32)",ATTRIBS(A_ARCHV3),
736"Sum: two vectors of halves",
737{
738        fHIDE(int i;)
739        RdV = 0;
740        for (i=0;i<4;i++) {
741            RdV += (fGETHALF(i,RssV)+fGETHALF(i,RttV));
742        }
743})
744
745Q6INSN(M2_vradduh,"Rd32=vradduh(Rss32,Rtt32)",ATTRIBS(A_ARCHV3),
746"Sum: two vectors of unsigned halves",
747{
748        fHIDE(int i;)
749        RdV = 0;
750        for (i=0;i<4;i++) {
751            RdV += (fGETUHALF(i,RssV)+fGETUHALF(i,RttV));
752        }
753})
754
755/**********************************************/
756/* Vector Sub                                 */
757/**********************************************/
758
759Q6INSN(A2_vsubub,"Rdd32=vsubub(Rtt32,Rss32)",ATTRIBS(),
760"Sub vector of bytes",
761{
762        fHIDE(int i;)
763        for (i = 0; i < 8; i++) {
764            fSETBYTE(i,RddV,(fGETUBYTE(i,RttV)-fGETUBYTE(i,RssV)));
765        }
766})
767
768Q6INSN(A2_vsububs,"Rdd32=vsubub(Rtt32,Rss32):sat",ATTRIBS(),
769"Sub vector of bytes",
770{
771        fHIDE(int i;)
772        for (i = 0; i < 8; i++) {
773            fSETBYTE(i,RddV,fSATUN(8,fGETUBYTE(i,RttV)-fGETUBYTE(i,RssV)));
774        }
775})
776
777Q6INSN(A2_vsubh,"Rdd32=vsubh(Rtt32,Rss32)",ATTRIBS(),
778"Sub vector of half integers",
779{
780        fHIDE(int i;)
781        for (i=0;i<4;i++) {
782            fSETHALF(i,RddV,fGETHALF(i,RttV)-fGETHALF(i,RssV));
783        }
784})
785
786Q6INSN(A2_vsubhs,"Rdd32=vsubh(Rtt32,Rss32):sat",ATTRIBS(),
787"Sub vector of half integers with saturation",
788{
789        fHIDE(int i;)
790        for (i=0;i<4;i++) {
791            fSETHALF(i,RddV,fSATN(16,fGETHALF(i,RttV)-fGETHALF(i,RssV)));
792        }
793})
794
795Q6INSN(A2_vsubuhs,"Rdd32=vsubuh(Rtt32,Rss32):sat",ATTRIBS(),
796"Sub vector of unsigned half integers with saturation",
797{
798        fHIDE(int i;)
799        for (i=0;i<4;i++) {
800            fSETHALF(i,RddV,fSATUN(16,fGETUHALF(i,RttV)-fGETUHALF(i,RssV)));
801        }
802})
803
804Q6INSN(A2_vsubw,"Rdd32=vsubw(Rtt32,Rss32)",ATTRIBS(),
805"Sub vector of words",
806{
807        fHIDE(int i;)
808        for (i=0;i<2;i++) {
809            fSETWORD(i,RddV,fGETWORD(i,RttV)-fGETWORD(i,RssV));
810        }
811})
812
813Q6INSN(A2_vsubws,"Rdd32=vsubw(Rtt32,Rss32):sat",ATTRIBS(),
814"Sub vector of words with saturation",
815{
816        fHIDE(int i;)
817        for (i=0;i<2;i++) {
818            fSETWORD(i,RddV,fSATN(32,fGETWORD(i,RttV)-fGETWORD(i,RssV)));
819        }
820})
821
822
823
824
825/**********************************************/
826/* Vector Abs                                 */
827/**********************************************/
828
829Q6INSN(A2_vabsh,"Rdd32=vabsh(Rss32)",ATTRIBS(),
830"Negate vector of half integers",
831{
832        fHIDE(int i;)
833        for (i=0;i<4;i++) {
834            fSETHALF(i,RddV,fABS(fGETHALF(i,RssV)));
835        }
836})
837
838Q6INSN(A2_vabshsat,"Rdd32=vabsh(Rss32):sat",ATTRIBS(),
839"Negate vector of half integers",
840{
841        fHIDE(int i;)
842        for (i=0;i<4;i++) {
843            fSETHALF(i,RddV,fSATH(fABS(fGETHALF(i,RssV))));
844        }
845})
846
847Q6INSN(A2_vabsw,"Rdd32=vabsw(Rss32)",ATTRIBS(),
848"Absolute Value vector of words",
849{
850        fHIDE(int i;)
851        for (i=0;i<2;i++) {
852            fSETWORD(i,RddV,fABS(fGETWORD(i,RssV)));
853        }
854})
855
856Q6INSN(A2_vabswsat,"Rdd32=vabsw(Rss32):sat",ATTRIBS(),
857"Absolute Value vector of words",
858{
859        fHIDE(int i;)
860        for (i=0;i<2;i++) {
861            fSETWORD(i,RddV,fSAT(fABS(fGETWORD(i,RssV))));
862        }
863})
864
865/**********************************************/
866/* Vector SAD                                 */
867/**********************************************/
868
869
870Q6INSN(M2_vabsdiffw,"Rdd32=vabsdiffw(Rtt32,Rss32)",ATTRIBS(A_ARCHV2),
871"Absolute Differences: vector of words",
872{
873        fHIDE(int i;)
874        for (i=0;i<2;i++) {
875            fSETWORD(i,RddV,fABS(fGETWORD(i,RttV) - fGETWORD(i,RssV)));
876        }
877})
878
879Q6INSN(M2_vabsdiffh,"Rdd32=vabsdiffh(Rtt32,Rss32)",ATTRIBS(A_ARCHV2),
880"Absolute Differences: vector of halfwords",
881{
882        fHIDE(int i;)
883        for (i=0;i<4;i++) {
884            fSETHALF(i,RddV,fABS(fGETHALF(i,RttV) - fGETHALF(i,RssV)));
885        }
886})
887
888Q6INSN(M6_vabsdiffb,"Rdd32=vabsdiffb(Rtt32,Rss32)",ATTRIBS(),
889"Absolute Differences: vector of halfwords",
890{
891        fHIDE(int i;)
892        for (i=0;i<8;i++) {
893            fSETBYTE(i,RddV,fABS(fGETBYTE(i,RttV) - fGETBYTE(i,RssV)));
894        }
895})
896
897Q6INSN(M6_vabsdiffub,"Rdd32=vabsdiffub(Rtt32,Rss32)",ATTRIBS(),
898"Absolute Differences: vector of halfwords",
899{
900        fHIDE(int i;)
901        for (i=0;i<8;i++) {
902            fSETBYTE(i,RddV,fABS(fGETUBYTE(i,RttV) - fGETUBYTE(i,RssV)));
903        }
904})
905
906
907
908Q6INSN(A2_vrsadub,"Rdd32=vrsadub(Rss32,Rtt32)",ATTRIBS(),
909"Sum of Absolute Differences: vector of unsigned bytes",
910{
911        fHIDE(int i;)
912        RddV = 0;
913        for (i = 0; i < 4; i++) {
914            fSETWORD(0,RddV,(fGETWORD(0,RddV) + fABS((fGETUBYTE(i,RssV) - fGETUBYTE(i,RttV)))));
915        }
916        for (i = 4; i < 8; i++) {
917            fSETWORD(1,RddV,(fGETWORD(1,RddV) + fABS((fGETUBYTE(i,RssV) - fGETUBYTE(i,RttV)))));
918        }
919})
920
921Q6INSN(A2_vrsadub_acc,"Rxx32+=vrsadub(Rss32,Rtt32)",ATTRIBS(),
922"Sum of Absolute Differences: vector of unsigned bytes",
923{
924        fHIDE(int i;)
925        for (i = 0; i < 4; i++) {
926            fSETWORD(0,RxxV,(fGETWORD(0,RxxV) + fABS((fGETUBYTE(i,RssV) - fGETUBYTE(i,RttV)))));
927        }
928        for (i = 4; i < 8; i++) {
929            fSETWORD(1,RxxV,(fGETWORD(1,RxxV) + fABS((fGETUBYTE(i,RssV) - fGETUBYTE(i,RttV)))));
930        }
931})
932
933
934/**********************************************/
935/* Vector Average                             */
936/**********************************************/
937
938Q6INSN(A2_vavgub,"Rdd32=vavgub(Rss32,Rtt32)",ATTRIBS(),
939"Average vector of unsigned bytes",
940{
941        fHIDE(int i;)
942        for (i = 0; i < 8; i++) {
943            fSETBYTE(i,RddV,((fGETUBYTE(i,RssV) + fGETUBYTE(i,RttV))>>1));
944        }
945})
946
947Q6INSN(A2_vavguh,"Rdd32=vavguh(Rss32,Rtt32)",ATTRIBS(),
948"Average vector of unsigned halfwords",
949{
950        fHIDE(int i;)
951        for (i=0;i<4;i++) {
952            fSETHALF(i,RddV,(fGETUHALF(i,RssV)+fGETUHALF(i,RttV))>>1);
953        }
954})
955
956Q6INSN(A2_vavgh,"Rdd32=vavgh(Rss32,Rtt32)",ATTRIBS(),
957"Average vector of halfwords",
958{
959        fHIDE(int i;)
960        for (i=0;i<4;i++) {
961            fSETHALF(i,RddV,(fGETHALF(i,RssV)+fGETHALF(i,RttV))>>1);
962        }
963})
964
965Q6INSN(A2_vnavgh,"Rdd32=vnavgh(Rtt32,Rss32)",ATTRIBS(),
966"Negative Average vector of halfwords",
967{
968        fHIDE(int i;)
969        for (i=0;i<4;i++) {
970            fSETHALF(i,RddV,(fGETHALF(i,RttV)-fGETHALF(i,RssV))>>1);
971        }
972})
973
974Q6INSN(A2_vavgw,"Rdd32=vavgw(Rss32,Rtt32)",ATTRIBS(),
975"Average vector of words",
976{
977        fHIDE(int i;)
978        for (i=0;i<2;i++) {
979            fSETWORD(i,RddV,(fSXTN(32,33,fGETWORD(i,RssV))+fSXTN(32,33,fGETWORD(i,RttV)))>>1);
980        }
981})
982
983Q6INSN(A2_vnavgw,"Rdd32=vnavgw(Rtt32,Rss32)",ATTRIBS(A_ARCHV2),
984"Average vector of words",
985{
986        fHIDE(int i;)
987        for (i=0;i<2;i++) {
988            fSETWORD(i,RddV,(fSXTN(32,33,fGETWORD(i,RttV))-fSXTN(32,33,fGETWORD(i,RssV)))>>1);
989        }
990})
991
992Q6INSN(A2_vavgwr,"Rdd32=vavgw(Rss32,Rtt32):rnd",ATTRIBS(),
993"Average vector of words",
994{
995        fHIDE(int i;)
996        for (i=0;i<2;i++) {
997            fSETWORD(i,RddV,(fSXTN(32,33,fGETWORD(i,RssV))+fSXTN(32,33,fGETWORD(i,RttV))+1)>>1);
998        }
999})
1000
1001Q6INSN(A2_vnavgwr,"Rdd32=vnavgw(Rtt32,Rss32):rnd:sat",ATTRIBS(A_ARCHV2),
1002"Average vector of words",
1003{
1004        fHIDE(int i;)
1005        for (i=0;i<2;i++) {
1006            fSETWORD(i,RddV,fSAT((fSXTN(32,33,fGETWORD(i,RttV))-fSXTN(32,33,fGETWORD(i,RssV))+1)>>1));
1007        }
1008})
1009
1010Q6INSN(A2_vavgwcr,"Rdd32=vavgw(Rss32,Rtt32):crnd",ATTRIBS(A_ARCHV2),
1011"Average vector of words with convergent rounding",
1012{
1013        fHIDE(int i;)
1014        for (i=0;i<2;i++) {
1015            fSETWORD(i,RddV,(fCRND(fSXTN(32,33,fGETWORD(i,RssV))+fSXTN(32,33,fGETWORD(i,RttV)))>>1));
1016        }
1017})
1018
1019Q6INSN(A2_vnavgwcr,"Rdd32=vnavgw(Rtt32,Rss32):crnd:sat",ATTRIBS(A_ARCHV2),
1020"Average negative vector of words with convergent rounding",
1021{
1022        fHIDE(int i;)
1023        for (i=0;i<2;i++) {
1024            fSETWORD(i,RddV,fSAT(fCRND(fSXTN(32,33,fGETWORD(i,RttV))-fSXTN(32,33,fGETWORD(i,RssV)))>>1));
1025        }
1026})
1027
1028Q6INSN(A2_vavghcr,"Rdd32=vavgh(Rss32,Rtt32):crnd",ATTRIBS(A_ARCHV2),
1029"Average vector of halfwords with conv rounding",
1030{
1031        fHIDE(int i;)
1032        for (i=0;i<4;i++) {
1033            fSETHALF(i,RddV,fCRND(fGETHALF(i,RssV)+fGETHALF(i,RttV))>>1);
1034        }
1035})
1036
1037Q6INSN(A2_vnavghcr,"Rdd32=vnavgh(Rtt32,Rss32):crnd:sat",ATTRIBS(A_ARCHV2),
1038"Average negative vector of halfwords with conv rounding",
1039{
1040        fHIDE(int i;)
1041        for (i=0;i<4;i++) {
1042            fSETHALF(i,RddV,fSATH(fCRND(fGETHALF(i,RttV)-fGETHALF(i,RssV))>>1));
1043        }
1044})
1045
1046
1047Q6INSN(A2_vavguw,"Rdd32=vavguw(Rss32,Rtt32)",ATTRIBS(),
1048"Average vector of unsigned words",
1049{
1050        fHIDE(int i;)
1051        for (i=0;i<2;i++) {
1052            fSETWORD(i,RddV,(fZXTN(32,33,fGETUWORD(i,RssV))+fZXTN(32,33,fGETUWORD(i,RttV)))>>1);
1053        }
1054})
1055
1056Q6INSN(A2_vavguwr,"Rdd32=vavguw(Rss32,Rtt32):rnd",ATTRIBS(),
1057"Average vector of unsigned words",
1058{
1059        fHIDE(int i;)
1060        for (i=0;i<2;i++) {
1061            fSETWORD(i,RddV,(fZXTN(32,33,fGETUWORD(i,RssV))+fZXTN(32,33,fGETUWORD(i,RttV))+1)>>1);
1062        }
1063})
1064
1065Q6INSN(A2_vavgubr,"Rdd32=vavgub(Rss32,Rtt32):rnd",ATTRIBS(),
1066"Average vector of unsigned bytes",
1067{
1068        fHIDE(int i;)
1069        for (i = 0; i < 8; i++) {
1070            fSETBYTE(i,RddV,((fGETUBYTE(i,RssV)+fGETUBYTE(i,RttV)+1)>>1));
1071        }
1072})
1073
1074Q6INSN(A2_vavguhr,"Rdd32=vavguh(Rss32,Rtt32):rnd",ATTRIBS(),
1075"Average vector of unsigned halfwords with rounding",
1076{
1077        fHIDE(int i;)
1078        for (i=0;i<4;i++) {
1079            fSETHALF(i,RddV,(fGETUHALF(i,RssV)+fGETUHALF(i,RttV)+1)>>1);
1080        }
1081})
1082
1083Q6INSN(A2_vavghr,"Rdd32=vavgh(Rss32,Rtt32):rnd",ATTRIBS(),
1084"Average vector of halfwords with rounding",
1085{
1086        fHIDE(int i;)
1087        for (i=0;i<4;i++) {
1088            fSETHALF(i,RddV,(fGETHALF(i,RssV)+fGETHALF(i,RttV)+1)>>1);
1089        }
1090})
1091
1092Q6INSN(A2_vnavghr,"Rdd32=vnavgh(Rtt32,Rss32):rnd:sat",ATTRIBS(A_ARCHV2),
1093"Negative Average vector of halfwords with rounding",
1094{
1095        fHIDE(int i;)
1096        for (i=0;i<4;i++) {
1097            fSETHALF(i,RddV,fSATH((fGETHALF(i,RttV)-fGETHALF(i,RssV)+1)>>1));
1098        }
1099})
1100
1101
1102/* Rounding Instruction */
1103
1104Q6INSN(A4_round_ri,"Rd32=round(Rs32,#u5)",ATTRIBS(),"Round", {RdV = fRNDN(RsV,uiV)>>uiV; })
1105Q6INSN(A4_round_rr,"Rd32=round(Rs32,Rt32)",ATTRIBS(),"Round", {RdV = fRNDN(RsV,fZXTN(5,32,RtV))>>fZXTN(5,32,RtV); })
1106Q6INSN(A4_round_ri_sat,"Rd32=round(Rs32,#u5):sat",ATTRIBS(),"Round", {RdV = (fSAT(fRNDN(RsV,uiV)))>>uiV; })
1107Q6INSN(A4_round_rr_sat,"Rd32=round(Rs32,Rt32):sat",ATTRIBS(),"Round", {RdV = (fSAT(fRNDN(RsV,fZXTN(5,32,RtV))))>>fZXTN(5,32,RtV); })
1108
1109
1110Q6INSN(A4_cround_ri,"Rd32=cround(Rs32,#u5)",ATTRIBS(),"Convergent Round", {RdV = fCRNDN(RsV,uiV); })
1111Q6INSN(A4_cround_rr,"Rd32=cround(Rs32,Rt32)",ATTRIBS(),"Convergent Round", {RdV = fCRNDN(RsV,fZXTN(5,32,RtV)); })
1112
1113
1114#define CROUND(DST,SRC,SHIFT) \
1115        fHIDE(size16s_t rndbit_128;)\
1116        fHIDE(size16s_t tmp128;)\
1117        fHIDE(size16s_t src_128;)\
1118        if (SHIFT == 0) { \
1119            DST = SRC;\
1120        } else if ((SRC & (size8s_t)((1LL << (SHIFT - 1)) - 1LL)) == 0) { \
1121            src_128 = fCAST8S_16S(SRC);\
1122            rndbit_128 = fCAST8S_16S(1LL);\
1123            rndbit_128 = fSHIFTL128(rndbit_128, SHIFT);\
1124            rndbit_128 = fAND128(rndbit_128, src_128);\
1125            rndbit_128 = fSHIFTR128(rndbit_128, 1);\
1126            tmp128 = fADD128(src_128, rndbit_128);\
1127            tmp128 = fSHIFTR128(tmp128, SHIFT);\
1128            DST =  fCAST16S_8S(tmp128);\
1129        } else {\
1130            size16s_t rndbit_128 =  fCAST8S_16S((1LL << (SHIFT - 1))); \
1131            size16s_t src_128 =  fCAST8S_16S(SRC); \
1132            size16s_t tmp128 = fADD128(src_128, rndbit_128);\
1133            tmp128 = fSHIFTR128(tmp128, SHIFT);\
1134            DST =  fCAST16S_8S(tmp128);\
1135        }
1136
1137Q6INSN(A7_croundd_ri,"Rdd32=cround(Rss32,#u6)",ATTRIBS(),"Convergent Round",
1138{
1139CROUND(RddV,RssV,uiV);
1140})
1141
1142Q6INSN(A7_croundd_rr,"Rdd32=cround(Rss32,Rt32)",ATTRIBS(),"Convergent Round",
1143{
1144CROUND(RddV,RssV,fZXTN(6,32,RtV));
1145})
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155Q6INSN(A7_clip,"Rd32=clip(Rs32,#u5)",ATTRIBS(),"Clip to  #s5", {   fCLIP(RdV,RsV,uiV);})
1156Q6INSN(A7_vclip,"Rdd32=vclip(Rss32,#u5)",ATTRIBS(),"Clip to  #s5",
1157{
1158fHIDE(size4s_t tmp;)
1159fCLIP(tmp, fGETWORD(0, RssV), uiV);
1160fSETWORD(0, RddV, tmp);
1161fCLIP(tmp,fGETWORD(1, RssV), uiV);
1162fSETWORD(1, RddV, tmp);
1163}
1164)
1165
1166
1167
1168/**********************************************/
1169/* V4: Cross Vector Min/Max                   */
1170/**********************************************/
1171
1172
1173#define VRMINORMAX(TAG,STR,OP,SHORTTYPE,SETTYPE,GETTYPE,NEL,SHIFT) \
1174Q6INSN(A4_vr##TAG##SHORTTYPE,"Rxx32=vr"#TAG#SHORTTYPE"(Rss32,Ru32)",ATTRIBS(), \
1175"Choose " STR " elements of a vector", \
1176{ \
1177        fHIDE(int i; size8s_t TAG; size4s_t addr;) \
1178        TAG = fGET##GETTYPE(0,RxxV); \
1179        addr = fGETWORD(1,RxxV); \
1180        for (i = 0; i < NEL; i++) { \
1181            if (TAG OP fGET##GETTYPE(i,RssV)) { \
1182                TAG = fGET##GETTYPE(i,RssV); \
1183                addr = RuV | i<<SHIFT; \
1184            } \
1185        } \
1186        fSETWORD(0,RxxV,TAG); \
1187        fSETWORD(1,RxxV,addr); \
1188})
1189
1190#define RMINMAX(SHORTTYPE,SETTYPE,GETTYPE,NEL,SHIFT) \
1191VRMINORMAX(min,"minimum",>,SHORTTYPE,SETTYPE,GETTYPE,NEL,SHIFT) \
1192VRMINORMAX(max,"maximum",<,SHORTTYPE,SETTYPE,GETTYPE,NEL,SHIFT)
1193
1194
1195RMINMAX(h,HALF,HALF,4,1)
1196RMINMAX(uh,HALF,UHALF,4,1)
1197RMINMAX(w,WORD,WORD,2,2)
1198RMINMAX(uw,WORD,UWORD,2,2)
1199
1200#undef RMINMAX
1201#undef VRMINORMAX
1202
1203/**********************************************/
1204/* Vector Min/Max                             */
1205/**********************************************/
1206
1207#define VMINORMAX(TAG,STR,FUNC,SHORTTYPE,SETTYPE,GETTYPE,NEL) \
1208Q6INSN(A2_v##TAG##SHORTTYPE,"Rdd32=v"#TAG#SHORTTYPE"(Rtt32,Rss32)",ATTRIBS(), \
1209"Choose " STR " elements of two vectors", \
1210{ \
1211        fHIDE(int i;) \
1212        for (i = 0; i < NEL; i++) { \
1213            fSET##SETTYPE(i,RddV,FUNC(fGET##GETTYPE(i,RttV),fGET##GETTYPE(i,RssV))); \
1214        } \
1215})
1216
1217#define VMINORMAX3(TAG,STR,FUNC,SHORTTYPE,SETTYPE,GETTYPE,NEL) \
1218Q6INSN(A6_v##TAG##SHORTTYPE##3,"Rxx32=v"#TAG#SHORTTYPE"3(Rtt32,Rss32)",ATTRIBS(), \
1219"Choose " STR " elements of two vectors", \
1220{ \
1221        fHIDE(int i;) \
1222        for (i = 0; i < NEL; i++) { \
1223            fSET##SETTYPE(i,RxxV,FUNC(fGET##GETTYPE(i,RxxV),FUNC(fGET##GETTYPE(i,RttV),fGET##GETTYPE(i,RssV)))); \
1224        } \
1225})
1226
1227#define MINMAX(SHORTTYPE,SETTYPE,GETTYPE,NEL) \
1228VMINORMAX(min,"minimum",fMIN,SHORTTYPE,SETTYPE,GETTYPE,NEL) \
1229VMINORMAX(max,"maximum",fMAX,SHORTTYPE,SETTYPE,GETTYPE,NEL)
1230
1231MINMAX(b,BYTE,BYTE,8)
1232MINMAX(ub,BYTE,UBYTE,8)
1233MINMAX(h,HALF,HALF,4)
1234MINMAX(uh,HALF,UHALF,4)
1235MINMAX(w,WORD,WORD,2)
1236MINMAX(uw,WORD,UWORD,2)
1237
1238#undef MINMAX
1239#undef VMINORMAX
1240#undef VMINORMAX3
1241
1242
1243/**********************************************/
1244/* Vector Min/Max                             */
1245/**********************************************/
1246
1247
1248Q6INSN(A4_modwrapu,"Rd32=modwrap(Rs32,Rt32)",ATTRIBS(),
1249"Wrap to an unsigned modulo buffer",
1250{
1251        if (RsV < 0) {
1252            RdV = RsV + fCAST4u(RtV);
1253        } else if (fCAST4u(RsV) >= fCAST4u(RtV)) {
1254            RdV = RsV - fCAST4u(RtV);
1255        } else {
1256            RdV = RsV;
1257        }
1258})
1259