Lines Matching +full:1 +full:w
26 #define __SELF_DEF_EXTINSN 1
215 DSTM(1,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VuV.SRCTYPE[i],shamt) >> shamt)))
347 unsigned shift = RtV & (fVBYTES()-1);
352 unsigned shift = fVBYTES() - (RtV & (fVBYTES()-1));
370 VdV.ub[k] = VuV.ub[(k+RtV)&(fVBYTES()-1)];
387 …"Vdd32=vunpackh(Vu32)", "Vdd32.w=vunpack(Vu32.h)", "Unpack halves with sign-extend", fVARRAY_…
390 ITERATOR_INSN2_PERMUTE_SLOT_DOUBLE_VEC(16,vunpackoh, "Vxx32|=vunpackoh(Vu32)", "Vxx32.w|=vunpacko(V…
402 …ITERATOR_INSN2_PERMUTE_SLOT(32, vpackeh, "Vd32=vpackeh(Vu32,Vv32)", "Vd32.h=vpacke(Vu32.w,Vv32.w)…
409 VdV.ub[i] = fGETUBYTE(1, VvV.uh[i]);
410 VdV.ub[i+fVELEM(16)] = fGETUBYTE(1, VuV.uh[i]))
412 …ITERATOR_INSN2_PERMUTE_SLOT(32, vpackoh, "Vd32=vpackoh(Vu32,Vv32)", "Vd32.h=vpacko(Vu32.w,Vv32.w)…
414 VdV.uh[i] = fGETUHALF(1, VvV.uw[i]);
415 VdV.uh[i+fVELEM(32)] = fGETUHALF(1, VuV.uw[i]))
431 …PERMUTE_SLOT(32, vpackwuh_sat, "Vd32=vpackwuh(Vu32,Vv32):sat", "Vd32.uh=vpack(Vu32.w,Vv32.w):sat",
433 VdV.uh[i] = fVSATUH(VvV.w[i]);
434 VdV.uh[i+fVELEM(32)] = fVSATUH(VuV.w[i]))
436 …N2_PERMUTE_SLOT(32, vpackwh_sat, "Vd32=vpackwh(Vu32,Vv32):sat", "Vd32.h=vpack(Vu32.w,Vv32.w):sat",
438 VdV.h[i] = fVSATH(VvV.w[i]);
439 VdV.h[i+fVELEM(32)] = fVSATH(VuV.w[i]))
452 VddV.v[1].uh[i] = fZE8_16(fGETUBYTE(1, VuV.uh[i])))
457 VddV.v[1].h[i] = fSE8_16(fGETBYTE(1, VuV.h[i])))
462 VddV.v[1].uw[i] = fZE16_32(fGETUHALF(1, VuV.uw[i])))
464 ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(32,vsh,"Vdd32=vsxth(Vu32)","Vdd32.w=vsxt(Vu32.h)",
466 VddV.v[0].w[i] = fSE16_32(fGETHALF(0, VuV.w[i]));
467 VddV.v[1].w[i] = fSE16_32(fGETHALF(1, VuV.w[i])))
488 VdV.h[i] += fMPY8US( fGETUBYTE(1, VuV.uh[i]), fGETBYTE((2*i+1)%4, RtV)))
493 VxV.h[i] += fMPY8US( fGETUBYTE(1, VuV.uh[i]), fGETBYTE((2*i+1)%4, RtV)))
500 VddV.v[0].h[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]),fGETBYTE((2*i+1)%4, RtV));
502 VddV.v[1].h[i] = fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]),fGETBYTE((2*i) % 4, RtV));
503 VddV.v[1].h[i] += fMPY8US(fGETUBYTE(0, VuuV.v[1].uh[i]),fGETBYTE((2*i+1)%4, RtV)))
508 VxxV.v[0].h[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]),fGETBYTE((2*i+1)%4, RtV));
510 VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]),fGETBYTE((2*i) % 4, RtV));
511 VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(0, VuuV.v[1].uh[i]),fGETBYTE((2*i+1)%4, RtV)))
518 ITERATOR_INSN2_MPY_SLOT(32,vdmpyhb,"Vd32=vdmpyhb(Vu32,Rt32)","Vd32.w=vdmpy(Vu32.h,Rt32.b)",
520 VdV.w[i] = fMPY16SS(fGETHALF(0, VuV.w[i]),fGETBYTE((2*i+0)%4, RtV));
521 VdV.w[i] += fMPY16SS(fGETHALF(1, VuV.w[i]),fGETBYTE((2*i+1)%4, RtV)))
523 ITERATOR_INSN2_MPY_SLOT(32,vdmpyhb_acc,"Vx32+=vdmpyhb(Vu32,Rt32)","Vx32.w+=vdmpy(Vu32.h,Rt32.b)",
525 VxV.w[i] += fMPY16SS(fGETHALF(0, VuV.w[i]),fGETBYTE((2*i+0)%4, RtV));
526 VxV.w[i] += fMPY16SS(fGETHALF(1, VuV.w[i]),fGETBYTE((2*i+1)%4, RtV)))
530 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhb_dv,"Vdd32=vdmpyhb(Vuu32,Rt32)","Vdd32.w=vdmpy(Vuu32.h…
532 VddV.v[0].w[i] = fMPY16SS(fGETHALF(0, VuuV.v[0].w[i]),fGETBYTE((2*i+0)%4, RtV));
533 VddV.v[0].w[i] += fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]),fGETBYTE((2*i+1)%4, RtV));
535 VddV.v[1].w[i] = fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]),fGETBYTE((2*i+0)%4, RtV));
536 VddV.v[1].w[i] += fMPY16SS(fGETHALF(0, VuuV.v[1].w[i]),fGETBYTE((2*i+1)%4, RtV)))
539 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhb_dv_acc,"Vxx32+=vdmpyhb(Vuu32,Rt32)","Vxx32.w+=vdmpy(V…
541 VxxV.v[0].w[i] += fMPY16SS(fGETHALF(0, VuuV.v[0].w[i]),fGETBYTE((2*i+0)%4, RtV));
542 VxxV.v[0].w[i] += fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]),fGETBYTE((2*i+1)%4, RtV));
544 VxxV.v[1].w[i] += fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]),fGETBYTE((2*i+0)%4, RtV));
545 VxxV.v[1].w[i] += fMPY16SS(fGETHALF(0, VuuV.v[1].w[i]),fGETBYTE((2*i+1)%4, RtV)))
555 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhvsat,"Vd32=vdmpyh(Vu32,Vv32):sat","Vd32.w=vdmpy(Vu32.h,…
558 accum = fMPY16SS(fGETHALF(0,VuV.w[i]),fGETHALF(0, VvV.w[i]));
559 accum += fMPY16SS(fGETHALF(1,VuV.w[i]),fGETHALF(1, VvV.w[i]));
560 VdV.w[i] = fVSATW(accum))
562 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhvsat_acc,"Vx32+=vdmpyh(Vu32,Vv32):sat","Vx32.w+=vdmpy(V…
565 accum = fMPY16SS(fGETHALF(0,VuV.w[i]),fGETHALF(0, VvV.w[i]));
566 accum += fMPY16SS(fGETHALF(1,VuV.w[i]),fGETHALF(1, VvV.w[i]));
567 VxV.w[i] = fVSATW(VxV.w[i]+accum))
572 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhsat,"Vd32=vdmpyh(Vu32,Rt32):sat","Vd32.w=vdmpy(Vu32.h,R…
575 accum = fMPY16SS(fGETHALF(0, VuV.w[i]),fGETHALF(0, RtV));
576 accum += fMPY16SS(fGETHALF(1, VuV.w[i]),fGETHALF(1, RtV));
577 VdV.w[i] = fVSATW(accum))
579 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhsat_acc,"Vx32+=vdmpyh(Vu32,Rt32):sat","Vx32.w+=vdmpy(Vu…
581 fHIDE(size8s_t) accum = VxV.w[i];
582 accum += fMPY16SS(fGETHALF(0, VuV.w[i]),fGETHALF(0, RtV));
583 accum += fMPY16SS(fGETHALF(1, VuV.w[i]),fGETHALF(1, RtV));
584 VxV.w[i] = fVSATW(accum))
589 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhisat,"Vd32=vdmpyh(Vuu32,Rt32):sat","Vd32.w=vdmpy(Vuu32.…
592 accum = fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]),fGETHALF(0,RtV));
593 accum += fMPY16SS(fGETHALF(0,VuuV.v[1].w[i]),fGETHALF(1,RtV));
594 VdV.w[i] = fVSATW(accum))
596 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhisat_acc,"Vx32+=vdmpyh(Vuu32,Rt32):sat","Vx32.w+=vdmpy(…
598 fHIDE(size8s_t) accum = VxV.w[i];
599 accum += fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]),fGETHALF(0,RtV));
600 accum += fMPY16SS(fGETHALF(0,VuuV.v[1].w[i]),fGETHALF(1,RtV));
601 VxV.w[i] = fVSATW(accum))
610 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhsusat,"Vd32=vdmpyhsu(Vu32,Rt32):sat","Vd32.w=vdmpy(Vu32…
613 accum = fMPY16SU(fGETHALF(0, VuV.w[i]),fGETUHALF(0, RtV));
614 accum += fMPY16SU(fGETHALF(1, VuV.w[i]),fGETUHALF(1, RtV));
615 VdV.w[i] = fVSATW(accum))
617 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhsusat_acc,"Vx32+=vdmpyhsu(Vu32,Rt32):sat","Vx32.w+=vdmp…
619 fHIDE(size8s_t) accum=VxV.w[i];
620 accum += fMPY16SU(fGETHALF(0, VuV.w[i]),fGETUHALF(0, RtV));
621 accum += fMPY16SU(fGETHALF(1, VuV.w[i]),fGETUHALF(1, RtV));
622 VxV.w[i] = fVSATW(accum))
626 …T_DOUBLE_VEC(32,vdmpyhsuisat,"Vd32=vdmpyhsu(Vuu32,Rt32,#1):sat","Vd32.w=vdmpy(Vuu32.h,Rt32.uh,#1):…
629 accum = fMPY16SU(fGETHALF(1,VuuV.v[0].w[i]),fGETUHALF(0,RtV));
630 accum += fMPY16SU(fGETHALF(0,VuuV.v[1].w[i]),fGETUHALF(1,RtV));
631 VdV.w[i] = fVSATW(accum))
633 …BLE_VEC(32,vdmpyhsuisat_acc,"Vx32+=vdmpyhsu(Vuu32,Rt32,#1):sat","Vx32.w+=vdmpy(Vuu32.h,Rt32.uh,#1)…
635 fHIDE(size8s_t) accum=VxV.w[i];
636 accum += fMPY16SU(fGETHALF(1, VuuV.v[0].w[i]),fGETUHALF(0,RtV));
637 accum += fMPY16SU(fGETHALF(0, VuuV.v[1].w[i]),fGETUHALF(1,RtV));
638 VxV.w[i] = fVSATW(accum))
649 VddV.v[0].h[i] += fMPY8SS(fGETBYTE(1,VuuV.v[0].h[i]), fGETBYTE((2*i+1)%4, RtV));
650 VddV.v[0].h[i] += fGETBYTE(0,VuuV.v[1].h[i]);
652 VddV.v[1].h[i] = fMPY8SS(fGETBYTE(1,VuuV.v[0].h[i]), fGETBYTE((2*i )%4, RtV));
653 VddV.v[1].h[i] += fMPY8SS(fGETBYTE(0,VuuV.v[1].h[i]), fGETBYTE((2*i+1)%4, RtV));
654 VddV.v[1].h[i] += fGETBYTE(1,VuuV.v[1].h[i]))
660 VxxV.v[0].h[i] += fMPY8SS(fGETBYTE(1,VuuV.v[0].h[i]), fGETBYTE((2*i+1)%4, RtV));
661 VxxV.v[0].h[i] += fGETBYTE(0,VuuV.v[1].h[i]);
663 VxxV.v[1].h[i] += fMPY8SS(fGETBYTE(1,VuuV.v[0].h[i]), fGETBYTE((2*i )%4, RtV));
664 VxxV.v[1].h[i] += fMPY8SS(fGETBYTE(0,VuuV.v[1].h[i]), fGETBYTE((2*i+1)%4, RtV));
665 VxxV.v[1].h[i] += fGETBYTE(1,VuuV.v[1].h[i]))
672 VddV.v[0].h[i] += fMPY8US(fGETUBYTE(1,VuuV.v[0].uh[i]), fGETBYTE((2*i+1)%4, RtV));
673 VddV.v[0].h[i] += fGETUBYTE(0,VuuV.v[1].uh[i]);
675 VddV.v[1].h[i] = fMPY8US(fGETUBYTE(1,VuuV.v[0].uh[i]), fGETBYTE((2*i )%4, RtV));
676 VddV.v[1].h[i] += fMPY8US(fGETUBYTE(0,VuuV.v[1].uh[i]), fGETBYTE((2*i+1)%4, RtV));
677 VddV.v[1].h[i] += fGETUBYTE(1,VuuV.v[1].uh[i]))
682 VxxV.v[0].h[i] += fMPY8US(fGETUBYTE(1,VuuV.v[0].uh[i]), fGETBYTE((2*i+1)%4, RtV));
683 VxxV.v[0].h[i] += fGETUBYTE(0,VuuV.v[1].uh[i]);
685 VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(1,VuuV.v[0].uh[i]), fGETBYTE((2*i )%4, RtV));
686 VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(0,VuuV.v[1].uh[i]), fGETBYTE((2*i+1)%4, RtV));
687 VxxV.v[1].h[i] += fGETUBYTE(1,VuuV.v[1].uh[i]))
690 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vtmpyhb, "Vdd32=vtmpyhb(Vuu32,Rt32)", "Vdd32.w=vtmpy(Vuu32.h,…
692 VddV.v[0].w[i] = fMPY16SS(fGETHALF(0,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+0)%4, RtV)));
693 VddV.v[0].w[i]+= fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+1)%4, RtV)));
694 VddV.v[0].w[i]+= fGETHALF(0,VuuV.v[1].w[i]);
696 VddV.v[1].w[i] = fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+0)%4, RtV)));
697 VddV.v[1].w[i]+= fMPY16SS(fGETHALF(0,VuuV.v[1].w[i]), fSE8_16(fGETBYTE((2*i+1)%4, RtV)));
698 VddV.v[1].w[i]+= fGETHALF(1,VuuV.v[1].w[i]))
700 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vtmpyhb_acc, "Vxx32+=vtmpyhb(Vuu32,Rt32)", "Vxx32.w+=vtmpy(Vu…
702 VxxV.v[0].w[i]+= fMPY16SS(fGETHALF(0,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+0)%4, RtV)));
703 VxxV.v[0].w[i]+= fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+1)%4, RtV)));
704 VxxV.v[0].w[i]+= fGETHALF(0,VuuV.v[1].w[i]);
706 VxxV.v[1].w[i]+= fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+0)%4, RtV)));
707 VxxV.v[1].w[i]+= fMPY16SS(fGETHALF(0,VuuV.v[1].w[i]), fSE8_16(fGETBYTE((2*i+1)%4, RtV)));
708 VxxV.v[1].w[i]+= fGETHALF(1,VuuV.v[1].w[i]))
720 VdV.uw[i] += fMPY8UU(fGETUBYTE(1,VuV.uw[i]), fGETUBYTE(1,RtV));
727 VxV.uw[i] += fMPY8UU(fGETUBYTE(1,VuV.uw[i]), fGETUBYTE(1,RtV));
735 VdV.uw[i] += fMPY8UU(fGETUBYTE(1,VuV.uw[i]), fGETUBYTE(1,VvV.uw[i]));
742 VxV.uw[i] += fMPY8UU(fGETUBYTE(1,VuV.uw[i]), fGETUBYTE(1,VvV.uw[i]));
746 ITERATOR_INSN2_MPY_SLOT(32,vrmpybv,"Vd32=vrmpyb(Vu32,Vv32)","Vd32.w=vrmpy(Vu32.b,Vv32.b)",
748 VdV.w[i] = fMPY8SS(fGETBYTE(0, VuV.w[i]), fGETBYTE(0, VvV.w[i]));
749 VdV.w[i] += fMPY8SS(fGETBYTE(1, VuV.w[i]), fGETBYTE(1, VvV.w[i]));
750 VdV.w[i] += fMPY8SS(fGETBYTE(2, VuV.w[i]), fGETBYTE(2, VvV.w[i]));
751 VdV.w[i] += fMPY8SS(fGETBYTE(3, VuV.w[i]), fGETBYTE(3, VvV.w[i])))
754 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrmpybv_acc,"Vx32+=vrmpyb(Vu32,Vv32)","Vx32.w+=vrmpy(Vu32.b,V…
756 VxV.w[i] += fMPY8SS(fGETBYTE(0, VuV.w[i]), fGETBYTE(0, VvV.w[i]));
757 VxV.w[i] += fMPY8SS(fGETBYTE(1, VuV.w[i]), fGETBYTE(1, VvV.w[i]));
758 VxV.w[i] += fMPY8SS(fGETBYTE(2, VuV.w[i]), fGETBYTE(2, VvV.w[i]));
759 VxV.w[i] += fMPY8SS(fGETBYTE(3, VuV.w[i]), fGETBYTE(3, VvV.w[i])))
764 VddV.v[0].uw[i] = fMPY8UU(fGETUBYTE(0, VuuV.v[uiV ? 1:0].uw[i]),fGETUBYTE((0-uiV) & 0x3,RtV));
765 VddV.v[0].uw[i] += fMPY8UU(fGETUBYTE(1, VuuV.v[0 ].uw[i]),fGETUBYTE((1-uiV) & 0x3,RtV));
769 VddV.v[1].uw[i] = fMPY8UU(fGETUBYTE(0, VuuV.v[1 ].uw[i]),fGETUBYTE((2-uiV) & 0x3,RtV));
770 VddV.v[1].uw[i] += fMPY8UU(fGETUBYTE(1, VuuV.v[1 ].uw[i]),fGETUBYTE((3-uiV) & 0x3,RtV));
771 VddV.v[1].uw[i] += fMPY8UU(fGETUBYTE(2, VuuV.v[uiV ? 1:0].uw[i]),fGETUBYTE((0-uiV) & 0x3,RtV));
772 VddV.v[1].uw[i] += fMPY8UU(fGETUBYTE(3, VuuV.v[0 ].uw[i]),fGETUBYTE((1-uiV) & 0x3,RtV)))
777 VxxV.v[0].uw[i] += fMPY8UU(fGETUBYTE(0, VuuV.v[uiV ? 1:0].uw[i]),fGETUBYTE((0-uiV) & 0x3,RtV));
778 VxxV.v[0].uw[i] += fMPY8UU(fGETUBYTE(1, VuuV.v[0 ].uw[i]),fGETUBYTE((1-uiV) & 0x3,RtV));
782 VxxV.v[1].uw[i] += fMPY8UU(fGETUBYTE(0, VuuV.v[1 ].uw[i]),fGETUBYTE((2-uiV) & 0x3,RtV));
783 VxxV.v[1].uw[i] += fMPY8UU(fGETUBYTE(1, VuuV.v[1 ].uw[i]),fGETUBYTE((3-uiV) & 0x3,RtV));
784 VxxV.v[1].uw[i] += fMPY8UU(fGETUBYTE(2, VuuV.v[uiV ? 1:0].uw[i]),fGETUBYTE((0-uiV) & 0x3,RtV));
785 VxxV.v[1].uw[i] += fMPY8UU(fGETUBYTE(3, VuuV.v[0 ].uw[i]),fGETUBYTE((1-uiV) & 0x3,RtV)))
794 ITERATOR_INSN2_MPY_SLOT(32,vrmpybus,"Vd32=vrmpybus(Vu32,Rt32)","Vd32.w=vrmpy(Vu32.ub,Rt32.b)",
796 VdV.w[i] = fMPY8US(fGETUBYTE(0,VuV.uw[i]), fGETBYTE(0,RtV));
797 VdV.w[i] += fMPY8US(fGETUBYTE(1,VuV.uw[i]), fGETBYTE(1,RtV));
798 VdV.w[i] += fMPY8US(fGETUBYTE(2,VuV.uw[i]), fGETBYTE(2,RtV));
799 VdV.w[i] += fMPY8US(fGETUBYTE(3,VuV.uw[i]), fGETBYTE(3,RtV)))
802 ITERATOR_INSN2_MPY_SLOT(32,vrmpybus_acc,"Vx32+=vrmpybus(Vu32,Rt32)","Vx32.w+=vrmpy(Vu32.ub,Rt32.b)",
804 VxV.w[i] += fMPY8US(fGETUBYTE(0,VuV.uw[i]), fGETBYTE(0,RtV));
805 VxV.w[i] += fMPY8US(fGETUBYTE(1,VuV.uw[i]), fGETBYTE(1,RtV));
806 VxV.w[i] += fMPY8US(fGETUBYTE(2,VuV.uw[i]), fGETBYTE(2,RtV));
807 VxV.w[i] += fMPY8US(fGETUBYTE(3,VuV.uw[i]), fGETBYTE(3,RtV)))
810 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrmpybusi,"Vdd32=vrmpybus(Vuu32,Rt32,#u1)","Vdd32.w=vrmpy(Vuu…
812 VddV.v[0].w[i] = fMPY8US(fGETUBYTE(0, VuuV.v[uiV ? 1:0].uw[i]),fGETBYTE((0-uiV) & 0x3,RtV));
813 VddV.v[0].w[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0 ].uw[i]),fGETBYTE((1-uiV) & 0x3,RtV));
814 VddV.v[0].w[i] += fMPY8US(fGETUBYTE(2, VuuV.v[0 ].uw[i]),fGETBYTE((2-uiV) & 0x3,RtV));
815 VddV.v[0].w[i] += fMPY8US(fGETUBYTE(3, VuuV.v[0 ].uw[i]),fGETBYTE((3-uiV) & 0x3,RtV));
817 VddV.v[1].w[i] = fMPY8US(fGETUBYTE(0, VuuV.v[1 ].uw[i]),fGETBYTE((2-uiV) & 0x3,RtV));
818 VddV.v[1].w[i] += fMPY8US(fGETUBYTE(1, VuuV.v[1 ].uw[i]),fGETBYTE((3-uiV) & 0x3,RtV));
819 VddV.v[1].w[i] += fMPY8US(fGETUBYTE(2, VuuV.v[uiV ? 1:0].uw[i]),fGETBYTE((0-uiV) & 0x3,RtV));
820 VddV.v[1].w[i] += fMPY8US(fGETUBYTE(3, VuuV.v[0 ].uw[i]),fGETBYTE((1-uiV) & 0x3,RtV)))
823 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrmpybusi_acc,"Vxx32+=vrmpybus(Vuu32,Rt32,#u1)","Vxx32.w+=vrm…
825 VxxV.v[0].w[i] += fMPY8US(fGETUBYTE(0, VuuV.v[uiV ? 1:0].uw[i]),fGETBYTE((0-uiV) & 0x3,RtV));
826 VxxV.v[0].w[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0 ].uw[i]),fGETBYTE((1-uiV) & 0x3,RtV));
827 VxxV.v[0].w[i] += fMPY8US(fGETUBYTE(2, VuuV.v[0 ].uw[i]),fGETBYTE((2-uiV) & 0x3,RtV));
828 VxxV.v[0].w[i] += fMPY8US(fGETUBYTE(3, VuuV.v[0 ].uw[i]),fGETBYTE((3-uiV) & 0x3,RtV));
830 VxxV.v[1].w[i] += fMPY8US(fGETUBYTE(0, VuuV.v[1 ].uw[i]),fGETBYTE((2-uiV) & 0x3,RtV));
831 VxxV.v[1].w[i] += fMPY8US(fGETUBYTE(1, VuuV.v[1 ].uw[i]),fGETBYTE((3-uiV) & 0x3,RtV));
832 VxxV.v[1].w[i] += fMPY8US(fGETUBYTE(2, VuuV.v[uiV ? 1:0].uw[i]),fGETBYTE((0-uiV) & 0x3,RtV));
833 VxxV.v[1].w[i] += fMPY8US(fGETUBYTE(3, VuuV.v[0 ].uw[i]),fGETBYTE((1-uiV) & 0x3,RtV)))
838 ITERATOR_INSN2_MPY_SLOT(32,vrmpybusv,"Vd32=vrmpybus(Vu32,Vv32)","Vd32.w=vrmpy(Vu32.ub,Vv32.b)",
840 VdV.w[i] = fMPY8US(fGETUBYTE(0,VuV.uw[i]), fGETBYTE(0,VvV.w[i]));
841 VdV.w[i] += fMPY8US(fGETUBYTE(1,VuV.uw[i]), fGETBYTE(1,VvV.w[i]));
842 VdV.w[i] += fMPY8US(fGETUBYTE(2,VuV.uw[i]), fGETBYTE(2,VvV.w[i]));
843 VdV.w[i] += fMPY8US(fGETUBYTE(3,VuV.uw[i]), fGETBYTE(3,VvV.w[i])))
846 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrmpybusv_acc,"Vx32+=vrmpybus(Vu32,Vv32)","Vx32.w+=vrmpy(Vu32…
848 VxV.w[i] += fMPY8US(fGETUBYTE(0,VuV.uw[i]), fGETBYTE(0,VvV.w[i]));
849 VxV.w[i] += fMPY8US(fGETUBYTE(1,VuV.uw[i]), fGETBYTE(1,VvV.w[i]));
850 VxV.w[i] += fMPY8US(fGETUBYTE(2,VuV.uw[i]), fGETBYTE(2,VvV.w[i]));
851 VxV.w[i] += fMPY8US(fGETUBYTE(3,VuV.uw[i]), fGETBYTE(3,VvV.w[i])))
870 VddV.v[0].uw[i] += fABS(fGETUHALF(1, VuuV.v[0].uw[i]) - fGETUHALF(1,RtV));
871 VddV.v[1].uw[i] = fABS(fGETUHALF(1, VuuV.v[0].uw[i]) - fGETUHALF(0,RtV));
872 VddV.v[1].uw[i] += fABS(fGETUHALF(0, VuuV.v[1].uw[i]) - fGETUHALF(1,RtV)))
877 VxxV.v[0].uw[i] += fABS(fGETUHALF(1, VuuV.v[0].uw[i]) - fGETUHALF(1,RtV));
878 VxxV.v[1].uw[i] += fABS(fGETUHALF(1, VuuV.v[0].uw[i]) - fGETUHALF(0,RtV));
879 VxxV.v[1].uw[i] += fABS(fGETUHALF(0, VuuV.v[1].uw[i]) - fGETUHALF(1,RtV)))
892 …VddV.v[0].uw[i] = fABS(fZE8_16(fGETUBYTE(0, VuuV.v[uiV?1:0].uw[i])) - fZE8_16(fGETUBYTE((0-uiV)&3…
893 …VddV.v[0].uw[i] += fABS(fZE8_16(fGETUBYTE(1, VuuV.v[0 ].uw[i])) - fZE8_16(fGETUBYTE((1-uiV)&3…
897 …VddV.v[1].uw[i] = fABS(fZE8_16(fGETUBYTE(0, VuuV.v[1 ].uw[i])) - fZE8_16(fGETUBYTE((2-uiV)&3…
898 …VddV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(1, VuuV.v[1 ].uw[i])) - fZE8_16(fGETUBYTE((3-uiV)&3…
899 …VddV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(2, VuuV.v[uiV?1:0].uw[i])) - fZE8_16(fGETUBYTE((0-uiV)&3…
900 …VddV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(3, VuuV.v[0 ].uw[i])) - fZE8_16(fGETUBYTE((1-uiV)&3…
904 …VxxV.v[0].uw[i] += fABS(fZE8_16(fGETUBYTE(0, VuuV.v[uiV?1:0].uw[i])) - fZE8_16(fGETUBYTE((0-uiV)&3…
905 …VxxV.v[0].uw[i] += fABS(fZE8_16(fGETUBYTE(1, VuuV.v[0 ].uw[i])) - fZE8_16(fGETUBYTE((1-uiV)&3…
909 …VxxV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(0, VuuV.v[1 ].uw[i])) - fZE8_16(fGETUBYTE((2-uiV)&3…
910 …VxxV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(1, VuuV.v[1 ].uw[i])) - fZE8_16(fGETUBYTE((3-uiV)&3…
911 …VxxV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(2, VuuV.v[uiV?1:0].uw[i])) - fZE8_16(fGETUBYTE((0-uiV)&3…
912 …VxxV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(3, VuuV.v[0 ].uw[i])) - fZE8_16(fGETUBYTE((1-uiV)&3…
929 …"Vector arithmetic shift right " DESC, VdV.TYPE[i] = (VuV.TYPE[i] >> (RtV & (SIZE-1)))) \
930 …"Vector arithmetic shift left " DESC, VdV.TYPE[i] = (VuV.TYPE[i] << (RtV & (SIZE-1)))) \
931 …"Vector logical shift right " DESC, VdV.u##TYPE[i] = (VuV.u##TYPE[i] >> (RtV & (SIZE-1)))) \
932 …ght " DESC, VdV.TYPE[i] = fBIDIR_ASHIFTR(VuV.TYPE[i], fSXTN((LOGSIZE+1),SIZE,VvV.TYPE[i]),C…
933 …t " DESC, VdV.TYPE[i] = fBIDIR_ASHIFTL(VuV.TYPE[i], fSXTN((LOGSIZE+1),SIZE,VvV.TYPE[i]),C…
934 … DESC, VdV.u##TYPE[i] = fBIDIR_LSHIFTR(VuV.u##TYPE[i], fSXTN((LOGSIZE+1),SIZE,VvV.TYPE[i]),C…
936 V_SHIFT(w, "word", 32,5,4_4)
947 …BLE_VEC(32,vasr_into,"Vxx32=vasrinto(Vu32,Vv32)","Vxx32.w=vasrinto(Vu32.w,Vv32.w)","ASR vector 1 e…
948 fHIDE(int64_t ) shift = (fSE32_64(VuV.w[i]) << 32);
949 fHIDE(int64_t ) mask = (((fSE32_64(VxxV.v[0].w[i])) << 32) | fZE32_64(VxxV.v[0].w[i]));
950 fHIDE(int64_t) lomask = (((fSE32_64(1)) << 32) - 1);
951 fHIDE(int ) count = -(0x40 & VvV.w[i]) + (VvV.w[i] & 0x3f);
953 VxxV.v[1].w[i] = ((result >> 32) & 0xffffffff);
954 VxxV.v[0].w[i] = (result & 0xffffffff))
956 #define NEW_NARROWING_SHIFT 1
965 DSTM(1,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VuV.SRCTYPE[i],shamt) >> shamt)))
973 NARROWING_SHIFT(32,vasrwh,fSETHALF,h,w,,fECHO,fVNOROUND,0xF)
974 NARROWING_SHIFT(32,vasrwhsat,fSETHALF,h,w,:sat,fVSATH,fVNOROUND,0xF)
975 NARROWING_SHIFT(32,vasrwhrndsat,fSETHALF,h,w,:rnd:sat,fVSATH,fVROUND,0xF)
976 NARROWING_SHIFT(32,vasrwuhrndsat,fSETHALF,uh,w,:rnd:sat,fVSATUH,fVROUND,0xF)
977 NARROWING_SHIFT(32,vasrwuhsat,fSETHALF,uh,w,:sat,fVSATUH,fVNOROUND,0xF)
992 shamt = VvV.SRCTYPE2[2*i+1] & SHAMTMASK; \
993 DSTM(1,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VuuV.v[1].SRCTYPE[i],shamt) >> shamt)))
996 NARROWING_VECTOR_SHIFT(32,vasrvwuhsat,fSETHALF,uh,w,uh,:sat,fVSATUH,fVNOROUND,0xF)
997 NARROWING_VECTOR_SHIFT(32,vasrvwuhrndsat,fSETHALF,uh,w,uh,:rnd:sat,fVSATUH,fVROUND,0xF)
1006 ITERATOR_INSN2_SHIFT_SLOT(32,vasrwh,"Vd32=vasrwh(Vu32,Vv32,Rt8)","Vd32.h=vasr(Vu32.w,Vv32.w,Rt8)",
1008 fSETHALF(0,VdV.w[i], (VvV.w[i] >> (RtV & 0xF)));
1009 fSETHALF(1,VdV.w[i], (VuV.w[i] >> (RtV & 0xF))))
1012 ITERATOR_INSN2_SHIFT_SLOT(32,vasrwhsat,"Vd32=vasrwh(Vu32,Vv32,Rt8):sat","Vd32.h=vasr(Vu32.w,Vv32.w,…
1014 fSETHALF(0,VdV.w[i], fVSATH(VvV.w[i] >> (RtV & 0xF)));
1015 fSETHALF(1,VdV.w[i], fVSATH(VuV.w[i] >> (RtV & 0xF))))
1017 …SLOT(32,vasrwhrndsat,"Vd32=vasrwh(Vu32,Vv32,Rt8):rnd:sat","Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat",
1020 fSETHALF(0,VdV.w[i], fVSATH( (VvV.w[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt));
1021 fSETHALF(1,VdV.w[i], fVSATH( (VuV.w[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt)))
1023 …T(32,vasrwuhrndsat,"Vd32=vasrwuh(Vu32,Vv32,Rt8):rnd:sat","Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat",
1026 fSETHALF(0,VdV.w[i], fVSATUH( (VvV.w[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt));
1027 fSETHALF(1,VdV.w[i], fVSATUH( (VuV.w[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt)))
1029 …2_SHIFT_SLOT(32,vasrwuhsat,"Vd32=vasrwuh(Vu32,Vv32,Rt8):sat","Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):sat",
1031 fSETHALF(0, VdV.uw[i], fVSATUH(VvV.w[i] >> (RtV & 0xF)));
1032 fSETHALF(1, VdV.uw[i], fVSATUH(VuV.w[i] >> (RtV & 0xF))))
1037 fSETHALF(0,VdV.w[i], fVSATUH( (VvV.uw[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt));
1038 fSETHALF(1,VdV.w[i], fVSATUH( (VuV.uw[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt)))
1043 ITERATOR_INSN2_SHIFT_SLOT(32,vroundwh,"Vd32=vroundwh(Vu32,Vv32):sat","Vd32.h=vround(Vu32.w,Vv32.w):…
1045 fSETHALF(0, VdV.uw[i], fVSATH((VvV.w[i] + fCONSTLL(0x8000)) >> 16));
1046 fSETHALF(1, VdV.uw[i], fVSATH((VuV.w[i] + fCONSTLL(0x8000)) >> 16)))
1048 …_INSN2_SHIFT_SLOT(32,vroundwuh,"Vd32=vroundwuh(Vu32,Vv32):sat","Vd32.uh=vround(Vu32.w,Vv32.w):sat",
1050 fSETHALF(0, VdV.uw[i], fVSATUH((VvV.w[i] + fCONSTLL(0x8000)) >> 16));
1051 fSETHALF(1, VdV.uw[i], fVSATUH((VuV.w[i] + fCONSTLL(0x8000)) >> 16)))
1056 fSETHALF(1, VdV.uw[i], fVSATUH((VuV.uw[i] + fCONSTLL(0x8000)) >> 16)))
1067 fSETBYTE(1, VdV.uh[i], fVSATB((VuV.h[i] + 0x80) >> 8)))
1072 fSETBYTE(1, VdV.uh[i], fVSATUB((VuV.h[i] + 0x80) >> 8)))
1077 fSETBYTE(1, VdV.uh[i], fVSATUB((VuV.uh[i] + 0x80) >> 8)))
1080 ITERATOR_INSN2_SHIFT_SLOT(32,vaslw_acc,"Vx32+=vaslw(Vu32,Rt32)","Vx32.w+=vasl(Vu32.w,Rt32)",
1082 VxV.w[i] += (VuV.w[i] << (RtV & (32-1))))
1084 ITERATOR_INSN2_SHIFT_SLOT(32,vasrw_acc,"Vx32+=vasrw(Vu32,Rt32)","Vx32.w+=vasr(Vu32.w,Rt32)",
1086 VxV.w[i] += (VuV.w[i] >> (RtV & (32-1))))
1090 VxV.h[i] += (VuV.h[i] << (RtV & (16-1))))
1094 VxV.h[i] += (VuV.h[i] >> (RtV & (16-1))))
1112 …IDTH, VuuV.v[0].SRC[i],VvvV.v[0].SRC[i]); VddV.v[1].DEST[i] = fVUADDSAT(WIDTH, VuuV.v[1].SRC[i],Vv…
1114 …IDTH, VuuV.v[0].SRC[i],VvvV.v[0].SRC[i]); VddV.v[1].DEST[i] = fVUSUBSAT(WIDTH, VuuV.v[1].SRC[i],Vv…
1118 …TH, VuuV.v[0].SRC[i], VvvV.v[0].SRC[i]); VddV.v[1].DEST[i] = fVSADDSAT(WIDTH, VuuV.v[1].SRC[i], Vv…
1120 …TH, VuuV.v[0].SRC[i], VvvV.v[0].SRC[i]); VddV.v[1].DEST[i] = fVSSUBSAT(WIDTH, VuuV.v[1].SRC[i], Vv…
1142 …].DEST[i] = VuuV.v[0].SRC[i] + VvvV.v[0].SRC[i]; VddV.v[1].DEST[i] = VuuV.v[1].SRC[i] + VvvV.v[1].…
1143 …].DEST[i] = VuuV.v[0].SRC[i] - VvvV.v[0].SRC[i]; VddV.v[1].DEST[i] = VuuV.v[1].SRC[i] - VvvV.v[1].…
1152 MMVEC_ADDWRAP(w, "w", "Word", 32, w, w)
1160 MMVEC_ADDS_SAT(w, "w", "Word", 32, w, w)
1169 MMVEC_AVGS(w, "w", "Word", 32, w, w)
1176 MMVEC_ABSDIFF(w,"w", "Word", 32, uw, w)
1181 ITERATOR_INSN_ANY_SLOT(32,vaddcarrysat,"Vd32.w=vadd(Vu32.w,Vv32.w,Qs4):carry:sat","add w/carry and …
1182 VdV.w[i] = fVSATW(VuV.w[i]+VvV.w[i]+fGETQBIT(QsV,i*4)))
1184 ITERATOR_INSN_ANY_SLOT(32,vaddcarry,"Vd32.w=vadd(Vu32.w,Vv32.w,Qx4):carry","add w/carry",
1185 VdV.w[i] = VuV.w[i]+VvV.w[i]+fGETQBIT(QxV,i*4);
1186 fSETQBITS(QxV,4,0xF,4*i,-fCARRY_FROM_ADD32(VuV.w[i],VvV.w[i],fGETQBIT(QxV,i*4))))
1188 ITERATOR_INSN_ANY_SLOT(32,vsubcarry,"Vd32.w=vsub(Vu32.w,Vv32.w,Qx4):carry","add w/carry",
1189 VdV.w[i] = VuV.w[i]+~VvV.w[i]+fGETQBIT(QxV,i*4);
1190 fSETQBITS(QxV,4,0xF,4*i,-fCARRY_FROM_ADD32(VuV.w[i],~VvV.w[i],fGETQBIT(QxV,i*4))))
1192 ITERATOR_INSN_ANY_SLOT(32,vaddcarryo,"Vd32.w,Qe4=vadd(Vu32.w,Vv32.w):carry","add w/carry out-only",
1193 VdV.w[i] = VuV.w[i]+VvV.w[i];
1194 fSETQBITS(QeV,4,0xF,4*i,-fCARRY_FROM_ADD32(VuV.w[i],VvV.w[i],0)))
1196 ITERATOR_INSN_ANY_SLOT(32,vsubcarryo,"Vd32.w,Qe4=vsub(Vu32.w,Vv32.w):carry","subtract w/carry out-o…
1197 VdV.w[i] = VuV.w[i]+~VvV.w[i]+1;
1198 fSETQBITS(QeV,4,0xF,4*i,-fCARRY_FROM_ADD32(VuV.w[i],~VvV.w[i],1)))
1201 …tdw,"Vd32.w=vsatdw(Vu32.w,Vv32.w)","Saturate from 64-bits (higher 32-bits come from first vector) …
1220 VddV.v[1].h[i] = fZE8_16(fGETUBYTE(1, VuV.uh[i])) + fZE8_16(fGETUBYTE(1, VvV.uh[i])))
1225 VddV.v[1].h[i] = fZE8_16(fGETUBYTE(1, VuV.uh[i])) - fZE8_16(fGETUBYTE(1, VvV.uh[i])))
1229 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vaddhw,"Vdd32=vaddh(Vu32,Vv32)","Vdd32.w=vadd(Vu32.h,Vv32.h)",
1231 VddV.v[0].w[i] = fGETHALF(0, VuV.w[i]) + fGETHALF(0, VvV.w[i]);
1232 VddV.v[1].w[i] = fGETHALF(1, VuV.w[i]) + fGETHALF(1, VvV.w[i]))
1234 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vsubhw,"Vdd32=vsubh(Vu32,Vv32)","Vdd32.w=vsub(Vu32.h,Vv32.h)",
1236 VddV.v[0].w[i] = fGETHALF(0, VuV.w[i]) - fGETHALF(0, VvV.w[i]);
1237 VddV.v[1].w[i] = fGETHALF(1, VuV.w[i]) - fGETHALF(1, VvV.w[i]))
1240 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vadduhw,"Vdd32=vadduh(Vu32,Vv32)","Vdd32.w=vadd(Vu32.uh,Vv32.…
1242 VddV.v[0].w[i] = fZE16_32(fGETUHALF(0, VuV.uw[i])) + fZE16_32(fGETUHALF(0, VvV.uw[i]));
1243 VddV.v[1].w[i] = fZE16_32(fGETUHALF(1, VuV.uw[i])) + fZE16_32(fGETUHALF(1, VvV.uw[i])))
1245 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vsubuhw,"Vdd32=vsubuh(Vu32,Vv32)","Vdd32.w=vsub(Vu32.uh,Vv32.…
1247 VddV.v[0].w[i] = fZE16_32(fGETUHALF(0, VuV.uw[i])) - fZE16_32(fGETUHALF(0, VvV.uw[i]));
1248 VddV.v[1].w[i] = fZE16_32(fGETUHALF(1, VuV.uw[i])) - fZE16_32(fGETUHALF(1, VvV.uw[i])))
1252 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vaddhw_acc,"Vxx32+=vaddh(Vu32,Vv32)","Vxx32.w+=vadd(Vu32.h,Vv…
1254 VxxV.v[0].w[i] += fGETHALF(0, VuV.w[i]) + fGETHALF(0, VvV.w[i]);
1255 VxxV.v[1].w[i] += fGETHALF(1, VuV.w[i]) + fGETHALF(1, VvV.w[i]))
1257 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vadduhw_acc,"Vxx32+=vadduh(Vu32,Vv32)","Vxx32.w+=vadd(Vu32.uh…
1259 VxxV.v[0].w[i] += fGETUHALF(0, VuV.w[i]) + fGETUHALF(0, VvV.w[i]);
1260 VxxV.v[1].w[i] += fGETUHALF(1, VuV.w[i]) + fGETUHALF(1, VvV.w[i]))
1265 VxxV.v[1].h[i] += fGETUBYTE(1, VuV.h[i]) + fGETUBYTE(1, VvV.h[i]))
1280 CONDADDSUB(32,w,"Vx32.w","Vu32.w","Conditional add/sub Word",VxV.w[i],VuV.w[i])
1292 … "Vd32=vabsw(Vu32)", "Vd32.w=vabs(Vu32.w)", "Vector absolute value of words", Vd…
1293 …2=vabsw(Vu32):sat", "Vd32.w=vabs(Vu32.w):sat", "Vector absolute value of words", VdV.w[i] …
1305 VddV.v[1].h[i] = fMPY8SS(fGETBYTE(1, VuV.h[i]), fGETBYTE(1, VvV.h[i])))
1310 VxxV.v[1].h[i] += fMPY8SS(fGETBYTE(1, VuV.h[i]), fGETBYTE(1, VvV.h[i])))
1316 VddV.v[1].uh[i] = fMPY8UU(fGETUBYTE(1, VuV.uh[i]), fGETUBYTE(1, VvV.uh[i]) ))
1321 VxxV.v[1].uh[i] += fMPY8UU(fGETUBYTE(1, VuV.uh[i]), fGETUBYTE(1, VvV.uh[i]) ))
1331 VddV.v[1].h[i] = fMPY8US(fGETUBYTE(1, VuV.uh[i]), fGETBYTE(1, VvV.h[i])))
1336 VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(1, VuV.uh[i]), fGETBYTE(1, VvV.h[i])))
1343 … fGETBYTE(0, VvvV.v[0].uh[i])) + fMPY8US(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETBYTE(0, VvvV.v[1].uh[i…
1344 …ddV.v[1].h[i] = fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETBYTE(1, VvvV.v[0].uh[i])) + fMPY8US(fGE…
1348 …GETUBYTE(0, VvvV.v[0].uh[i])) + fMPY8UU(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETUBYTE(0, VvvV.v[1].uh[i…
1349 …dV.v[1].h[i] = fMPY8UU(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETUBYTE(1, VvvV.v[0].uh[i])) + fMPY8UU(fGE…
1357 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhv,"Vdd32=vmpyh(Vu32,Vv32)","Vdd32.w=vmpy(Vu32.h,Vv32.h)",
1359 VddV.v[0].w[i] = fMPY16SS(fGETHALF(0, VuV.w[i]), fGETHALF(0, VvV.w[i]));
1360 VddV.v[1].w[i] = fMPY16SS(fGETHALF(1, VuV.w[i]), fGETHALF(1, VvV.w[i])))
1362 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhv_acc,"Vxx32+=vmpyh(Vu32,Vv32)","Vxx32.w+=vmpy(Vu32.h,Vv…
1364 VxxV.v[0].w[i] += fMPY16SS(fGETHALF(0, VuV.w[i]), fGETHALF(0, VvV.w[i]));
1365 VxxV.v[1].w[i] += fMPY16SS(fGETHALF(1, VuV.w[i]), fGETHALF(1, VvV.w[i])))
1370 VddV.v[1].uw[i] = fMPY16UU(fGETUHALF(1, VuV.uw[i]), fGETUHALF(1, VvV.uw[i])))
1375 VxxV.v[1].uw[i] += fMPY16UU(fGETUHALF(1, VuV.uw[i]), fGETUHALF(1, VvV.uw[i])))
1380 …_SLOT_DOUBLE_VEC(16,vmpyhvsrs,"Vd32=vmpyh(Vu32,Vv32):<<1:rnd:sat","Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:…
1382 VdV.h[i] = fVSATH(fGETHALF(1,fVSAT(fROUND((fMPY16SS(VuV.h[i],VvV.h[i] )<<1))))))
1388 VdV.uh[i] = fGETUHALF(1,fMPY16UU(VuV.uh[i],VvV.uh[i])))
1391 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhus, "Vdd32=vmpyhus(Vu32,Vv32)","Vdd32.w=vmpy(Vu32.h,Vv32…
1393 VddV.v[0].w[i] = fMPY16SU(fGETHALF(0, VuV.w[i]), fGETUHALF(0, VvV.uw[i]));
1394 VddV.v[1].w[i] = fMPY16SU(fGETHALF(1, VuV.w[i]), fGETUHALF(1, VvV.uw[i])))
1397 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhus_acc, "Vxx32+=vmpyhus(Vu32,Vv32)","Vxx32.w+=vmpy(Vu32.…
1399 VxxV.v[0].w[i] += fMPY16SU(fGETHALF(0, VuV.w[i]), fGETUHALF(0, VvV.uw[i]));
1400 VxxV.v[1].w[i] += fMPY16SU(fGETHALF(1, VuV.w[i]), fGETUHALF(1, VvV.uw[i])))
1418 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyewuh,"Vd32=vmpyewuh(Vu32,Vv32)","Vd32.w=vmpye(Vu32.w,Vv32…
1420 VdV.w[i] = fMPY3216SU(VuV.w[i], fGETUHALF(0, VvV.w[i])) >> 16)
1422 …PY_SLOT_DOUBLE_VEC(32,vmpyowh,"Vd32=vmpyowh(Vu32,Vv32):<<1:sat","Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:s…
1424 VdV.w[i] = fVSATW((((fMPY3216SS(VuV.w[i], fGETHALF(1, VvV.w[i])) >> 14) + 0) >> 1)))
1426 …T_DOUBLE_VEC(32,vmpyowh_rnd,"Vd32=vmpyowh(Vu32,Vv32):<<1:rnd:sat","Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1…
1428 VdV.w[i] = fVSATW((((fMPY3216SS(VuV.w[i], fGETHALF(1, VvV.w[i])) >> 14) + 1) >> 1)))
1430 ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpyewuh_64,"Vdd32=vmpye(Vu32.w,Vv32.uh)",
1433 prod = fMPY32SU(VuV.w[i],fGETUHALF(0,VvV.w[i]));
1434 VddV.v[1].w[i] = prod >> 16;
1435 VddV.v[0].w[i] = prod << 16)
1437 ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpyowh_64_acc,"Vxx32+=vmpyo(Vu32.w,Vv32.h)",
1440 prod = fMPY32SS(VuV.w[i],fGETHALF(1,VvV.w[i])) + fSE32_64(VxxV.v[1].w[i]);
1441 VxxV.v[1].w[i] = prod >> 16;
1442 fSETHALF(0, VxxV.v[0].w[i], VxxV.v[0].w[i] >> 16);
1443 fSETHALF(1, VxxV.v[0].w[i], prod & 0x0000ffff))
1445 …UBLE_VEC(32,vmpyowh_sacc,"Vx32+=vmpyowh(Vu32,Vv32):<<1:sat:shift","Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<…
1447 IV1DEAD() VxV.w[i] = fVSATW(((((VxV.w[i] + fMPY3216SS(VuV.w[i], fGETHALF(1, VvV.w[i]))) >> 14) + 0)…
1449 …EC(32,vmpyowh_rnd_sacc,"Vx32+=vmpyowh(Vu32,Vv32):<<1:rnd:sat:shift","Vx32.w+=vmpyo(Vu32.w,Vv32.h):…
1451 IV1DEAD() VxV.w[i] = fVSATW(((((VxV.w[i] + fMPY3216SS(VuV.w[i], fGETHALF(1, VvV.w[i]))) >> 14) + 1)…
1455 ITERATOR_INSN_MPY_SLOT(32,vmpyieoh,"Vd32.w=vmpyieo(Vu32.h,Vv32.h)","Odd/Even multiply for 32x32 low…
1456 VdV.w[i] = (fGETHALF(0,VuV.w[i])*fGETHALF(1,VvV.w[i])) << 16)
1458 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyiewuh,"Vd32=vmpyiewuh(Vu32,Vv32)","Vd32.w=vmpyie(Vu32.w,V…
1460 IV1DEAD() VdV.w[i] = fMPY3216SU(VuV.w[i], fGETUHALF(0, VvV.w[i])) )
1462 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyiowh,"Vd32=vmpyiowh(Vu32,Vv32)","Vd32.w=vmpyio(Vu32.w,Vv3…
1464 IV1DEAD() VdV.w[i] = fMPY3216SS(VuV.w[i], fGETHALF(1, VvV.w[i])) )
1468 …N2_MPY_SLOT_DOUBLE_VEC(32,vmpyiewh_acc,"Vx32+=vmpyiewh(Vu32,Vv32)","Vx32.w+=vmpyie(Vu32.w,Vv32.h)",
1470 VxV.w[i] = VxV.w[i] + fMPY3216SS(VuV.w[i], fGETHALF(0, VvV.w[i])) )
1472 …MPY_SLOT_DOUBLE_VEC(32,vmpyiewuh_acc,"Vx32+=vmpyiewuh(Vu32,Vv32)","Vx32.w+=vmpyie(Vu32.w,Vv32.uh)",
1474 VxV.w[i] = VxV.w[i] + fMPY3216SU(VuV.w[i], fGETUHALF(0, VvV.w[i])) )
1486 VddV.v[1].uh[i] = fMPY8UU(fGETUBYTE(1, VuV.uh[i]), fGETUBYTE((2*i+1)%4, RtV)))
1491 VxxV.v[1].uh[i] += fMPY8UU(fGETUBYTE(1, VuV.uh[i]), fGETUBYTE((2*i+1)%4, RtV)))
1497 VddV.v[1].h[i] = fMPY8US(fGETUBYTE(1, VuV.uh[i]), fGETBYTE((2*i+1)%4, RtV)))
1502 VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(1, VuV.uh[i]), fGETBYTE((2*i+1)%4, RtV)))
1507 …0, VuuV.v[0].uh[i]), fGETBYTE(0, RtV)) + fMPY16SS(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETBYTE(1, RtV));
1508 …VddV.v[1].h[i] = fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETBYTE(2, RtV)) + fMPY16SS(fGETUBYTE(1, …
1512 …0, VuuV.v[0].uh[i]), fGETBYTE(0, RtV)) + fMPY16SS(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETBYTE(1, RtV));
1513 …VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETBYTE(2, RtV)) + fMPY16SS(fGETUBYTE(1,…
1519 …, VuuV.v[0].uh[i]), fGETUBYTE(0, RtV)) + fMPY8UU(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETUBYTE(1, RtV));
1520 …VddV.v[1].uh[i] = fMPY8UU(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETUBYTE(2, RtV)) + fMPY8UU(fGETUBYTE(1,…
1524 …, VuuV.v[0].uh[i]), fGETUBYTE(0, RtV)) + fMPY8UU(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETUBYTE(1, RtV));
1525 …VxxV.v[1].uh[i] += fMPY8UU(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETUBYTE(2, RtV)) + fMPY8UU(fGETUBYTE(1…
1531 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpahb,"Vdd32=vmpahb(Vuu32,Rt32)","Vdd32.w=vmpa(Vuu32.h,Rt32.…
1533 …VddV.v[0].w[i] = fMPY16SS(fGETHALF(0, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(0, RtV))) + fMPY16SS(fGETH…
1534 …VddV.v[1].w[i] = fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(2, RtV))) + fMPY16SS(fGETH…
1536 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpahb_acc,"Vxx32+=vmpahb(Vuu32,Rt32)","Vxx32.w+=vmpa(Vuu32.h…
1538 …VxxV.v[0].w[i] += fMPY16SS(fGETHALF(0, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(0, RtV))) + fMPY16SS(fGET…
1539 …VxxV.v[1].w[i] += fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(2, RtV))) + fMPY16SS(fGET…
1542 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpauhb,"Vdd32=vmpauhb(Vuu32,Rt32)","Vdd32.w=vmpa(Vuu32.uh,Rt…
1544 …VddV.v[0].w[i] = fMPY16US(fGETUHALF(0, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(0, RtV))) + fMPY16US(fGET…
1545 …VddV.v[1].w[i] = fMPY16US(fGETUHALF(1, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(2, RtV))) + fMPY16US(fGET…
1547 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpauhb_acc,"Vxx32+=vmpauhb(Vuu32,Rt32)","Vxx32.w+=vmpa(Vuu32…
1549 …VxxV.v[0].w[i] += fMPY16US(fGETUHALF(0, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(0, RtV))) + fMPY16US(fGE…
1550 …VxxV.v[1].w[i] += fMPY16US(fGETUHALF(1, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(2, RtV))) + fMPY16US(fGE…
1559 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyh,"Vdd32=vmpyh(Vu32,Rt32)","Vdd32.w=vmpy(Vu32.h,Rt32.h)",
1561 VddV.v[0].w[i] = fMPY16SS(fGETHALF(0, VuV.w[i]), fGETHALF(0, RtV));
1562 VddV.v[1].w[i] = fMPY16SS(fGETHALF(1, VuV.w[i]), fGETHALF(1, RtV)))
1564 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC_NOV1(32,vmpyh_acc,"Vxx32+=vmpyh(Vu32,Rt32)","Vxx32.w+=vmpy(Vu32.…
1566 VxxV.v[0].w[i] = fCAST8s(VxxV.v[0].w[i]) + fMPY16SS(fGETHALF(0, VuV.w[i]), fGETHALF(0, RtV));
1567 VxxV.v[1].w[i] = fCAST8s(VxxV.v[1].w[i]) + fMPY16SS(fGETHALF(1, VuV.w[i]), fGETHALF(1, RtV)))
1570 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhsat_acc,"Vxx32+=vmpyh(Vu32,Rt32):sat","Vxx32.w+=vmpy(Vu3…
1572 …VxxV.v[0].w[i] = fVSATW(fCAST8s(VxxV.v[0].w[i]) + fMPY16SS(fGETHALF(0, VuV.w[i]), fGETHALF(0, RtV…
1573 …VxxV.v[1].w[i] = fVSATW(fCAST8s(VxxV.v[1].w[i]) + fMPY16SS(fGETHALF(1, VuV.w[i]), fGETHALF(1, RtV…
1577 …_MPY_SLOT_DOUBLE_VEC(32,vmpyhss,"Vd32=vmpyh(Vu32,Rt32):<<1:sat","Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sa…
1578 "Vector halfword by halfword multiply, shift by 1, and take upper 16 msb",
1579 …fSETHALF(0,VdV.w[i],fVSATH(fGETHALF(1,fVSAT((fMPY16SS(fGETHALF(0,VuV.w[i]),fGETHALF(0,RtV))<<1))))…
1580 …fSETHALF(1,VdV.w[i],fVSATH(fGETHALF(1,fVSAT((fMPY16SS(fGETHALF(1,VuV.w[i]),fGETHALF(1,RtV))<<1))))…
1583 …Y_SLOT_DOUBLE_VEC(32,vmpyhsrs,"Vd32=vmpyh(Vu32,Rt32):<<1:rnd:sat","Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:…
1585 …fSETHALF(0,VdV.w[i],fVSATH(fGETHALF(1,fVSAT(fROUND((fMPY16SS(fGETHALF(0,VuV.w[i]),fGETHALF(0,RtV))…
1586 …fSETHALF(1,VdV.w[i],fVSATH(fGETHALF(1,fVSAT(fROUND((fMPY16SS(fGETHALF(1,VuV.w[i]),fGETHALF(1,RtV))…
1593 VddV.v[1].uw[i] = fMPY16UU(fGETUHALF(1, VuV.uw[i]),fGETUHALF(1,RtV)))
1599 VxxV.v[1].uw[i] += fMPY16UU(fGETUHALF(1, VuV.uw[i]),fGETUHALF(1,RtV)))
1619 ITERATOR_INSN2_MPY_SLOT(32,vmpyiwb,"Vd32=vmpyiwb(Vu32,Rt32)","Vd32.w=vmpyi(Vu32.w,Rt32.b)",
1621 VdV.w[i] = fMPY32SS(VuV.w[i], fGETBYTE(i % 4, RtV) ))
1623 ITERATOR_INSN2_MPY_SLOT(32,vmpyiwb_acc,"Vx32+=vmpyiwb(Vu32,Rt32)","Vx32.w+=vmpyi(Vu32.w,Rt32.b)",
1625 VxV.w[i] += fMPY32SS(VuV.w[i], fGETBYTE(i % 4, RtV) ))
1627 ITERATOR_INSN2_MPY_SLOT(32,vmpyiwub,"Vd32=vmpyiwub(Vu32,Rt32)","Vd32.w=vmpyi(Vu32.w,Rt32.ub)",
1629 VdV.w[i] = fMPY32SS(VuV.w[i], fGETUBYTE(i % 4, RtV) ))
1631 ITERATOR_INSN2_MPY_SLOT(32,vmpyiwub_acc,"Vx32+=vmpyiwub(Vu32,Rt32)","Vx32.w+=vmpyi(Vu32.w,Rt32.ub)",
1633 VxV.w[i] += fMPY32SS(VuV.w[i], fGETUBYTE(i % 4, RtV) ))
1639 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyiwh,"Vd32=vmpyiwh(Vu32,Rt32)","Vd32.w=vmpyi(Vu32.w,Rt32.h…
1641 VdV.w[i] = fMPY32SS(VuV.w[i], fGETHALF(i % 2, RtV)))
1643 ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyiwh_acc,"Vx32+=vmpyiwh(Vu32,Rt32)","Vx32.w+=vmpyi(Vu32.w,…
1645 VxV.w[i] += fMPY32SS(VuV.w[i], fGETHALF(i % 2, RtV)))
1696 fSETQBIT(QdV,i,((VuV.ub[i] & fGETUBYTE(i % 4, RtV)) != 0) ? 1 : 0))
1700 fSETQBIT(QxV,i,fGETQBIT(QxV,i)|(((VuV.ub[i] & fGETUBYTE(i % 4, RtV)) != 0) ? 1 : 0)))
1741 MMVEC_CMP(w,"w","","Vector Word Compare ", fVELEM(32), 0xF, 4, w)
1743 MMVEC_CMP(b,"b","","Vector Half Compare ", fVELEM(8), 0x1, 1, b)
1746 MMVEC_CMPGT(ub,"ub","","Vector Unsigned Byte Compare ", fVELEM(8), 0x1, 1,ub)
1755 for(i = 0; i < fVBYTES(); i++) fSETQBIT(QdV,i,(i < (RtV & (fVBYTES()-1))) ? 1 : 0);
1761 for(i = 0; i < fVBYTES(); i++) fSETQBIT(QdV,i,(i <= ((RtV-1) & (fVBYTES()-1))) ? 1 : 0);
1765 ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8, shuffeqw, "Qd4.h=vshuffe(Qs4.w,Qt4.w)","Shrink Predicate", fSE…
1766 ….b=vshuffe(Qs4.h,Qt4.h)","Shrink Predicate", fSETQBIT(QdV,i, (i & 1) ? fGETQBIT(QsV,i-1) : fGETQBI…
1802 VddV.v[1].ub[i] = VuV.ub[i];
1813 VddV.v[1].ub[i] = VuV.ub[i];
1827 VddV.v[1].ub[i] = !fGETQBIT(QtV,i) ? VuV.ub[i] : VvV.ub[i])
1844 MMVEC_SORT(w, "w", "word", 32, w)
1861 fSETBYTE(1, VdV.uh[i], fVSATUB(VuV.h[i])))
1863 ITERATOR_INSN2_ANY_SLOT(32,vsatwh,"Vd32=vsatwh(Vu32,Vv32)","Vd32.h=vsat(Vu32.w,Vv32.w)",
1865 fSETHALF(0, VdV.w[i], fVSATH(VvV.w[i]));
1866 fSETHALF(1, VdV.w[i], fVSATH(VuV.w[i])))
1870 fSETHALF(0, VdV.w[i], fVSATUH(VvV.uw[i]));
1871 fSETHALF(1, VdV.w[i], fVSATUH(VuV.uw[i])))
1876 fSETBYTE(1, VdV.uh[i], fGETUBYTE(0, VuV.uh[i])))
1880 fSETBYTE(0, VdV.uh[i], fGETUBYTE(1, VvV.uh[i]));
1881 fSETBYTE(1, VdV.uh[i], fGETUBYTE(1, VuV.uh[i])))
1886 fSETHALF(1, VdV.uw[i], fGETUHALF(0, VuV.uw[i])))
1890 fSETHALF(0, VdV.uw[i], fGETUHALF(1, VvV.uw[i]));
1891 fSETHALF(1, VdV.uw[i], fGETUHALF(1, VuV.uw[i])))
1905 for (offset=1; offset<fVBYTES(); offset<<=1) {
1923 VddV.v[1] = VuV;
1924 for (offset=1; offset<fVBYTES(); offset<<=1) {
1929 fSWAPB(VddV.v[1].ub[k], VddV.v[0].ub[k+offset]);
1941 for (offset=fVBYTES()>>1; offset>0; offset>>=1) {
1959 VddV.v[1] = VuV;
1960 for (offset=fVBYTES()>>1; offset>0; offset>>=1) {
1965 fSWAPB(VddV.v[1].ub[k], VddV.v[0].ub[k+offset]);
1979 fSETHALF(1, VddV.v[0].uw[i], fGETUHALF(0, VuV.uw[i]));
1980 fSETHALF(0, VddV.v[1].uw[i], fGETUHALF(1, VvV.uw[i]));
1981 fSETHALF(1, VddV.v[1].uw[i], fGETUHALF(1, VuV.uw[i])))
1986 fSETBYTE(1, VddV.v[0].uh[i], fGETUBYTE(0, VuV.uh[i]));
1987 fSETBYTE(0, VddV.v[1].uh[i], fGETUBYTE(1, VvV.uh[i]));
1988 fSETBYTE(1, VddV.v[1].uh[i], fGETUBYTE(1, VuV.uh[i])))
1998 VdV.uh[i+fVELEM(32)] = fGETUHALF(1, VuV.uw[i]))
2003 VdV.ub[i+fVELEM(16)] = fGETUBYTE(1, VuV.uh[i]))
2019 fSETHALF(1, VdV.uw[i], VuV.uh[i+fVELEM(32)]))
2024 fSETBYTE(1, VdV.uh[i], VuV.ub[i+fVELEM(16)]))
2036 fHIDE(warn("RdN=%d VuN=%d RsN=%d RsV=0x%08x widx=%d",RdN,VuN,RsN,RsV,((RsV & (fVBYTES()-1)) >> 2));)
2037 RdV = VuV.uw[ (RsV & (fVBYTES()-1)) >> 2];
2040 EXTINSN(V6_vinsertwr, "Vx32.w=vinsert(Rt32)",
2055 ITERATOR_INSN_ANY_SLOT(32,vassign,"Vd32=Vu32","Copy a vector",VdV.w[i]=VuV.w[i])
2061 VddV.v[1].ub[i] = VuV.ub[i])
2073 VddV.v[1].ub[i] = VuV.ub[i];
2082 VdV.w[i]=VuV.w[i];
2098 for (offset=fVBYTES(); (offset>>=1)>0; ) {
2116 for (offset=1; offset<fVBYTES(); offset<<=1){
2133 ITERATOR_INSN2_SHIFT_SLOT(32,vnormamtw,"Vd32=vnormamtw(Vu32)","Vd32.w=vnormamt(Vu32.w)","Norm Amoun…
2134 VdV.w[i]=fMAX(fCL1_4(~VuV.w[i]),fCL1_4(VuV.w[i]))-1; fHIDE(IV1DEAD();))
2136 VdV.h[i]=fMAX(fCL1_2(~VuV.h[i]),fCL1_2(VuV.h[i]))-1; fHIDE(IV1DEAD();))
2138 ITERATOR_INSN_SHIFT_SLOT_VV_LATE(32,vaddclbw,"Vd32.w=vadd(vclb(Vu32.w),Vv32.w)",
2140 VdV.w[i] = fMAX(fCL1_4(~VuV.w[i]),fCL1_4(VuV.w[i])) + VvV.w[i])
2192 #if 1
2195 fHIDE(unsigned int) weight = fGETUBYTE(1,input.h[i]); \
2208 …INSN_VHISTLIKE(16,vwhist128,"vwhist128","vector weighted histogram word counters", WHIST(uw,3,1,,))
2209 …128q,"vwhist128(Qv4)","vector weighted histogram word counters", WHIST(uw,3,1,if (fGETQBIT(QvV,2*i…
2210 …vwhist128(#u1)","vector weighted histogram word counters", WHIST(uw,3,1,if ((bucket & 1) == uiV),))
2211 …28(Qv4,#u1)","vector weighted histogram word counters", WHIST(uw,3,1,if (((bucket & 1) == uiV) && …
2242 VddV.v[0].h[i] = ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0;
2243 idx = fGETUBYTE(1,VuV.uh[i]);
2244 VddV.v[1].h[i] = ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0)
2251 VxxV.v[0].h[i] |= ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0;
2252 idx = fGETUBYTE(1,VuV.uh[i]);
2253 VxxV.v[1].h[i] |= ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0)
2275 VddV.v[0].h[i] = ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0;
2276 idx = fGETUBYTE(1,VuV.uh[i]);
2277 VddV.v[1].h[i] = ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0)
2284 VxxV.v[0].h[i] |= ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0;
2285 idx = fGETUBYTE(1,VuV.uh[i]);
2286 VxxV.v[1].h[i] |= ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0)
2302 VddV.v[0].h[i] = fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]);
2303 idx = fGETUBYTE(1,VuV.uh[i]);
2305 VddV.v[1].h[i] = fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]))
2315 …VxV.h[i]= fVSATH( ( ( fMPY16SS(VxV.h[i],VuV.h[i])<<1) + (fGETHALF(( (VuV.h[i]>>14)&0x3), RttV )<<1…
2346 EXTINSN(V6_vgathermw, "vtmp.w=vgather(Rt32,Mu2,Vv32.w).w", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_GATHER,…
2375 EXTINSN(V6_vgathermhw, "vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_GATHE…
2393 EXTINSN(V6_vgathermwq, "if (Qs4) vtmp.w=vgather(Rt32,Mu2,Vv32.w).w", ATTRIBS(A_EXTENSION,A_CVI,A_C…
2422 EXTINSN(V6_vgathermhwq, "if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h", ATTRIBS(A_EXTENSION,A_CVI,A…
2441 EXTINSN(V6_vscattermw , "vscatter(Rt32,Mu2,Vv32.w).w=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER…
2472 EXTINSN(V6_vscattermw_add, "vscatter(Rt32,Mu2,Vv32.w).w+=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SC…
2485 fSCATTER_FINISH(1)
2501 fSCATTER_FINISH(1)
2505 EXTINSN(V6_vscattermwq, "if (Qs4) vscatter(Rt32,Mu2,Vv32.w).w=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_C…
2536 EXTINSN(V6_vscattermhw , "vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATT…
2554 ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyvubs10_vxx, "Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2)…
2558 fGET10BIT(c01, VvvV.v[0].uw[i], 1)
2563 fGET10BIT(c10, VvvV.v[1].uw[i], 0)
2565 fGET10BIT(c11, VvvV.v[1].uw[i], 1)
2567 fGET10BIT(c12, VvvV.v[1].uw[i], 2)
2570 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
2571 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
2572 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
2574 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
2575 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
2576 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
2578 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10);
2579 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
2580 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12);
2582 } else if (uiV == 1) {
2583 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00);
2584 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01);
2585 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02);
2587 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
2588 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
2589 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
2591 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
2592 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
2593 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
2596 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
2597 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
2598 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
2600 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
2601 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
2602 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
2604 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10);
2605 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11);
2606 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12);
2609 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00);
2610 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
2611 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02);
2613 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
2614 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
2615 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
2617 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
2618 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
2619 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
2622 ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyhubs10_vxx, "Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2)…
2626 fGET10BIT(c01, VvvV.v[0].uw[i], 1)
2630 fGET10BIT(c10, VvvV.v[1].uw[i], 0)
2632 fGET10BIT(c11, VvvV.v[1].uw[i], 1)
2634 fGET10BIT(c12, VvvV.v[1].uw[i], 2)
2637 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
2638 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
2639 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
2641 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
2642 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
2643 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
2645 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10);
2646 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
2647 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12);
2649 } else if (uiV == 1) {
2650 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00);
2651 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01);
2652 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02);
2654 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
2655 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
2656 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
2658 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
2659 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
2660 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
2663 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
2664 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
2665 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
2667 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
2668 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
2669 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
2671 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10);
2672 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11);
2673 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12);
2676 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00);
2677 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
2678 VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02);
2680 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
2681 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
2682 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
2684 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
2685 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
2686 VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
2691 ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyvubs10, "Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v", "",
2695 fGET10BIT(c01, VvvV.v[0].uw[i], 1)
2699 fGET10BIT(c10, VvvV.v[1].uw[i], 0)
2701 fGET10BIT(c11, VvvV.v[1].uw[i], 1)
2703 fGET10BIT(c12, VvvV.v[1].uw[i], 2)
2708 VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
2709 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
2710 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
2712 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
2713 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
2714 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
2716 VddV.v[0].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10);
2717 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
2718 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12);
2720 } else if (uiV == 1) {
2721 VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00);
2722 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01);
2723 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02);
2725 VddV.v[0].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10);
2726 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11);
2727 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12);
2729 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00);
2730 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
2731 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02);
2734 VddV.v[1].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
2735 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
2736 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
2738 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
2739 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
2740 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
2742 VddV.v[0].w[i] = fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10);
2743 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11);
2744 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12);
2747 VddV.v[1].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00);
2748 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
2749 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02);
2751 VddV.v[0].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10);
2752 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
2753 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12);
2755 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00);
2756 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01);
2757 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02);
2761 ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyhubs10, "Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h", "",
2765 fGET10BIT(c01, VvvV.v[0].uw[i], 1)
2769 fGET10BIT(c10, VvvV.v[1].uw[i], 0)
2771 fGET10BIT(c11, VvvV.v[1].uw[i], 1)
2773 fGET10BIT(c12, VvvV.v[1].uw[i], 2)
2776 VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
2777 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
2778 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
2780 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
2781 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
2782 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
2784 VddV.v[0].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10);
2785 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11);
2786 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12);
2788 } else if (uiV == 1) {
2789 VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00);
2790 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01);
2791 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02);
2793 VddV.v[0].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10);
2794 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11);
2795 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12);
2797 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00);
2798 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01);
2799 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02);
2802 VddV.v[1].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
2803 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
2804 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
2806 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
2807 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
2808 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
2810 VddV.v[0].w[i] = fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10);
2811 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11);
2812 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12);
2815 VddV.v[1].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00);
2816 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01);
2817 VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02);
2819 VddV.v[0].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10);
2820 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11);
2821 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12);
2823 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00);
2824 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01);
2825 VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02);
2830 EXTINSN(V6_vscattermhwq, "if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A…
2847 EXTINSN(V6_vscattermhw_add, "vscatter(Rt32,Mu2,Vvv32.w).h+=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_…
2863 fSCATTER_FINISH(1)
2881 acc += fGETQBIT(QvV,i*2+1);
2885 EXTINSN(V6_vprefixqw,"Vd32.w=prefixsum(Qv4)", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS), "parallel pre…
2891 acc += fGETQBIT(QvV,i*4+1);