1 .file "wm_shrx.S" 2/*---------------------------------------------------------------------------+ 3 | wm_shrx.S | 4 | | 5 | 64 bit right shift functions | 6 | | 7 | Copyright (C) 1992,1995 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail billm@jacobi.maths.monash.edu.au | 10 | | 11 | Call from C as: | 12 | unsigned FPU_shrx(void *arg1, unsigned arg2) | 13 | and | 14 | unsigned FPU_shrxs(void *arg1, unsigned arg2) | 15 | | 16 +---------------------------------------------------------------------------*/ 17 18#include "fpu_emu.h" 19 20.text 21/*---------------------------------------------------------------------------+ 22 | unsigned FPU_shrx(void *arg1, unsigned arg2) | 23 | | 24 | Extended shift right function. | 25 | Fastest for small shifts. | 26 | Shifts the 64 bit quantity pointed to by the first arg (arg1) | 27 | right by the number of bits specified by the second arg (arg2). | 28 | Forms a 96 bit quantity from the 64 bit arg and eax: | 29 | [ 64 bit arg ][ eax ] | 30 | shift right ---------> | 31 | The eax register is initialized to 0 before the shifting. | 32 | Results returned in the 64 bit arg and eax. | 33 +---------------------------------------------------------------------------*/ 34 35ENTRY(FPU_shrx) 36 push %ebp 37 movl %esp,%ebp 38 pushl %esi 39 movl PARAM2,%ecx 40 movl PARAM1,%esi 41 cmpl $32,%ecx /* shrd only works for 0..31 bits */ 42 jnc L_more_than_31 43 44/* less than 32 bits */ 45 pushl %ebx 46 movl (%esi),%ebx /* lsl */ 47 movl 4(%esi),%edx /* msl */ 48 xorl %eax,%eax /* extension */ 49 shrd %cl,%ebx,%eax 50 shrd %cl,%edx,%ebx 51 shr %cl,%edx 52 movl %ebx,(%esi) 53 movl %edx,4(%esi) 54 popl %ebx 55 popl %esi 56 leave 57 ret 58 59L_more_than_31: 60 cmpl $64,%ecx 61 jnc L_more_than_63 62 63 subb $32,%cl 64 movl (%esi),%eax /* lsl */ 65 movl 4(%esi),%edx /* msl */ 66 shrd %cl,%edx,%eax 67 shr %cl,%edx 68 movl %edx,(%esi) 69 movl $0,4(%esi) 70 popl %esi 71 leave 72 ret 73 74L_more_than_63: 75 cmpl $96,%ecx 76 jnc L_more_than_95 77 78 subb $64,%cl 79 movl 4(%esi),%eax /* msl */ 80 shr %cl,%eax 81 xorl %edx,%edx 82 movl %edx,(%esi) 83 movl %edx,4(%esi) 84 popl %esi 85 leave 86 ret 87 88L_more_than_95: 89 xorl %eax,%eax 90 movl %eax,(%esi) 91 movl %eax,4(%esi) 92 popl %esi 93 leave 94 ret 95ENDPROC(FPU_shrx) 96 97 98/*---------------------------------------------------------------------------+ 99 | unsigned FPU_shrxs(void *arg1, unsigned arg2) | 100 | | 101 | Extended shift right function (optimized for small floating point | 102 | integers). | 103 | Shifts the 64 bit quantity pointed to by the first arg (arg1) | 104 | right by the number of bits specified by the second arg (arg2). | 105 | Forms a 96 bit quantity from the 64 bit arg and eax: | 106 | [ 64 bit arg ][ eax ] | 107 | shift right ---------> | 108 | The eax register is initialized to 0 before the shifting. | 109 | The lower 8 bits of eax are lost and replaced by a flag which is | 110 | set (to 0x01) if any bit, apart from the first one, is set in the | 111 | part which has been shifted out of the arg. | 112 | Results returned in the 64 bit arg and eax. | 113 +---------------------------------------------------------------------------*/ 114ENTRY(FPU_shrxs) 115 push %ebp 116 movl %esp,%ebp 117 pushl %esi 118 pushl %ebx 119 movl PARAM2,%ecx 120 movl PARAM1,%esi 121 cmpl $64,%ecx /* shrd only works for 0..31 bits */ 122 jnc Ls_more_than_63 123 124 cmpl $32,%ecx /* shrd only works for 0..31 bits */ 125 jc Ls_less_than_32 126 127/* We got here without jumps by assuming that the most common requirement 128 is for small integers */ 129/* Shift by [32..63] bits */ 130 subb $32,%cl 131 movl (%esi),%eax /* lsl */ 132 movl 4(%esi),%edx /* msl */ 133 xorl %ebx,%ebx 134 shrd %cl,%eax,%ebx 135 shrd %cl,%edx,%eax 136 shr %cl,%edx 137 orl %ebx,%ebx /* test these 32 bits */ 138 setne %bl 139 test $0x7fffffff,%eax /* and 31 bits here */ 140 setne %bh 141 orw %bx,%bx /* Any of the 63 bit set ? */ 142 setne %al 143 movl %edx,(%esi) 144 movl $0,4(%esi) 145 popl %ebx 146 popl %esi 147 leave 148 ret 149 150/* Shift by [0..31] bits */ 151Ls_less_than_32: 152 movl (%esi),%ebx /* lsl */ 153 movl 4(%esi),%edx /* msl */ 154 xorl %eax,%eax /* extension */ 155 shrd %cl,%ebx,%eax 156 shrd %cl,%edx,%ebx 157 shr %cl,%edx 158 test $0x7fffffff,%eax /* only need to look at eax here */ 159 setne %al 160 movl %ebx,(%esi) 161 movl %edx,4(%esi) 162 popl %ebx 163 popl %esi 164 leave 165 ret 166 167/* Shift by [64..95] bits */ 168Ls_more_than_63: 169 cmpl $96,%ecx 170 jnc Ls_more_than_95 171 172 subb $64,%cl 173 movl (%esi),%ebx /* lsl */ 174 movl 4(%esi),%eax /* msl */ 175 xorl %edx,%edx /* extension */ 176 shrd %cl,%ebx,%edx 177 shrd %cl,%eax,%ebx 178 shr %cl,%eax 179 orl %ebx,%edx 180 setne %bl 181 test $0x7fffffff,%eax /* only need to look at eax here */ 182 setne %bh 183 orw %bx,%bx 184 setne %al 185 xorl %edx,%edx 186 movl %edx,(%esi) /* set to zero */ 187 movl %edx,4(%esi) /* set to zero */ 188 popl %ebx 189 popl %esi 190 leave 191 ret 192 193Ls_more_than_95: 194/* Shift by [96..inf) bits */ 195 xorl %eax,%eax 196 movl (%esi),%ebx 197 orl 4(%esi),%ebx 198 setne %al 199 xorl %ebx,%ebx 200 movl %ebx,(%esi) 201 movl %ebx,4(%esi) 202 popl %ebx 203 popl %esi 204 leave 205 ret 206ENDPROC(FPU_shrxs) 207