1/* 2 * Copyright 2015, Cyril Bur, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9 10#include "../basic_asm.h" 11 12#define PUSH_FPU(pos) \ 13 stfd f14,pos(sp); \ 14 stfd f15,pos+8(sp); \ 15 stfd f16,pos+16(sp); \ 16 stfd f17,pos+24(sp); \ 17 stfd f18,pos+32(sp); \ 18 stfd f19,pos+40(sp); \ 19 stfd f20,pos+48(sp); \ 20 stfd f21,pos+56(sp); \ 21 stfd f22,pos+64(sp); \ 22 stfd f23,pos+72(sp); \ 23 stfd f24,pos+80(sp); \ 24 stfd f25,pos+88(sp); \ 25 stfd f26,pos+96(sp); \ 26 stfd f27,pos+104(sp); \ 27 stfd f28,pos+112(sp); \ 28 stfd f29,pos+120(sp); \ 29 stfd f30,pos+128(sp); \ 30 stfd f31,pos+136(sp); 31 32#define POP_FPU(pos) \ 33 lfd f14,pos(sp); \ 34 lfd f15,pos+8(sp); \ 35 lfd f16,pos+16(sp); \ 36 lfd f17,pos+24(sp); \ 37 lfd f18,pos+32(sp); \ 38 lfd f19,pos+40(sp); \ 39 lfd f20,pos+48(sp); \ 40 lfd f21,pos+56(sp); \ 41 lfd f22,pos+64(sp); \ 42 lfd f23,pos+72(sp); \ 43 lfd f24,pos+80(sp); \ 44 lfd f25,pos+88(sp); \ 45 lfd f26,pos+96(sp); \ 46 lfd f27,pos+104(sp); \ 47 lfd f28,pos+112(sp); \ 48 lfd f29,pos+120(sp); \ 49 lfd f30,pos+128(sp); \ 50 lfd f31,pos+136(sp); 51 52# Careful calling this, it will 'clobber' fpu (by design) 53# Don't call this from C 54FUNC_START(load_fpu) 55 lfd f14,0(r3) 56 lfd f15,8(r3) 57 lfd f16,16(r3) 58 lfd f17,24(r3) 59 lfd f18,32(r3) 60 lfd f19,40(r3) 61 lfd f20,48(r3) 62 lfd f21,56(r3) 63 lfd f22,64(r3) 64 lfd f23,72(r3) 65 lfd f24,80(r3) 66 lfd f25,88(r3) 67 lfd f26,96(r3) 68 lfd f27,104(r3) 69 lfd f28,112(r3) 70 lfd f29,120(r3) 71 lfd f30,128(r3) 72 lfd f31,136(r3) 73 blr 74FUNC_END(load_fpu) 75 76FUNC_START(check_fpu) 77 mr r4,r3 78 li r3,1 # assume a bad result 79 lfd f0,0(r4) 80 fcmpu cr1,f0,f14 81 bne cr1,1f 82 lfd f0,8(r4) 83 fcmpu cr1,f0,f15 84 bne cr1,1f 85 lfd f0,16(r4) 86 fcmpu cr1,f0,f16 87 bne cr1,1f 88 lfd f0,24(r4) 89 fcmpu cr1,f0,f17 90 bne cr1,1f 91 lfd f0,32(r4) 92 fcmpu cr1,f0,f18 93 bne cr1,1f 94 lfd f0,40(r4) 95 fcmpu cr1,f0,f19 96 bne cr1,1f 97 lfd f0,48(r4) 98 fcmpu cr1,f0,f20 99 bne cr1,1f 100 lfd f0,56(r4) 101 fcmpu cr1,f0,f21 102 bne cr1,1f 103 lfd f0,64(r4) 104 fcmpu cr1,f0,f22 105 bne cr1,1f 106 lfd f0,72(r4) 107 fcmpu cr1,f0,f23 108 bne cr1,1f 109 lfd f0,80(r4) 110 fcmpu cr1,f0,f24 111 bne cr1,1f 112 lfd f0,88(r4) 113 fcmpu cr1,f0,f25 114 bne cr1,1f 115 lfd f0,96(r4) 116 fcmpu cr1,f0,f26 117 bne cr1,1f 118 lfd f0,104(r4) 119 fcmpu cr1,f0,f27 120 bne cr1,1f 121 lfd f0,112(r4) 122 fcmpu cr1,f0,f28 123 bne cr1,1f 124 lfd f0,120(r4) 125 fcmpu cr1,f0,f29 126 bne cr1,1f 127 lfd f0,128(r4) 128 fcmpu cr1,f0,f30 129 bne cr1,1f 130 lfd f0,136(r4) 131 fcmpu cr1,f0,f31 132 bne cr1,1f 133 li r3,0 # Success!!! 1341: blr 135 136FUNC_START(test_fpu) 137 # r3 holds pointer to where to put the result of fork 138 # r4 holds pointer to the pid 139 # f14-f31 are non volatiles 140 PUSH_BASIC_STACK(256) 141 std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray 142 std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid 143 PUSH_FPU(STACK_FRAME_LOCAL(2,0)) 144 145 bl load_fpu 146 nop 147 li r0,__NR_fork 148 sc 149 150 # pass the result of the fork to the caller 151 ld r9,STACK_FRAME_PARAM(1)(sp) 152 std r3,0(r9) 153 154 ld r3,STACK_FRAME_PARAM(0)(sp) 155 bl check_fpu 156 nop 157 158 POP_FPU(STACK_FRAME_LOCAL(2,0)) 159 POP_BASIC_STACK(256) 160 blr 161FUNC_END(test_fpu) 162 163# int preempt_fpu(double *darray, int *threads_running, int *running) 164# On starting will (atomically) decrement not_ready as a signal that the FPU 165# has been loaded with darray. Will proceed to check the validity of the FPU 166# registers while running is not zero. 167FUNC_START(preempt_fpu) 168 PUSH_BASIC_STACK(256) 169 std r3,STACK_FRAME_PARAM(0)(sp) # double *darray 170 std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting 171 std r5,STACK_FRAME_PARAM(2)(sp) # int *running 172 PUSH_FPU(STACK_FRAME_LOCAL(3,0)) 173 174 bl load_fpu 175 nop 176 177 sync 178 # Atomic DEC 179 ld r3,STACK_FRAME_PARAM(1)(sp) 1801: lwarx r4,0,r3 181 addi r4,r4,-1 182 stwcx. r4,0,r3 183 bne- 1b 184 1852: ld r3,STACK_FRAME_PARAM(0)(sp) 186 bl check_fpu 187 nop 188 cmpdi r3,0 189 bne 3f 190 ld r4,STACK_FRAME_PARAM(2)(sp) 191 ld r5,0(r4) 192 cmpwi r5,0 193 bne 2b 194 1953: POP_FPU(STACK_FRAME_LOCAL(3,0)) 196 POP_BASIC_STACK(256) 197 blr 198FUNC_END(preempt_fpu) 199