1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
|
/*
* Copyright 2015, Cyril Bur, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include "../basic_asm.h"
#define PUSH_FPU(pos) \
stfd f14,pos(sp); \
stfd f15,pos+8(sp); \
stfd f16,pos+16(sp); \
stfd f17,pos+24(sp); \
stfd f18,pos+32(sp); \
stfd f19,pos+40(sp); \
stfd f20,pos+48(sp); \
stfd f21,pos+56(sp); \
stfd f22,pos+64(sp); \
stfd f23,pos+72(sp); \
stfd f24,pos+80(sp); \
stfd f25,pos+88(sp); \
stfd f26,pos+96(sp); \
stfd f27,pos+104(sp); \
stfd f28,pos+112(sp); \
stfd f29,pos+120(sp); \
stfd f30,pos+128(sp); \
stfd f31,pos+136(sp);
#define POP_FPU(pos) \
lfd f14,pos(sp); \
lfd f15,pos+8(sp); \
lfd f16,pos+16(sp); \
lfd f17,pos+24(sp); \
lfd f18,pos+32(sp); \
lfd f19,pos+40(sp); \
lfd f20,pos+48(sp); \
lfd f21,pos+56(sp); \
lfd f22,pos+64(sp); \
lfd f23,pos+72(sp); \
lfd f24,pos+80(sp); \
lfd f25,pos+88(sp); \
lfd f26,pos+96(sp); \
lfd f27,pos+104(sp); \
lfd f28,pos+112(sp); \
lfd f29,pos+120(sp); \
lfd f30,pos+128(sp); \
lfd f31,pos+136(sp);
# Careful calling this, it will 'clobber' fpu (by design)
# Don't call this from C
FUNC_START(load_fpu)
lfd f14,0(r3)
lfd f15,8(r3)
lfd f16,16(r3)
lfd f17,24(r3)
lfd f18,32(r3)
lfd f19,40(r3)
lfd f20,48(r3)
lfd f21,56(r3)
lfd f22,64(r3)
lfd f23,72(r3)
lfd f24,80(r3)
lfd f25,88(r3)
lfd f26,96(r3)
lfd f27,104(r3)
lfd f28,112(r3)
lfd f29,120(r3)
lfd f30,128(r3)
lfd f31,136(r3)
blr
FUNC_END(load_fpu)
FUNC_START(check_fpu)
mr r4,r3
li r3,1 # assume a bad result
lfd f0,0(r4)
fcmpu cr1,f0,f14
bne cr1,1f
lfd f0,8(r4)
fcmpu cr1,f0,f15
bne cr1,1f
lfd f0,16(r4)
fcmpu cr1,f0,f16
bne cr1,1f
lfd f0,24(r4)
fcmpu cr1,f0,f17
bne cr1,1f
lfd f0,32(r4)
fcmpu cr1,f0,f18
bne cr1,1f
lfd f0,40(r4)
fcmpu cr1,f0,f19
bne cr1,1f
lfd f0,48(r4)
fcmpu cr1,f0,f20
bne cr1,1f
lfd f0,56(r4)
fcmpu cr1,f0,f21
bne cr1,1f
lfd f0,64(r4)
fcmpu cr1,f0,f22
bne cr1,1f
lfd f0,72(r4)
fcmpu cr1,f0,f23
bne cr1,1f
lfd f0,80(r4)
fcmpu cr1,f0,f24
bne cr1,1f
lfd f0,88(r4)
fcmpu cr1,f0,f25
bne cr1,1f
lfd f0,96(r4)
fcmpu cr1,f0,f26
bne cr1,1f
lfd f0,104(r4)
fcmpu cr1,f0,f27
bne cr1,1f
lfd f0,112(r4)
fcmpu cr1,f0,f28
bne cr1,1f
lfd f0,120(r4)
fcmpu cr1,f0,f29
bne cr1,1f
lfd f0,128(r4)
fcmpu cr1,f0,f30
bne cr1,1f
lfd f0,136(r4)
fcmpu cr1,f0,f31
bne cr1,1f
li r3,0 # Success!!!
1: blr
FUNC_START(test_fpu)
# r3 holds pointer to where to put the result of fork
# r4 holds pointer to the pid
# f14-f31 are non volatiles
PUSH_BASIC_STACK(256)
std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
PUSH_FPU(STACK_FRAME_LOCAL(2,0))
bl load_fpu
nop
li r0,__NR_fork
sc
# pass the result of the fork to the caller
ld r9,STACK_FRAME_PARAM(1)(sp)
std r3,0(r9)
ld r3,STACK_FRAME_PARAM(0)(sp)
bl check_fpu
nop
POP_FPU(STACK_FRAME_LOCAL(2,0))
POP_BASIC_STACK(256)
blr
FUNC_END(test_fpu)
# int preempt_fpu(double *darray, int *threads_running, int *running)
# On starting will (atomically) decrement not_ready as a signal that the FPU
# has been loaded with darray. Will proceed to check the validity of the FPU
# registers while running is not zero.
FUNC_START(preempt_fpu)
PUSH_BASIC_STACK(256)
std r3,STACK_FRAME_PARAM(0)(sp) # double *darray
std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
std r5,STACK_FRAME_PARAM(2)(sp) # int *running
PUSH_FPU(STACK_FRAME_LOCAL(3,0))
bl load_fpu
nop
sync
# Atomic DEC
ld r3,STACK_FRAME_PARAM(1)(sp)
1: lwarx r4,0,r3
addi r4,r4,-1
stwcx. r4,0,r3
bne- 1b
2: ld r3,STACK_FRAME_PARAM(0)(sp)
bl check_fpu
nop
cmpdi r3,0
bne 3f
ld r4,STACK_FRAME_PARAM(2)(sp)
ld r5,0(r4)
cmpwi r5,0
bne 2b
3: POP_FPU(STACK_FRAME_LOCAL(3,0))
POP_BASIC_STACK(256)
blr
FUNC_END(preempt_fpu)
|