1 #if defined(__SUNPRO_C) && defined(__sparcv9)
2 # define ABI64 /* They've said -xarch=v9 at command line */
3 #elif defined(__GNUC__) && defined(__arch64__)
4 # define ABI64 /* They've said -m64 at command line */
19 .global OPENSSL_wipe_cpu
20 .type OPENSSL_wipe_cpu,#function
21 ! Keep in mind that this does not excuse us from wiping the stack!
22 ! This routine wipes registers, but not the backing store [which
23 ! resides on the stack, toward lower addresses]. To facilitate for
24 ! stack wiping I return pointer to the top of stack of the *caller*.
41 ! Following is V9 "rd %ccr,%o0" instruction. However! V8
42 ! specification says that it ("rd %asr2,%o0" in V8 terms) does
43 ! not cause illegal_instruction trap. It therefore can be used
44 ! to determine if the CPU the code is executing on is V8- or
45 ! V9-compliant, as V9 returns a distinct value of 0x99,
46 ! "negative" and "borrow" bits set in both %icc and %xcc.
47 .word 0x91408000 !rd %ccr,%o0
51 ! Even though we do not use %fp register bank,
52 ! we wipe it as memcpy might have used it...
53 .word 0xbfa00040 !fmovd %f0,%f62
68 .word 0x83a00040 !fmovd %f0,%f32
126 add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack
136 .global walk_reg_wins
137 .type walk_reg_wins,#function
145 cmp %o7,0 ! compiler never cleans %o7...
146 be 1f ! could have been a leaf function...
163 add %o0,1,%i0 ! used for debugging
166 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
168 .global OPENSSL_atomic_add
169 .type OPENSSL_atomic_add,#function
174 .word 0x95408000 !rd %ccr,%o2, see comment above
182 ! Note that you do not have to link with libthread to call thr_yield,
183 ! as libc provides a stub, which is overloaded the moment you link
184 ! with *either* libpthread or libthread...
185 #define YIELD_CPU thr_yield
187 ! applies at least to Linux and FreeBSD... Feedback expected...
188 #define YIELD_CPU sched_yield
190 .spin: call YIELD_CPU
209 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
212 mov %o3,%o2 ! cas is always fetching to dest. register
213 add %o1,%o2,%o0 ! OpenSSL expects the new value
215 sra %o0,%g0,%o0 ! we return signed int, remember?
216 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
218 .global _sparcv9_vis1_probe
222 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
224 .word 0x81b00d80 !fxor %f0,%f0,%f0
225 .type _sparcv9_vis1_probe,#function
226 .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
228 ! Probe and instrument VIS1 instruction. Output is number of cycles it
229 ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
230 ! is slow (documented to be 6 cycles on T2) and the core is in-order
231 ! single-issue, it should be possible to distinguish Tx reliably...
232 ! Observed return values are:
238 ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
240 ! It would be possible to detect specifically US-T1 by instrumenting
241 ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
242 ! a lot of %tick-s, couple of thousand on Linux...
243 .global _sparcv9_vis1_instrument
245 _sparcv9_vis1_instrument:
246 .word 0x91410000 !rd %tick,%o0
247 .word 0x81b00d80 !fxor %f0,%f0,%f0
248 .word 0x85b08d82 !fxor %f2,%f2,%f2
249 .word 0x93410000 !rd %tick,%o1
250 .word 0x81b00d80 !fxor %f0,%f0,%f0
251 .word 0x85b08d82 !fxor %f2,%f2,%f2
252 .word 0x95410000 !rd %tick,%o2
253 .word 0x81b00d80 !fxor %f0,%f0,%f0
254 .word 0x85b08d82 !fxor %f2,%f2,%f2
255 .word 0x97410000 !rd %tick,%o3
256 .word 0x81b00d80 !fxor %f0,%f0,%f0
257 .word 0x85b08d82 !fxor %f2,%f2,%f2
258 .word 0x99410000 !rd %tick,%o4
260 ! calculate intervals
268 .word 0x38680002 !bgu,a %xcc,.+8
271 .word 0x38680002 !bgu,a %xcc,.+8
274 .word 0x38680002 !bgu,a %xcc,.+8
279 .type _sparcv9_vis1_instrument,#function
280 .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
282 .global _sparcv9_vis2_probe
286 .word 0x81b00980 !bshuffle %f0,%f0,%f0
287 .type _sparcv9_vis2_probe,#function
288 .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
290 .global _sparcv9_fmadd_probe
292 _sparcv9_fmadd_probe:
293 .word 0x81b00d80 !fxor %f0,%f0,%f0
294 .word 0x85b08d82 !fxor %f2,%f2,%f2
296 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
297 .type _sparcv9_fmadd_probe,#function
298 .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
300 .section ".init",#alloc,#execinstr
301 call OPENSSL_cpuid_setup