(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[glibc.git] / sysdeps / ia64 / fpu / e_sinhf.S
blobd5aa2dca16963f5dc2fd5c8319c397ccfd47f9e0
1 .file "sinhf.s"
3 // Copyright (C) 2000, 2001, Intel Corporation
4 // All rights reserved.
5 // 
6 // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
7 // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
22 // permission.
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
35 // 
36 // Intel Corporation is the author of this code, and requests that all
37 // problem reports or change requests be submitted to it directly at 
38 // http://developer.intel.com/opensource.
40 // History
41 //==============================================================
42 // 2/02/00  Initial version
43 // 4/04/00  Unwind support added
44 // 8/15/00  Bundle added after call to __libm_error_support to properly
45 //          set [the previously overwritten] GR_Parameter_RESULT.
46 // 10/12/00 Update to set denormal operand and underflow flags
47 // 1/22/01  Fixed to set inexact flag for small args.
49 // API
50 //==============================================================
51 // float = sinhf(float)
52 // input  floating point f8
53 // output floating point f8
55 // Registers used
56 //==============================================================
57 // general registers: 
58 // r32 -> r47
59 // predicate registers used:
60 // p6 p7 p8 p9
61 // floating-point registers used:
62 // f9 -> f15; f32 -> f45; 
63 // f8 has input, then output
65 // Overview of operation
66 //==============================================================
67 // There are four paths
68 // 1. |x| < 0.25        SINH_BY_POLY
69 // 2. |x| < 32          SINH_BY_TBL
70 // 3. |x| < 2^14        SINH_BY_EXP
71 // 4. |x_ >= 2^14       SINH_HUGE
73 // For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
74 //                                           >= 1.0110001.... x 2^13
75 //                                           >= 11357.2166
77 // But for double we get infinity for x >= 408633ce8fb9f87e
78 //                                      >= 1.0110...x 2^9
79 //                                      >= +7.10476e+002
81 // And for single we get infinity for x >= 42b3a496
82 //                                      >= 1.0110... 2^6
83 //                                      >= 89.8215
85 // SAFE: If there is danger of overflow set SAFE to 0
86 //       NOT implemented: if there is danger of underflow, set SAFE to 0
87 // SAFE for all paths listed below
89 // 1. SINH_BY_POLY
90 // ===============
91 // If |x| is less than the tiny threshold, then clear SAFE 
92 // For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
93 //             register-biased, this is fc01
94 // For single, the tiny threshold is -126  = -7e    => -7e  + ffff = ff81
95 // If |x| < tiny threshold, set SAFE = 0
97 // 2. SINH_BY_TBL
98 // =============
99 // SAFE: SAFE is always 1 for TBL; 
101 // 3. SINH_BY_EXP
102 // ==============
103 // There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
104 // r34 has N-1; 16382 is in register biased form, 0x13ffd
105 // There is danger of double overflow if N-1 > 0x3fe
106 //                       in register biased form, 0x103fd
107 // Analagously, there is danger of single overflow if N-1 > 0x7e
108 //                       in register biased form, 0x1007d
109 // SAFE: If there is danger of overflow set SAFE to 0
111 // 4. SINH_HUGE
112 // ============
113 // SAFE: SAFE is always 0 for HUGE
116 #include "libm_support.h"
118 // Assembly macros
119 //==============================================================
120 sinh_FR_X            = f44
121 sinh_FR_X2           = f9
122 sinh_FR_X4           = f10
123 sinh_FR_SGNX         = f40
124 sinh_FR_all_ones     = f45
125 sinh_FR_tmp          = f42
127 sinh_FR_Inv_log2by64 = f9
128 sinh_FR_log2by64_lo  = f11
129 sinh_FR_log2by64_hi  = f10
131 sinh_FR_A1           = f9
132 sinh_FR_A2           = f10
133 sinh_FR_A3           = f11
135 sinh_FR_Rcub         = f12
136 sinh_FR_M_temp       = f13
137 sinh_FR_R_temp       = f13
138 sinh_FR_Rsq          = f13
139 sinh_FR_R            = f14
141 sinh_FR_M            = f38
143 sinh_FR_B1           = f15
144 sinh_FR_B2           = f32
145 sinh_FR_B3           = f33
147 sinh_FR_peven_temp1  = f34
148 sinh_FR_peven_temp2  = f35
149 sinh_FR_peven        = f36
151 sinh_FR_podd_temp1   = f34
152 sinh_FR_podd_temp2   = f35
153 sinh_FR_podd         = f37
155 sinh_FR_poly_podd_temp1    =  f11 
156 sinh_FR_poly_podd_temp2    =  f13
157 sinh_FR_poly_peven_temp1   =  f11
158 sinh_FR_poly_peven_temp2   =  f13
160 sinh_FR_J_temp       = f9
161 sinh_FR_J            = f10
163 sinh_FR_Mmj          = f39
165 sinh_FR_N_temp1      = f11
166 sinh_FR_N_temp2      = f12
167 sinh_FR_N            = f13
169 sinh_FR_spos         = f14
170 sinh_FR_sneg         = f15
172 sinh_FR_Tjhi         = f32
173 sinh_FR_Tjlo         = f33
174 sinh_FR_Tmjhi        = f34
175 sinh_FR_Tmjlo        = f35
177 sinh_GR_mJ           = r35
178 sinh_GR_J            = r36
180 sinh_AD_mJ           = r38
181 sinh_AD_J            = r39
182 sinh_GR_all_ones     = r40
184 sinh_FR_S_hi         = f9
185 sinh_FR_S_hi_temp    = f10
186 sinh_FR_S_lo_temp1   = f11 
187 sinh_FR_S_lo_temp2   = f12 
188 sinh_FR_S_lo_temp3   = f13 
190 sinh_FR_S_lo         = f38
191 sinh_FR_C_hi         = f39
193 sinh_FR_C_hi_temp1   = f10
194 sinh_FR_Y_hi         = f11 
195 sinh_FR_Y_lo_temp    = f12 
196 sinh_FR_Y_lo         = f13 
197 sinh_FR_SINH         = f9
199 sinh_FR_P1           = f14
200 sinh_FR_P2           = f15
201 sinh_FR_P3           = f32
202 sinh_FR_P4           = f33
203 sinh_FR_P5           = f34
204 sinh_FR_P6           = f35
206 sinh_FR_TINY_THRESH  = f9
208 sinh_FR_SINH_temp    = f10
209 sinh_FR_SCALE        = f11 
211 sinh_FR_signed_hi_lo = f10
214 GR_SAVE_PFS          = r41
215 GR_SAVE_B0           = r42
216 GR_SAVE_GP           = r43
218 GR_Parameter_X       = r44
219 GR_Parameter_Y       = r45
220 GR_Parameter_RESULT  = r46
222 // Data tables
223 //==============================================================
225 #ifdef _LIBC
226 .rodata
227 #else
228 .data
229 #endif
231 .align 16
232 double_sinh_arg_reduction:
233 ASM_TYPE_DIRECTIVE(double_sinh_arg_reduction,@object)
234    data8 0xB8AA3B295C17F0BC, 0x00004005
235    data8 0xB17217F7D1000000, 0x00003FF8
236    data8 0xCF79ABC9E3B39804, 0x00003FD0
237 ASM_SIZE_DIRECTIVE(double_sinh_arg_reduction)
239 double_sinh_p_table:
240 ASM_TYPE_DIRECTIVE(double_sinh_p_table,@object)
241    data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
242    data8 0x8888888888888412, 0x00003FF8
243    data8 0xD00D00D00D4D39F2, 0x00003FF2
244    data8 0xB8EF1D28926D8891, 0x00003FEC
245    data8 0xD732377688025BE9, 0x00003FE5
246    data8 0xB08AF9AE78C1239F, 0x00003FDE
247 ASM_SIZE_DIRECTIVE(double_sinh_p_table)
249 double_sinh_ab_table:
250 ASM_TYPE_DIRECTIVE(double_sinh_ab_table,@object)
251    data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
252    data8 0x88888888884ECDD5, 0x00003FF8
253    data8 0xD00D0C6DCC26A86B, 0x00003FF2
254    data8 0x8000000000000002, 0x00003FFE
255    data8 0xAAAAAAAAAA402C77, 0x00003FFA
256    data8 0xB60B6CC96BDB144D, 0x00003FF5
257 ASM_SIZE_DIRECTIVE(double_sinh_ab_table)
259 double_sinh_j_table:
260 ASM_TYPE_DIRECTIVE(double_sinh_j_table,@object)
261    data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
262    data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
263    data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
264    data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
265    data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
266    data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
267    data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
268    data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
269    data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
270    data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
271    data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
272    data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
273    data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
274    data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
275    data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
276    data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
277    data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
278    data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
279    data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
280    data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
281    data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
282    data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
283    data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
284    data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
285    data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
286    data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
287    data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
288    data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
289    data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
290    data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
291    data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
292    data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
293    data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
294    data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
295    data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
296    data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
297    data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
298    data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
299    data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
300    data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
301    data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
302    data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
303    data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
304    data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
305    data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
306    data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
307    data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
308    data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
309    data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
310    data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
311    data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
312    data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
313    data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
314    data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
315    data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
316    data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
317    data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
318    data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
319    data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
320    data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
321    data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
322    data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
323    data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
324    data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
325    data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
326 ASM_SIZE_DIRECTIVE(double_sinh_j_table)
328 .align 32
329 .global sinhf#
331 .section .text
332 .proc  sinhf#
333 .align 32
335 sinhf: 
336 #ifdef _LIBC
337 .global __ieee754_sinhf
338 .type __ieee754_sinhf,@function
339 __ieee754_sinhf:
340 #endif
342 // X infinity or NAN?
343 // Take invalid fault if enabled
346 { .mfi
347       alloc r32 = ar.pfs,0,12,4,0                  
348 (p0)     fclass.m.unc  p6,p0 = f8, 0xe3 //@qnan | @snan | @inf 
349          mov sinh_GR_all_ones = -1
354 { .mfb
355          nop.m 999
356 (p6)     fma.s.s0   f8 = f8,f1,f8               
357 (p6)     br.ret.spnt     b0 ;;                          
360 // Put 0.25 in f9; p6 true if x < 0.25
361 // Make constant that will generate inexact when squared
362 { .mlx
363          setf.sig sinh_FR_all_ones = sinh_GR_all_ones 
364 (p0)     movl            r32 = 0x000000000000fffd ;;         
367 { .mfi
368 (p0)     setf.exp        f9 = r32                         
369 (p0)     fclass.m.unc  p7,p0 = f8, 0x07 //@zero
370          nop.i 999 ;;
373 { .mfb
374          nop.m 999
375 (p0)     fmerge.s      sinh_FR_X    = f0,f8             
376 (p7)     br.ret.spnt     b0 ;;                          
379 // Identify denormal operands.
380 { .mfi
381          nop.m 999
382          fclass.m.unc  p10,p0 = f8, 0x09        //  + denorm
383          nop.i 999
385 { .mfi
386          nop.m 999
387          fclass.m.unc  p11,p0 = f8, 0x0a        //  - denorm
388          nop.i 999 
391 { .mfi
392          nop.m 999
393 (p0)     fmerge.s      sinh_FR_SGNX = f8,f1             
394          nop.i 999 ;;
397 { .mfi
398          nop.m 999
399 (p0)     fcmp.lt.unc.s1  p0,p7 = sinh_FR_X,f9             
400          nop.i 999 ;;
403 { .mib
404          nop.m 999
405          nop.i 999
406 (p7)     br.cond.sptk    L(SINH_BY_TBL) ;;                      
410 L(SINH_BY_POLY): 
412 // POLY cannot overflow so there is no need to call __libm_error_support
413 // Set tiny_SAFE (p7) to 1(0) if answer is not tiny 
414 // Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
415 // commented out.
416 //(p0)     movl            r32            = 0x000000000000fc01           
417 //(p0)     setf.exp        f10            = r32                         
418 //(p0)     fcmp.lt.unc.s1  p6,p7          = f8,f10                     
419 // Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order 
420 // of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
421 // Note that ax = |x|
422 // sinh(x) = sign * (series(e^x) - series(e^-x))/2
423 //         = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
424 //         = sign * (ax   + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
425 //                        + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
426 //         = sign * (ax   + ax*p_odd + (ax*p_even))
427 //         = sign * (ax   + Y_lo)
428 // sinh(x) = sign * (Y_hi + Y_lo)
429 // Get the values of P_x from the table
430 { .mfb
431 (p0)  addl           r34   = @ltoff(double_sinh_p_table), gp
432 (p10) fma.s.s0       f8 =  f8,f8,f8
433 (p10) br.ret.spnt    b0
437 { .mfb
438       ld8 r34 = [r34]
439 (p11) fnma.s.s0      f8 =  f8,f8,f8
440 (p11) br.ret.spnt    b0
444 // Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
445 { .mmf
446          nop.m 999
447 (p0)     ldfe            sinh_FR_P1 = [r34],16                 
448 (p0)     fma.s1        sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;           
451 { .mmi
452 (p0)     ldfe            sinh_FR_P2 = [r34],16 ;;                 
453 (p0)     ldfe            sinh_FR_P3 = [r34],16                 
454          nop.i 999 ;;
457 { .mmi
458 (p0)     ldfe            sinh_FR_P4 = [r34],16 ;;                 
459 (p0)     ldfe            sinh_FR_P5 = [r34],16                 
460          nop.i 999 ;;
463 { .mfi
464 (p0)     ldfe            sinh_FR_P6 = [r34],16                 
465 (p0)     fma.s1        sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0         
466          nop.i 999 ;;
469 // Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even 
470 { .mfi
471          nop.m 999
472 (p0)     fma.s1      sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3                
473          nop.i 999 ;;
476 { .mfi
477          nop.m 999
478 (p0)     fma.s1      sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1   
479          nop.i 999
482 { .mfi
483          nop.m 999
484 (p0)     fma.s1      sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4               
485          nop.i 999 ;;
488 { .mfi
489          nop.m 999
490 (p0)     fma.s1      sinh_FR_podd       = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0           
491          nop.i 999
494 { .mfi
495          nop.m 999
496 (p0)     fma.s1      sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2 
497          nop.i 999 ;;
500 { .mfi
501          nop.m 999
502 (p0)     fma.s1      sinh_FR_peven       = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0         
503          nop.i 999 ;;
506 // Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
507 { .mfi
508          nop.m 999
509 (p0)     fma.s1      sinh_FR_Y_lo_temp    = sinh_FR_X, sinh_FR_peven, f0                    
510          nop.i 999 ;;
513 { .mfi
514          nop.m 999
515 (p0)     fma.s1      sinh_FR_Y_lo         = sinh_FR_X, sinh_FR_podd,  sinh_FR_Y_lo_temp          
516          nop.i 999 ;;
519 // Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
520 { .mfi
521          nop.m 999
522 (p0)     fma.s1      sinh_FR_SINH        = sinh_FR_X, f1, sinh_FR_Y_lo                      
523          nop.i 999 ;;
525 // Dummy multiply to generate inexact
526 { .mfi
527          nop.m 999
528 (p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
529          nop.i 999
532 // Calculate f8 = sign * (Y_hi + Y_lo)
533 // Go to return
534 { .mfb
535          nop.m 999
536 (p0)     fma.s.s0        f8 = sinh_FR_SGNX,sinh_FR_SINH,f0                       
537 (p0)     br.ret.sptk     b0 ;;                          
541 L(SINH_BY_TBL): 
543 // Now that we are at TBL; so far all we know is that |x| >= 0.25.
544 // The first two steps are the same for TBL and EXP, but if we are HUGE
545 // we want to leave now. 
546 // Double-extended:
547 // Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
548 // Double
549 // Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
550 // Single
551 // Go to HUGE if |x| >= 2^7,  10006 (register-biased) is e =  7 (true)
553 { .mlx
554          nop.m 999
555 (p0)     movl            r32 = 0x0000000000010006 ;;         
558 { .mfi
559 (p0)     setf.exp        f9 = r32                         
560          nop.f 999
561          nop.i 999 ;;
564 { .mfi
565          nop.m 999
566 (p0)     fcmp.ge.unc.s1  p6,p7 = sinh_FR_X,f9             
567          nop.i 999 ;;
570 { .mib
571          nop.m 999
572          nop.i 999
573 (p6)     br.cond.spnt    L(SINH_HUGE) ;;                        
576 // r32 = 1
577 // r34 = N-1 
578 // r35 = N
579 // r36 = j
580 // r37 = N+1
582 // TBL can never overflow
583 // sinh(x) = sinh(B+R)
584 //         = sinh(B)cosh(R) + cosh(B)sinh(R)
585 // 
586 // ax = |x| = M*log2/64 + R
587 // B = M*log2/64
588 // M = 64*N + j 
589 //   We will calcualte M and get N as (M-j)/64
590 //   The division is a shift.
591 // exp(B)  = exp(N*log2 + j*log2/64)
592 //         = 2^N * 2^(j*log2/64)
593 // sinh(B) = 1/2(e^B -e^-B)
594 //         = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64)) 
595 // sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64)) 
596 // cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64)) 
597 // 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
598 // Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
599 // R = ax - M*log2/64
600 // R = ax - M*log2_by_64_hi - M*log2_by_64_lo
601 // exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
602 //        = 1 + p_odd + p_even
603 //        where the p_even uses the A coefficients and the p_even uses the B coefficients
604 // So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
605 //    cosh(R) = 1 + p_even
606 //    sinh(B) = S_hi + S_lo
607 //    cosh(B) = C_hi
608 // sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
609 // ******************************************************
610 // STEP 1 (TBL and EXP)
611 // ******************************************************
612 // Get the following constants. 
613 // f9  = Inv_log2by64
614 // f10 = log2by64_hi
615 // f11 = log2by64_lo
617 { .mmi
618 (p0)  adds                 r32 = 0x1,r0      
619 (p0)  addl           r34   = @ltoff(double_sinh_arg_reduction), gp
620       nop.i 999
624 { .mmi
625       ld8 r34 = [r34]
626       nop.m 999
627       nop.i 999
632 // We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
633 // put them in an exponent.
634 // sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
635 // r39 = 0xffff + (N-1)  = 0xffff +N -1
636 // r40 = 0xffff - (N +1) = 0xffff -N -1
638 { .mlx
639          nop.m 999
640 (p0)     movl                r38 = 0x000000000000fffe ;; 
643 { .mmi
644 (p0)     ldfe            sinh_FR_Inv_log2by64 = [r34],16 ;;       
645 (p0)     ldfe            sinh_FR_log2by64_hi  = [r34],16       
646          nop.i 999 ;;
649 { .mbb
650 (p0)     ldfe            sinh_FR_log2by64_lo  = [r34],16       
651          nop.b 999
652          nop.b 999 ;;
655 // Get the A coefficients
656 // f9  = A_1
657 // f10 = A_2
658 // f11 = A_3
660 { .mmi
661       nop.m 999
662 (p0)  addl           r34   = @ltoff(double_sinh_ab_table), gp
663       nop.i 999
667 { .mmi
668       ld8 r34 = [r34]
669       nop.m 999
670       nop.i 999
675 // Calculate M and keep it as integer and floating point.
676 // f38 = M = round-to-integer(x*Inv_log2by64)
677 // sinh_FR_M = M = truncate(ax/(log2/64))
678 // Put the significand of M in r35
679 //    and the floating point representation of M in sinh_FR_M
681 { .mfi
682          nop.m 999
683 (p0)     fma.s1          sinh_FR_M      = sinh_FR_X, sinh_FR_Inv_log2by64, f0 
684          nop.i 999
687 { .mfi
688 (p0)     ldfe            sinh_FR_A1 = [r34],16            
689          nop.f 999
690          nop.i 999 ;;
693 { .mfi
694          nop.m 999
695 (p0)     fcvt.fx.s1      sinh_FR_M_temp = sinh_FR_M                      
696          nop.i 999 ;;
699 { .mfi
700          nop.m 999
701 (p0)     fnorm.s1        sinh_FR_M      = sinh_FR_M_temp                 
702          nop.i 999 ;;
705 { .mfi
706 (p0)     getf.sig        r35       = sinh_FR_M_temp                 
707          nop.f 999
708          nop.i 999 ;;
711 // M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It 
712 // has a range of -32 thru 31.
713 // r35 = M
714 // r36 = j 
716 { .mii
717          nop.m 999
718          nop.i 999 ;;
719 (p0)     and            r36 = 0x3f, r35 ;;   
722 // Calculate R
723 // f13 = f44 - f12*f10 = ax - M*log2by64_hi
724 // f14 = f13 - f8*f11  = R = (ax - M*log2by64_hi) - M*log2by64_lo
726 { .mfi
727          nop.m 999
728 (p0)     fnma.s1           sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X      
729          nop.i 999
732 { .mfi
733 (p0)     ldfe            sinh_FR_A2 = [r34],16            
734          nop.f 999
735          nop.i 999 ;;
738 { .mfi
739          nop.m 999
740 (p0)     fnma.s1           sinh_FR_R      = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp 
741          nop.i 999
744 // Get the B coefficients
745 // f15 = B_1
746 // f32 = B_2
747 // f33 = B_3
749 { .mmi
750 (p0)     ldfe            sinh_FR_A3 = [r34],16 ;;            
751 (p0)     ldfe            sinh_FR_B1 = [r34],16            
752          nop.i 999 ;;
755 { .mmi
756 (p0)     ldfe            sinh_FR_B2 = [r34],16 ;;            
757 (p0)     ldfe            sinh_FR_B3 = [r34],16            
758          nop.i 999 ;;
761 { .mii
762          nop.m 999
763 (p0)     shl            r34 = r36,  0x2 ;;   
764 (p0)     sxt1           r37 = r34 ;;         
767 // ******************************************************
768 // STEP 2 (TBL and EXP)
769 // ******************************************************
770 // Calculate Rsquared and Rcubed in preparation for p_even and p_odd
771 // f12 = R*R*R
772 // f13 = R*R
773 // f14 = R <== from above
775 { .mfi
776          nop.m 999
777 (p0)     fma.s1             sinh_FR_Rsq  = sinh_FR_R,   sinh_FR_R, f0  
778 (p0)     shr            r36 = r37,  0x2 ;;   
781 // r34 = M-j = r35 - r36
782 // r35 = N = (M-j)/64
784 { .mii
785 (p0)     sub                  r34 = r35, r36    
786          nop.i 999 ;;
787 (p0)     shr                  r35 = r34, 0x6 ;;    
790 { .mii
791 (p0)     sub                 r40 = r38, r35           
792 (p0)     adds                 r37 = 0x1, r35    
793 (p0)     add                 r39 = r38, r35 ;;           
796 // Get the address of the J table, add the offset, 
797 // addresses are sinh_AD_mJ and sinh_AD_J, get the T value
798 // f32 = T(j)_hi
799 // f33 = T(j)_lo
800 // f34 = T(-j)_hi
801 // f35 = T(-j)_lo
803 { .mmi
804 (p0)  sub                  r34 = r35, r32    
805 (p0)  addl           r37   = @ltoff(double_sinh_j_table), gp
806       nop.i 999
810 { .mmi
811       ld8 r37 = [r37]
812       nop.m 999
813       nop.i 999
818 { .mfi
819          nop.m 999
820 (p0)     fma.s1             sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0  
821          nop.i 999
824 // ******************************************************
825 // STEP 3 Now decide if we need to branch to EXP
826 // ******************************************************
827 // Put 32 in f9; p6 true if x < 32
828 // Go to EXP if |x| >= 32 
830 { .mlx
831          nop.m 999
832 (p0)     movl                r32 = 0x0000000000010004 ;;               
835 // Calculate p_even
836 // f34 = B_2 + Rsq *B_3
837 // f35 = B_1 + Rsq*f34      = B_1 + Rsq * (B_2 + Rsq *B_3)
838 // f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
840 { .mfi
841          nop.m 999
842 (p0)     fma.s1          sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3,          sinh_FR_B2  
843          nop.i 999 ;;
846 { .mfi
847          nop.m 999
848 (p0)     fma.s1          sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1  
849          nop.i 999
852 // Calculate p_odd
853 // f34 = A_2 + Rsq *A_3
854 // f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
855 // f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
857 { .mfi
858          nop.m 999
859 (p0)     fma.s1          sinh_FR_podd_temp1 = sinh_FR_Rsq,        sinh_FR_A3,         sinh_FR_A2  
860          nop.i 999 ;;
863 { .mfi
864 (p0)     setf.exp            sinh_FR_N_temp1 = r39            
865          nop.f 999
866          nop.i 999 ;;
869 { .mfi
870          nop.m 999
871 (p0)     fma.s1          sinh_FR_peven       = sinh_FR_Rsq, sinh_FR_peven_temp2, f0     
872          nop.i 999
875 { .mfi
876          nop.m 999
877 (p0)     fma.s1          sinh_FR_podd_temp2 = sinh_FR_Rsq,        sinh_FR_podd_temp1, sinh_FR_A1  
878          nop.i 999 ;;
881 { .mfi
882 (p0)     setf.exp            f9  = r32                              
883          nop.f 999
884          nop.i 999 ;;
887 { .mfi
888          nop.m 999
889 (p0)     fma.s1          sinh_FR_podd       = sinh_FR_podd_temp2, sinh_FR_Rcub,       sinh_FR_R   
890          nop.i 999
893 // sinh_GR_mj contains the table offset for -j
894 // sinh_GR_j  contains the table offset for +j
895 // p6 is true when j <= 0
897 { .mlx
898 (p0)     setf.exp            sinh_FR_N_temp2 = r40            
899 (p0)     movl                r40 = 0x0000000000000020 ;;    
902 { .mfi
903 (p0)     sub                 sinh_GR_mJ = r40,  r36           
904 (p0)     fmerge.se           sinh_FR_spos    = sinh_FR_N_temp1, f1 
905 (p0)     adds                sinh_GR_J  = 0x20, r36 ;;           
908 { .mii
909          nop.m 999
910 (p0)     shl                  sinh_GR_mJ = sinh_GR_mJ, 5 ;;   
911 (p0)     add                  sinh_AD_mJ = r37, sinh_GR_mJ ;; 
914 { .mmi
915          nop.m 999
916 (p0)     ldfe                 sinh_FR_Tmjhi = [sinh_AD_mJ],16                 
917 (p0)     shl                  sinh_GR_J  = sinh_GR_J, 5 ;;    
920 { .mfi
921 (p0)     ldfs                 sinh_FR_Tmjlo = [sinh_AD_mJ],16                 
922 (p0)     fcmp.lt.unc.s1      p0,p7 = sinh_FR_X,f9                          
923 (p0)     add                  sinh_AD_J  = r37, sinh_GR_J ;;  
926 { .mmi
927 (p0)     ldfe                 sinh_FR_Tjhi  = [sinh_AD_J],16 ;;                  
928 (p0)     ldfs                 sinh_FR_Tjlo  = [sinh_AD_J],16                  
929          nop.i 999 ;;
932 { .mfb
933          nop.m 999
934 (p0)     fmerge.se           sinh_FR_sneg    = sinh_FR_N_temp2, f1 
935 (p7)     br.cond.spnt        L(SINH_BY_EXP) ;;                            
938 { .mfi
939          nop.m 999
940          nop.f 999
941          nop.i 999 ;;
944 // ******************************************************
945 // If NOT branch to EXP
946 // ******************************************************
947 // Calculate S_hi and S_lo
948 // sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
949 // sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
950 // sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
952 { .mfi
953          nop.m 999
954 (p0)    fma.s1         sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0   
955          nop.i 999 ;;
958 { .mfi
959          nop.m 999
960 (p0)    fms.s1         sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi,  sinh_FR_S_hi_temp              
961          nop.i 999
964 // Calculate C_hi
965 // sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
966 // sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
968 { .mfi
969          nop.m 999
970 (p0)    fma.s1         sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0                   
971          nop.i 999 ;;
974 // sinh_FR_S_lo_temp1 =  sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
975 // sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
976 // sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1              )
978 { .mfi
979          nop.m 999
980 (p0)    fms.s1         sinh_FR_S_lo_temp1 =  sinh_FR_spos, sinh_FR_Tjhi,  sinh_FR_S_hi            
981          nop.i 999
984 { .mfi
985          nop.m 999
986 (p0)    fma.s1         sinh_FR_C_hi       = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1    
987          nop.i 999 ;;
990 { .mfi
991          nop.m 999
992 (p0)    fnma.s1        sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1       
993          nop.i 999
996 // sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
997 // sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
998 // sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
999 // sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
1001 { .mfi
1002          nop.m 999
1003 (p0)    fma.s1         sinh_FR_S_lo_temp1 =  sinh_FR_sneg, sinh_FR_Tmjlo, f0                  
1004          nop.i 999 ;;
1007 /////////// BUG FIX fma to fms -TK
1008 { .mfi
1009          nop.m 999
1010 (p0)    fms.s1         sinh_FR_S_lo_temp3 =  sinh_FR_spos, sinh_FR_Tjlo,  sinh_FR_S_lo_temp1  
1011          nop.i 999 ;;
1014 { .mfi
1015          nop.m 999
1016 (p0)    fma.s1         sinh_FR_S_lo       =  sinh_FR_S_lo_temp3, f1,   sinh_FR_S_lo_temp2     
1017          nop.i 999 ;;
1020 // Y_hi = S_hi 
1021 // Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
1022 // sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
1023 // sinh_FR_Y_lo      = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
1025 { .mfi
1026          nop.m 999
1027 (p0)    fma.s1         sinh_FR_Y_lo_temp  = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo           
1028          nop.i 999 ;;
1031 { .mfi
1032          nop.m 999
1033 (p0)    fma.s1         sinh_FR_Y_lo       =  sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp      
1034          nop.i 999 ;;
1037 // sinh_FR_SINH = Y_hi + Y_lo
1038 // f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
1040 // Dummy multiply to generate inexact
1041 { .mfi
1042          nop.m 999
1043 (p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
1044          nop.i 999
1046 { .mfi
1047          nop.m 999
1048 (p0)    fma.s1         sinh_FR_SINH       =  sinh_FR_S_hi, f1, sinh_FR_Y_lo    
1049          nop.i 999 ;;
1052 { .mfb
1053          nop.m 999
1054 (p0)    fma.s.s0       f8 = sinh_FR_SGNX, sinh_FR_SINH,f0                      
1055 (p0)    br.ret.sptk     b0 ;;                          
1059 L(SINH_BY_EXP): 
1061 // When p7 is true,  we know that an overflow is not going to happen
1062 // When p7 is false, we must check for possible overflow
1063 // p7 is the over_SAFE flag
1064 // Y_hi = Tjhi
1065 // Y_lo = Tjhi * (p_odd + p_even) +Tjlo
1066 // Scale = sign * 2^(N-1)
1067 // sinh_FR_Y_lo =  sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
1068 // sinh_FR_Y_lo =  sinh_FR_Tjhi * (sinh_FR_Y_lo_temp      )
1070 { .mfi
1071          nop.m 999
1072 (p0)   fma.s1            sinh_FR_Y_lo_temp =  sinh_FR_peven, f1, sinh_FR_podd                   
1073          nop.i 999
1076 // Now we are in EXP. This is the only path where an overflow is possible
1077 // but not for certain. So this is the only path where over_SAFE has any use.
1078 // r34 still has N-1
1079 // There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
1080 // There is a danger of double overflow            if N-1 > 0x3fe = 1022
1081 // There is a danger of single overflow            if N-1 > 0x7e = 126
1082 { .mlx
1083          nop.m 999
1084 (p0)   movl                r32          = 0x000000000000007e ;;                       
1087 { .mfi
1088 (p0)   cmp.gt.unc          p0,p7        = r34, r32                                 
1089 (p0)   fmerge.s          sinh_FR_SCALE     = sinh_FR_SGNX, sinh_FR_spos                         
1090          nop.i 999 ;;
1093 { .mfi
1094          nop.m 999
1095 (p0)   fma.s1            sinh_FR_Y_lo      =  sinh_FR_Tjhi,  sinh_FR_Y_lo_temp, sinh_FR_Tjlo    
1096          nop.i 999 ;;
1099 // f8 = answer = scale * (Y_hi + Y_lo)
1100 { .mfi
1101          nop.m 999
1102 (p0)   fma.s1            sinh_FR_SINH_temp = sinh_FR_Y_lo,  f1, sinh_FR_Tjhi       
1103          nop.i 999 ;;
1106 { .mfi
1107          nop.m 999
1108 (p0)   fma.s.s0          f44          = sinh_FR_SCALE,  sinh_FR_SINH_temp, f0      
1109          nop.i 999 ;;
1112 // Dummy multiply to generate inexact
1113 { .mfi
1114          nop.m 999
1115 (p7)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
1116          nop.i 999 ;;
1119 // If over_SAFE is set, return
1120 { .mfb
1121        nop.m 999
1122 (p7)   fmerge.s            f8 = f44,f44                                            
1123 (p7)   br.ret.sptk     b0 ;;                          
1126 // Else see if we overflowed
1127 // S0 user supplied status
1128 // S2 user supplied status + WRE + TD  (Overflows)
1129 // If WRE is set then an overflow will not occur in EXP.
1130 // The input value that would cause a register (WRE) value to overflow is about 2^15
1131 // and this input would go into the HUGE path.
1132 // Answer with WRE is in f43.
1134 { .mfi
1135          nop.m 999
1136 (p0)   fsetc.s2            0x7F,0x42                                               
1137          nop.i 999;;
1140 { .mfi
1141          nop.m 999
1142 (p0)   fma.s.s2            f43  = sinh_FR_SCALE,  sinh_FR_SINH_temp, f0                      
1143          nop.i 999 ;;
1146 // 1007F => 1007F -FFFF = 80(true)
1147 // 80 + 7F = FF, which is 1 more that the exponent of the largest
1148 // double (FE). So 0 1007F 8000000000000000  is one ulp more than
1149 // largest single in register bias
1150 // Now  set p8 if the answer with WRE is greater than or equal this value
1151 // Also set p9 if the answer with WRE is less than or equal to negative this value
1153 { .mlx
1154          nop.m 999
1155 (p0)   movl                r32     = 0x0000000001007F ;;                              
1158 { .mmf
1159          nop.m 999
1160 (p0)   setf.exp            f41 = r32                                               
1161 (p0)   fsetc.s2            0x7F,0x40 ;;                                               
1164 { .mfi
1165          nop.m 999
1166 (p0)   fcmp.ge.unc.s1 p8, p0 =  f43, f41                                           
1167          nop.i 999
1170 { .mfi
1171          nop.m 999
1172 (p0)   fmerge.ns           f42 = f41, f41                                          
1173          nop.i 999 ;;
1176 // The error tag for overflow is 128
1177 { .mii
1178          nop.m 999
1179          nop.i 999 ;;
1180 (p8)   mov                 r47 = 128 ;;                                               
1183 { .mfb
1184          nop.m 999
1185 (p0)   fcmp.le.unc.s1      p9, p0 =  f43, f42                                      
1186 (p8)   br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
1189 { .mii
1190          nop.m 999
1191          nop.i 999 ;;
1192 (p9)   mov                 r47 = 128                                               
1195 { .mib
1196          nop.m 999
1197          nop.i 999
1198 (p9)   br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
1201 // Dummy multiply to generate inexact
1202 { .mfi
1203          nop.m 999
1204 (p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
1205          nop.i 999 ;;
1208 { .mfb
1209          nop.m 999
1210 (p0)   fmerge.s            f8 = f44,f44                                            
1211 (p0)   br.ret.sptk     b0 ;;                          
1214 L(SINH_HUGE): 
1216 // for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
1217 // SAFE: SAFE is always 0 for HUGE
1219 { .mlx
1220          nop.m 999
1221 (p0)   movl                r32 = 0x0000000000015dbf ;;                                
1224 { .mfi
1225 (p0)   setf.exp            f9  = r32                                               
1226          nop.f 999
1227          nop.i 999 ;;
1230 { .mfi
1231          nop.m 999
1232 (p0)   fma.s1              sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1                       
1233          nop.i 999 ;;
1236 { .mfi
1237          nop.m 999
1238 (p0)   fma.s.s0            f44 = sinh_FR_signed_hi_lo,  f9, f0                          
1239 (p0)   mov                 r47 = 128                                               
1241 .endp sinhf
1242 ASM_SIZE_DIRECTIVE(sinhf)
1243 #ifdef _LIBC
1244 ASM_SIZE_DIRECTIVE(__ieee754_sinhf)
1245 #endif
1247 // Stack operations when calling error support.
1248 //       (1)               (2)                          (3) (call)              (4)
1249 //   sp   -> +          psp -> +                     psp -> +                   sp -> +
1250 //           |                 |                            |                         |
1251 //           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
1252 //           |                 |                            |                         |
1253 //           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
1254 //           |                 |                            |                         |
1255 //           |                 | <- GR_X               X1 ->|                         |
1256 //           |                 |                            |                         |
1257 //  sp-64 -> +          sp ->  +                     sp ->  +                         +
1258 //    save ar.pfs          save b0                                               restore gp
1259 //    save gp                                                                    restore ar.pfs
1261 .proc __libm_error_region
1262 __libm_error_region:
1263 L(SINH_ERROR_SUPPORT):
1264 .prologue
1266 // (1)
1267 { .mfi
1268         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
1269         nop.f 0
1270 .save   ar.pfs,GR_SAVE_PFS
1271         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
1273 { .mfi
1274 .fframe 64
1275         add sp=-64,sp                          // Create new stack
1276         nop.f 0
1277         mov GR_SAVE_GP=gp                      // Save gp
1281 // (2)
1282 { .mmi
1283         stfs [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
1284         add GR_Parameter_X = 16,sp            // Parameter 1 address
1285 .save   b0, GR_SAVE_B0
1286         mov GR_SAVE_B0=b0                     // Save b0
1289 .body
1290 // (3)
1291 { .mib
1292         stfs [GR_Parameter_X] = f8                     // STORE Parameter 1 on stack
1293         add   GR_Parameter_RESULT = 0,GR_Parameter_Y   // Parameter 3 address
1294         nop.b 0                            
1296 { .mib
1297         stfs [GR_Parameter_Y] = f44                    // STORE Parameter 3 on stack
1298         add   GR_Parameter_Y = -16,GR_Parameter_Y
1299         br.call.sptk b0=__libm_error_support#          // Call error handling function
1301 { .mmi
1302         nop.m 0
1303         nop.m 0
1304         add   GR_Parameter_RESULT = 48,sp
1307 // (4)
1308 { .mmi
1309         ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
1310 .restore sp
1311         add   sp = 64,sp                       // Restore stack pointer
1312         mov   b0 = GR_SAVE_B0                  // Restore return address
1314 { .mib
1315         mov   gp = GR_SAVE_GP                  // Restore gp
1316         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
1317         br.ret.sptk     b0                     // Return
1320 .endp __libm_error_region
1321 ASM_SIZE_DIRECTIVE(__libm_error_region)
1323 .type   __libm_error_support#,@function
1324 .global __libm_error_support#