(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[glibc.git] / sysdeps / ia64 / fpu / e_sinhl.S
blobb880b95b64cb619b0b2a48e3d0af2daa5bc4cab4
1 .file "sinhl.s"
3 // Copyright (C) 2000, 2001, Intel Corporation
4 // All rights reserved.
5 // 
6 // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
7 // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
22 // permission.
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
35 // 
36 // Intel Corporation is the author of this code, and requests that all
37 // problem reports or change requests be submitted to it directly at 
38 // http://developer.intel.com/opensource.
40 // History
41 //==============================================================
42 // 2/02/00  Initial version
43 // 4/04/00  Unwind support added
44 // 8/15/00  Bundle added after call to __libm_error_support to properly
45 //          set [the previously overwritten] GR_Parameter_RESULT.
46 // 10/12/00 Update to set denormal operand and underflow flags
47 // 1/22/01  Fixed to set inexact flag for small args.  Fixed incorrect 
48 //          call to __libm_error_support for 710.476 < x < 11357.2166.
50 // API
51 //==============================================================
52 // long double = sinhl(long double)
53 // input  floating point f8
54 // output floating point f8
56 // Registers used
57 //==============================================================
58 // general registers: 
59 // r32 -> r47
60 // predicate registers used:
61 // p6 p7 p8 p9
62 // floating-point registers used:
63 // f9 -> f15; f32 -> f45; 
64 // f8 has input, then output
66 // Overview of operation
67 //==============================================================
68 // There are four paths
69 // 1. |x| < 0.25        SINH_BY_POLY
70 // 2. |x| < 32          SINH_BY_TBL
71 // 3. |x| < 2^14        SINH_BY_EXP
72 // 4. |x_ >= 2^14       SINH_HUGE
74 // For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
75 //                                           >= 1.0110001.... x 2^13
76 //                                           >= 11357.2166
78 // But for double we get infinity for x >= 408633ce8fb9f87e
79 //                                      >= 1.0110...x 2^9
80 //                                      >= +7.10476e+002
82 // And for single we get infinity for x >= 42b3a496
83 //                                      >= 1.0110... 2^6
84 //                                      >= 89.8215
86 // SAFE: If there is danger of overflow set SAFE to 0
87 //       NOT implemented: if there is danger of underflow, set SAFE to 0
88 // SAFE for all paths listed below
90 // 1. SINH_BY_POLY
91 // ===============
92 // If |x| is less than the tiny threshold, then clear SAFE 
93 // For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
94 //             register-biased, this is fc01
95 // For single, the tiny threshold is -126  = -7e    => -7e  + ffff = ff81
96 // If |x| < tiny threshold, set SAFE = 0
98 // 2. SINH_BY_TBL
99 // =============
100 // SAFE: SAFE is always 1 for TBL; 
102 // 3. SINH_BY_EXP
103 // ==============
104 // There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
105 // r34 has N-1; 16382 is in register biased form, 0x13ffd
106 // There is danger of double overflow if N-1 > 0x3fe
107 //                       in register biased form, 0x103fd
108 // Analagously, there is danger of single overflow if N-1 > 0x7e
109 //                       in register biased form, 0x1007d
110 // SAFE: If there is danger of overflow set SAFE to 0
112 // 4. SINH_HUGE
113 // ============
114 // SAFE: SAFE is always 0 for HUGE
117 #include "libm_support.h"
119 // Assembly macros
120 //==============================================================
121 sinh_FR_X            = f44
122 sinh_FR_X2           = f9
123 sinh_FR_X4           = f10
124 sinh_FR_SGNX         = f40
125 sinh_FR_all_ones     = f45
126 sinh_FR_tmp          = f42
128 sinh_FR_Inv_log2by64 = f9
129 sinh_FR_log2by64_lo  = f11
130 sinh_FR_log2by64_hi  = f10
132 sinh_FR_A1           = f9
133 sinh_FR_A2           = f10
134 sinh_FR_A3           = f11
136 sinh_FR_Rcub         = f12
137 sinh_FR_M_temp       = f13
138 sinh_FR_R_temp       = f13
139 sinh_FR_Rsq          = f13
140 sinh_FR_R            = f14
142 sinh_FR_M            = f38
144 sinh_FR_B1           = f15
145 sinh_FR_B2           = f32
146 sinh_FR_B3           = f33
148 sinh_FR_peven_temp1  = f34
149 sinh_FR_peven_temp2  = f35
150 sinh_FR_peven        = f36
152 sinh_FR_podd_temp1   = f34
153 sinh_FR_podd_temp2   = f35
154 sinh_FR_podd         = f37
156 sinh_FR_poly_podd_temp1    =  f11 
157 sinh_FR_poly_podd_temp2    =  f13
158 sinh_FR_poly_peven_temp1   =  f11
159 sinh_FR_poly_peven_temp2   =  f13
161 sinh_FR_J_temp       = f9
162 sinh_FR_J            = f10
164 sinh_FR_Mmj          = f39
166 sinh_FR_N_temp1      = f11
167 sinh_FR_N_temp2      = f12
168 sinh_FR_N            = f13
170 sinh_FR_spos         = f14
171 sinh_FR_sneg         = f15
173 sinh_FR_Tjhi         = f32
174 sinh_FR_Tjlo         = f33
175 sinh_FR_Tmjhi        = f34
176 sinh_FR_Tmjlo        = f35
178 sinh_GR_mJ           = r35
179 sinh_GR_J            = r36
181 sinh_AD_mJ           = r38
182 sinh_AD_J            = r39
183 sinh_GR_all_ones     = r40
185 sinh_FR_S_hi         = f9
186 sinh_FR_S_hi_temp    = f10
187 sinh_FR_S_lo_temp1   = f11 
188 sinh_FR_S_lo_temp2   = f12 
189 sinh_FR_S_lo_temp3   = f13 
191 sinh_FR_S_lo         = f38
192 sinh_FR_C_hi         = f39
194 sinh_FR_C_hi_temp1   = f10
195 sinh_FR_Y_hi         = f11 
196 sinh_FR_Y_lo_temp    = f12 
197 sinh_FR_Y_lo         = f13 
198 sinh_FR_SINH         = f9
200 sinh_FR_P1           = f14
201 sinh_FR_P2           = f15
202 sinh_FR_P3           = f32
203 sinh_FR_P4           = f33
204 sinh_FR_P5           = f34
205 sinh_FR_P6           = f35
207 sinh_FR_TINY_THRESH  = f9
209 sinh_FR_SINH_temp    = f10
210 sinh_FR_SCALE        = f11 
212 sinh_FR_signed_hi_lo = f10
215 GR_SAVE_PFS          = r41
216 GR_SAVE_B0           = r42
217 GR_SAVE_GP           = r43
219 GR_Parameter_X       = r44
220 GR_Parameter_Y       = r45
221 GR_Parameter_RESULT  = r46
223 // Data tables
224 //==============================================================
226 #ifdef _LIBC
227 .rodata
228 #else
229 .data
230 #endif
232 .align 16
233 double_sinh_arg_reduction:
234 ASM_TYPE_DIRECTIVE(double_sinh_arg_reduction,@object)
235    data8 0xB8AA3B295C17F0BC, 0x00004005
236    data8 0xB17217F7D1000000, 0x00003FF8
237    data8 0xCF79ABC9E3B39804, 0x00003FD0
238 ASM_SIZE_DIRECTIVE(double_sinh_arg_reduction)
240 double_sinh_p_table:
241 ASM_TYPE_DIRECTIVE(double_sinh_p_table,@object)
242    data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
243    data8 0x8888888888888412, 0x00003FF8
244    data8 0xD00D00D00D4D39F2, 0x00003FF2
245    data8 0xB8EF1D28926D8891, 0x00003FEC
246    data8 0xD732377688025BE9, 0x00003FE5
247    data8 0xB08AF9AE78C1239F, 0x00003FDE
248 ASM_SIZE_DIRECTIVE(double_sinh_p_table)
250 double_sinh_ab_table:
251 ASM_TYPE_DIRECTIVE(double_sinh_ab_table,@object)
252    data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
253    data8 0x88888888884ECDD5, 0x00003FF8
254    data8 0xD00D0C6DCC26A86B, 0x00003FF2
255    data8 0x8000000000000002, 0x00003FFE
256    data8 0xAAAAAAAAAA402C77, 0x00003FFA
257    data8 0xB60B6CC96BDB144D, 0x00003FF5
258 ASM_SIZE_DIRECTIVE(double_sinh_ab_table)
260 double_sinh_j_table:
261 ASM_TYPE_DIRECTIVE(double_sinh_j_table,@object)
262    data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
263    data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
264    data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
265    data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
266    data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
267    data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
268    data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
269    data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
270    data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
271    data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
272    data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
273    data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
274    data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
275    data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
276    data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
277    data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
278    data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
279    data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
280    data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
281    data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
282    data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
283    data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
284    data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
285    data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
286    data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
287    data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
288    data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
289    data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
290    data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
291    data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
292    data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
293    data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
294    data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
295    data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
296    data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
297    data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
298    data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
299    data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
300    data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
301    data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
302    data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
303    data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
304    data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
305    data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
306    data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
307    data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
308    data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
309    data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
310    data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
311    data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
312    data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
313    data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
314    data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
315    data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
316    data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
317    data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
318    data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
319    data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
320    data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
321    data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
322    data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
323    data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
324    data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
325    data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
326    data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
327 ASM_SIZE_DIRECTIVE(double_sinh_j_table)
329 .align 32
330 .global sinhl#
332 .section .text
333 .proc  sinhl#
334 .align 32
336 sinhl:
337 #ifdef _LIBC
338 .global __ieee754_sinhl
339 .type __ieee754_sinhl,@function
340 __ieee754_sinhl:
341 #endif
343 // X infinity or NAN?
344 // Take invalid fault if enabled
347 { .mfi
348       alloc r32 = ar.pfs,0,12,4,0                  
349 (p0)     fclass.m.unc  p6,p0 = f8, 0xe3 //@qnan | @snan | @inf 
350          mov sinh_GR_all_ones = -1
355 { .mfb
356          nop.m 999
357 (p6)     fma.s0   f8 = f8,f1,f8               
358 (p6)     br.ret.spnt     b0 ;;                          
361 // Put 0.25 in f9; p6 true if x < 0.25
362 // Make constant that will generate inexact when squared
363 { .mlx
364          setf.sig sinh_FR_all_ones = sinh_GR_all_ones 
365 (p0)     movl            r32 = 0x000000000000fffd ;;         
368 { .mfi
369 (p0)     setf.exp        f9 = r32                         
370 (p0)     fclass.m.unc  p7,p0 = f8, 0x07 //@zero
371          nop.i 999 ;;
374 { .mfb
375          nop.m 999
376 (p0)     fmerge.s      sinh_FR_X    = f0,f8             
377 (p7)     br.ret.spnt     b0 ;;                          
380 // Identify denormal operands.
381 { .mfi
382          nop.m 999
383          fclass.m.unc  p10,p0 = f8, 0x09        //  + denorm
384          nop.i 999
386 { .mfi
387          nop.m 999
388          fclass.m.unc  p11,p0 = f8, 0x0a        //  - denorm
389          nop.i 999 
392 { .mfi
393          nop.m 999
394 (p0)     fmerge.s      sinh_FR_SGNX = f8,f1             
395          nop.i 999 ;;
398 { .mfi
399          nop.m 999
400 (p0)     fcmp.lt.unc.s1  p0,p7 = sinh_FR_X,f9             
401          nop.i 999 ;;
404 { .mib
405          nop.m 999
406          nop.i 999
407 (p7)     br.cond.sptk    L(SINH_BY_TBL) ;;                      
411 L(SINH_BY_POLY): 
413 // POLY cannot overflow so there is no need to call __libm_error_support
414 // Set tiny_SAFE (p7) to 1(0) if answer is not tiny 
415 // Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
416 // commented out.
417 //(p0)     movl            r32            = 0x000000000000fc01           
418 //(p0)     setf.exp        f10            = r32                         
419 //(p0)     fcmp.lt.unc.s1  p6,p7          = f8,f10                     
420 // Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order 
421 // of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
422 // Note that ax = |x|
423 // sinh(x) = sign * (series(e^x) - series(e^-x))/2
424 //         = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
425 //         = sign * (ax   + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
426 //                        + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
427 //         = sign * (ax   + ax*p_odd + (ax*p_even))
428 //         = sign * (ax   + Y_lo)
429 // sinh(x) = sign * (Y_hi + Y_lo)
430 // Get the values of P_x from the table
431 { .mfb
432 (p0)  addl           r34   = @ltoff(double_sinh_p_table), gp
433 (p10) fma.s0       f8 =  f8,f8,f8
434 (p10) br.ret.spnt    b0
438 { .mfb
439       ld8 r34 = [r34]
440 (p11) fnma.s0      f8 =  f8,f8,f8
441 (p11) br.ret.spnt    b0
445 // Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
446 { .mmf
447          nop.m 999
448 (p0)     ldfe            sinh_FR_P1 = [r34],16                 
449 (p0)     fma.s1        sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;           
452 { .mmi
453 (p0)     ldfe            sinh_FR_P2 = [r34],16 ;;                 
454 (p0)     ldfe            sinh_FR_P3 = [r34],16                 
455          nop.i 999 ;;
458 { .mmi
459 (p0)     ldfe            sinh_FR_P4 = [r34],16 ;;                 
460 (p0)     ldfe            sinh_FR_P5 = [r34],16                 
461          nop.i 999 ;;
464 { .mfi
465 (p0)     ldfe            sinh_FR_P6 = [r34],16                 
466 (p0)     fma.s1        sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0         
467          nop.i 999 ;;
470 // Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even 
471 { .mfi
472          nop.m 999
473 (p0)     fma.s1      sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3                
474          nop.i 999 ;;
477 { .mfi
478          nop.m 999
479 (p0)     fma.s1      sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1   
480          nop.i 999
483 { .mfi
484          nop.m 999
485 (p0)     fma.s1      sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4               
486          nop.i 999 ;;
489 { .mfi
490          nop.m 999
491 (p0)     fma.s1      sinh_FR_podd       = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0           
492          nop.i 999
495 { .mfi
496          nop.m 999
497 (p0)     fma.s1      sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2 
498          nop.i 999 ;;
501 { .mfi
502          nop.m 999
503 (p0)     fma.s1      sinh_FR_peven       = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0         
504          nop.i 999 ;;
507 // Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
508 { .mfi
509          nop.m 999
510 (p0)     fma.s1      sinh_FR_Y_lo_temp    = sinh_FR_X, sinh_FR_peven, f0                    
511          nop.i 999 ;;
514 { .mfi
515          nop.m 999
516 (p0)     fma.s1      sinh_FR_Y_lo         = sinh_FR_X, sinh_FR_podd,  sinh_FR_Y_lo_temp          
517          nop.i 999 ;;
520 // Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
521 { .mfi
522          nop.m 999
523 (p0)     fma.s1      sinh_FR_SINH        = sinh_FR_X, f1, sinh_FR_Y_lo                      
524          nop.i 999 ;;
526 // Dummy multiply to generate inexact
527 { .mfi
528          nop.m 999
529 (p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
530          nop.i 999
533 // Calculate f8 = sign * (Y_hi + Y_lo)
534 // Go to return
535 { .mfb
536          nop.m 999
537 (p0)     fma.s0        f8 = sinh_FR_SGNX,sinh_FR_SINH,f0                       
538 (p0)     br.ret.sptk     b0 ;;                          
542 L(SINH_BY_TBL): 
544 // Now that we are at TBL; so far all we know is that |x| >= 0.25.
545 // The first two steps are the same for TBL and EXP, but if we are HUGE
546 // we want to leave now. 
547 // Double-extended:
548 // Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
549 // Double
550 // Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
551 // Single
552 // Go to HUGE if |x| >= 2^7,  10006 (register-biased) is e =  7 (true)
554 { .mlx
555          nop.m 999
556 (p0)     movl            r32 = 0x000000000001000d ;;         
559 { .mfi
560 (p0)     setf.exp        f9 = r32                         
561          nop.f 999
562          nop.i 999 ;;
565 { .mfi
566          nop.m 999
567 (p0)     fcmp.ge.unc.s1  p6,p7 = sinh_FR_X,f9             
568          nop.i 999 ;;
571 { .mib
572          nop.m 999
573          nop.i 999
574 (p6)     br.cond.spnt    L(SINH_HUGE) ;;                        
577 // r32 = 1
578 // r34 = N-1 
579 // r35 = N
580 // r36 = j
581 // r37 = N+1
583 // TBL can never overflow
584 // sinh(x) = sinh(B+R)
585 //         = sinh(B)cosh(R) + cosh(B)sinh(R)
586 // 
587 // ax = |x| = M*log2/64 + R
588 // B = M*log2/64
589 // M = 64*N + j 
590 //   We will calcualte M and get N as (M-j)/64
591 //   The division is a shift.
592 // exp(B)  = exp(N*log2 + j*log2/64)
593 //         = 2^N * 2^(j*log2/64)
594 // sinh(B) = 1/2(e^B -e^-B)
595 //         = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64)) 
596 // sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64)) 
597 // cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64)) 
598 // 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
599 // Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
600 // R = ax - M*log2/64
601 // R = ax - M*log2_by_64_hi - M*log2_by_64_lo
602 // exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
603 //        = 1 + p_odd + p_even
604 //        where the p_even uses the A coefficients and the p_even uses the B coefficients
605 // So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
606 //    cosh(R) = 1 + p_even
607 //    sinh(B) = S_hi + S_lo
608 //    cosh(B) = C_hi
609 // sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
610 // ******************************************************
611 // STEP 1 (TBL and EXP)
612 // ******************************************************
613 // Get the following constants. 
614 // f9  = Inv_log2by64
615 // f10 = log2by64_hi
616 // f11 = log2by64_lo
618 { .mmi
619 (p0)  adds                 r32 = 0x1,r0      
620 (p0)  addl           r34   = @ltoff(double_sinh_arg_reduction), gp
621       nop.i 999
625 { .mmi
626       ld8 r34 = [r34]
627       nop.m 999
628       nop.i 999
633 // We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
634 // put them in an exponent.
635 // sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
636 // r39 = 0xffff + (N-1)  = 0xffff +N -1
637 // r40 = 0xffff - (N +1) = 0xffff -N -1
639 { .mlx
640          nop.m 999
641 (p0)     movl                r38 = 0x000000000000fffe ;; 
644 { .mmi
645 (p0)     ldfe            sinh_FR_Inv_log2by64 = [r34],16 ;;       
646 (p0)     ldfe            sinh_FR_log2by64_hi  = [r34],16       
647          nop.i 999 ;;
650 { .mbb
651 (p0)     ldfe            sinh_FR_log2by64_lo  = [r34],16       
652          nop.b 999
653          nop.b 999 ;;
656 // Get the A coefficients
657 // f9  = A_1
658 // f10 = A_2
659 // f11 = A_3
661 { .mmi
662       nop.m 999
663 (p0)  addl           r34   = @ltoff(double_sinh_ab_table), gp
664       nop.i 999
668 { .mmi
669       ld8 r34 = [r34]
670       nop.m 999
671       nop.i 999
676 // Calculate M and keep it as integer and floating point.
677 // f38 = M = round-to-integer(x*Inv_log2by64)
678 // sinh_FR_M = M = truncate(ax/(log2/64))
679 // Put the significand of M in r35
680 //    and the floating point representation of M in sinh_FR_M
682 { .mfi
683          nop.m 999
684 (p0)     fma.s1          sinh_FR_M      = sinh_FR_X, sinh_FR_Inv_log2by64, f0 
685          nop.i 999
688 { .mfi
689 (p0)     ldfe            sinh_FR_A1 = [r34],16            
690          nop.f 999
691          nop.i 999 ;;
694 { .mfi
695          nop.m 999
696 (p0)     fcvt.fx.s1      sinh_FR_M_temp = sinh_FR_M                      
697          nop.i 999 ;;
700 { .mfi
701          nop.m 999
702 (p0)     fnorm.s1        sinh_FR_M      = sinh_FR_M_temp                 
703          nop.i 999 ;;
706 { .mfi
707 (p0)     getf.sig        r35       = sinh_FR_M_temp                 
708          nop.f 999
709          nop.i 999 ;;
712 // M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It 
713 // has a range of -32 thru 31.
714 // r35 = M
715 // r36 = j 
717 { .mii
718          nop.m 999
719          nop.i 999 ;;
720 (p0)     and            r36 = 0x3f, r35 ;;   
723 // Calculate R
724 // f13 = f44 - f12*f10 = ax - M*log2by64_hi
725 // f14 = f13 - f8*f11  = R = (ax - M*log2by64_hi) - M*log2by64_lo
727 { .mfi
728          nop.m 999
729 (p0)     fnma.s1           sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X      
730          nop.i 999
733 { .mfi
734 (p0)     ldfe            sinh_FR_A2 = [r34],16            
735          nop.f 999
736          nop.i 999 ;;
739 { .mfi
740          nop.m 999
741 (p0)     fnma.s1           sinh_FR_R      = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp 
742          nop.i 999
745 // Get the B coefficients
746 // f15 = B_1
747 // f32 = B_2
748 // f33 = B_3
750 { .mmi
751 (p0)     ldfe            sinh_FR_A3 = [r34],16 ;;            
752 (p0)     ldfe            sinh_FR_B1 = [r34],16            
753          nop.i 999 ;;
756 { .mmi
757 (p0)     ldfe            sinh_FR_B2 = [r34],16 ;;            
758 (p0)     ldfe            sinh_FR_B3 = [r34],16            
759          nop.i 999 ;;
762 { .mii
763          nop.m 999
764 (p0)     shl            r34 = r36,  0x2 ;;   
765 (p0)     sxt1           r37 = r34 ;;         
768 // ******************************************************
769 // STEP 2 (TBL and EXP)
770 // ******************************************************
771 // Calculate Rsquared and Rcubed in preparation for p_even and p_odd
772 // f12 = R*R*R
773 // f13 = R*R
774 // f14 = R <== from above
776 { .mfi
777          nop.m 999
778 (p0)     fma.s1             sinh_FR_Rsq  = sinh_FR_R,   sinh_FR_R, f0  
779 (p0)     shr            r36 = r37,  0x2 ;;   
782 // r34 = M-j = r35 - r36
783 // r35 = N = (M-j)/64
785 { .mii
786 (p0)     sub                  r34 = r35, r36    
787          nop.i 999 ;;
788 (p0)     shr                  r35 = r34, 0x6 ;;    
791 { .mii
792 (p0)     sub                 r40 = r38, r35           
793 (p0)     adds                 r37 = 0x1, r35    
794 (p0)     add                 r39 = r38, r35 ;;           
797 // Get the address of the J table, add the offset, 
798 // addresses are sinh_AD_mJ and sinh_AD_J, get the T value
799 // f32 = T(j)_hi
800 // f33 = T(j)_lo
801 // f34 = T(-j)_hi
802 // f35 = T(-j)_lo
804 { .mmi
805 (p0)  sub                  r34 = r35, r32    
806 (p0)  addl           r37   = @ltoff(double_sinh_j_table), gp
807       nop.i 999
811 { .mmi
812       ld8 r37 = [r37]
813       nop.m 999
814       nop.i 999
819 { .mfi
820          nop.m 999
821 (p0)     fma.s1             sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0  
822          nop.i 999
825 // ******************************************************
826 // STEP 3 Now decide if we need to branch to EXP
827 // ******************************************************
828 // Put 32 in f9; p6 true if x < 32
829 // Go to EXP if |x| >= 32 
831 { .mlx
832          nop.m 999
833 (p0)     movl                r32 = 0x0000000000010004 ;;               
836 // Calculate p_even
837 // f34 = B_2 + Rsq *B_3
838 // f35 = B_1 + Rsq*f34      = B_1 + Rsq * (B_2 + Rsq *B_3)
839 // f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
841 { .mfi
842          nop.m 999
843 (p0)     fma.s1          sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3,          sinh_FR_B2  
844          nop.i 999 ;;
847 { .mfi
848          nop.m 999
849 (p0)     fma.s1          sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1  
850          nop.i 999
853 // Calculate p_odd
854 // f34 = A_2 + Rsq *A_3
855 // f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
856 // f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
858 { .mfi
859          nop.m 999
860 (p0)     fma.s1          sinh_FR_podd_temp1 = sinh_FR_Rsq,        sinh_FR_A3,         sinh_FR_A2  
861          nop.i 999 ;;
864 { .mfi
865 (p0)     setf.exp            sinh_FR_N_temp1 = r39            
866          nop.f 999
867          nop.i 999 ;;
870 { .mfi
871          nop.m 999
872 (p0)     fma.s1          sinh_FR_peven       = sinh_FR_Rsq, sinh_FR_peven_temp2, f0     
873          nop.i 999
876 { .mfi
877          nop.m 999
878 (p0)     fma.s1          sinh_FR_podd_temp2 = sinh_FR_Rsq,        sinh_FR_podd_temp1, sinh_FR_A1  
879          nop.i 999 ;;
882 { .mfi
883 (p0)     setf.exp            f9  = r32                              
884          nop.f 999
885          nop.i 999 ;;
888 { .mfi
889          nop.m 999
890 (p0)     fma.s1          sinh_FR_podd       = sinh_FR_podd_temp2, sinh_FR_Rcub,       sinh_FR_R   
891          nop.i 999
894 // sinh_GR_mj contains the table offset for -j
895 // sinh_GR_j  contains the table offset for +j
896 // p6 is true when j <= 0
898 { .mlx
899 (p0)     setf.exp            sinh_FR_N_temp2 = r40            
900 (p0)     movl                r40 = 0x0000000000000020 ;;    
903 { .mfi
904 (p0)     sub                 sinh_GR_mJ = r40,  r36           
905 (p0)     fmerge.se           sinh_FR_spos    = sinh_FR_N_temp1, f1 
906 (p0)     adds                sinh_GR_J  = 0x20, r36 ;;           
909 { .mii
910          nop.m 999
911 (p0)     shl                  sinh_GR_mJ = sinh_GR_mJ, 5 ;;   
912 (p0)     add                  sinh_AD_mJ = r37, sinh_GR_mJ ;; 
915 { .mmi
916          nop.m 999
917 (p0)     ldfe                 sinh_FR_Tmjhi = [sinh_AD_mJ],16                 
918 (p0)     shl                  sinh_GR_J  = sinh_GR_J, 5 ;;    
921 { .mfi
922 (p0)     ldfs                 sinh_FR_Tmjlo = [sinh_AD_mJ],16                 
923 (p0)     fcmp.lt.unc.s1      p0,p7 = sinh_FR_X,f9                          
924 (p0)     add                  sinh_AD_J  = r37, sinh_GR_J ;;  
927 { .mmi
928 (p0)     ldfe                 sinh_FR_Tjhi  = [sinh_AD_J],16 ;;                  
929 (p0)     ldfs                 sinh_FR_Tjlo  = [sinh_AD_J],16                  
930          nop.i 999 ;;
933 { .mfb
934          nop.m 999
935 (p0)     fmerge.se           sinh_FR_sneg    = sinh_FR_N_temp2, f1 
936 (p7)     br.cond.spnt        L(SINH_BY_EXP) ;;                            
939 { .mfi
940          nop.m 999
941          nop.f 999
942          nop.i 999 ;;
945 // ******************************************************
946 // If NOT branch to EXP
947 // ******************************************************
948 // Calculate S_hi and S_lo
949 // sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
950 // sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
951 // sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
953 { .mfi
954          nop.m 999
955 (p0)    fma.s1         sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0   
956          nop.i 999 ;;
959 { .mfi
960          nop.m 999
961 (p0)    fms.s1         sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi,  sinh_FR_S_hi_temp              
962          nop.i 999
965 // Calculate C_hi
966 // sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
967 // sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
969 { .mfi
970          nop.m 999
971 (p0)    fma.s1         sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0                   
972          nop.i 999 ;;
975 // sinh_FR_S_lo_temp1 =  sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
976 // sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
977 // sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1              )
979 { .mfi
980          nop.m 999
981 (p0)    fms.s1         sinh_FR_S_lo_temp1 =  sinh_FR_spos, sinh_FR_Tjhi,  sinh_FR_S_hi            
982          nop.i 999
985 { .mfi
986          nop.m 999
987 (p0)    fma.s1         sinh_FR_C_hi       = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1    
988          nop.i 999 ;;
991 { .mfi
992          nop.m 999
993 (p0)    fnma.s1        sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1       
994          nop.i 999
997 // sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
998 // sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
999 // sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
1000 // sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
1002 { .mfi
1003          nop.m 999
1004 (p0)    fma.s1         sinh_FR_S_lo_temp1 =  sinh_FR_sneg, sinh_FR_Tmjlo, f0                  
1005          nop.i 999 ;;
1008 /////////// BUG FIX fma to fms -TK
1009 { .mfi
1010          nop.m 999
1011 (p0)    fms.s1         sinh_FR_S_lo_temp3 =  sinh_FR_spos, sinh_FR_Tjlo,  sinh_FR_S_lo_temp1  
1012          nop.i 999 ;;
1015 { .mfi
1016          nop.m 999
1017 (p0)    fma.s1         sinh_FR_S_lo       =  sinh_FR_S_lo_temp3, f1,   sinh_FR_S_lo_temp2     
1018          nop.i 999 ;;
1021 // Y_hi = S_hi 
1022 // Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
1023 // sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
1024 // sinh_FR_Y_lo      = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
1026 { .mfi
1027          nop.m 999
1028 (p0)    fma.s1         sinh_FR_Y_lo_temp  = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo           
1029          nop.i 999 ;;
1032 { .mfi
1033          nop.m 999
1034 (p0)    fma.s1         sinh_FR_Y_lo       =  sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp      
1035          nop.i 999 ;;
1038 // sinh_FR_SINH = Y_hi + Y_lo
1039 // f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
1041 // Dummy multiply to generate inexact
1042 { .mfi
1043          nop.m 999
1044 (p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
1045          nop.i 999
1047 { .mfi
1048          nop.m 999
1049 (p0)    fma.s1         sinh_FR_SINH       =  sinh_FR_S_hi, f1, sinh_FR_Y_lo    
1050          nop.i 999 ;;
1053 { .mfb
1054          nop.m 999
1055 (p0)    fma.s0       f8 = sinh_FR_SGNX, sinh_FR_SINH,f0                      
1056 (p0)    br.ret.sptk     b0 ;;                          
1060 L(SINH_BY_EXP): 
1062 // When p7 is true,  we know that an overflow is not going to happen
1063 // When p7 is false, we must check for possible overflow
1064 // p7 is the over_SAFE flag
1065 // Y_hi = Tjhi
1066 // Y_lo = Tjhi * (p_odd + p_even) +Tjlo
1067 // Scale = sign * 2^(N-1)
1068 // sinh_FR_Y_lo =  sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
1069 // sinh_FR_Y_lo =  sinh_FR_Tjhi * (sinh_FR_Y_lo_temp      )
1071 { .mfi
1072          nop.m 999
1073 (p0)   fma.s1            sinh_FR_Y_lo_temp =  sinh_FR_peven, f1, sinh_FR_podd                   
1074          nop.i 999
1077 // Now we are in EXP. This is the only path where an overflow is possible
1078 // but not for certain. So this is the only path where over_SAFE has any use.
1079 // r34 still has N-1
1080 // There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
1081 // There is a danger of double overflow            if N-1 > 0x3fe = 1022
1082 { .mlx
1083          nop.m 999
1084 (p0)   movl                r32          = 0x0000000000003ffe ;;                       
1087 { .mfi
1088 (p0)   cmp.gt.unc          p0,p7        = r34, r32                                 
1089 (p0)   fmerge.s          sinh_FR_SCALE     = sinh_FR_SGNX, sinh_FR_spos                         
1090          nop.i 999 ;;
1093 { .mfi
1094          nop.m 999
1095 (p0)   fma.s1            sinh_FR_Y_lo      =  sinh_FR_Tjhi,  sinh_FR_Y_lo_temp, sinh_FR_Tjlo    
1096          nop.i 999 ;;
1099 // f8 = answer = scale * (Y_hi + Y_lo)
1100 { .mfi
1101          nop.m 999
1102 (p0)   fma.s1            sinh_FR_SINH_temp = sinh_FR_Y_lo,  f1, sinh_FR_Tjhi       
1103          nop.i 999 ;;
1106 { .mfi
1107          nop.m 999
1108 (p0)   fma.s0          f44          = sinh_FR_SCALE,  sinh_FR_SINH_temp, f0      
1109          nop.i 999 ;;
1112 // Dummy multiply to generate inexact
1113 { .mfi
1114          nop.m 999
1115 (p7)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
1116          nop.i 999 ;;
1119 // If over_SAFE is set, return
1120 { .mfb
1121        nop.m 999
1122 (p7)   fmerge.s            f8 = f44,f44                                            
1123 (p7)   br.ret.sptk     b0 ;;                          
1126 // Else see if we overflowed
1127 // S0 user supplied status
1128 // S2 user supplied status + WRE + TD  (Overflows)
1129 // If WRE is set then an overflow will not occur in EXP.
1130 // The input value that would cause a register (WRE) value to overflow is about 2^15
1131 // and this input would go into the HUGE path.
1132 // Answer with WRE is in f43.
1134 { .mfi
1135          nop.m 999
1136 (p0)   fsetc.s2            0x7F,0x42                                               
1137          nop.i 999;;
1140 { .mfi
1141          nop.m 999
1142 (p0)   fma.s2            f43  = sinh_FR_SCALE,  sinh_FR_SINH_temp, f0                      
1143          nop.i 999 ;;
1146 // 13FFF => 13FFF -FFFF = 4000(true)
1147 // 4000 + 3FFF = 7FFF, which is 1 more that the exponent of the largest
1148 // long double (7FFE). So 0 13FFF 8000000000000000  is one ulp more than
1149 // largest long double in register bias
1150 // Now  set p8 if the answer with WRE is greater than or equal this value
1151 // Also set p9 if the answer with WRE is less than or equal to negative this value
1153 { .mlx
1154          nop.m 999
1155 (p0)   movl                r32     = 0x00000000013FFF ;;                              
1158 { .mmf
1159          nop.m 999
1160 (p0)   setf.exp            f41 = r32                                               
1161 (p0)   fsetc.s2            0x7F,0x40 ;;                                               
1164 { .mfi
1165          nop.m 999
1166 (p0)   fcmp.ge.unc.s1 p8, p0 =  f43, f41                                           
1167          nop.i 999
1170 { .mfi
1171          nop.m 999
1172 (p0)   fmerge.ns           f42 = f41, f41                                          
1173          nop.i 999 ;;
1176 // The error tag for overflow is 126
1177 { .mii
1178          nop.m 999
1179          nop.i 999 ;;
1180 (p8)   mov                 r47 = 126 ;;                                               
1183 { .mfb
1184          nop.m 999
1185 (p0)   fcmp.le.unc.s1      p9, p0 =  f43, f42                                      
1186 (p8)   br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
1189 { .mii
1190          nop.m 999
1191          nop.i 999 ;;
1192 (p9)   mov                 r47 = 126                                               
1195 { .mib
1196          nop.m 999
1197          nop.i 999
1198 (p9)   br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
1201 // Dummy multiply to generate inexact
1202 { .mfi
1203          nop.m 999
1204 (p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
1205          nop.i 999 ;;
1208 { .mfb
1209          nop.m 999
1210 (p0)   fmerge.s            f8 = f44,f44                                            
1211 (p0)   br.ret.sptk     b0 ;;                          
1214 L(SINH_HUGE): 
1216 // for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
1217 // SAFE: SAFE is always 0 for HUGE
1219 { .mlx
1220          nop.m 999
1221 (p0)   movl                r32 = 0x0000000000015dbf ;;                                
1224 { .mfi
1225 (p0)   setf.exp            f9  = r32                                               
1226          nop.f 999
1227          nop.i 999 ;;
1230 { .mfi
1231          nop.m 999
1232 (p0)   fma.s1              sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1                       
1233          nop.i 999 ;;
1236 { .mfi
1237          nop.m 999
1238 (p0)   fma.s0            f44 = sinh_FR_signed_hi_lo,  f9, f0                          
1239 (p0)   mov                 r47 = 126                                               
1241 .endp sinhl
1242 ASM_SIZE_DIRECTIVE(sinhl)
1243 #ifdef _LIBC
1244 ASM_SIZE_DIRECTIVE(__ieee754_sinhl)
1245 #endif
1247 // Stack operations when calling error support.
1248 //       (1)               (2)                          (3) (call)              (4)
1249 //   sp   -> +          psp -> +                     psp -> +                   sp -> +
1250 //           |                 |                            |                         |
1251 //           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
1252 //           |                 |                            |                         |
1253 //           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
1254 //           |                 |                            |                         |
1255 //           |                 | <- GR_X               X1 ->|                         |
1256 //           |                 |                            |                         |
1257 //  sp-64 -> +          sp ->  +                     sp ->  +                         +
1258 //    save ar.pfs          save b0                                               restore gp
1259 //    save gp                                                                    restore ar.pfs
1261 .proc __libm_error_region
1262 __libm_error_region:
1263 L(SINH_ERROR_SUPPORT):
1264 .prologue
1266 // (1)
1267 { .mfi
1268         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
1269         nop.f 0
1270 .save   ar.pfs,GR_SAVE_PFS
1271         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
1273 { .mfi
1274 .fframe 64
1275         add sp=-64,sp                          // Create new stack
1276         nop.f 0
1277         mov GR_SAVE_GP=gp                      // Save gp
1281 // (2)
1282 { .mmi
1283         stfe [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
1284         add GR_Parameter_X = 16,sp            // Parameter 1 address
1285 .save   b0, GR_SAVE_B0
1286         mov GR_SAVE_B0=b0                     // Save b0
1289 .body
1290 // (3)
1291 { .mib
1292         stfe [GR_Parameter_X] = f8                     // STORE Parameter 1 on stack
1293         add   GR_Parameter_RESULT = 0,GR_Parameter_Y   // Parameter 3 address
1294         nop.b 0                            
1296 { .mib
1297         stfe [GR_Parameter_Y] = f44                    // STORE Parameter 3 on stack
1298         add   GR_Parameter_Y = -16,GR_Parameter_Y
1299         br.call.sptk b0=__libm_error_support#          // Call error handling function
1301 { .mmi
1302         nop.m 0
1303         nop.m 0
1304         add   GR_Parameter_RESULT = 48,sp
1307 // (4)
1308 { .mmi
1309         ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
1310 .restore sp
1311         add   sp = 64,sp                       // Restore stack pointer
1312         mov   b0 = GR_SAVE_B0                  // Restore return address
1314 { .mib
1315         mov   gp = GR_SAVE_GP                  // Restore gp
1316         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
1317         br.ret.sptk     b0                     // Return
1320 .endp __libm_error_region
1321 ASM_SIZE_DIRECTIVE(__libm_error_region)
1323 .type   __libm_error_support#,@function
1324 .global __libm_error_support#