(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[glibc.git] / sysdeps / ia64 / fpu / e_cosh.S
blob205653d4bf2440a16d55a5aa776d8d7708e4dd41
1 .file "cosh.s"
3 // Copyright (C) 2000, 2001, Intel Corporation
4 // All rights reserved.
5 // 
6 // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
7 // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
22 // permission.
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
35 // 
36 // Intel Corporation is the author of this code, and requests that all
37 // problem reports or change requests be submitted to it directly at 
38 // http://developer.intel.com/opensource.
40 // History
41 //==============================================================
42 // 2/02/00  Initial version 
43 // 4/04/00  Unwind support added
44 // 8/15/00  Bundle added after call to __libm_error_support to properly
45 //          set [the previously overwritten] GR_Parameter_RESULT.
47 // API
48 //==============================================================
49 // double = cosh(double)
50 // input  floating point f8
51 // output floating point f8
54 // Overview of operation
55 //==============================================================
56 // There are four paths
58 // 1. |x| < 0.25        COSH_BY_POLY
59 // 2. |x| < 32          COSH_BY_TBL
60 // 3. |x| < 2^14        COSH_BY_EXP
61 // 4. |x_ >= 2^14       COSH_HUGE
63 // For paths 1, and 2 SAFE is always 1.
64 // For path  4, Safe is always 0.
65 // SAFE = 1 means we cannot overflow.
67 #include "libm_support.h"
69 // Assembly macros
70 //==============================================================
71 cosh_FR_X            = f44
72 cosh_FR_SGNX         = f40
74 cosh_FR_Inv_log2by64 = f9
75 cosh_FR_log2by64_lo  = f11
76 cosh_FR_log2by64_hi  = f10
78 cosh_FR_A1           = f9
79 cosh_FR_A2           = f10
80 cosh_FR_A3           = f11
82 cosh_FR_Rcub         = f12
83 cosh_FR_M_temp       = f13
84 cosh_FR_R_temp       = f13
85 cosh_FR_Rsq          = f13
86 cosh_FR_R            = f14
88 cosh_FR_M            = f38
90 cosh_FR_B1           = f15
91 cosh_FR_B2           = f32
92 cosh_FR_B3           = f33
94 cosh_FR_peven_temp1  = f34
95 cosh_FR_peven_temp2  = f35
96 cosh_FR_peven        = f36
98 cosh_FR_podd_temp1   = f34
99 cosh_FR_podd_temp2   = f35
100 cosh_FR_podd         = f37
102 cosh_FR_J_temp       = f9
103 cosh_FR_J            = f10
105 cosh_FR_Mmj          = f39
107 cosh_FR_N_temp1      = f11
108 cosh_FR_N_temp2      = f12
109 cosh_FR_N            = f13
111 cosh_FR_spos         = f14
112 cosh_FR_sneg         = f15
114 cosh_FR_Tjhi         = f32
115 cosh_FR_Tjlo         = f33
116 cosh_FR_Tmjhi        = f34
117 cosh_FR_Tmjlo        = f35
119 GR_mJ           = r35
120 GR_J            = r36
122 AD_mJ           = r38
123 AD_J            = r39
125 cosh_FR_C_hi         = f9
126 cosh_FR_C_hi_temp    = f10
127 cosh_FR_C_lo_temp1   = f11 
128 cosh_FR_C_lo_temp2   = f12 
129 cosh_FR_C_lo_temp3   = f13 
131 cosh_FR_C_lo         = f38
132 cosh_FR_S_hi         = f39
134 cosh_FR_S_hi_temp1   = f10
135 cosh_FR_Y_hi         = f11 
136 cosh_FR_Y_lo_temp    = f12 
137 cosh_FR_Y_lo         = f13 
138 cosh_FR_COSH         = f9
140 cosh_FR_X2           = f9
141 cosh_FR_X4           = f10
143 cosh_FR_P1           = f14
144 cosh_FR_P2           = f15
145 cosh_FR_P3           = f32
146 cosh_FR_P4           = f33
147 cosh_FR_P5           = f34
148 cosh_FR_P6           = f35
150 cosh_FR_TINY_THRESH  = f9
152 cosh_FR_COSH_temp    = f10
153 cosh_FR_SCALE        = f11 
155 cosh_FR_hi_lo = f10
157 cosh_FR_poly_podd_temp1    =  f11 
158 cosh_FR_poly_podd_temp2    =  f13
159 cosh_FR_poly_peven_temp1   =  f11
160 cosh_FR_poly_peven_temp2   =  f13
162 GR_SAVE_PFS                    = r41
163 GR_SAVE_B0                     = r42
164 GR_SAVE_GP                     = r43
166 GR_Parameter_X                 = r44
167 GR_Parameter_Y                 = r45
168 GR_Parameter_RESULT            = r46
171 // Data tables
172 //==============================================================
174 #ifdef _LIBC
175 .rodata
176 #else
177 .data
178 #endif
180 .align 16
181 double_cosh_arg_reduction:
182 ASM_TYPE_DIRECTIVE(double_cosh_arg_reduction,@object)
183    data8 0xB8AA3B295C17F0BC, 0x00004005
184    data8 0xB17217F7D1000000, 0x00003FF8
185    data8 0xCF79ABC9E3B39804, 0x00003FD0
186 ASM_SIZE_DIRECTIVE(double_cosh_arg_reduction)
188 double_cosh_p_table:
189 ASM_TYPE_DIRECTIVE(double_cosh_p_table,@object)
190    data8 0x8000000000000000, 0x00003FFE
191    data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
192    data8 0xB60B60B60B4FE884, 0x00003FF5
193    data8 0xD00D00D1021D7370, 0x00003FEF
194    data8 0x93F27740C0C2F1CC, 0x00003FE9
195    data8 0x8FA02AC65BCBD5BC, 0x00003FE2
196 ASM_SIZE_DIRECTIVE(double_cosh_p_table)
198 double_cosh_ab_table:
199 ASM_TYPE_DIRECTIVE(double_cosh_ab_table,@object)
200    data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
201    data8 0x88888888884ECDD5, 0x00003FF8
202    data8 0xD00D0C6DCC26A86B, 0x00003FF2
203    data8 0x8000000000000002, 0x00003FFE
204    data8 0xAAAAAAAAAA402C77, 0x00003FFA
205    data8 0xB60B6CC96BDB144D, 0x00003FF5
206 ASM_SIZE_DIRECTIVE(double_cosh_ab_table)
208 double_cosh_j_table:
209 ASM_TYPE_DIRECTIVE(double_cosh_j_table,@object)
210    data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
211    data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
212    data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
213    data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
214    data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
215    data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
216    data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
217    data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
218    data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
219    data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
220    data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
221    data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
222    data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
223    data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
224    data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
225    data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
226    data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
227    data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
228    data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
229    data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
230    data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
231    data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
232    data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
233    data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
234    data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
235    data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
236    data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
237    data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
238    data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
239    data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
240    data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
241    data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
242    data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
243    data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
244    data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
245    data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
246    data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
247    data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
248    data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
249    data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
250    data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
251    data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
252    data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
253    data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
254    data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
255    data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
256    data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
257    data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
258    data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
259    data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
260    data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
261    data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
262    data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
263    data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
264    data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
265    data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
266    data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
267    data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
268    data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
269    data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
270    data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
271    data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
272    data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
273    data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
274    data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
275 ASM_SIZE_DIRECTIVE(double_cosh_j_table)
277 .align 32
278 .global cosh#
280 .section .text
281 .proc  cosh#
282 .align 32
284 cosh: 
286 #ifdef _LIBC
287 .global __ieee754_cosh#
288 .proc __ieee754_cosh#
289 __ieee754_cosh:
290 #endif
292 // X NAN?
294 { .mfi
295       alloc r32 = ar.pfs,0,12,4,0                  
296 (p0)  fclass.m.unc  p6,p7 = f8, 0xc3    //@snan | @qnan 
297       nop.i 999
302 { .mfb
303       nop.m 999
304 (p6)  fma.d.s0   f8 = f8,f1,f8                  
305 (p6)  br.ret.spnt     b0 ;;                          
309 // X infinity 
310 { .mfi
311       nop.m 999
312 (p0)  fclass.m.unc  p6,p0 = f8, 0x23    //@inf 
313       nop.i 999 ;;
316 { .mfb
317       nop.m 999
318 (p6)     fmerge.s      f8 = f0,f8                  
319 (p6)  br.ret.spnt     b0 ;;                          
324 // Put 0.25 in f9; p6 true if x < 0.25
325 { .mlx
326          nop.m 999
327 (p0)     movl            r32 = 0x000000000000fffd ;;         
330 { .mfi
331 (p0)  setf.exp        f9 = r32                         
332       nop.f 999
333       nop.i 999 ;;
336 { .mfi
337       nop.m 999
338 (p0)  fmerge.s      cosh_FR_X    = f0,f8                
339       nop.i 999
342 { .mfi
343       nop.m 999
344 (p0)  fmerge.s      cosh_FR_SGNX = f8,f1                
345       nop.i 999 ;;
348 { .mfi
349       nop.m 999
350 (p0)  fcmp.lt.unc     p0,p7 = cosh_FR_X,f9                    
351       nop.i 999 ;;
354 { .mib
355       nop.m 999
356       nop.i 999
357 (p7)  br.cond.sptk    L(COSH_BY_TBL) 
362 // COSH_BY_POLY: 
363 // POLY cannot overflow so there is no need to call __libm_error_support
364 // Get the values of P_x from the table
366 { .mmi
367       nop.m 999
368 (p0)  addl           r34   = @ltoff(double_cosh_p_table), gp
369       nop.i 999
373 { .mmi
374       ld8 r34 = [r34]
375       nop.m 999
376       nop.i 999
381 // Calculate cosh_FR_X2 = ax*ax and cosh_FR_X4 = ax*ax*ax*ax
382 { .mmf
383          nop.m 999
384 (p0)     ldfe       cosh_FR_P1 = [r34],16                 
385 (p0)     fma.s1     cosh_FR_X2 = cosh_FR_X, cosh_FR_X, f0 ;;           
388 { .mmi
389 (p0)     ldfe       cosh_FR_P2 = [r34],16 ;;                 
390 (p0)     ldfe       cosh_FR_P3 = [r34],16                 
391          nop.i 999 ;;
394 { .mmi
395 (p0)     ldfe       cosh_FR_P4 = [r34],16 ;;                 
396 (p0)     ldfe       cosh_FR_P5 = [r34],16                 
397          nop.i 999 ;;
400 { .mfi
401 (p0)     ldfe       cosh_FR_P6 = [r34],16                 
402 (p0)     fma.s1     cosh_FR_X4 = cosh_FR_X2, cosh_FR_X2, f0         
403          nop.i 999 ;;
406 // Calculate cosh_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
407 { .mfi
408          nop.m 999
409 (p0)     fma.s1     cosh_FR_poly_podd_temp1 = cosh_FR_X4, cosh_FR_P5, cosh_FR_P3                
410          nop.i 999 ;;
413 { .mfi
414          nop.m 999
415 (p0)     fma.s1     cosh_FR_podd            = cosh_FR_X4, cosh_FR_poly_podd_temp1, cosh_FR_P1   
416          nop.i 999
419 // Calculate cosh_FR_peven =  p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
420 { .mfi
421          nop.m 999
422 (p0)     fma.s1     cosh_FR_poly_peven_temp1 = cosh_FR_X4, cosh_FR_P6, cosh_FR_P4               
423          nop.i 999 ;;
426 { .mfi
427          nop.m 999
428 (p0)     fma.s1     cosh_FR_poly_peven_temp2 = cosh_FR_X4, cosh_FR_poly_peven_temp1, cosh_FR_P2 
429          nop.i 999 ;;
432 { .mfi
433          nop.m 999
434 (p0)     fma.s1     cosh_FR_peven       = cosh_FR_X4, cosh_FR_poly_peven_temp2, f0         
435          nop.i 999 ;;
438 // Y_lo = x2*p_odd + p_even
439 // Calculate f8 = Y_hi + Y_lo 
440 { .mfi
441          nop.m 999
442 (p0)     fma.s1     cosh_FR_Y_lo         = cosh_FR_X2, cosh_FR_podd,  cosh_FR_peven    
443          nop.i 999 ;;
446 { .mfb
447          nop.m 999
448 (p0)     fma.d.s0   f8                   = f1, f1, cosh_FR_Y_lo                        
449 (p0)  br.ret.sptk     b0 ;;                          
453 L(COSH_BY_TBL): 
455 // Now that we are at TBL; so far all we know is that |x| >= 0.25.
456 // The first two steps are the same for TBL and EXP, but if we are HUGE
457 // Double
458 // Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
459 // Single
460 // Go to HUGE if |x| >= 2^7,  10006 (register-biased) is e =  7 (true)
461 // we want to leave now. Go to HUGE if |x| >= 2^14
462 // 1000d (register-biased) is e = 14 (true)
464 { .mlx
465       nop.m 999
466 (p0)     movl            r32 = 0x0000000000010009 ;;              
469 { .mfi
470 (p0)     setf.exp        f9 = r32                              
471       nop.f 999
472       nop.i 999 ;;
475 { .mfi
476       nop.m 999
477 (p0)     fcmp.ge.unc     p6,p7 = cosh_FR_X,f9                  
478       nop.i 999 ;;
481 { .mib
482       nop.m 999
483       nop.i 999
484 (p6)     br.cond.spnt    L(COSH_HUGE) ;;                             
487 // r32 = 1
488 // r34 = N-1 
489 // r35 = N
490 // r36 = j
491 // r37 = N+1
493 // TBL can never overflow
494 // cosh(x) = cosh(B+R)
495 //         = cosh(B) cosh(R) + sinh(B) sinh(R) 
496 // cosh(R) can be approximated by 1 + p_even
497 // sinh(R) can be approximated by p_odd
499 // ******************************************************
500 // STEP 1 (TBL and EXP)
501 // ******************************************************
502 // Get the following constants.
503 // f9  = Inv_log2by64
504 // f10 = log2by64_hi
505 // f11 = log2by64_lo
507 { .mmi
508 (p0)     adds                 r32 = 0x1,r0      
509 (p0)     addl           r34   = @ltoff(double_cosh_arg_reduction), gp
510          nop.i 999
514 // We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
515 // put them in an exponent.
516 // cosh_FR_spos = 2^(N-1) and cosh_FR_sneg = 2^(-N-1)
517 // r39 = 0xffff + (N-1)  = 0xffff +N -1
518 // r40 = 0xffff - (N +1) = 0xffff -N -1
520 { .mlx
521          ld8 r34 = [r34]
522 (p0)     movl                r38 = 0x000000000000fffe ;; 
525 { .mmi
526 (p0)     ldfe            cosh_FR_Inv_log2by64 = [r34],16 ;;            
527 (p0)     ldfe            cosh_FR_log2by64_hi  = [r34],16            
528          nop.i 999 ;;
531 { .mbb
532 (p0)     ldfe            cosh_FR_log2by64_lo  = [r34],16            
533          nop.b 999
534          nop.b 999 ;;
537 // Get the A coefficients
538 // f9  = A_1
539 // f10 = A_2
540 // f11 = A_3
542 { .mmi
543       nop.m 999
544 (p0)  addl           r34   = @ltoff(double_cosh_ab_table), gp
545       nop.i 999
549 { .mmi
550       ld8 r34 = [r34]
551       nop.m 999
552       nop.i 999
557 // Calculate M and keep it as integer and floating point.
558 // M = round-to-integer(x*Inv_log2by64)
559 // cosh_FR_M = M = truncate(ax/(log2/64))
560 // Put the significand of M in r35
561 //    and the floating point representation of M in cosh_FR_M
563 { .mfi
564       nop.m 999
565 (p0)  fma.s1          cosh_FR_M      = cosh_FR_X, cosh_FR_Inv_log2by64, f0 
566       nop.i 999
569 { .mfi
570 (p0)  ldfe            cosh_FR_A1 = [r34],16            
571       nop.f 999
572       nop.i 999 ;;
575 { .mfi
576       nop.m 999
577 (p0)  fcvt.fx.s1      cosh_FR_M_temp = cosh_FR_M                      
578       nop.i 999 ;;
581 { .mfi
582       nop.m 999
583 (p0)  fnorm.s1        cosh_FR_M      = cosh_FR_M_temp                 
584       nop.i 999 ;;
587 { .mfi
588 (p0)  getf.sig        r35       = cosh_FR_M_temp                 
589       nop.f 999
590       nop.i 999 ;;
593 // M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
594 // has a range of -32 thru 31.
595 // r35 = M
596 // r36 = j 
597 { .mii
598       nop.m 999
599       nop.i 999 ;;
600 (p0)  and            r36 = 0x3f, r35 ;;   
603 // Calculate R
604 // f13 = f44 - f12*f10 = x - M*log2by64_hi
605 // f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
607 { .mfi
608       nop.m 999
609 (p0)  fnma.s1        cosh_FR_R_temp = cosh_FR_M, cosh_FR_log2by64_hi, cosh_FR_X      
610       nop.i 999
613 { .mfi
614 (p0)  ldfe            cosh_FR_A2 = [r34],16            
615       nop.f 999
616       nop.i 999 ;;
619 { .mfi
620       nop.m 999
621 (p0)  fnma.s1        cosh_FR_R      = cosh_FR_M, cosh_FR_log2by64_lo, cosh_FR_R_temp 
622       nop.i 999
625 // Get the B coefficients
626 // f15 = B_1
627 // f32 = B_2
628 // f33 = B_3
630 { .mmi
631 (p0)     ldfe            cosh_FR_A3 = [r34],16 ;;            
632 (p0)     ldfe            cosh_FR_B1 = [r34],16            
633          nop.i 999 ;;
636 { .mmi
637 (p0)     ldfe            cosh_FR_B2 = [r34],16 ;;            
638 (p0)     ldfe            cosh_FR_B3 = [r34],16            
639          nop.i 999 ;;
642 { .mii
643          nop.m 999
644 (p0)     shl            r34 = r36,  0x2 ;;   
645 (p0)     sxt1           r37 = r34 ;;         
648 // ******************************************************
649 // STEP 2 (TBL and EXP)
650 // ******************************************************
651 // Calculate Rsquared and Rcubed in preparation for p_even and p_odd
652 // f12 = R*R*R
653 // f13 = R*R
654 // f14 = R <== from above
656 { .mfi
657       nop.m 999
658 (p0)     fma.s1          cosh_FR_Rsq  = cosh_FR_R,   cosh_FR_R, f0  
659 (p0)     shr            r36 = r37,  0x2 ;;   
662 // r34 = M-j = r35 - r36
663 // r35 = N = (M-j)/64
665 { .mii
666 (p0)     sub                  r34 = r35, r36    
667          nop.i 999 ;;
668 (p0)     shr                  r35 = r34, 0x6 ;;    
671 { .mii
672 (p0)     sub                 r40 = r38, r35           
673 (p0)     adds                 r37 = 0x1, r35    
674 (p0)     add                 r39 = r38, r35 ;;           
677 // Get the address of the J table, add the offset,
678 // addresses are sinh_AD_mJ and sinh_AD_J, get the T value
679 // f32 = T(j)_hi
680 // f33 = T(j)_lo
681 // f34 = T(-j)_hi
682 // f35 = T(-j)_lo
684 { .mmi
685 (p0)     sub                  r34 = r35, r32    
686 (p0)     addl    r37   = @ltoff(double_cosh_j_table), gp
687          nop.i 999
691 { .mfi
692       ld8 r37 = [r37]
693 (p0)  fma.s1          cosh_FR_Rcub = cosh_FR_Rsq, cosh_FR_R, f0  
694       nop.i 999
697 // ******************************************************
698 // STEP 3 Now decide if we need to branch to EXP
699 // ******************************************************
700 // Put 32 in f9; p6 true if x < 32
702 { .mlx
703          nop.m 999
704 (p0)     movl                r32 = 0x0000000000010004 ;;               
707 // Calculate p_even
708 // f34 = B_2 + Rsq *B_3
709 // f35 = B_1 + Rsq*f34      = B_1 + Rsq * (B_2 + Rsq *B_3)
710 // f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
712 { .mfi
713       nop.m 999
714 (p0)  fma.s1          cosh_FR_peven_temp1 = cosh_FR_Rsq, cosh_FR_B3,          cosh_FR_B2  
715       nop.i 999 ;;
718 { .mfi
719       nop.m 999
720 (p0)  fma.s1          cosh_FR_peven_temp2 = cosh_FR_Rsq, cosh_FR_peven_temp1, cosh_FR_B1  
721       nop.i 999
724 // Calculate p_odd
725 // f34 = A_2 + Rsq *A_3
726 // f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
727 // f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
729 { .mfi
730       nop.m 999
731 (p0)  fma.s1          cosh_FR_podd_temp1 = cosh_FR_Rsq,        cosh_FR_A3,         cosh_FR_A2  
732       nop.i 999 ;;
735 { .mfi
736 (p0)  setf.exp            cosh_FR_N_temp1 = r39            
737       nop.f 999
738       nop.i 999 ;;
741 { .mfi
742       nop.m 999
743 (p0)  fma.s1          cosh_FR_peven       = cosh_FR_Rsq, cosh_FR_peven_temp2, f0     
744       nop.i 999
747 { .mfi
748       nop.m 999
749 (p0)  fma.s1          cosh_FR_podd_temp2 = cosh_FR_Rsq,        cosh_FR_podd_temp1, cosh_FR_A1  
750       nop.i 999 ;;
753 { .mfi
754 (p0)  setf.exp            f9  = r32                              
755       nop.f 999
756       nop.i 999 ;;
759 { .mfi
760       nop.m 999
761 (p0)  fma.s1          cosh_FR_podd       = cosh_FR_podd_temp2, cosh_FR_Rcub,       cosh_FR_R   
762       nop.i 999
765 // sinh_GR_mj contains the table offset for -j
766 // sinh_GR_j  contains the table offset for +j
767 // p6 is true when j <= 0
769 { .mlx
770 (p0)     setf.exp            cosh_FR_N_temp2 = r40            
771 (p0)     movl                r40 = 0x0000000000000020 ;;    
774 { .mfi
775 (p0)     sub                 GR_mJ = r40,  r36           
776 (p0)     fmerge.se           cosh_FR_spos    = cosh_FR_N_temp1, f1 
777 (p0)     adds                GR_J  = 0x20, r36 ;;           
780 { .mii
781          nop.m 999
782 (p0)     shl                  GR_mJ = GR_mJ, 5 ;;   
783 (p0)     add                  AD_mJ = r37, GR_mJ ;; 
786 { .mmi
787          nop.m 999
788 (p0)     ldfe                 cosh_FR_Tmjhi = [AD_mJ],16                 
789 (p0)     shl                  GR_J  = GR_J, 5 ;;    
792 { .mfi
793 (p0)     ldfs                 cosh_FR_Tmjlo = [AD_mJ],16                 
794 (p0)     fcmp.lt.unc.s1      p6,p7 = cosh_FR_X,f9                          
795 (p0)     add                  AD_J  = r37, GR_J ;;  
798 { .mmi
799 (p0)     ldfe                 cosh_FR_Tjhi  = [AD_J],16 ;;                  
800 (p0)     ldfs                 cosh_FR_Tjlo  = [AD_J],16                  
801          nop.i 999 ;;
804 { .mfb
805          nop.m 999
806 (p0)     fmerge.se           cosh_FR_sneg    = cosh_FR_N_temp2, f1 
807 (p7)     br.cond.spnt        L(COSH_BY_EXP) ;;                            
810 // ******************************************************
811 // If NOT branch to EXP
812 // ******************************************************
813 // Calculate C_hi
814 // ******************************************************
815 // cosh_FR_C_hi_temp = cosh_FR_sneg * cosh_FR_Tmjhi
816 // cosh_FR_C_hi = cosh_FR_spos * cosh_FR_Tjhi + (cosh_FR_sneg * cosh_FR_Tmjhi)
818 { .mfi
819       nop.m 999
820 (p0)  fma.s1         cosh_FR_C_hi_temp = cosh_FR_sneg, cosh_FR_Tmjhi, f0                   
821       nop.i 999 ;;
824 { .mfi
825       nop.m 999
826 (p0)  fma.s1         cosh_FR_C_hi      = cosh_FR_spos, cosh_FR_Tjhi,  cosh_FR_C_hi_temp    
827       nop.i 999
830 // ******************************************************
831 // Calculate S_hi
832 // ******************************************************
833 // cosh_FR_S_hi_temp1 = cosh_FR_sneg * cosh_FR_Tmjhi
834 // cosh_FR_S_hi = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi_temp1
836 { .mfi
837       nop.m 999
838 (p0)  fma.s1        cosh_FR_S_hi_temp1 =  cosh_FR_sneg, cosh_FR_Tmjhi, f0                
839       nop.i 999 ;;
842 // ******************************************************
843 // Calculate C_lo
844 // ******************************************************
845 // cosh_FR_C_lo_temp1 = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi
846 // cosh_FR_C_lo_temp2 = cosh_FR_sneg * cosh_FR_Tmjlo + (cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi)
847 // cosh_FR_C_lo_temp1 = cosh_FR_sneg * cosh_FR_Tmjlo
848 // cosh_FR_C_lo_temp3 = cosh_FR_spos * cosh_FR_Tjlo + (cosh_FR_sneg * cosh_FR_Tmjlo)
849 // cosh_FR_C_lo = cosh_FR_C_lo_temp3 + cosh_FR_C_lo_temp2
851 { .mfi
852       nop.m 999
853 (p0)  fms.s1        cosh_FR_C_lo_temp1 = cosh_FR_spos, cosh_FR_Tjhi,  cosh_FR_C_hi        
854       nop.i 999
857 { .mfi
858       nop.m 999
859 (p0)  fms.s1        cosh_FR_S_hi       =  cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_S_hi_temp1 
860       nop.i 999 ;;
863 { .mfi
864       nop.m 999
865 (p0)  fma.s1        cosh_FR_C_lo_temp2 = cosh_FR_sneg, cosh_FR_Tmjhi, cosh_FR_C_lo_temp1  
866       nop.i 999
869 { .mfi
870       nop.m 999
871 (p0)  fma.s1        cosh_FR_C_lo_temp1 = cosh_FR_sneg, cosh_FR_Tmjlo, f0                  
872       nop.i 999 ;;
875 { .mfi
876       nop.m 999
877 (p0)  fma.s1        cosh_FR_C_lo_temp3 =  cosh_FR_spos, cosh_FR_Tjlo,  cosh_FR_C_lo_temp1 
878       nop.i 999 ;;
881 { .mfi
882       nop.m 999
883 (p0)  fma.s1        cosh_FR_C_lo       =  cosh_FR_C_lo_temp3, f1,   cosh_FR_C_lo_temp2    
884       nop.i 999 ;;
887 // ******************************************************
888 // cosh_FR_Y_lo_temp = cosh_FR_C_hi * cosh_FR_peven + cosh_FR_C_lo
889 // cosh_FR_Y_lo = cosh_FR_S_hi * cosh_FR_podd + cosh_FR_Y_lo_temp
890 // cosh_FR_COSH = Y_hi + Y_lo
892 { .mfi
893       nop.m 999
894 (p0)  fma.s1         cosh_FR_Y_lo_temp =  cosh_FR_C_hi, cosh_FR_peven, cosh_FR_C_lo       
895       nop.i 999 ;;
898 { .mfi
899       nop.m 999
900 (p0)  fma.s1         cosh_FR_Y_lo      =  cosh_FR_S_hi, cosh_FR_podd, cosh_FR_Y_lo_temp   
901       nop.i 999 ;;
904 { .mfb
905       nop.m 999
906 (p0)  fma.d.s0       f8 =  cosh_FR_C_hi, f1, cosh_FR_Y_lo                       
907 (p0)  br.ret.sptk     b0 ;;                          
910 L(COSH_BY_EXP): 
912 // When p7 is true,  we know that an overflow is not going to happen
913 // When p7 is false, we must check for possible overflow
914 // p7 is the over_SAFE flag
915 // f44 = Scale * (Y_hi + Y_lo)
916 //     =  cosh_FR_spos * (cosh_FR_Tjhi + cosh_FR_Y_lo)
918 { .mfi
919       nop.m 999
920 (p0)  fma.s1         cosh_FR_Y_lo_temp =  cosh_FR_peven, f1,       cosh_FR_podd           
921       nop.i 999
924 // Now we are in EXP. This is the only path where an overflow is possible
925 // but not for certain. So this is the only path where over_SAFE has any use.
926 // r34 still has N-1
927 // There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
928 // There is a danger of double overflow            if N-1 > 0x3fe = 1022
930 { .mlx
931        nop.m 999
932 (p0)   movl                r32          = 0x00000000000003fe ;;                       
935 { .mfi
936 (p0)  cmp.gt.unc          p0,p7        = r34, r32                                 
937       nop.f 999
938       nop.i 999 ;;
941 { .mfi
942       nop.m 999
943 (p0)  fma.s1         cosh_FR_Y_lo      =  cosh_FR_Tjhi,  cosh_FR_Y_lo_temp, cosh_FR_Tjlo       
944       nop.i 999 ;;
947 { .mfi
948       nop.m 999
949 (p0)  fma.s1         cosh_FR_COSH_temp =  cosh_FR_Y_lo,  f1, cosh_FR_Tjhi                 
950       nop.i 999 ;;
953 { .mfi
954       nop.m 999
955 (p0)  fma.d.s0       f44 = cosh_FR_spos,  cosh_FR_COSH_temp, f0                       
956       nop.i 999 ;;
959 // If over_SAFE is set, return
960 { .mfb
961        nop.m 999
962 (p7)   fmerge.s            f8 = f44,f44
963 (p7)  br.ret.sptk     b0 ;;                          
966 // Else see if we overflowed
967 // S0 user supplied status
968 // S2 user supplied status + WRE + TD  (Overflows)
969 // If WRE is set then an overflow will not occur in EXP.
970 // The input value that would cause a register (WRE) value to overflow is about 2^15
971 // and this input would go into the HUGE path.
972 // Answer with WRE is in f43.
974 { .mfi
975       nop.m 999
976 (p0)  fsetc.s2            0x7F,0x42                                               
977       nop.i 999;;
980 { .mfi
981       nop.m 999
982 (p0)  fma.d.s2            f43  = cosh_FR_spos,  cosh_FR_COSH_temp, f0                      
983       nop.i 999 ;;
986 // 103FF => 103FF -FFFF = 400(true)
987 // 400 + 3FF = 7FF, which is 1 more that the exponent of the largest
988 // double (7FE). So 0 103FF 8000000000000000  is one ulp more than
989 // largest double in register bias
990 // Now  set p8 if the answer with WRE is greater than or equal this value
991 // Also set p9 if the answer with WRE is less than or equal to negative this value
993 { .mlx
994        nop.m 999
995 (p0)   movl                r32          = 0x00000000000103ff ;;                     
998 { .mmf
999        nop.m 999
1000 (p0)   setf.exp            f41          = r32                                    
1001 (p0)   fsetc.s2            0x7F,0x40 ;;                                               
1004 { .mfi
1005       nop.m 999
1006 (p0)  fcmp.ge.unc.s1      p8, p0       = f43, f41                               
1007       nop.i 999
1010 { .mfi
1011       nop.m 999
1012 (p0)  fmerge.ns           f42 = f41, f41                                          
1013       nop.i 999 ;;
1016 // The error tag for overflow is 64
1017 { .mii
1018       nop.m 999
1019       nop.i 999 ;;
1020 (p8)  mov                 r47 = 64 ;;                                               
1023 { .mfb
1024       nop.m 999
1025 (p0)  fcmp.le.unc.s1      p9, p0 =  f43, f42                                      
1026 (p8)  br.cond.spnt __libm_error_region ;;
1029 { .mii
1030       nop.m 999
1031       nop.i 999 ;;
1032 (p9)  mov                 r47 = 64                                               
1035 { .mib
1036       nop.m 999
1037       nop.i 999
1038 (p9)  br.cond.spnt __libm_error_region ;;
1041 { .mfb
1042       nop.m 999
1043 (p0)  fmerge.s            f8 = f44,f44                                            
1044 (p0)  br.ret.sptk     b0 ;;                          
1048 // for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
1049 // SAFE: SAFE is always 0 for HUGE
1051 L(COSH_HUGE): 
1053 { .mlx
1054       nop.m 999
1055 (p0)  movl                r32 = 0x0000000000015dbf ;;                                
1058 { .mfi
1059 (p0)  setf.exp            f9  = r32                                               
1060       nop.f 999
1061       nop.i 999 ;;
1064 { .mfi
1065       nop.m 999
1066 (p0)  fma.s1              cosh_FR_hi_lo = f1, f9, f1                              
1067       nop.i 999 ;;
1070 { .mfi
1071       nop.m 999
1072 (p0)  fma.d.s0            f44 = f9, cosh_FR_hi_lo, f0                             
1073 (p0)  mov                 r47 = 64                                               
1077 .endp cosh#
1078 ASM_SIZE_DIRECTIVE(cosh#)
1080 // Stack operations when calling error support.
1081 //       (1)               (2)                          (3) (call)              (4)
1082 //   sp   -> +          psp -> +                     psp -> +                   sp -> +
1083 //           |                 |                            |                         |
1084 //           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
1085 //           |                 |                            |                         |
1086 //           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
1087 //           |                 |                            |                         |
1088 //           |                 | <- GR_X               X1 ->|                         |
1089 //           |                 |                            |                         |
1090 //  sp-64 -> +          sp ->  +                     sp ->  +                         +
1091 //    save ar.pfs          save b0                                               restore gp
1092 //    save gp                                                                    restore ar.pfs
1094 .proc __libm_error_region
1095 __libm_error_region:
1096 .prologue
1097 // (1)
1098 { .mfi
1099         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
1100         nop.f 0
1101 .save   ar.pfs,GR_SAVE_PFS
1102         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
1104 { .mfi
1105 .fframe 64
1106         add sp=-64,sp                          // Create new stack
1107         nop.f 0
1108         mov GR_SAVE_GP=gp                      // Save gp
1112 // (2)
1113 { .mmi
1114         stfd [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
1115         add GR_Parameter_X = 16,sp            // Parameter 1 address
1116 .save   b0, GR_SAVE_B0
1117         mov GR_SAVE_B0=b0                     // Save b0
1120 .body
1121 // (3)
1122 { .mib
1123         stfd [GR_Parameter_X] = f8                    // STORE Parameter 1 on stack
1124         add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
1125         nop.b 0                                 
1127 { .mib
1128         stfd [GR_Parameter_Y] = f44                   // STORE Parameter 3 on stack
1129         add   GR_Parameter_Y = -16,GR_Parameter_Y
1130         br.call.sptk b0=__libm_error_support#         // Call error handling function
1132 { .mmi
1133         nop.m 0
1134         nop.m 0
1135         add   GR_Parameter_RESULT = 48,sp
1138 // (4)
1139 { .mmi
1140         ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
1141 .restore sp
1142         add   sp = 64,sp                       // Restore stack pointer
1143         mov   b0 = GR_SAVE_B0                  // Restore return address
1145 { .mib
1146         mov   gp = GR_SAVE_GP                  // Restore gp
1147         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
1148         br.ret.sptk     b0                     // Return
1151 .endp __libm_error_region
1152 ASM_SIZE_DIRECTIVE(__libm_error_region)
1154 .type   __libm_error_support#,@function
1155 .global __libm_error_support#