Update.
[glibc.git] / sysdeps / ia64 / fpu / e_coshl.S
blob97486f6d1d27f25e397da50953cf09ad2b714811
1 .file "coshl.s"
3 // Copyright (c) 2000, 2001, Intel Corporation
4 // All rights reserved.
5 // 
6 // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
7 // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
8 // 
9 // WARRANTY DISCLAIMER
10 // 
11 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
12 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
13 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
14 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
15 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
16 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
17 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
18 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
19 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
20 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
21 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
22 // 
23 // Intel Corporation is the author of this code, and requests that all
24 // problem reports or change requests be submitted to it directly at 
25 // http://developer.intel.com/opensource.
27 // History
28 //==============================================================
29 // 2/02/00  Initial version 
30 // 4/04/00  Unwind support added
31 // 8/15/00  Bundle added after call to __libm_error_support to properly
32 //          set [the previously overwritten] GR_Parameter_RESULT.
33 // 1/23/01  Set inexact flag for large args.
35 // API
36 //==============================================================
37 // float       = cosh(float)
38 // double      = cosh(double)
39 // long double = coshl(long double)
40 // input  floating point f8
41 // output floating point f8
44 // Overview of operation
45 //==============================================================
46 // There are four paths
48 // 1. |x| < 0.25        COSH_BY_POLY
49 // 2. |x| < 32          COSH_BY_TBL
50 // 3. |x| < 2^14        COSH_BY_EXP
51 // 4. |x| >= 2^14       COSH_HUGE
53 // For paths 1, and 2 SAFE is always 1.
54 // For path  4, Safe is always 0.
55 // SAFE = 1 means we cannot overflow.
57 #include "libm_support.h"
59 // Assembly macros
60 //==============================================================
61 cosh_FR_X            = f44
62 FR_RESULT            = f44
63 cosh_FR_SGNX         = f40
64 cosh_FR_all_ones     = f45
66 FR_X                 = f8
67 FR_Y                 = f0
68 cosh_FR_Inv_log2by64 = f9
69 cosh_FR_log2by64_lo  = f11
70 cosh_FR_log2by64_hi  = f10
72 cosh_FR_A1           = f9
73 cosh_FR_A2           = f10
74 cosh_FR_A3           = f11
76 cosh_FR_Rcub         = f12
77 cosh_FR_M_temp       = f13
78 cosh_FR_R_temp       = f13
79 cosh_FR_Rsq          = f13
80 cosh_FR_R            = f14
82 cosh_FR_M            = f38
84 cosh_FR_tmp          = f15
85 cosh_FR_B1           = f15
86 cosh_FR_B2           = f32
87 cosh_FR_B3           = f33
89 cosh_FR_peven_temp1  = f34
90 cosh_FR_peven_temp2  = f35
91 cosh_FR_peven        = f36
93 cosh_FR_podd_temp1   = f34
94 cosh_FR_podd_temp2   = f35
95 cosh_FR_podd         = f37
97 cosh_FR_J_temp       = f9
98 cosh_FR_J            = f10
100 cosh_FR_Mmj          = f39
102 cosh_FR_N_temp1      = f11
103 cosh_FR_N_temp2      = f12
104 cosh_FR_N            = f13
106 cosh_FR_spos         = f14
107 cosh_FR_sneg         = f15
109 cosh_FR_Tjhi         = f32
110 cosh_FR_Tjlo         = f33
111 cosh_FR_Tmjhi        = f34
112 cosh_FR_Tmjlo        = f35
114 GR_mJ           = r35
115 GR_J            = r36
117 AD_mJ           = r38
118 AD_J            = r39
120 cosh_GR_all_ones     = r40
122 GR_SAVE_PFS           = r41
123 GR_SAVE_B0            = r42
124 GR_SAVE_GP            = r43
125 GR_Parameter_X        = r44
126 GR_Parameter_Y        = r45
127 GR_Parameter_RESULT   = r46
128 GR_Parameter_TAG      = r47 
130 cosh_FR_C_hi         = f9
131 cosh_FR_C_hi_temp    = f10
132 cosh_FR_C_lo_temp1   = f11 
133 cosh_FR_C_lo_temp2   = f12 
134 cosh_FR_C_lo_temp3   = f13 
136 cosh_FR_C_lo         = f38
137 cosh_FR_S_hi         = f39
139 cosh_FR_S_hi_temp1   = f10
140 cosh_FR_Y_hi         = f11 
141 cosh_FR_Y_lo_temp    = f12 
142 cosh_FR_Y_lo         = f13 
143 cosh_FR_COSH         = f9
145 cosh_FR_X2           = f9
146 cosh_FR_X4           = f10
148 cosh_FR_P1           = f14
149 cosh_FR_P2           = f15
150 cosh_FR_P3           = f32
151 cosh_FR_P4           = f33
152 cosh_FR_P5           = f34
153 cosh_FR_P6           = f35
155 cosh_FR_TINY_THRESH  = f9
157 cosh_FR_COSH_temp    = f10
158 cosh_FR_SCALE        = f11 
160 cosh_FR_hi_lo = f10
162 cosh_FR_poly_podd_temp1    =  f11 
163 cosh_FR_poly_podd_temp2    =  f13
164 cosh_FR_poly_peven_temp1   =  f11
165 cosh_FR_poly_peven_temp2   =  f13
167 // Data tables
168 //==============================================================
170 #ifdef _LIBC
171 .rodata
172 #else
173 .data
174 #endif
176 .align 16
177 double_cosh_arg_reduction:
178 ASM_TYPE_DIRECTIVE(double_cosh_arg_reduction,@object)
179    data8 0xB8AA3B295C17F0BC, 0x00004005
180    data8 0xB17217F7D1000000, 0x00003FF8
181    data8 0xCF79ABC9E3B39804, 0x00003FD0
182 ASM_SIZE_DIRECTIVE(double_cosh_arg_reduction)
184 double_cosh_p_table:
185 ASM_TYPE_DIRECTIVE(double_cosh_p_table,@object)
186    data8 0x8000000000000000, 0x00003FFE
187    data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
188    data8 0xB60B60B60B4FE884, 0x00003FF5
189    data8 0xD00D00D1021D7370, 0x00003FEF
190    data8 0x93F27740C0C2F1CC, 0x00003FE9
191    data8 0x8FA02AC65BCBD5BC, 0x00003FE2
192 ASM_SIZE_DIRECTIVE(double_cosh_p_table)
194 double_cosh_ab_table:
195 ASM_TYPE_DIRECTIVE(double_cosh_ab_table,@object)
196    data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
197    data8 0x88888888884ECDD5, 0x00003FF8
198    data8 0xD00D0C6DCC26A86B, 0x00003FF2
199    data8 0x8000000000000002, 0x00003FFE
200    data8 0xAAAAAAAAAA402C77, 0x00003FFA
201    data8 0xB60B6CC96BDB144D, 0x00003FF5
202 ASM_SIZE_DIRECTIVE(double_cosh_ab_table)
204 double_cosh_j_table:
205 ASM_TYPE_DIRECTIVE(double_cosh_j_table,@object)
206    data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
207    data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
208    data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
209    data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
210    data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
211    data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
212    data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
213    data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
214    data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
215    data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
216    data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
217    data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
218    data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
219    data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
220    data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
221    data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
222    data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
223    data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
224    data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
225    data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
226    data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
227    data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
228    data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
229    data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
230    data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
231    data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
232    data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
233    data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
234    data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
235    data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
236    data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
237    data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
238    data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
239    data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
240    data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
241    data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
242    data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
243    data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
244    data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
245    data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
246    data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
247    data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
248    data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
249    data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
250    data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
251    data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
252    data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
253    data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
254    data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
255    data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
256    data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
257    data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
258    data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
259    data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
260    data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
261    data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
262    data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
263    data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
264    data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
265    data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
266    data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
267    data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
268    data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
269    data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
270    data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
271 ASM_SIZE_DIRECTIVE(double_cosh_j_table)
273 .align 32
274 .global coshl#
276 .section .text
277 .proc  coshl#
278 .align 32
280 coshl:
282 #ifdef _LIBC
283 .global __ieee754_coshl#
284 .proc __ieee754_coshl#
285 __ieee754_coshl:
286 #endif
288 // X NAN?
290 { .mfi
291       alloc r32 = ar.pfs,0,12,4,0                  
292 (p0)  fclass.m.unc  p6,p7 = f8, 0xc3               
293       mov cosh_GR_all_ones = -1
296 //   This is more than we need but it is in preparation
297 //   for the values we add for error support. We push three
298 //   addresses on the stack (3*8) = 24 bytes and one tag
300 { .mfb
301       nop.m 999
302 (p6)     fma.s0   f8 = f8,f1,f8                  
303 (p6)  br.ret.spnt     b0 ;;                          
307 // Make constant that will generate inexact when squared
308 // X infinity 
309 { .mfi
310       setf.sig cosh_FR_all_ones = cosh_GR_all_ones 
311 (p0)  fclass.m.unc  p6,p0 = f8, 0x23               
312       nop.i 999 ;;
315 { .mfb
316       nop.m 999
317 (p6)     fmerge.s      f8 = f0,f8                  
318 (p6)  br.ret.spnt     b0 ;;
323 // Put 0.25 in f9; p6 true if x < 0.25
324 { .mlx
325          nop.m 999
326 (p0)     movl            r32 = 0x000000000000fffd ;;         
329 { .mfi
330 (p0)  setf.exp        f9 = r32                         
331       nop.f 999
332       nop.i 999 ;;
335 { .mfi
336       nop.m 999
337 (p0)  fmerge.s      cosh_FR_X    = f0,f8                
338       nop.i 999
341 { .mfi
342       nop.m 999
343 (p0)  fmerge.s      cosh_FR_SGNX = f8,f1                
344       nop.i 999 ;;
347 { .mfi
348       nop.m 999
349 (p0)  fcmp.lt.unc     p0,p7 = cosh_FR_X,f9                    
350       nop.i 999 ;;
353 { .mib
354       nop.m 999
355       nop.i 999
356 (p7)  br.cond.sptk    L(COSH_BY_TBL) 
361 // COSH_BY_POLY: 
362 // POLY cannot overflow so there is no need to call __libm_error_support
363 // Get the values of P_x from the table
365 { .mmi
366       nop.m 999
367 (p0)  addl           r34   = @ltoff(double_cosh_p_table), gp
368       nop.i 999
372 { .mmi
373       ld8 r34 = [r34]
374       nop.m 999
375       nop.i 999
380 // Calculate cosh_FR_X2 = ax*ax and cosh_FR_X4 = ax*ax*ax*ax
381 { .mmf
382          nop.m 999
383 (p0)     ldfe       cosh_FR_P1 = [r34],16                 
384 (p0)     fma.s1     cosh_FR_X2 = cosh_FR_X, cosh_FR_X, f0 ;;           
387 { .mmi
388 (p0)     ldfe       cosh_FR_P2 = [r34],16 ;;                 
389 (p0)     ldfe       cosh_FR_P3 = [r34],16                 
390          nop.i 999 ;;
393 { .mmi
394 (p0)     ldfe       cosh_FR_P4 = [r34],16 ;;                 
395 (p0)     ldfe       cosh_FR_P5 = [r34],16                 
396          nop.i 999 ;;
399 { .mfi
400 (p0)     ldfe       cosh_FR_P6 = [r34],16                 
401 (p0)     fma.s1     cosh_FR_X4 = cosh_FR_X2, cosh_FR_X2, f0         
402          nop.i 999 ;;
405 // Calculate cosh_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
406 { .mfi
407          nop.m 999
408 (p0)     fma.s1     cosh_FR_poly_podd_temp1 = cosh_FR_X4, cosh_FR_P5, cosh_FR_P3                
409          nop.i 999 ;;
412 { .mfi
413          nop.m 999
414 (p0)     fma.s1     cosh_FR_podd            = cosh_FR_X4, cosh_FR_poly_podd_temp1, cosh_FR_P1   
415          nop.i 999
418 // Calculate cosh_FR_peven =  p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
419 { .mfi
420          nop.m 999
421 (p0)     fma.s1     cosh_FR_poly_peven_temp1 = cosh_FR_X4, cosh_FR_P6, cosh_FR_P4               
422          nop.i 999 ;;
425 { .mfi
426          nop.m 999
427 (p0)     fma.s1     cosh_FR_poly_peven_temp2 = cosh_FR_X4, cosh_FR_poly_peven_temp1, cosh_FR_P2 
428          nop.i 999 ;;
431 { .mfi
432          nop.m 999
433 (p0)     fma.s1     cosh_FR_peven       = cosh_FR_X4, cosh_FR_poly_peven_temp2, f0         
434          nop.i 999 ;;
437 // Y_lo = x2*p_odd + p_even
438 // Calculate f8 = Y_hi + Y_lo 
439 { .mfi
440          nop.m 999
441 (p0)     fma.s1     cosh_FR_Y_lo         = cosh_FR_X2, cosh_FR_podd,  cosh_FR_peven    
442          nop.i 999 ;;
445 { .mfb
446          nop.m 999
447 (p0)     fma.s0   f8                   = f1, f1, cosh_FR_Y_lo                        
448 (p0)     br.ret.sptk     b0 ;;
452 L(COSH_BY_TBL): 
454 // Now that we are at TBL; so far all we know is that |x| >= 0.25.
455 // The first two steps are the same for TBL and EXP, but if we are HUGE
456 // Double Extended
457 // Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
458 // Double
459 // Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
460 // Single
461 // Go to HUGE if |x| >= 2^7,  10006 (register-biased) is e =  7 (true)
462 // we want to leave now. Go to HUGE if |x| >= 2^14
463 // 1000d (register-biased) is e = 14 (true)
465 { .mlx
466       nop.m 999
467 (p0)     movl            r32 = 0x000000000001000d ;;              
470 { .mfi
471 (p0)     setf.exp        f9 = r32                              
472       nop.f 999
473       nop.i 999 ;;
476 { .mfi
477       nop.m 999
478 (p0)     fcmp.ge.unc     p6,p7 = cosh_FR_X,f9                  
479       nop.i 999 ;;
482 { .mib
483       nop.m 999
484       nop.i 999
485 (p6)     br.cond.spnt    L(COSH_HUGE) ;;                             
488 // r32 = 1
489 // r34 = N-1 
490 // r35 = N
491 // r36 = j
492 // r37 = N+1
494 // TBL can never overflow
495 // cosh(x) = cosh(B+R)
496 //         = cosh(B) cosh(R) + sinh(B) sinh(R) 
497 // cosh(R) can be approximated by 1 + p_even
498 // sinh(R) can be approximated by p_odd
500 // ******************************************************
501 // STEP 1 (TBL and EXP)
502 // ******************************************************
503 // Get the following constants.
504 // f9  = Inv_log2by64
505 // f10 = log2by64_hi
506 // f11 = log2by64_lo
508 { .mmi
509 (p0)     adds                 r32 = 0x1,r0      
510 (p0)     addl           r34   = @ltoff(double_cosh_arg_reduction), gp
511          nop.i 999
515 // We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
516 // put them in an exponent.
517 // cosh_FR_spos = 2^(N-1) and cosh_FR_sneg = 2^(-N-1)
518 // r39 = 0xffff + (N-1)  = 0xffff +N -1
519 // r40 = 0xffff - (N +1) = 0xffff -N -1
521 { .mlx
522          ld8 r34 = [r34]
523 (p0)     movl                r38 = 0x000000000000fffe ;; 
526 { .mmi
527 (p0)     ldfe            cosh_FR_Inv_log2by64 = [r34],16 ;;            
528 (p0)     ldfe            cosh_FR_log2by64_hi  = [r34],16            
529          nop.i 999 ;;
532 { .mbb
533 (p0)     ldfe            cosh_FR_log2by64_lo  = [r34],16            
534          nop.b 999
535          nop.b 999 ;;
538 // Get the A coefficients
539 // f9  = A_1
540 // f10 = A_2
541 // f11 = A_3
543 { .mmi
544       nop.m 999
545 (p0)  addl           r34   = @ltoff(double_cosh_ab_table), gp
546       nop.i 999
550 { .mmi
551       ld8 r34 = [r34]
552       nop.m 999
553       nop.i 999
558 // Calculate M and keep it as integer and floating point.
559 // M = round-to-integer(x*Inv_log2by64)
560 // cosh_FR_M = M = truncate(ax/(log2/64))
561 // Put the significand of M in r35
562 //    and the floating point representation of M in cosh_FR_M
564 { .mfi
565       nop.m 999
566 (p0)  fma.s1          cosh_FR_M      = cosh_FR_X, cosh_FR_Inv_log2by64, f0 
567       nop.i 999
570 { .mfi
571 (p0)  ldfe            cosh_FR_A1 = [r34],16            
572       nop.f 999
573       nop.i 999 ;;
576 { .mfi
577       nop.m 999
578 (p0)  fcvt.fx.s1      cosh_FR_M_temp = cosh_FR_M                      
579       nop.i 999 ;;
582 { .mfi
583       nop.m 999
584 (p0)  fnorm.s1        cosh_FR_M      = cosh_FR_M_temp                 
585       nop.i 999 ;;
588 { .mfi
589 (p0)  getf.sig        r35       = cosh_FR_M_temp                 
590       nop.f 999
591       nop.i 999 ;;
594 // M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
595 // has a range of -32 thru 31.
596 // r35 = M
597 // r36 = j 
598 { .mii
599       nop.m 999
600       nop.i 999 ;;
601 (p0)  and            r36 = 0x3f, r35 ;;   
604 // Calculate R
605 // f13 = f44 - f12*f10 = x - M*log2by64_hi
606 // f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
608 { .mfi
609       nop.m 999
610 (p0)  fnma.s1        cosh_FR_R_temp = cosh_FR_M, cosh_FR_log2by64_hi, cosh_FR_X      
611       nop.i 999
614 { .mfi
615 (p0)  ldfe            cosh_FR_A2 = [r34],16            
616       nop.f 999
617       nop.i 999 ;;
620 { .mfi
621       nop.m 999
622 (p0)  fnma.s1        cosh_FR_R      = cosh_FR_M, cosh_FR_log2by64_lo, cosh_FR_R_temp 
623       nop.i 999
626 // Get the B coefficients
627 // f15 = B_1
628 // f32 = B_2
629 // f33 = B_3
631 { .mmi
632 (p0)     ldfe            cosh_FR_A3 = [r34],16 ;;            
633 (p0)     ldfe            cosh_FR_B1 = [r34],16            
634          nop.i 999 ;;
637 { .mmi
638 (p0)     ldfe            cosh_FR_B2 = [r34],16 ;;            
639 (p0)     ldfe            cosh_FR_B3 = [r34],16            
640          nop.i 999 ;;
643 { .mii
644          nop.m 999
645 (p0)     shl            r34 = r36,  0x2 ;;   
646 (p0)     sxt1           r37 = r34 ;;         
649 // ******************************************************
650 // STEP 2 (TBL and EXP)
651 // ******************************************************
652 // Calculate Rsquared and Rcubed in preparation for p_even and p_odd
653 // f12 = R*R*R
654 // f13 = R*R
655 // f14 = R <== from above
657 { .mfi
658       nop.m 999
659 (p0)     fma.s1          cosh_FR_Rsq  = cosh_FR_R,   cosh_FR_R, f0  
660 (p0)     shr            r36 = r37,  0x2 ;;   
663 // r34 = M-j = r35 - r36
664 // r35 = N = (M-j)/64
666 { .mii
667 (p0)     sub                  r34 = r35, r36    
668          nop.i 999 ;;
669 (p0)     shr                  r35 = r34, 0x6 ;;    
672 { .mii
673 (p0)     sub                 r40 = r38, r35           
674 (p0)     adds                 r37 = 0x1, r35    
675 (p0)     add                 r39 = r38, r35 ;;           
678 // Get the address of the J table, add the offset,
679 // addresses are sinh_AD_mJ and sinh_AD_J, get the T value
680 // f32 = T(j)_hi
681 // f33 = T(j)_lo
682 // f34 = T(-j)_hi
683 // f35 = T(-j)_lo
685 { .mmi
686 (p0)     sub                  r34 = r35, r32    
687 (p0)     addl    r37   = @ltoff(double_cosh_j_table), gp
688          nop.i 999
692 { .mfi
693       ld8 r37 = [r37]
694 (p0)  fma.s1          cosh_FR_Rcub = cosh_FR_Rsq, cosh_FR_R, f0  
695       nop.i 999
698 // ******************************************************
699 // STEP 3 Now decide if we need to branch to EXP
700 // ******************************************************
701 // Put 32 in f9; p6 true if x < 32
703 { .mlx
704          nop.m 999
705 (p0)     movl                r32 = 0x0000000000010004 ;;               
708 // Calculate p_even
709 // f34 = B_2 + Rsq *B_3
710 // f35 = B_1 + Rsq*f34      = B_1 + Rsq * (B_2 + Rsq *B_3)
711 // f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
713 { .mfi
714       nop.m 999
715 (p0)  fma.s1          cosh_FR_peven_temp1 = cosh_FR_Rsq, cosh_FR_B3,          cosh_FR_B2  
716       nop.i 999 ;;
719 { .mfi
720       nop.m 999
721 (p0)  fma.s1          cosh_FR_peven_temp2 = cosh_FR_Rsq, cosh_FR_peven_temp1, cosh_FR_B1  
722       nop.i 999
725 // Calculate p_odd
726 // f34 = A_2 + Rsq *A_3
727 // f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
728 // f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
730 { .mfi
731       nop.m 999
732 (p0)  fma.s1          cosh_FR_podd_temp1 = cosh_FR_Rsq,        cosh_FR_A3,         cosh_FR_A2  
733       nop.i 999 ;;
736 { .mfi
737 (p0)  setf.exp            cosh_FR_N_temp1 = r39            
738       nop.f 999
739       nop.i 999 ;;
742 { .mfi
743       nop.m 999
744 (p0)  fma.s1          cosh_FR_peven       = cosh_FR_Rsq, cosh_FR_peven_temp2, f0     
745       nop.i 999
748 { .mfi
749       nop.m 999
750 (p0)  fma.s1          cosh_FR_podd_temp2 = cosh_FR_Rsq,        cosh_FR_podd_temp1, cosh_FR_A1  
751       nop.i 999 ;;
754 { .mfi
755 (p0)  setf.exp            f9  = r32                              
756       nop.f 999
757       nop.i 999 ;;
760 { .mfi
761       nop.m 999
762 (p0)  fma.s1          cosh_FR_podd       = cosh_FR_podd_temp2, cosh_FR_Rcub,       cosh_FR_R   
763       nop.i 999
766 // sinh_GR_mj contains the table offset for -j
767 // sinh_GR_j  contains the table offset for +j
768 // p6 is true when j <= 0
770 { .mlx
771 (p0)     setf.exp            cosh_FR_N_temp2 = r40            
772 (p0)     movl                r40 = 0x0000000000000020 ;;    
775 { .mfi
776 (p0)     sub                 GR_mJ = r40,  r36           
777 (p0)     fmerge.se           cosh_FR_spos    = cosh_FR_N_temp1, f1 
778 (p0)     adds                GR_J  = 0x20, r36 ;;           
781 { .mii
782          nop.m 999
783 (p0)     shl                  GR_mJ = GR_mJ, 5 ;;   
784 (p0)     add                  AD_mJ = r37, GR_mJ ;; 
787 { .mmi
788          nop.m 999
789 (p0)     ldfe                 cosh_FR_Tmjhi = [AD_mJ],16                 
790 (p0)     shl                  GR_J  = GR_J, 5 ;;    
793 { .mfi
794 (p0)     ldfs                 cosh_FR_Tmjlo = [AD_mJ],16                 
795 (p0)     fcmp.lt.unc.s1      p6,p7 = cosh_FR_X,f9                          
796 (p0)     add                  AD_J  = r37, GR_J ;;  
799 { .mmi
800 (p0)     ldfe                 cosh_FR_Tjhi  = [AD_J],16 ;;                  
801 (p0)     ldfs                 cosh_FR_Tjlo  = [AD_J],16                  
802          nop.i 999 ;;
805 { .mfb
806          nop.m 999
807 (p0)     fmerge.se           cosh_FR_sneg    = cosh_FR_N_temp2, f1 
808 (p7)     br.cond.spnt        L(COSH_BY_EXP) ;;                            
811 // ******************************************************
812 // If NOT branch to EXP
813 // ******************************************************
814 // Calculate C_hi
815 // ******************************************************
816 // cosh_FR_C_hi_temp = cosh_FR_sneg * cosh_FR_Tmjhi
817 // cosh_FR_C_hi = cosh_FR_spos * cosh_FR_Tjhi + (cosh_FR_sneg * cosh_FR_Tmjhi)
819 { .mfi
820       nop.m 999
821 (p0)  fma.s1         cosh_FR_C_hi_temp = cosh_FR_sneg, cosh_FR_Tmjhi, f0                   
822       nop.i 999 ;;
825 { .mfi
826       nop.m 999
827 (p0)  fma.s1         cosh_FR_C_hi      = cosh_FR_spos, cosh_FR_Tjhi,  cosh_FR_C_hi_temp    
828       nop.i 999
831 // ******************************************************
832 // Calculate S_hi
833 // ******************************************************
834 // cosh_FR_S_hi_temp1 = cosh_FR_sneg * cosh_FR_Tmjhi
835 // cosh_FR_S_hi = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi_temp1
837 { .mfi
838       nop.m 999
839 (p0)  fma.s1        cosh_FR_S_hi_temp1 =  cosh_FR_sneg, cosh_FR_Tmjhi, f0                
840       nop.i 999 ;;
843 // ******************************************************
844 // Calculate C_lo
845 // ******************************************************
846 // cosh_FR_C_lo_temp1 = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi
847 // cosh_FR_C_lo_temp2 = cosh_FR_sneg * cosh_FR_Tmjlo + (cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi)
848 // cosh_FR_C_lo_temp1 = cosh_FR_sneg * cosh_FR_Tmjlo
849 // cosh_FR_C_lo_temp3 = cosh_FR_spos * cosh_FR_Tjlo + (cosh_FR_sneg * cosh_FR_Tmjlo)
850 // cosh_FR_C_lo = cosh_FR_C_lo_temp3 + cosh_FR_C_lo_temp2
852 { .mfi
853       nop.m 999
854 (p0)  fms.s1        cosh_FR_C_lo_temp1 = cosh_FR_spos, cosh_FR_Tjhi,  cosh_FR_C_hi        
855       nop.i 999
858 { .mfi
859       nop.m 999
860 (p0)  fms.s1        cosh_FR_S_hi       =  cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_S_hi_temp1 
861       nop.i 999 ;;
864 { .mfi
865       nop.m 999
866 (p0)  fma.s1        cosh_FR_C_lo_temp2 = cosh_FR_sneg, cosh_FR_Tmjhi, cosh_FR_C_lo_temp1  
867       nop.i 999
870 { .mfi
871       nop.m 999
872 (p0)  fma.s1        cosh_FR_C_lo_temp1 = cosh_FR_sneg, cosh_FR_Tmjlo, f0                  
873       nop.i 999 ;;
876 { .mfi
877       nop.m 999
878 (p0)  fma.s1        cosh_FR_C_lo_temp3 =  cosh_FR_spos, cosh_FR_Tjlo,  cosh_FR_C_lo_temp1 
879       nop.i 999 ;;
882 { .mfi
883       nop.m 999
884 (p0)  fma.s1        cosh_FR_C_lo       =  cosh_FR_C_lo_temp3, f1,   cosh_FR_C_lo_temp2    
885       nop.i 999 ;;
888 // ******************************************************
889 // cosh_FR_Y_lo_temp = cosh_FR_C_hi * cosh_FR_peven + cosh_FR_C_lo
890 // cosh_FR_Y_lo = cosh_FR_S_hi * cosh_FR_podd + cosh_FR_Y_lo_temp
891 // cosh_FR_COSH = Y_hi + Y_lo
893 { .mfi
894       nop.m 999
895 (p0)  fma.s1         cosh_FR_Y_lo_temp =  cosh_FR_C_hi, cosh_FR_peven, cosh_FR_C_lo       
896       nop.i 999 ;;
899 { .mfi
900       nop.m 999
901 (p0)  fma.s1         cosh_FR_Y_lo      =  cosh_FR_S_hi, cosh_FR_podd, cosh_FR_Y_lo_temp   
902       nop.i 999 ;;
905 { .mfb
906       nop.m 999
907 (p0)  fma.s0       f8 =  cosh_FR_C_hi, f1, cosh_FR_Y_lo                       
908 (p0)  br.ret.sptk     b0 ;;
911 L(COSH_BY_EXP): 
913 // When p7 is true,  we know that an overflow is not going to happen
914 // When p7 is false, we must check for possible overflow
915 // p7 is the over_SAFE flag
916 // f44 = Scale * (Y_hi + Y_lo)
917 //     =  cosh_FR_spos * (cosh_FR_Tjhi + cosh_FR_Y_lo)
919 { .mfi
920       nop.m 999
921 (p0)  fma.s1         cosh_FR_Y_lo_temp =  cosh_FR_peven, f1,       cosh_FR_podd           
922       nop.i 999
925 // Now we are in EXP. This is the only path where an overflow is possible
926 // but not for certain. So this is the only path where over_SAFE has any use.
927 // r34 still has N-1
928 // There is a danger of double-extended overflow   if N-1 > 0x3ffe = 16382
929 // There is a danger of double overflow            if N-1 > 0x3fe  = 1022
930 // There is a danger of single overflow            if N-1 > 0x7e   = 126
932 { .mlx
933        nop.m 999
934 (p0)   movl                r32          = 0x0000000000003ffe ;;                       
937 { .mfi
938 (p0)  cmp.gt.unc          p0,p7        = r34, r32                                 
939       nop.f 999
940       nop.i 999 ;;
943 { .mfi
944       nop.m 999
945 (p0)  fma.s1         cosh_FR_Y_lo      =  cosh_FR_Tjhi,  cosh_FR_Y_lo_temp, cosh_FR_Tjlo       
946       nop.i 999 ;;
949 { .mfi
950       nop.m 999
951 (p0)  fma.s1         cosh_FR_COSH_temp =  cosh_FR_Y_lo,  f1, cosh_FR_Tjhi                 
952       nop.i 999 ;;
955 { .mfi
956       nop.m 999
957 (p0)  fma.s0       f44 = cosh_FR_spos,  cosh_FR_COSH_temp, f0                       
958       nop.i 999 ;;
961 // Dummy multiply to generate inexact
962 { .mfi
963          nop.m 999
964 (p7)     fmpy.s0      cosh_FR_tmp = cosh_FR_all_ones, cosh_FR_all_ones
965          nop.i 999 ;;
968 // If over_SAFE is set, return
969 { .mfb
970        nop.m 999
971 (p7)   fmerge.s            f8 = f44,f44
972 (p7)  br.ret.sptk     b0 ;;
975 // Else see if we overflowed
976 // S0 user supplied status
977 // S2 user supplied status + WRE + TD  (Overflows)
978 // If WRE is set then an overflow will not occur in EXP.
979 // The input value that would cause a register (WRE) value to overflow is about 2^15
980 // and this input would go into the HUGE path.
981 // Answer with WRE is in f43.
983 { .mfi
984       nop.m 999
985 (p0)  fsetc.s2            0x7F,0x42                                               
986       nop.i 999;;
989 { .mfi
990       nop.m 999
991 (p0)  fma.s2            f43  = cosh_FR_spos,  cosh_FR_COSH_temp, f0                      
992       nop.i 999 ;;
995 // 103FF => 103FF -FFFF = 400(true)
996 // 400 + 3FF = 7FF, which is 1 more than the exponent of the largest
997 // double (7FE). So 0 103FF 8000000000000000  is one ulp more than
998 // largest double in register bias
1000 // 13FFF => 13FFF -FFFF = 4000(true)
1002 // Now  set p8 if the answer with WRE is greater than or equal this value
1003 // Also set p9 if the answer with WRE is less than or equal to negative this value
1005 { .mlx
1006        nop.m 999
1007 (p0)   movl                r32          = 0x0000000000013fff ;;                     
1010 { .mmf
1011        nop.m 999
1012 (p0)   setf.exp            f41          = r32                                    
1013 (p0)   fsetc.s2            0x7F,0x40 ;;                                               
1016 { .mfi
1017       nop.m 999
1018 (p0)  fcmp.ge.unc.s1      p8, p0       = f43, f41                               
1019       nop.i 999
1022 { .mfi
1023       nop.m 999
1024 (p0)  fmerge.ns           f42 = f41, f41                                          
1025       nop.i 999 ;;
1028 // The error tag for overflow is 63
1029 { .mii
1030       nop.m 999
1031       nop.i 999 ;;
1032 (p8)  mov                 GR_Parameter_TAG = 63 ;;                                               
1035 { .mfb
1036       nop.m 999
1037 (p0)  fcmp.le.unc.s1      p9, p0 =  f43, f42                                      
1038 (p8)  br.cond.spnt __libm_error_region ;;
1041 { .mii
1042       nop.m 999
1043       nop.i 999 ;;
1044 (p9)  mov                 GR_Parameter_TAG = 63                                               
1047 { .mib
1048       nop.m 999
1049       nop.i 999
1050 (p9)  br.cond.spnt __libm_error_region ;;
1053 // Dummy multiply to generate inexact
1054 { .mfi
1055          nop.m 999
1056 (p0)     fmpy.s0      cosh_FR_tmp = cosh_FR_all_ones, cosh_FR_all_ones
1057          nop.i 999 ;;
1060 { .mfb
1061       nop.m 999
1062 (p0)  fmerge.s            f8 = f44,f44                                            
1063 (p0)  br.ret.sptk     b0 ;;
1067 // for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
1068 // SAFE: SAFE is always 0 for HUGE
1070 L(COSH_HUGE): 
1072 { .mlx
1073       nop.m 999
1074 (p0)  movl                r32 = 0x0000000000015dbf ;;                                
1077 { .mfi
1078 (p0)  setf.exp            f9  = r32                                               
1079       nop.f 999
1080       nop.i 999 ;;
1083 { .mfi
1084       nop.m 999
1085 (p0)  fma.s1              cosh_FR_hi_lo = f1, f9, f1                              
1086       nop.i 999 ;;
1089 { .mfi
1090       nop.m 999
1091 (p0)  fma.s0            f44 = f9, cosh_FR_hi_lo, f0                             
1092 (p0)  mov                 GR_Parameter_TAG = 63                                               
1094 .endp coshl
1095 ASM_SIZE_DIRECTIVE(coshl)
1097 .proc __libm_error_region
1098 __libm_error_region:
1099 .prologue
1100 { .mfi
1101         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
1102         nop.f 0
1103 .save   ar.pfs,GR_SAVE_PFS
1104         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
1106 { .mfi
1107 .fframe 64
1108         add sp=-64,sp                           // Create new stack
1109         nop.f 0
1110         mov GR_SAVE_GP=gp                       // Save gp
1112 { .mmi
1113         stfe [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
1114         add GR_Parameter_X = 16,sp              // Parameter 1 address
1115 .save   b0, GR_SAVE_B0
1116         mov GR_SAVE_B0=b0                       // Save b0
1118 .body
1119 { .mib
1120         stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
1121         add   GR_Parameter_RESULT = 0,GR_Parameter_Y
1122         nop.b 0                                 // Parameter 3 address
1124 { .mib
1125         stfe [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
1126         add   GR_Parameter_Y = -16,GR_Parameter_Y
1127         br.call.sptk b0=__libm_error_support#  // Call error handling function
1129 { .mmi
1130         nop.m 0
1131         nop.m 0
1132         add   GR_Parameter_RESULT = 48,sp
1134 { .mmi
1135         ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
1136 .restore sp
1137         add   sp = 64,sp                       // Restore stack pointer
1138         mov   b0 = GR_SAVE_B0                  // Restore return address
1140 { .mib
1141         mov   gp = GR_SAVE_GP                  // Restore gp
1142         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
1143         br.ret.sptk     b0                     // Return
1146 .endp __libm_error_region
1147 ASM_SIZE_DIRECTIVE(__libm_error_region)
1149 .type   __libm_error_support#,@function
1150 .global __libm_error_support#