(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[glibc.git] / sysdeps / ia64 / fpu / e_coshl.S
blobdaac20d9a3bf59cac821c583637fe58b68ca50ed
1 .file "coshl.s"
3 // Copyright (C) 2000, 2001, Intel Corporation
4 // All rights reserved.
5 // 
6 // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
7 // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
13 // * Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
16 // * Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
20 // * The name of Intel Corporation may not be used to endorse or promote
21 // products derived from this software without specific prior written
22 // permission.
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
32 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
35 // 
36 // Intel Corporation is the author of this code, and requests that all
37 // problem reports or change requests be submitted to it directly at 
38 // http://developer.intel.com/opensource.
40 // History
41 //==============================================================
42 // 2/02/00  Initial version 
43 // 4/04/00  Unwind support added
44 // 8/15/00  Bundle added after call to __libm_error_support to properly
45 //          set [the previously overwritten] GR_Parameter_RESULT.
46 // 1/23/01  Set inexact flag for large args.
48 // API
49 //==============================================================
50 // float       = cosh(float)
51 // double      = cosh(double)
52 // long double = coshl(long double)
53 // input  floating point f8
54 // output floating point f8
57 // Overview of operation
58 //==============================================================
59 // There are four paths
61 // 1. |x| < 0.25        COSH_BY_POLY
62 // 2. |x| < 32          COSH_BY_TBL
63 // 3. |x| < 2^14        COSH_BY_EXP
64 // 4. |x| >= 2^14       COSH_HUGE
66 // For paths 1, and 2 SAFE is always 1.
67 // For path  4, Safe is always 0.
68 // SAFE = 1 means we cannot overflow.
70 #include "libm_support.h"
72 // Assembly macros
73 //==============================================================
74 cosh_FR_X            = f44
75 FR_RESULT            = f44
76 cosh_FR_SGNX         = f40
77 cosh_FR_all_ones     = f45
79 FR_X                 = f8
80 FR_Y                 = f0
81 cosh_FR_Inv_log2by64 = f9
82 cosh_FR_log2by64_lo  = f11
83 cosh_FR_log2by64_hi  = f10
85 cosh_FR_A1           = f9
86 cosh_FR_A2           = f10
87 cosh_FR_A3           = f11
89 cosh_FR_Rcub         = f12
90 cosh_FR_M_temp       = f13
91 cosh_FR_R_temp       = f13
92 cosh_FR_Rsq          = f13
93 cosh_FR_R            = f14
95 cosh_FR_M            = f38
97 cosh_FR_tmp          = f15
98 cosh_FR_B1           = f15
99 cosh_FR_B2           = f32
100 cosh_FR_B3           = f33
102 cosh_FR_peven_temp1  = f34
103 cosh_FR_peven_temp2  = f35
104 cosh_FR_peven        = f36
106 cosh_FR_podd_temp1   = f34
107 cosh_FR_podd_temp2   = f35
108 cosh_FR_podd         = f37
110 cosh_FR_J_temp       = f9
111 cosh_FR_J            = f10
113 cosh_FR_Mmj          = f39
115 cosh_FR_N_temp1      = f11
116 cosh_FR_N_temp2      = f12
117 cosh_FR_N            = f13
119 cosh_FR_spos         = f14
120 cosh_FR_sneg         = f15
122 cosh_FR_Tjhi         = f32
123 cosh_FR_Tjlo         = f33
124 cosh_FR_Tmjhi        = f34
125 cosh_FR_Tmjlo        = f35
127 GR_mJ           = r35
128 GR_J            = r36
130 AD_mJ           = r38
131 AD_J            = r39
133 cosh_GR_all_ones     = r40
135 GR_SAVE_PFS           = r41
136 GR_SAVE_B0            = r42
137 GR_SAVE_GP            = r43
138 GR_Parameter_X        = r44
139 GR_Parameter_Y        = r45
140 GR_Parameter_RESULT   = r46
141 GR_Parameter_TAG      = r47 
143 cosh_FR_C_hi         = f9
144 cosh_FR_C_hi_temp    = f10
145 cosh_FR_C_lo_temp1   = f11 
146 cosh_FR_C_lo_temp2   = f12 
147 cosh_FR_C_lo_temp3   = f13 
149 cosh_FR_C_lo         = f38
150 cosh_FR_S_hi         = f39
152 cosh_FR_S_hi_temp1   = f10
153 cosh_FR_Y_hi         = f11 
154 cosh_FR_Y_lo_temp    = f12 
155 cosh_FR_Y_lo         = f13 
156 cosh_FR_COSH         = f9
158 cosh_FR_X2           = f9
159 cosh_FR_X4           = f10
161 cosh_FR_P1           = f14
162 cosh_FR_P2           = f15
163 cosh_FR_P3           = f32
164 cosh_FR_P4           = f33
165 cosh_FR_P5           = f34
166 cosh_FR_P6           = f35
168 cosh_FR_TINY_THRESH  = f9
170 cosh_FR_COSH_temp    = f10
171 cosh_FR_SCALE        = f11 
173 cosh_FR_hi_lo = f10
175 cosh_FR_poly_podd_temp1    =  f11 
176 cosh_FR_poly_podd_temp2    =  f13
177 cosh_FR_poly_peven_temp1   =  f11
178 cosh_FR_poly_peven_temp2   =  f13
180 // Data tables
181 //==============================================================
183 #ifdef _LIBC
184 .rodata
185 #else
186 .data
187 #endif
189 .align 16
190 double_cosh_arg_reduction:
191 ASM_TYPE_DIRECTIVE(double_cosh_arg_reduction,@object)
192    data8 0xB8AA3B295C17F0BC, 0x00004005
193    data8 0xB17217F7D1000000, 0x00003FF8
194    data8 0xCF79ABC9E3B39804, 0x00003FD0
195 ASM_SIZE_DIRECTIVE(double_cosh_arg_reduction)
197 double_cosh_p_table:
198 ASM_TYPE_DIRECTIVE(double_cosh_p_table,@object)
199    data8 0x8000000000000000, 0x00003FFE
200    data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
201    data8 0xB60B60B60B4FE884, 0x00003FF5
202    data8 0xD00D00D1021D7370, 0x00003FEF
203    data8 0x93F27740C0C2F1CC, 0x00003FE9
204    data8 0x8FA02AC65BCBD5BC, 0x00003FE2
205 ASM_SIZE_DIRECTIVE(double_cosh_p_table)
207 double_cosh_ab_table:
208 ASM_TYPE_DIRECTIVE(double_cosh_ab_table,@object)
209    data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
210    data8 0x88888888884ECDD5, 0x00003FF8
211    data8 0xD00D0C6DCC26A86B, 0x00003FF2
212    data8 0x8000000000000002, 0x00003FFE
213    data8 0xAAAAAAAAAA402C77, 0x00003FFA
214    data8 0xB60B6CC96BDB144D, 0x00003FF5
215 ASM_SIZE_DIRECTIVE(double_cosh_ab_table)
217 double_cosh_j_table:
218 ASM_TYPE_DIRECTIVE(double_cosh_j_table,@object)
219    data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
220    data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
221    data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
222    data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
223    data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
224    data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
225    data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
226    data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
227    data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
228    data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
229    data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
230    data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
231    data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
232    data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
233    data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
234    data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
235    data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
236    data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
237    data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
238    data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
239    data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
240    data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
241    data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
242    data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
243    data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
244    data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
245    data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
246    data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
247    data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
248    data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
249    data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
250    data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
251    data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
252    data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
253    data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
254    data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
255    data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
256    data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
257    data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
258    data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
259    data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
260    data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
261    data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
262    data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
263    data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
264    data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
265    data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
266    data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
267    data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
268    data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
269    data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
270    data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
271    data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
272    data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
273    data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
274    data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
275    data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
276    data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
277    data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
278    data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
279    data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
280    data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
281    data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
282    data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
283    data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
284 ASM_SIZE_DIRECTIVE(double_cosh_j_table)
286 .align 32
287 .global coshl#
289 .section .text
290 .proc  coshl#
291 .align 32
293 coshl:
295 #ifdef _LIBC
296 .global __ieee754_coshl#
297 .proc __ieee754_coshl#
298 __ieee754_coshl:
299 #endif
301 // X NAN?
303 { .mfi
304       alloc r32 = ar.pfs,0,12,4,0                  
305 (p0)  fclass.m.unc  p6,p7 = f8, 0xc3               
306       mov cosh_GR_all_ones = -1
309 //   This is more than we need but it is in preparation
310 //   for the values we add for error support. We push three
311 //   addresses on the stack (3*8) = 24 bytes and one tag
313 { .mfb
314       nop.m 999
315 (p6)     fma.s0   f8 = f8,f1,f8                  
316 (p6)  br.ret.spnt     b0 ;;                          
320 // Make constant that will generate inexact when squared
321 // X infinity 
322 { .mfi
323       setf.sig cosh_FR_all_ones = cosh_GR_all_ones 
324 (p0)  fclass.m.unc  p6,p0 = f8, 0x23               
325       nop.i 999 ;;
328 { .mfb
329       nop.m 999
330 (p6)     fmerge.s      f8 = f0,f8                  
331 (p6)  br.ret.spnt     b0 ;;
336 // Put 0.25 in f9; p6 true if x < 0.25
337 { .mlx
338          nop.m 999
339 (p0)     movl            r32 = 0x000000000000fffd ;;         
342 { .mfi
343 (p0)  setf.exp        f9 = r32                         
344       nop.f 999
345       nop.i 999 ;;
348 { .mfi
349       nop.m 999
350 (p0)  fmerge.s      cosh_FR_X    = f0,f8                
351       nop.i 999
354 { .mfi
355       nop.m 999
356 (p0)  fmerge.s      cosh_FR_SGNX = f8,f1                
357       nop.i 999 ;;
360 { .mfi
361       nop.m 999
362 (p0)  fcmp.lt.unc     p0,p7 = cosh_FR_X,f9                    
363       nop.i 999 ;;
366 { .mib
367       nop.m 999
368       nop.i 999
369 (p7)  br.cond.sptk    L(COSH_BY_TBL) 
374 // COSH_BY_POLY: 
375 // POLY cannot overflow so there is no need to call __libm_error_support
376 // Get the values of P_x from the table
378 { .mmi
379       nop.m 999
380 (p0)  addl           r34   = @ltoff(double_cosh_p_table), gp
381       nop.i 999
385 { .mmi
386       ld8 r34 = [r34]
387       nop.m 999
388       nop.i 999
393 // Calculate cosh_FR_X2 = ax*ax and cosh_FR_X4 = ax*ax*ax*ax
394 { .mmf
395          nop.m 999
396 (p0)     ldfe       cosh_FR_P1 = [r34],16                 
397 (p0)     fma.s1     cosh_FR_X2 = cosh_FR_X, cosh_FR_X, f0 ;;           
400 { .mmi
401 (p0)     ldfe       cosh_FR_P2 = [r34],16 ;;                 
402 (p0)     ldfe       cosh_FR_P3 = [r34],16                 
403          nop.i 999 ;;
406 { .mmi
407 (p0)     ldfe       cosh_FR_P4 = [r34],16 ;;                 
408 (p0)     ldfe       cosh_FR_P5 = [r34],16                 
409          nop.i 999 ;;
412 { .mfi
413 (p0)     ldfe       cosh_FR_P6 = [r34],16                 
414 (p0)     fma.s1     cosh_FR_X4 = cosh_FR_X2, cosh_FR_X2, f0         
415          nop.i 999 ;;
418 // Calculate cosh_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
419 { .mfi
420          nop.m 999
421 (p0)     fma.s1     cosh_FR_poly_podd_temp1 = cosh_FR_X4, cosh_FR_P5, cosh_FR_P3                
422          nop.i 999 ;;
425 { .mfi
426          nop.m 999
427 (p0)     fma.s1     cosh_FR_podd            = cosh_FR_X4, cosh_FR_poly_podd_temp1, cosh_FR_P1   
428          nop.i 999
431 // Calculate cosh_FR_peven =  p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
432 { .mfi
433          nop.m 999
434 (p0)     fma.s1     cosh_FR_poly_peven_temp1 = cosh_FR_X4, cosh_FR_P6, cosh_FR_P4               
435          nop.i 999 ;;
438 { .mfi
439          nop.m 999
440 (p0)     fma.s1     cosh_FR_poly_peven_temp2 = cosh_FR_X4, cosh_FR_poly_peven_temp1, cosh_FR_P2 
441          nop.i 999 ;;
444 { .mfi
445          nop.m 999
446 (p0)     fma.s1     cosh_FR_peven       = cosh_FR_X4, cosh_FR_poly_peven_temp2, f0         
447          nop.i 999 ;;
450 // Y_lo = x2*p_odd + p_even
451 // Calculate f8 = Y_hi + Y_lo 
452 { .mfi
453          nop.m 999
454 (p0)     fma.s1     cosh_FR_Y_lo         = cosh_FR_X2, cosh_FR_podd,  cosh_FR_peven    
455          nop.i 999 ;;
458 { .mfb
459          nop.m 999
460 (p0)     fma.s0   f8                   = f1, f1, cosh_FR_Y_lo                        
461 (p0)     br.ret.sptk     b0 ;;
465 L(COSH_BY_TBL): 
467 // Now that we are at TBL; so far all we know is that |x| >= 0.25.
468 // The first two steps are the same for TBL and EXP, but if we are HUGE
469 // Double Extended
470 // Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
471 // Double
472 // Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
473 // Single
474 // Go to HUGE if |x| >= 2^7,  10006 (register-biased) is e =  7 (true)
475 // we want to leave now. Go to HUGE if |x| >= 2^14
476 // 1000d (register-biased) is e = 14 (true)
478 { .mlx
479       nop.m 999
480 (p0)     movl            r32 = 0x000000000001000d ;;              
483 { .mfi
484 (p0)     setf.exp        f9 = r32                              
485       nop.f 999
486       nop.i 999 ;;
489 { .mfi
490       nop.m 999
491 (p0)     fcmp.ge.unc     p6,p7 = cosh_FR_X,f9                  
492       nop.i 999 ;;
495 { .mib
496       nop.m 999
497       nop.i 999
498 (p6)     br.cond.spnt    L(COSH_HUGE) ;;                             
501 // r32 = 1
502 // r34 = N-1 
503 // r35 = N
504 // r36 = j
505 // r37 = N+1
507 // TBL can never overflow
508 // cosh(x) = cosh(B+R)
509 //         = cosh(B) cosh(R) + sinh(B) sinh(R) 
510 // cosh(R) can be approximated by 1 + p_even
511 // sinh(R) can be approximated by p_odd
513 // ******************************************************
514 // STEP 1 (TBL and EXP)
515 // ******************************************************
516 // Get the following constants.
517 // f9  = Inv_log2by64
518 // f10 = log2by64_hi
519 // f11 = log2by64_lo
521 { .mmi
522 (p0)     adds                 r32 = 0x1,r0      
523 (p0)     addl           r34   = @ltoff(double_cosh_arg_reduction), gp
524          nop.i 999
528 // We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
529 // put them in an exponent.
530 // cosh_FR_spos = 2^(N-1) and cosh_FR_sneg = 2^(-N-1)
531 // r39 = 0xffff + (N-1)  = 0xffff +N -1
532 // r40 = 0xffff - (N +1) = 0xffff -N -1
534 { .mlx
535          ld8 r34 = [r34]
536 (p0)     movl                r38 = 0x000000000000fffe ;; 
539 { .mmi
540 (p0)     ldfe            cosh_FR_Inv_log2by64 = [r34],16 ;;            
541 (p0)     ldfe            cosh_FR_log2by64_hi  = [r34],16            
542          nop.i 999 ;;
545 { .mbb
546 (p0)     ldfe            cosh_FR_log2by64_lo  = [r34],16            
547          nop.b 999
548          nop.b 999 ;;
551 // Get the A coefficients
552 // f9  = A_1
553 // f10 = A_2
554 // f11 = A_3
556 { .mmi
557       nop.m 999
558 (p0)  addl           r34   = @ltoff(double_cosh_ab_table), gp
559       nop.i 999
563 { .mmi
564       ld8 r34 = [r34]
565       nop.m 999
566       nop.i 999
571 // Calculate M and keep it as integer and floating point.
572 // M = round-to-integer(x*Inv_log2by64)
573 // cosh_FR_M = M = truncate(ax/(log2/64))
574 // Put the significand of M in r35
575 //    and the floating point representation of M in cosh_FR_M
577 { .mfi
578       nop.m 999
579 (p0)  fma.s1          cosh_FR_M      = cosh_FR_X, cosh_FR_Inv_log2by64, f0 
580       nop.i 999
583 { .mfi
584 (p0)  ldfe            cosh_FR_A1 = [r34],16            
585       nop.f 999
586       nop.i 999 ;;
589 { .mfi
590       nop.m 999
591 (p0)  fcvt.fx.s1      cosh_FR_M_temp = cosh_FR_M                      
592       nop.i 999 ;;
595 { .mfi
596       nop.m 999
597 (p0)  fnorm.s1        cosh_FR_M      = cosh_FR_M_temp                 
598       nop.i 999 ;;
601 { .mfi
602 (p0)  getf.sig        r35       = cosh_FR_M_temp                 
603       nop.f 999
604       nop.i 999 ;;
607 // M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
608 // has a range of -32 thru 31.
609 // r35 = M
610 // r36 = j 
611 { .mii
612       nop.m 999
613       nop.i 999 ;;
614 (p0)  and            r36 = 0x3f, r35 ;;   
617 // Calculate R
618 // f13 = f44 - f12*f10 = x - M*log2by64_hi
619 // f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
621 { .mfi
622       nop.m 999
623 (p0)  fnma.s1        cosh_FR_R_temp = cosh_FR_M, cosh_FR_log2by64_hi, cosh_FR_X      
624       nop.i 999
627 { .mfi
628 (p0)  ldfe            cosh_FR_A2 = [r34],16            
629       nop.f 999
630       nop.i 999 ;;
633 { .mfi
634       nop.m 999
635 (p0)  fnma.s1        cosh_FR_R      = cosh_FR_M, cosh_FR_log2by64_lo, cosh_FR_R_temp 
636       nop.i 999
639 // Get the B coefficients
640 // f15 = B_1
641 // f32 = B_2
642 // f33 = B_3
644 { .mmi
645 (p0)     ldfe            cosh_FR_A3 = [r34],16 ;;            
646 (p0)     ldfe            cosh_FR_B1 = [r34],16            
647          nop.i 999 ;;
650 { .mmi
651 (p0)     ldfe            cosh_FR_B2 = [r34],16 ;;            
652 (p0)     ldfe            cosh_FR_B3 = [r34],16            
653          nop.i 999 ;;
656 { .mii
657          nop.m 999
658 (p0)     shl            r34 = r36,  0x2 ;;   
659 (p0)     sxt1           r37 = r34 ;;         
662 // ******************************************************
663 // STEP 2 (TBL and EXP)
664 // ******************************************************
665 // Calculate Rsquared and Rcubed in preparation for p_even and p_odd
666 // f12 = R*R*R
667 // f13 = R*R
668 // f14 = R <== from above
670 { .mfi
671       nop.m 999
672 (p0)     fma.s1          cosh_FR_Rsq  = cosh_FR_R,   cosh_FR_R, f0  
673 (p0)     shr            r36 = r37,  0x2 ;;   
676 // r34 = M-j = r35 - r36
677 // r35 = N = (M-j)/64
679 { .mii
680 (p0)     sub                  r34 = r35, r36    
681          nop.i 999 ;;
682 (p0)     shr                  r35 = r34, 0x6 ;;    
685 { .mii
686 (p0)     sub                 r40 = r38, r35           
687 (p0)     adds                 r37 = 0x1, r35    
688 (p0)     add                 r39 = r38, r35 ;;           
691 // Get the address of the J table, add the offset,
692 // addresses are sinh_AD_mJ and sinh_AD_J, get the T value
693 // f32 = T(j)_hi
694 // f33 = T(j)_lo
695 // f34 = T(-j)_hi
696 // f35 = T(-j)_lo
698 { .mmi
699 (p0)     sub                  r34 = r35, r32    
700 (p0)     addl    r37   = @ltoff(double_cosh_j_table), gp
701          nop.i 999
705 { .mfi
706       ld8 r37 = [r37]
707 (p0)  fma.s1          cosh_FR_Rcub = cosh_FR_Rsq, cosh_FR_R, f0  
708       nop.i 999
711 // ******************************************************
712 // STEP 3 Now decide if we need to branch to EXP
713 // ******************************************************
714 // Put 32 in f9; p6 true if x < 32
716 { .mlx
717          nop.m 999
718 (p0)     movl                r32 = 0x0000000000010004 ;;               
721 // Calculate p_even
722 // f34 = B_2 + Rsq *B_3
723 // f35 = B_1 + Rsq*f34      = B_1 + Rsq * (B_2 + Rsq *B_3)
724 // f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
726 { .mfi
727       nop.m 999
728 (p0)  fma.s1          cosh_FR_peven_temp1 = cosh_FR_Rsq, cosh_FR_B3,          cosh_FR_B2  
729       nop.i 999 ;;
732 { .mfi
733       nop.m 999
734 (p0)  fma.s1          cosh_FR_peven_temp2 = cosh_FR_Rsq, cosh_FR_peven_temp1, cosh_FR_B1  
735       nop.i 999
738 // Calculate p_odd
739 // f34 = A_2 + Rsq *A_3
740 // f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
741 // f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
743 { .mfi
744       nop.m 999
745 (p0)  fma.s1          cosh_FR_podd_temp1 = cosh_FR_Rsq,        cosh_FR_A3,         cosh_FR_A2  
746       nop.i 999 ;;
749 { .mfi
750 (p0)  setf.exp            cosh_FR_N_temp1 = r39            
751       nop.f 999
752       nop.i 999 ;;
755 { .mfi
756       nop.m 999
757 (p0)  fma.s1          cosh_FR_peven       = cosh_FR_Rsq, cosh_FR_peven_temp2, f0     
758       nop.i 999
761 { .mfi
762       nop.m 999
763 (p0)  fma.s1          cosh_FR_podd_temp2 = cosh_FR_Rsq,        cosh_FR_podd_temp1, cosh_FR_A1  
764       nop.i 999 ;;
767 { .mfi
768 (p0)  setf.exp            f9  = r32                              
769       nop.f 999
770       nop.i 999 ;;
773 { .mfi
774       nop.m 999
775 (p0)  fma.s1          cosh_FR_podd       = cosh_FR_podd_temp2, cosh_FR_Rcub,       cosh_FR_R   
776       nop.i 999
779 // sinh_GR_mj contains the table offset for -j
780 // sinh_GR_j  contains the table offset for +j
781 // p6 is true when j <= 0
783 { .mlx
784 (p0)     setf.exp            cosh_FR_N_temp2 = r40            
785 (p0)     movl                r40 = 0x0000000000000020 ;;    
788 { .mfi
789 (p0)     sub                 GR_mJ = r40,  r36           
790 (p0)     fmerge.se           cosh_FR_spos    = cosh_FR_N_temp1, f1 
791 (p0)     adds                GR_J  = 0x20, r36 ;;           
794 { .mii
795          nop.m 999
796 (p0)     shl                  GR_mJ = GR_mJ, 5 ;;   
797 (p0)     add                  AD_mJ = r37, GR_mJ ;; 
800 { .mmi
801          nop.m 999
802 (p0)     ldfe                 cosh_FR_Tmjhi = [AD_mJ],16                 
803 (p0)     shl                  GR_J  = GR_J, 5 ;;    
806 { .mfi
807 (p0)     ldfs                 cosh_FR_Tmjlo = [AD_mJ],16                 
808 (p0)     fcmp.lt.unc.s1      p6,p7 = cosh_FR_X,f9                          
809 (p0)     add                  AD_J  = r37, GR_J ;;  
812 { .mmi
813 (p0)     ldfe                 cosh_FR_Tjhi  = [AD_J],16 ;;                  
814 (p0)     ldfs                 cosh_FR_Tjlo  = [AD_J],16                  
815          nop.i 999 ;;
818 { .mfb
819          nop.m 999
820 (p0)     fmerge.se           cosh_FR_sneg    = cosh_FR_N_temp2, f1 
821 (p7)     br.cond.spnt        L(COSH_BY_EXP) ;;                            
824 // ******************************************************
825 // If NOT branch to EXP
826 // ******************************************************
827 // Calculate C_hi
828 // ******************************************************
829 // cosh_FR_C_hi_temp = cosh_FR_sneg * cosh_FR_Tmjhi
830 // cosh_FR_C_hi = cosh_FR_spos * cosh_FR_Tjhi + (cosh_FR_sneg * cosh_FR_Tmjhi)
832 { .mfi
833       nop.m 999
834 (p0)  fma.s1         cosh_FR_C_hi_temp = cosh_FR_sneg, cosh_FR_Tmjhi, f0                   
835       nop.i 999 ;;
838 { .mfi
839       nop.m 999
840 (p0)  fma.s1         cosh_FR_C_hi      = cosh_FR_spos, cosh_FR_Tjhi,  cosh_FR_C_hi_temp    
841       nop.i 999
844 // ******************************************************
845 // Calculate S_hi
846 // ******************************************************
847 // cosh_FR_S_hi_temp1 = cosh_FR_sneg * cosh_FR_Tmjhi
848 // cosh_FR_S_hi = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi_temp1
850 { .mfi
851       nop.m 999
852 (p0)  fma.s1        cosh_FR_S_hi_temp1 =  cosh_FR_sneg, cosh_FR_Tmjhi, f0                
853       nop.i 999 ;;
856 // ******************************************************
857 // Calculate C_lo
858 // ******************************************************
859 // cosh_FR_C_lo_temp1 = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi
860 // cosh_FR_C_lo_temp2 = cosh_FR_sneg * cosh_FR_Tmjlo + (cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi)
861 // cosh_FR_C_lo_temp1 = cosh_FR_sneg * cosh_FR_Tmjlo
862 // cosh_FR_C_lo_temp3 = cosh_FR_spos * cosh_FR_Tjlo + (cosh_FR_sneg * cosh_FR_Tmjlo)
863 // cosh_FR_C_lo = cosh_FR_C_lo_temp3 + cosh_FR_C_lo_temp2
865 { .mfi
866       nop.m 999
867 (p0)  fms.s1        cosh_FR_C_lo_temp1 = cosh_FR_spos, cosh_FR_Tjhi,  cosh_FR_C_hi        
868       nop.i 999
871 { .mfi
872       nop.m 999
873 (p0)  fms.s1        cosh_FR_S_hi       =  cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_S_hi_temp1 
874       nop.i 999 ;;
877 { .mfi
878       nop.m 999
879 (p0)  fma.s1        cosh_FR_C_lo_temp2 = cosh_FR_sneg, cosh_FR_Tmjhi, cosh_FR_C_lo_temp1  
880       nop.i 999
883 { .mfi
884       nop.m 999
885 (p0)  fma.s1        cosh_FR_C_lo_temp1 = cosh_FR_sneg, cosh_FR_Tmjlo, f0                  
886       nop.i 999 ;;
889 { .mfi
890       nop.m 999
891 (p0)  fma.s1        cosh_FR_C_lo_temp3 =  cosh_FR_spos, cosh_FR_Tjlo,  cosh_FR_C_lo_temp1 
892       nop.i 999 ;;
895 { .mfi
896       nop.m 999
897 (p0)  fma.s1        cosh_FR_C_lo       =  cosh_FR_C_lo_temp3, f1,   cosh_FR_C_lo_temp2    
898       nop.i 999 ;;
901 // ******************************************************
902 // cosh_FR_Y_lo_temp = cosh_FR_C_hi * cosh_FR_peven + cosh_FR_C_lo
903 // cosh_FR_Y_lo = cosh_FR_S_hi * cosh_FR_podd + cosh_FR_Y_lo_temp
904 // cosh_FR_COSH = Y_hi + Y_lo
906 { .mfi
907       nop.m 999
908 (p0)  fma.s1         cosh_FR_Y_lo_temp =  cosh_FR_C_hi, cosh_FR_peven, cosh_FR_C_lo       
909       nop.i 999 ;;
912 { .mfi
913       nop.m 999
914 (p0)  fma.s1         cosh_FR_Y_lo      =  cosh_FR_S_hi, cosh_FR_podd, cosh_FR_Y_lo_temp   
915       nop.i 999 ;;
918 { .mfb
919       nop.m 999
920 (p0)  fma.s0       f8 =  cosh_FR_C_hi, f1, cosh_FR_Y_lo                       
921 (p0)  br.ret.sptk     b0 ;;
924 L(COSH_BY_EXP): 
926 // When p7 is true,  we know that an overflow is not going to happen
927 // When p7 is false, we must check for possible overflow
928 // p7 is the over_SAFE flag
929 // f44 = Scale * (Y_hi + Y_lo)
930 //     =  cosh_FR_spos * (cosh_FR_Tjhi + cosh_FR_Y_lo)
932 { .mfi
933       nop.m 999
934 (p0)  fma.s1         cosh_FR_Y_lo_temp =  cosh_FR_peven, f1,       cosh_FR_podd           
935       nop.i 999
938 // Now we are in EXP. This is the only path where an overflow is possible
939 // but not for certain. So this is the only path where over_SAFE has any use.
940 // r34 still has N-1
941 // There is a danger of double-extended overflow   if N-1 > 0x3ffe = 16382
942 // There is a danger of double overflow            if N-1 > 0x3fe  = 1022
943 // There is a danger of single overflow            if N-1 > 0x7e   = 126
945 { .mlx
946        nop.m 999
947 (p0)   movl                r32          = 0x0000000000003ffe ;;                       
950 { .mfi
951 (p0)  cmp.gt.unc          p0,p7        = r34, r32                                 
952       nop.f 999
953       nop.i 999 ;;
956 { .mfi
957       nop.m 999
958 (p0)  fma.s1         cosh_FR_Y_lo      =  cosh_FR_Tjhi,  cosh_FR_Y_lo_temp, cosh_FR_Tjlo       
959       nop.i 999 ;;
962 { .mfi
963       nop.m 999
964 (p0)  fma.s1         cosh_FR_COSH_temp =  cosh_FR_Y_lo,  f1, cosh_FR_Tjhi                 
965       nop.i 999 ;;
968 { .mfi
969       nop.m 999
970 (p0)  fma.s0       f44 = cosh_FR_spos,  cosh_FR_COSH_temp, f0                       
971       nop.i 999 ;;
974 // Dummy multiply to generate inexact
975 { .mfi
976          nop.m 999
977 (p7)     fmpy.s0      cosh_FR_tmp = cosh_FR_all_ones, cosh_FR_all_ones
978          nop.i 999 ;;
981 // If over_SAFE is set, return
982 { .mfb
983        nop.m 999
984 (p7)   fmerge.s            f8 = f44,f44
985 (p7)  br.ret.sptk     b0 ;;
988 // Else see if we overflowed
989 // S0 user supplied status
990 // S2 user supplied status + WRE + TD  (Overflows)
991 // If WRE is set then an overflow will not occur in EXP.
992 // The input value that would cause a register (WRE) value to overflow is about 2^15
993 // and this input would go into the HUGE path.
994 // Answer with WRE is in f43.
996 { .mfi
997       nop.m 999
998 (p0)  fsetc.s2            0x7F,0x42                                               
999       nop.i 999;;
1002 { .mfi
1003       nop.m 999
1004 (p0)  fma.s2            f43  = cosh_FR_spos,  cosh_FR_COSH_temp, f0                      
1005       nop.i 999 ;;
1008 // 103FF => 103FF -FFFF = 400(true)
1009 // 400 + 3FF = 7FF, which is 1 more than the exponent of the largest
1010 // double (7FE). So 0 103FF 8000000000000000  is one ulp more than
1011 // largest double in register bias
1013 // 13FFF => 13FFF -FFFF = 4000(true)
1015 // Now  set p8 if the answer with WRE is greater than or equal this value
1016 // Also set p9 if the answer with WRE is less than or equal to negative this value
1018 { .mlx
1019        nop.m 999
1020 (p0)   movl                r32          = 0x0000000000013fff ;;                     
1023 { .mmf
1024        nop.m 999
1025 (p0)   setf.exp            f41          = r32                                    
1026 (p0)   fsetc.s2            0x7F,0x40 ;;                                               
1029 { .mfi
1030       nop.m 999
1031 (p0)  fcmp.ge.unc.s1      p8, p0       = f43, f41                               
1032       nop.i 999
1035 { .mfi
1036       nop.m 999
1037 (p0)  fmerge.ns           f42 = f41, f41                                          
1038       nop.i 999 ;;
1041 // The error tag for overflow is 63
1042 { .mii
1043       nop.m 999
1044       nop.i 999 ;;
1045 (p8)  mov                 GR_Parameter_TAG = 63 ;;                                               
1048 { .mfb
1049       nop.m 999
1050 (p0)  fcmp.le.unc.s1      p9, p0 =  f43, f42                                      
1051 (p8)  br.cond.spnt __libm_error_region ;;
1054 { .mii
1055       nop.m 999
1056       nop.i 999 ;;
1057 (p9)  mov                 GR_Parameter_TAG = 63                                               
1060 { .mib
1061       nop.m 999
1062       nop.i 999
1063 (p9)  br.cond.spnt __libm_error_region ;;
1066 // Dummy multiply to generate inexact
1067 { .mfi
1068          nop.m 999
1069 (p0)     fmpy.s0      cosh_FR_tmp = cosh_FR_all_ones, cosh_FR_all_ones
1070          nop.i 999 ;;
1073 { .mfb
1074       nop.m 999
1075 (p0)  fmerge.s            f8 = f44,f44                                            
1076 (p0)  br.ret.sptk     b0 ;;
1080 // for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
1081 // SAFE: SAFE is always 0 for HUGE
1083 L(COSH_HUGE): 
1085 { .mlx
1086       nop.m 999
1087 (p0)  movl                r32 = 0x0000000000015dbf ;;                                
1090 { .mfi
1091 (p0)  setf.exp            f9  = r32                                               
1092       nop.f 999
1093       nop.i 999 ;;
1096 { .mfi
1097       nop.m 999
1098 (p0)  fma.s1              cosh_FR_hi_lo = f1, f9, f1                              
1099       nop.i 999 ;;
1102 { .mfi
1103       nop.m 999
1104 (p0)  fma.s0            f44 = f9, cosh_FR_hi_lo, f0                             
1105 (p0)  mov                 GR_Parameter_TAG = 63                                               
1107 .endp coshl
1108 ASM_SIZE_DIRECTIVE(coshl)
1110 .proc __libm_error_region
1111 __libm_error_region:
1112 .prologue
1113 { .mfi
1114         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
1115         nop.f 0
1116 .save   ar.pfs,GR_SAVE_PFS
1117         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
1119 { .mfi
1120 .fframe 64
1121         add sp=-64,sp                           // Create new stack
1122         nop.f 0
1123         mov GR_SAVE_GP=gp                       // Save gp
1125 { .mmi
1126         stfe [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
1127         add GR_Parameter_X = 16,sp              // Parameter 1 address
1128 .save   b0, GR_SAVE_B0
1129         mov GR_SAVE_B0=b0                       // Save b0
1131 .body
1132 { .mib
1133         stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
1134         add   GR_Parameter_RESULT = 0,GR_Parameter_Y
1135         nop.b 0                                 // Parameter 3 address
1137 { .mib
1138         stfe [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
1139         add   GR_Parameter_Y = -16,GR_Parameter_Y
1140         br.call.sptk b0=__libm_error_support#  // Call error handling function
1142 { .mmi
1143         nop.m 0
1144         nop.m 0
1145         add   GR_Parameter_RESULT = 48,sp
1147 { .mmi
1148         ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
1149 .restore sp
1150         add   sp = 64,sp                       // Restore stack pointer
1151         mov   b0 = GR_SAVE_B0                  // Restore return address
1153 { .mib
1154         mov   gp = GR_SAVE_GP                  // Restore gp
1155         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
1156         br.ret.sptk     b0                     // Return
1159 .endp __libm_error_region
1160 ASM_SIZE_DIRECTIVE(__libm_error_region)
1162 .type   __libm_error_support#,@function
1163 .global __libm_error_support#