1 ;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
3 ;; This file is part of GCC.
5 ;; GCC is free software; you can redistribute it and/or modify
6 ;; it under the terms of the GNU General Public License as published by
7 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; GCC is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ;; GNU General Public License for more details.
15 ;; You should have received a copy of the GNU General Public License
16 ;; along with GCC; see the file COPYING3. If not see
17 ;; <http://www.gnu.org/licenses/>.
19 ;; For the internal conditional math routines:
21 ;; operand 0 is always the result
22 ;; operand 1 is always the predicate
23 ;; operand 2, 3, and sometimes 4 are the input values.
24 ;; operand 4 or 5 is the floating point status register to use.
25 ;; operand 5 or 6 is the rounding to do. (0 = single, 1 = double, 2 = none)
27 ;; addrf3_cond - F0 = F2 + F3
28 ;; subrf3_cond - F0 = F2 - F3
29 ;; mulrf3_cond - F0 = F2 * F3
30 ;; nmulrf3_cond - F0 = - (F2 * F3)
31 ;; m1addrf4_cond - F0 = (F2 * F3) + F4
32 ;; m1subrf4_cond - F0 = (F2 * F3) - F4
33 ;; m2addrf4_cond - F0 = F2 + (F3 * F4)
34 ;; m2subrf4_cond - F0 = F2 - (F3 * F4)
36 ;; Basic plus/minus/mult operations
38 (define_insn "addrf3_cond"
39 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
40 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
43 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
44 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
45 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
46 (use (match_operand:SI 5 "const_int_operand" ""))
47 (use (match_operand:SI 6 "const_int_operand" ""))]
49 "(%1) fadd%R6.s%5 %0 = %F2, %F3"
50 [(set_attr "itanium_class" "fmac")
51 (set_attr "predicable" "no")])
53 (define_insn "subrf3_cond"
54 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
55 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
58 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
59 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
60 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
61 (use (match_operand:SI 5 "const_int_operand" ""))
62 (use (match_operand:SI 6 "const_int_operand" ""))]
64 "(%1) fsub%R6.s%5 %0 = %F2, %F3"
65 [(set_attr "itanium_class" "fmac")
66 (set_attr "predicable" "no")])
68 (define_insn "mulrf3_cond"
69 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
70 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
73 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
74 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
75 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
76 (use (match_operand:SI 5 "const_int_operand" ""))
77 (use (match_operand:SI 6 "const_int_operand" ""))]
79 "(%1) fmpy%R6.s%5 %0 = %F2, %F3"
80 [(set_attr "itanium_class" "fmac")
81 (set_attr "predicable" "no")])
85 (define_insn "nmulrf3_cond"
86 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
87 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
90 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
91 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")))
92 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
93 (use (match_operand:SI 5 "const_int_operand" ""))
94 (use (match_operand:SI 6 "const_int_operand" ""))]
96 "(%1) fnmpy%R6.s%5 %0 = %F2, %F3"
97 [(set_attr "itanium_class" "fmac")
98 (set_attr "predicable" "no")])
100 ;; add-mult/sub-mult operations (mult as op1)
102 (define_insn "m1addrf4_cond"
103 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
104 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
108 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
109 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
110 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
111 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
112 (use (match_operand:SI 6 "const_int_operand" ""))
113 (use (match_operand:SI 7 "const_int_operand" ""))]
115 "(%1) fma%R7.s%6 %0 = %F2, %F3, %F4"
116 [(set_attr "itanium_class" "fmac")
117 (set_attr "predicable" "no")])
119 (define_insn "m1subrf4_cond"
120 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
121 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
125 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
126 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
127 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
128 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
129 (use (match_operand:SI 6 "const_int_operand" ""))
130 (use (match_operand:SI 7 "const_int_operand" ""))]
132 "(%1) fms%R7.s%6 %0 = %F2, %F3, %F4"
133 [(set_attr "itanium_class" "fmac")
134 (set_attr "predicable" "no")])
136 ;; add-mult/sub-mult operations (mult as op2)
138 (define_insn "m2addrf4_cond"
139 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
140 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
143 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
145 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")
146 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")))
147 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
148 (use (match_operand:SI 6 "const_int_operand" ""))
149 (use (match_operand:SI 7 "const_int_operand" ""))]
151 "(%1) fma%R7.s%6 %0 = %F3, %F4, %F2"
152 [(set_attr "itanium_class" "fmac")
153 (set_attr "predicable" "no")])
155 (define_insn "m2subrf4_cond"
156 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
157 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
160 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
162 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")
163 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")))
164 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
165 (use (match_operand:SI 6 "const_int_operand" ""))
166 (use (match_operand:SI 7 "const_int_operand" ""))]
168 "(%1) fnma%R7.s%6 %0 = %F3, %F4, %F2"
169 [(set_attr "itanium_class" "fmac")
170 (set_attr "predicable" "no")])
172 ;; Conversions to/from RF and SF/DF/XF
173 ;; These conversions should not generate any code but make it possible
174 ;; for all the instructions used to implement floating point division
175 ;; to be written for RFmode only and to not have to handle multiple
176 ;; modes or to have to handle a register in more than one mode.
178 (define_mode_iterator SDX_F [SF DF XF])
180 (define_insn "extend<mode>rf2"
181 [(set (match_operand:RF 0 "fr_register_operand" "=f")
182 (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "fG")))]
185 [(set_attr "itanium_class" "fmisc")
186 (set_attr "predicable" "yes")])
189 [(set (match_operand:RF 0 "fr_register_operand" "")
190 (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "")))]
192 [(set (match_dup 0) (match_dup 2))]
194 if (operands[1] == CONST0_RTX (<MODE>mode))
195 operands[2] = gen_rtx_REG (RFmode, FR_REG (0));
196 else if (operands[1] == CONST1_RTX (<MODE>mode))
197 operands[2] = gen_rtx_REG (RFmode, FR_REG (1));
199 operands[2] = gen_rtx_REG (RFmode, REGNO (operands[1]));
203 (define_insn "truncrf<mode>2"
204 [(set (match_operand:SDX_F 0 "fr_register_operand" "=f")
205 (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")))]
208 [(set_attr "itanium_class" "fmisc")
209 (set_attr "predicable" "yes")])
212 [(set (match_operand:SDX_F 0 "fr_register_operand" "")
213 (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "")))]
215 [(set (match_dup 0) (match_dup 2))]
217 if (operands[1] == CONST0_RTX (RFmode))
218 operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (0));
219 else if (operands[1] == CONST1_RTX (RFmode))
220 operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (1));
222 operands[2] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
225 ;; Float to integer truncations using an alternative status register.
227 (define_insn "fix_truncrfdi2_alts"
228 [(set (match_operand:DI 0 "fr_register_operand" "=f")
229 (fix:DI (match_operand:RF 1 "fr_register_operand" "f")))
230 (use (match_operand:SI 2 "const_int_operand" ""))]
232 "fcvt.fx.trunc.s%2 %0 = %1"
233 [(set_attr "itanium_class" "fcvtfx")])
235 (define_insn "fixuns_truncrfdi2_alts"
236 [(set (match_operand:DI 0 "fr_register_operand" "=f")
237 (unsigned_fix:DI (match_operand:RF 1 "fr_register_operand" "f")))
238 (use (match_operand:SI 2 "const_int_operand" ""))]
240 "fcvt.fxu.trunc.s%2 %0 = %1"
241 [(set_attr "itanium_class" "fcvtfx")])
243 (define_insn "setf_exp_rf"
244 [(set (match_operand:RF 0 "fr_register_operand" "=f")
245 (unspec:RF [(match_operand:DI 1 "register_operand" "r")]
249 [(set_attr "itanium_class" "frfr")])
251 ;; Reciprocal approximation
253 (define_insn "recip_approx_rf"
254 [(set (match_operand:RF 0 "fr_register_operand" "=f")
255 (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")
256 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG")]
257 UNSPEC_FR_RECIP_APPROX_RES))
258 (set (match_operand:CCI 3 "register_operand" "=c")
259 (unspec:CCI [(match_dup 1) (match_dup 2)] UNSPEC_FR_RECIP_APPROX))
260 (use (match_operand:SI 4 "const_int_operand" ""))]
262 "frcpa.s%4 %0, %3 = %F1, %F2"
263 [(set_attr "itanium_class" "fmisc")
264 (set_attr "predicable" "no")])
266 ;; Single precision floating point division
268 (define_expand "divsf3"
269 [(set (match_operand:SF 0 "fr_register_operand" "")
270 (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
271 (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
272 "TARGET_INLINE_FLOAT_DIV"
275 if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
276 insn = gen_divsf3_internal_lat (operands[0], operands[1], operands[2]);
278 insn = gen_divsf3_internal_thr (operands[0], operands[1], operands[2]);
283 ;; Single precision floating point division (maximum throughput algorithm).
285 (define_expand "divsf3_internal_thr"
286 [(set (match_operand:SF 0 "fr_register_operand" "")
287 (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
288 (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
289 "TARGET_INLINE_FLOAT_DIV"
291 rtx y = gen_reg_rtx (RFmode);
292 rtx a = gen_reg_rtx (RFmode);
293 rtx b = gen_reg_rtx (RFmode);
294 rtx e = gen_reg_rtx (RFmode);
295 rtx y1 = gen_reg_rtx (RFmode);
296 rtx y2 = gen_reg_rtx (RFmode);
297 rtx q = gen_reg_rtx (RFmode);
298 rtx r = gen_reg_rtx (RFmode);
299 rtx q_res = gen_reg_rtx (RFmode);
300 rtx cond = gen_reg_rtx (CCImode);
301 rtx zero = CONST0_RTX (RFmode);
302 rtx one = CONST1_RTX (RFmode);
303 rtx status0 = CONST0_RTX (SImode);
304 rtx status1 = CONST1_RTX (SImode);
305 rtx trunc_sgl = CONST0_RTX (SImode);
306 rtx trunc_off = CONST2_RTX (SImode);
308 /* Empty conversions to put inputs into RFmode. */
309 emit_insn (gen_extendsfrf2 (a, operands[1]));
310 emit_insn (gen_extendsfrf2 (b, operands[2]));
312 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
313 /* e = 1 - (b * y) */
314 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
315 /* y1 = y + (y * e) */
316 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
317 /* y2 = y + (y1 * e) */
318 emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e, zero, status1, trunc_off));
319 /* q = single(a * y2) */
320 emit_insn (gen_mulrf3_cond (q, cond, a, y2, zero, status1, trunc_sgl));
321 /* r = a - (q * b) */
322 emit_insn (gen_m2subrf4_cond (r, cond, a, q, b, zero, status1, trunc_off));
323 /* Q = single (q + (r * y2)) */
324 emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y2, y, status0, trunc_sgl));
325 /* Conversion back into SFmode. */
326 emit_insn (gen_truncrfsf2 (operands[0], q_res));
330 ;; Single precision floating point division (minimum latency algorithm).
332 (define_expand "divsf3_internal_lat"
333 [(set (match_operand:SF 0 "fr_register_operand" "")
334 (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
335 (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
336 "TARGET_INLINE_FLOAT_DIV"
338 rtx y = gen_reg_rtx (RFmode);
339 rtx a = gen_reg_rtx (RFmode);
340 rtx b = gen_reg_rtx (RFmode);
341 rtx e = gen_reg_rtx (RFmode);
342 rtx q = gen_reg_rtx (RFmode);
343 rtx e1 = gen_reg_rtx (RFmode);
344 rtx y1 = gen_reg_rtx (RFmode);
345 rtx q1 = gen_reg_rtx (RFmode);
346 rtx r = gen_reg_rtx (RFmode);
347 rtx q_res = gen_reg_rtx (RFmode);
348 rtx cond = gen_reg_rtx (CCImode);
349 rtx zero = CONST0_RTX (RFmode);
350 rtx one = CONST1_RTX (RFmode);
351 rtx status0 = CONST0_RTX (SImode);
352 rtx status1 = CONST1_RTX (SImode);
353 rtx trunc_sgl = CONST0_RTX (SImode);
354 rtx trunc_off = CONST2_RTX (SImode);
356 /* Empty conversions to put inputs into RFmode. */
357 emit_insn (gen_extendsfrf2 (a, operands[1]));
358 emit_insn (gen_extendsfrf2 (b, operands[2]));
360 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
362 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
363 /* e = 1 - (b * y) */
364 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
365 /* e1 = e + (e * e) */
366 emit_insn (gen_m2addrf4_cond (e1, cond, e, e, e, zero, status1, trunc_off));
367 /* q1 = single(q + (q * e1)) */
368 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e1, zero, status1, trunc_sgl));
369 /* y1 = y + (y * e1) */
370 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e1, zero, status1, trunc_off));
371 /* r = a - (q1 * b) */
372 emit_insn (gen_m2subrf4_cond (r, cond, a, q1, b, zero, status1, trunc_off));
373 /* Q = single (q1 + (r * y1)) */
374 emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r, y1, y, status0, trunc_sgl));
375 /* Conversion back into SFmode. */
376 emit_insn (gen_truncrfsf2 (operands[0], q_res));
380 ;; Double precision floating point division
382 (define_expand "divdf3"
383 [(set (match_operand:DF 0 "fr_register_operand" "")
384 (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
385 (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
386 "TARGET_INLINE_FLOAT_DIV"
389 if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
390 insn = gen_divdf3_internal_lat (operands[0], operands[1], operands[2]);
392 insn = gen_divdf3_internal_thr (operands[0], operands[1], operands[2]);
397 ;; Double precision floating point division (maximum throughput algorithm).
399 (define_expand "divdf3_internal_thr"
400 [(set (match_operand:DF 0 "fr_register_operand" "")
401 (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
402 (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
403 "TARGET_INLINE_FLOAT_DIV"
405 rtx q_res = gen_reg_rtx (RFmode);
406 rtx a = gen_reg_rtx (RFmode);
407 rtx b = gen_reg_rtx (RFmode);
408 rtx y = gen_reg_rtx (RFmode);
409 rtx e = gen_reg_rtx (RFmode);
410 rtx y1 = gen_reg_rtx (RFmode);
411 rtx e1 = gen_reg_rtx (RFmode);
412 rtx y2 = gen_reg_rtx (RFmode);
413 rtx e2 = gen_reg_rtx (RFmode);
414 rtx y3 = gen_reg_rtx (RFmode);
415 rtx q = gen_reg_rtx (RFmode);
416 rtx r = gen_reg_rtx (RFmode);
417 rtx cond = gen_reg_rtx (CCImode);
418 rtx zero = CONST0_RTX (RFmode);
419 rtx one = CONST1_RTX (RFmode);
420 rtx status0 = CONST0_RTX (SImode);
421 rtx status1 = CONST1_RTX (SImode);
422 rtx trunc_dbl = CONST1_RTX (SImode);
423 rtx trunc_off = CONST2_RTX (SImode);
424 /* Empty conversions to put inputs into RFmode */
425 emit_insn (gen_extenddfrf2 (a, operands[1]));
426 emit_insn (gen_extenddfrf2 (b, operands[2]));
428 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
429 /* e = 1 - (b * y) */
430 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
431 /* y1 = y + (y * e) */
432 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
434 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
435 /* y2 = y1 + (y1 * e1) */
436 emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
438 emit_insn (gen_mulrf3_cond (e2, cond, e1, e1, zero, status1, trunc_off));
439 /* y3 = y2 + (y2 * e2) */
440 emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e2, zero, status1, trunc_off));
441 /* q = double (a * y3) */
442 emit_insn (gen_mulrf3_cond (q, cond, a, y3, zero, status1, trunc_dbl));
443 /* r = a - (b * q) */
444 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
445 /* Q = double (q + (r * y3)) */
446 emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y3, y, status0, trunc_dbl));
447 /* Conversion back into DFmode */
448 emit_insn (gen_truncrfdf2 (operands[0], q_res));
452 ;; Double precision floating point division (minimum latency algorithm).
454 (define_expand "divdf3_internal_lat"
455 [(set (match_operand:DF 0 "fr_register_operand" "")
456 (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
457 (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
458 "TARGET_INLINE_FLOAT_DIV"
460 rtx q_res = gen_reg_rtx (RFmode);
461 rtx a = gen_reg_rtx (RFmode);
462 rtx b = gen_reg_rtx (RFmode);
463 rtx y = gen_reg_rtx (RFmode);
464 rtx e = gen_reg_rtx (RFmode);
465 rtx y1 = gen_reg_rtx (RFmode);
466 rtx e1 = gen_reg_rtx (RFmode);
467 rtx q1 = gen_reg_rtx (RFmode);
468 rtx y2 = gen_reg_rtx (RFmode);
469 rtx e2 = gen_reg_rtx (RFmode);
470 rtx q2 = gen_reg_rtx (RFmode);
471 rtx e3 = gen_reg_rtx (RFmode);
472 rtx q = gen_reg_rtx (RFmode);
473 rtx r1 = gen_reg_rtx (RFmode);
474 rtx cond = gen_reg_rtx (CCImode);
475 rtx zero = CONST0_RTX (RFmode);
476 rtx one = CONST1_RTX (RFmode);
477 rtx status0 = CONST0_RTX (SImode);
478 rtx status1 = CONST1_RTX (SImode);
479 rtx trunc_dbl = CONST1_RTX (SImode);
480 rtx trunc_off = CONST2_RTX (SImode);
482 /* Empty conversions to put inputs into RFmode */
483 emit_insn (gen_extenddfrf2 (a, operands[1]));
484 emit_insn (gen_extenddfrf2 (b, operands[2]));
486 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
487 /* e = 1 - (b * y) */
488 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
490 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
491 /* e2 = e + (e * e) */
492 emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
494 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
495 /* e3 = e + (e1 * e1) */
496 emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
497 /* q1 = q + (q * e2) */
498 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e2, zero, status1, trunc_off));
499 /* y1 = y + (y * e2) */
500 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
501 /* q2 = double(q + (q1 * e3)) */
502 emit_insn (gen_m2addrf4_cond (q2, cond, q, q1, e3, zero, status1, trunc_dbl));
503 /* y2 = y + (y1 * e3) */
504 emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
505 /* r1 = a - (b * q2) */
506 emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q2, zero, status1, trunc_off));
507 /* Q = double (q2 + (r1 * y2)) */
508 emit_insn (gen_m2addrf4_cond (q_res, cond, q2, r1, y2, y, status0, trunc_dbl));
509 /* Conversion back into DFmode */
510 emit_insn (gen_truncrfdf2 (operands[0], q_res));
514 ;; Extended precision floating point division.
516 (define_expand "divxf3"
517 [(set (match_operand:XF 0 "fr_register_operand" "")
518 (div:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "")
519 (match_operand:XF 2 "fr_reg_or_fp01_operand" "")))]
520 "TARGET_INLINE_FLOAT_DIV"
522 rtx q_res = gen_reg_rtx (RFmode);
523 rtx a = gen_reg_rtx (RFmode);
524 rtx b = gen_reg_rtx (RFmode);
525 rtx y = gen_reg_rtx (RFmode);
526 rtx e = gen_reg_rtx (RFmode);
527 rtx y1 = gen_reg_rtx (RFmode);
528 rtx e1 = gen_reg_rtx (RFmode);
529 rtx q1 = gen_reg_rtx (RFmode);
530 rtx y2 = gen_reg_rtx (RFmode);
531 rtx e2 = gen_reg_rtx (RFmode);
532 rtx y3 = gen_reg_rtx (RFmode);
533 rtx e3 = gen_reg_rtx (RFmode);
534 rtx e4 = gen_reg_rtx (RFmode);
535 rtx q = gen_reg_rtx (RFmode);
536 rtx r = gen_reg_rtx (RFmode);
537 rtx r1 = gen_reg_rtx (RFmode);
538 rtx cond = gen_reg_rtx (CCImode);
539 rtx zero = CONST0_RTX (RFmode);
540 rtx one = CONST1_RTX (RFmode);
541 rtx status0 = CONST0_RTX (SImode);
542 rtx status1 = CONST1_RTX (SImode);
543 rtx trunc_off = CONST2_RTX (SImode);
545 /* Empty conversions to put inputs into RFmode */
546 emit_insn (gen_extendxfrf2 (a, operands[1]));
547 emit_insn (gen_extendxfrf2 (b, operands[2]));
549 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
550 /* e = 1 - (b * y) */
551 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
553 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
554 /* e2 = e + (e * e) */
555 emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
557 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
558 /* y1 = y + (y * e2) */
559 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
560 /* e3 = e + (e1 * e1) */
561 emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
562 /* y2 = y + (y1 * e3) */
563 emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
564 /* r = a - (b * q) */
565 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
566 /* e4 = 1 - (b * y2) */
567 emit_insn (gen_m2subrf4_cond (e4, cond, one, b, y2, zero, status1, trunc_off));
568 /* q1 = q + (r * y2) */
569 emit_insn (gen_m2addrf4_cond (q1, cond, q, r, y2, zero, status1, trunc_off));
570 /* y3 = y2 + (y2 * e4) */
571 emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e4, zero, status1, trunc_off));
572 /* r1 = a - (b * q1) */
573 emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q1, zero, status1, trunc_off));
574 /* Q = q1 + (r1 * y3) */
575 emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r1, y3, y, status0, trunc_off));
576 /* Conversion back into XFmode */
577 emit_insn (gen_truncrfxf2 (operands[0], q_res));
582 ;; Integer division operations
584 (define_expand "divsi3"
585 [(set (match_operand:SI 0 "register_operand" "")
586 (div:SI (match_operand:SI 1 "general_operand" "")
587 (match_operand:SI 2 "general_operand" "")))]
588 "TARGET_INLINE_INT_DIV"
590 rtx op1_rf, op2_rf, op0_rf, op0_di;
592 op0_rf = gen_reg_rtx (RFmode);
593 op0_di = gen_reg_rtx (DImode);
595 if (! register_operand (operands[1], SImode))
596 operands[1] = force_reg (SImode, operands[1]);
597 op1_rf = gen_reg_rtx (RFmode);
598 expand_float (op1_rf, operands[1], 0);
600 if (! register_operand (operands[2], SImode))
601 operands[2] = force_reg (SImode, operands[2]);
602 op2_rf = gen_reg_rtx (RFmode);
603 expand_float (op2_rf, operands[2], 0);
605 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode),
606 CONST1_RTX (SImode)));
608 emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf));
610 emit_insn (gen_fix_truncrfdi2_alts (op0_di, op0_rf, const1_rtx));
611 emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
615 (define_expand "modsi3"
616 [(set (match_operand:SI 0 "register_operand" "")
617 (mod:SI (match_operand:SI 1 "general_operand" "")
618 (match_operand:SI 2 "general_operand" "")))]
619 "TARGET_INLINE_INT_DIV"
621 rtx op2_neg, op1_di, div;
623 div = gen_reg_rtx (SImode);
624 emit_insn (gen_divsi3 (div, operands[1], operands[2]));
626 op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
628 /* This is a trick to get us to reuse the value that we're sure to
629 have already copied to the FP regs. */
630 op1_di = gen_reg_rtx (DImode);
631 convert_move (op1_di, operands[1], 0);
633 emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
634 gen_lowpart (SImode, op1_di)));
638 (define_expand "udivsi3"
639 [(set (match_operand:SI 0 "register_operand" "")
640 (udiv:SI (match_operand:SI 1 "general_operand" "")
641 (match_operand:SI 2 "general_operand" "")))]
642 "TARGET_INLINE_INT_DIV"
644 rtx op1_rf, op2_rf, op0_rf, op0_di;
646 op0_rf = gen_reg_rtx (RFmode);
647 op0_di = gen_reg_rtx (DImode);
649 if (! register_operand (operands[1], SImode))
650 operands[1] = force_reg (SImode, operands[1]);
651 op1_rf = gen_reg_rtx (RFmode);
652 expand_float (op1_rf, operands[1], 1);
654 if (! register_operand (operands[2], SImode))
655 operands[2] = force_reg (SImode, operands[2]);
656 op2_rf = gen_reg_rtx (RFmode);
657 expand_float (op2_rf, operands[2], 1);
659 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode),
660 CONST1_RTX (SImode)));
662 emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf));
664 emit_insn (gen_fixuns_truncrfdi2_alts (op0_di, op0_rf, const1_rtx));
665 emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
669 (define_expand "umodsi3"
670 [(set (match_operand:SI 0 "register_operand" "")
671 (umod:SI (match_operand:SI 1 "general_operand" "")
672 (match_operand:SI 2 "general_operand" "")))]
673 "TARGET_INLINE_INT_DIV"
675 rtx op2_neg, op1_di, div;
677 div = gen_reg_rtx (SImode);
678 emit_insn (gen_udivsi3 (div, operands[1], operands[2]));
680 op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
682 /* This is a trick to get us to reuse the value that we're sure to
683 have already copied to the FP regs. */
684 op1_di = gen_reg_rtx (DImode);
685 convert_move (op1_di, operands[1], 1);
687 emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
688 gen_lowpart (SImode, op1_di)));
692 (define_expand "divsi3_internal"
693 [(set (match_operand:RF 0 "fr_register_operand" "")
694 (float:RF (div:SI (match_operand:RF 1 "fr_register_operand" "")
695 (match_operand:RF 2 "fr_register_operand" ""))))]
696 "TARGET_INLINE_INT_DIV"
700 rtx y = gen_reg_rtx (RFmode);
701 rtx e = gen_reg_rtx (RFmode);
702 rtx e1 = gen_reg_rtx (RFmode);
703 rtx q = gen_reg_rtx (RFmode);
704 rtx q1 = gen_reg_rtx (RFmode);
705 rtx cond = gen_reg_rtx (CCImode);
706 rtx zero = CONST0_RTX (RFmode);
707 rtx one = CONST1_RTX (RFmode);
708 rtx status1 = CONST1_RTX (SImode);
709 rtx trunc_off = CONST2_RTX (SImode);
710 rtx twon34_exp = gen_reg_rtx (DImode);
711 rtx twon34 = gen_reg_rtx (RFmode);
713 /* Load cosntant 2**(-34) */
714 emit_move_insn (twon34_exp, GEN_INT (65501));
715 emit_insn (gen_setf_exp_rf (twon34, twon34_exp));
718 emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
719 /* e = 1 - (b * y) */
720 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
722 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
723 /* q1 = q + (q * e) */
724 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off));
725 /* e1 = (2**-34) + (e * e) */
726 emit_insn (gen_m2addrf4_cond (e1, cond, twon34, e, e, zero, status1, trunc_off));
727 /* q2 = q1 + (e1 * q1) */
728 emit_insn (gen_m2addrf4_cond (operands[0], cond, q1, e1, q1, y, status1, trunc_off));
732 (define_expand "divdi3"
733 [(set (match_operand:DI 0 "register_operand" "")
734 (div:DI (match_operand:DI 1 "general_operand" "")
735 (match_operand:DI 2 "general_operand" "")))]
736 "TARGET_INLINE_INT_DIV"
738 rtx op1_rf, op2_rf, op0_rf;
740 op0_rf = gen_reg_rtx (RFmode);
742 if (! register_operand (operands[1], DImode))
743 operands[1] = force_reg (DImode, operands[1]);
744 op1_rf = gen_reg_rtx (RFmode);
745 expand_float (op1_rf, operands[1], 0);
747 if (! register_operand (operands[2], DImode))
748 operands[2] = force_reg (DImode, operands[2]);
749 op2_rf = gen_reg_rtx (RFmode);
750 expand_float (op2_rf, operands[2], 0);
752 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode),
753 CONST1_RTX (DImode)));
755 if (TARGET_INLINE_INT_DIV == INL_MIN_LAT)
756 emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf));
758 emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf));
760 emit_insn (gen_fix_truncrfdi2_alts (operands[0], op0_rf, const1_rtx));
764 (define_expand "moddi3"
765 [(set (match_operand:DI 0 "register_operand" "")
766 (mod:SI (match_operand:DI 1 "general_operand" "")
767 (match_operand:DI 2 "general_operand" "")))]
768 "TARGET_INLINE_INT_DIV"
772 div = gen_reg_rtx (DImode);
773 emit_insn (gen_divdi3 (div, operands[1], operands[2]));
775 op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
777 emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
781 (define_expand "udivdi3"
782 [(set (match_operand:DI 0 "register_operand" "")
783 (udiv:DI (match_operand:DI 1 "general_operand" "")
784 (match_operand:DI 2 "general_operand" "")))]
785 "TARGET_INLINE_INT_DIV"
787 rtx op1_rf, op2_rf, op0_rf;
789 op0_rf = gen_reg_rtx (RFmode);
791 if (! register_operand (operands[1], DImode))
792 operands[1] = force_reg (DImode, operands[1]);
793 op1_rf = gen_reg_rtx (RFmode);
794 expand_float (op1_rf, operands[1], 1);
796 if (! register_operand (operands[2], DImode))
797 operands[2] = force_reg (DImode, operands[2]);
798 op2_rf = gen_reg_rtx (RFmode);
799 expand_float (op2_rf, operands[2], 1);
801 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode),
802 CONST1_RTX (DImode)));
804 if (TARGET_INLINE_INT_DIV == INL_MIN_LAT)
805 emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf));
807 emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf));
809 emit_insn (gen_fixuns_truncrfdi2_alts (operands[0], op0_rf, const1_rtx));
813 (define_expand "umoddi3"
814 [(set (match_operand:DI 0 "register_operand" "")
815 (umod:DI (match_operand:DI 1 "general_operand" "")
816 (match_operand:DI 2 "general_operand" "")))]
817 "TARGET_INLINE_INT_DIV"
821 div = gen_reg_rtx (DImode);
822 emit_insn (gen_udivdi3 (div, operands[1], operands[2]));
824 op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
826 emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
830 (define_expand "divdi3_internal_lat"
831 [(set (match_operand:RF 0 "fr_register_operand" "")
832 (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "")
833 (match_operand:RF 2 "fr_register_operand" ""))))]
834 "TARGET_INLINE_INT_DIV"
838 rtx y = gen_reg_rtx (RFmode);
839 rtx y1 = gen_reg_rtx (RFmode);
840 rtx y2 = gen_reg_rtx (RFmode);
841 rtx e = gen_reg_rtx (RFmode);
842 rtx e1 = gen_reg_rtx (RFmode);
843 rtx q = gen_reg_rtx (RFmode);
844 rtx q1 = gen_reg_rtx (RFmode);
845 rtx q2 = gen_reg_rtx (RFmode);
846 rtx r = gen_reg_rtx (RFmode);
847 rtx cond = gen_reg_rtx (CCImode);
848 rtx zero = CONST0_RTX (RFmode);
849 rtx one = CONST1_RTX (RFmode);
850 rtx status1 = CONST1_RTX (SImode);
851 rtx trunc_off = CONST2_RTX (SImode);
854 emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
855 /* e = 1 - (b * y) */
856 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
858 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
859 /* q1 = q + (q * e) */
860 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off));
862 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
863 /* q2 = q1 + (e1 * q1) */
864 emit_insn (gen_m2addrf4_cond (q2, cond, q1, e1, q1, zero, status1, trunc_off));
865 /* y1 = y + (y * e) */
866 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
867 /* r = a - (b * q2) */
868 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off));
869 /* y2 = y1 + (y1 * e1) */
870 emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
871 /* q3 = q2 + (r * y2) */
872 emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off));
876 (define_expand "divdi3_internal_thr"
877 [(set (match_operand:RF 0 "fr_register_operand" "")
878 (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "")
879 (match_operand:RF 2 "fr_register_operand" ""))))]
880 "TARGET_INLINE_INT_DIV"
884 rtx y = gen_reg_rtx (RFmode);
885 rtx y1 = gen_reg_rtx (RFmode);
886 rtx y2 = gen_reg_rtx (RFmode);
887 rtx e = gen_reg_rtx (RFmode);
888 rtx e1 = gen_reg_rtx (RFmode);
889 rtx q2 = gen_reg_rtx (RFmode);
890 rtx r = gen_reg_rtx (RFmode);
891 rtx cond = gen_reg_rtx (CCImode);
892 rtx zero = CONST0_RTX (RFmode);
893 rtx one = CONST1_RTX (RFmode);
894 rtx status1 = CONST1_RTX (SImode);
895 rtx trunc_off = CONST2_RTX (SImode);
898 emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
899 /* e = 1 - (b * y) */
900 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
901 /* y1 = y + (y * e) */
902 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
904 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
905 /* y2 = y1 + (y1 * e1) */
906 emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
908 emit_insn (gen_mulrf3_cond (q2, cond, y2, a, zero, status1, trunc_off));
909 /* r = a - (b * q2) */
910 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off));
911 /* q3 = q2 + (r * y2) */
912 emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off));
919 (define_insn "sqrt_approx_rf"
920 [(set (match_operand:RF 0 "fr_register_operand" "=f")
921 (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")]
922 UNSPEC_FR_SQRT_RECIP_APPROX_RES))
923 (set (match_operand:CCI 2 "register_operand" "=c")
924 (unspec:CCI [(match_dup 1)] UNSPEC_FR_SQRT_RECIP_APPROX))
925 (use (match_operand:SI 3 "const_int_operand" ""))]
927 "frsqrta.s%3 %0, %2 = %F1"
928 [(set_attr "itanium_class" "fmisc")
929 (set_attr "predicable" "no")])
931 (define_expand "sqrtsf2"
932 [(set (match_operand:SF 0 "fr_register_operand" "=&f")
933 (sqrt:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
937 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
938 insn = gen_sqrtsf2_internal_lat (operands[0], operands[1]);
940 insn = gen_sqrtsf2_internal_thr (operands[0], operands[1]);
945 (define_expand "sqrtsf2_internal_thr"
946 [(set (match_operand:SF 0 "fr_register_operand" "")
947 (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
950 rtx y = gen_reg_rtx (RFmode);
951 rtx b = gen_reg_rtx (RFmode);
952 rtx g = gen_reg_rtx (RFmode);
953 rtx e = gen_reg_rtx (RFmode);
954 rtx s = gen_reg_rtx (RFmode);
955 rtx f = gen_reg_rtx (RFmode);
956 rtx y1 = gen_reg_rtx (RFmode);
957 rtx g1 = gen_reg_rtx (RFmode);
958 rtx h = gen_reg_rtx (RFmode);
959 rtx d = gen_reg_rtx (RFmode);
960 rtx g2 = gen_reg_rtx (RFmode);
961 rtx cond = gen_reg_rtx (CCImode);
962 rtx zero = CONST0_RTX (RFmode);
963 rtx one = CONST1_RTX (RFmode);
964 rtx c1 = ia64_dconst_0_5();
965 rtx c2 = ia64_dconst_0_375();
966 rtx reg_df_c1 = gen_reg_rtx (DFmode);
967 rtx reg_df_c2 = gen_reg_rtx (DFmode);
968 rtx reg_rf_c1 = gen_reg_rtx (RFmode);
969 rtx reg_rf_c2 = gen_reg_rtx (RFmode);
970 rtx status0 = CONST0_RTX (SImode);
971 rtx status1 = CONST1_RTX (SImode);
972 rtx trunc_sgl = CONST0_RTX (SImode);
973 rtx trunc_off = CONST2_RTX (SImode);
975 /* Put needed constants into registers. */
976 emit_insn (gen_movdf (reg_df_c1, c1));
977 emit_insn (gen_movdf (reg_df_c2, c2));
978 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
979 emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
980 /* Empty conversion to put input into RFmode. */
981 emit_insn (gen_extendsfrf2 (b, operands[1]));
982 /* y = sqrt (1 / b) */
983 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
985 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
986 /* e = 1 - (g * y) */
987 emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
988 /* s = 0.5 + (0.375 * e) */
989 emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
991 emit_insn (gen_mulrf3_cond (f, cond, y, e, zero, status1, trunc_off));
992 /* y1 = y + (f * s) */
993 emit_insn (gen_m2addrf4_cond (y1, cond, y, f, s, zero, status1, trunc_off));
994 /* g1 = single (b * y1) */
995 emit_insn (gen_mulrf3_cond (g1, cond, b, y1, zero, status1, trunc_sgl));
997 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y1, zero, status1, trunc_off));
998 /* d = b - g1 * g1 */
999 emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
1000 /* g2 = single(g1 + (d * h)) */
1001 emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h, y, status0, trunc_sgl));
1002 /* Conversion back into SFmode. */
1003 emit_insn (gen_truncrfsf2 (operands[0], g2));
1007 (define_expand "sqrtsf2_internal_lat"
1008 [(set (match_operand:SF 0 "fr_register_operand" "")
1009 (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
1010 "TARGET_INLINE_SQRT"
1012 rtx y = gen_reg_rtx (RFmode);
1013 rtx b = gen_reg_rtx (RFmode);
1014 rtx g = gen_reg_rtx (RFmode);
1015 rtx g1 = gen_reg_rtx (RFmode);
1016 rtx g2 = gen_reg_rtx (RFmode);
1017 rtx e = gen_reg_rtx (RFmode);
1018 rtx s = gen_reg_rtx (RFmode);
1019 rtx f = gen_reg_rtx (RFmode);
1020 rtx f1 = gen_reg_rtx (RFmode);
1021 rtx h = gen_reg_rtx (RFmode);
1022 rtx h1 = gen_reg_rtx (RFmode);
1023 rtx d = gen_reg_rtx (RFmode);
1024 rtx cond = gen_reg_rtx (CCImode);
1025 rtx zero = CONST0_RTX (RFmode);
1026 rtx one = CONST1_RTX (RFmode);
1027 rtx c1 = ia64_dconst_0_5();
1028 rtx c2 = ia64_dconst_0_375();
1029 rtx reg_df_c1 = gen_reg_rtx (DFmode);
1030 rtx reg_df_c2 = gen_reg_rtx (DFmode);
1031 rtx reg_rf_c1 = gen_reg_rtx (RFmode);
1032 rtx reg_rf_c2 = gen_reg_rtx (RFmode);
1033 rtx status0 = CONST0_RTX (SImode);
1034 rtx status1 = CONST1_RTX (SImode);
1035 rtx trunc_sgl = CONST0_RTX (SImode);
1036 rtx trunc_off = CONST2_RTX (SImode);
1038 /* Put needed constants into registers. */
1039 emit_insn (gen_movdf (reg_df_c1, c1));
1040 emit_insn (gen_movdf (reg_df_c2, c2));
1041 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
1042 emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
1043 /* Empty conversion to put input into RFmode. */
1044 emit_insn (gen_extendsfrf2 (b, operands[1]));
1045 /* y = sqrt (1 / b) */
1046 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
1048 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
1049 /* e = 1 - (g * y) */
1050 emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
1052 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
1053 /* s = 0.5 + (0.375 * e) */
1054 emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
1056 emit_insn (gen_mulrf3_cond (f, cond, e, g, zero, status1, trunc_off));
1057 /* g1 = single (g + (f * s)) */
1058 emit_insn (gen_m2addrf4_cond (g1, cond, g, f, s, zero, status1, trunc_sgl));
1060 emit_insn (gen_mulrf3_cond (f1, cond, e, h, zero, status1, trunc_off));
1061 /* d = b - g1 * g1 */
1062 emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
1063 /* h1 = h + (f1 * s) */
1064 emit_insn (gen_m2addrf4_cond (h1, cond, h, f1, s, zero, status1, trunc_off));
1065 /* g2 = single(g1 + (d * h1)) */
1066 emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h1, y, status0, trunc_sgl));
1067 /* Conversion back into SFmode. */
1068 emit_insn (gen_truncrfsf2 (operands[0], g2));
1072 (define_expand "sqrtdf2"
1073 [(set (match_operand:DF 0 "fr_register_operand" "=&f")
1074 (sqrt:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
1075 "TARGET_INLINE_SQRT"
1079 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
1080 insn = gen_sqrtdf2_internal_lat (operands[0], operands[1]);
1083 insn = gen_sqrtdf2_internal_thr (operands[0], operands[1]);
1088 (define_expand "sqrtdf2_internal_thr"
1089 [(set (match_operand:DF 0 "fr_register_operand" "")
1090 (sqrt:DF (match_operand:DF 1 "fr_register_operand" "")))]
1091 "TARGET_INLINE_SQRT"
1093 rtx y = gen_reg_rtx (RFmode);
1094 rtx b = gen_reg_rtx (RFmode);
1095 rtx g = gen_reg_rtx (RFmode);
1096 rtx g1 = gen_reg_rtx (RFmode);
1097 rtx g2 = gen_reg_rtx (RFmode);
1098 rtx g3 = gen_reg_rtx (RFmode);
1099 rtx g4 = gen_reg_rtx (RFmode);
1100 rtx r = gen_reg_rtx (RFmode);
1101 rtx r1 = gen_reg_rtx (RFmode);
1102 rtx h = gen_reg_rtx (RFmode);
1103 rtx h1 = gen_reg_rtx (RFmode);
1104 rtx h2 = gen_reg_rtx (RFmode);
1105 rtx d = gen_reg_rtx (RFmode);
1106 rtx d1 = gen_reg_rtx (RFmode);
1107 rtx cond = gen_reg_rtx (CCImode);
1108 rtx zero = CONST0_RTX (RFmode);
1109 rtx c1 = ia64_dconst_0_5();
1110 rtx reg_df_c1 = gen_reg_rtx (DFmode);
1111 rtx reg_rf_c1 = gen_reg_rtx (RFmode);
1112 rtx status0 = CONST0_RTX (SImode);
1113 rtx status1 = CONST1_RTX (SImode);
1114 rtx trunc_dbl = CONST1_RTX (SImode);
1115 rtx trunc_off = CONST2_RTX (SImode);
1117 /* Put needed constants into registers. */
1118 emit_insn (gen_movdf (reg_df_c1, c1));
1119 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
1120 /* Empty conversion to put input into RFmode. */
1121 emit_insn (gen_extenddfrf2 (b, operands[1]));
1122 /* y = sqrt (1 / b) */
1123 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
1125 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
1127 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
1128 /* r = 0.5 - (g * h) */
1129 emit_insn (gen_m2subrf4_cond (r, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
1130 /* g1 = g + (g * r) */
1131 emit_insn (gen_m2addrf4_cond (g1, cond, g, g, r, zero, status1, trunc_off));
1132 /* h1 = h + (h * r) */
1133 emit_insn (gen_m2addrf4_cond (h1, cond, h, h, r, zero, status1, trunc_off));
1134 /* r1 = 0.5 - (g1 * h1) */
1135 emit_insn (gen_m2subrf4_cond (r1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
1136 /* g2 = g1 + (g1 * r1) */
1137 emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, r1, zero, status1, trunc_off));
1138 /* h2 = h1 + (h1 * r1) */
1139 emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, r1, zero, status1, trunc_off));
1140 /* d = b - (g2 * g2) */
1141 emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
1142 /* g3 = g2 + (d * h2) */
1143 emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
1144 /* d1 = b - (g3 * g3) */
1145 emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
1146 /* g4 = g3 + (d1 * h2) */
1147 emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h2, y, status1, trunc_dbl));
1148 /* Conversion back into SFmode. */
1149 emit_insn (gen_truncrfdf2 (operands[0], g4));
1153 (define_expand "sqrtxf2"
1154 [(set (match_operand:XF 0 "fr_register_operand" "")
1155 (sqrt:XF (match_operand:XF 1 "fr_register_operand" "")))]
1156 "TARGET_INLINE_SQRT"
1158 rtx y = gen_reg_rtx (RFmode);
1159 rtx b = gen_reg_rtx (RFmode);
1160 rtx g = gen_reg_rtx (RFmode);
1161 rtx g1 = gen_reg_rtx (RFmode);
1162 rtx g2 = gen_reg_rtx (RFmode);
1163 rtx g3 = gen_reg_rtx (RFmode);
1164 rtx g4 = gen_reg_rtx (RFmode);
1165 rtx e = gen_reg_rtx (RFmode);
1166 rtx e1 = gen_reg_rtx (RFmode);
1167 rtx e2 = gen_reg_rtx (RFmode);
1168 rtx h = gen_reg_rtx (RFmode);
1169 rtx h1 = gen_reg_rtx (RFmode);
1170 rtx h2 = gen_reg_rtx (RFmode);
1171 rtx h3 = gen_reg_rtx (RFmode);
1172 rtx d = gen_reg_rtx (RFmode);
1173 rtx d1 = gen_reg_rtx (RFmode);
1174 rtx cond = gen_reg_rtx (CCImode);
1175 rtx zero = CONST0_RTX (RFmode);
1176 rtx c1 = ia64_dconst_0_5();
1177 rtx reg_df_c1 = gen_reg_rtx (DFmode);
1178 rtx reg_rf_c1 = gen_reg_rtx (RFmode);
1179 rtx status0 = CONST0_RTX (SImode);
1180 rtx status1 = CONST1_RTX (SImode);
1181 rtx trunc_off = CONST2_RTX (SImode);
1183 /* Put needed constants into registers. */
1184 emit_insn (gen_movdf (reg_df_c1, c1));
1185 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
1186 /* Empty conversion to put input into RFmode. */
1187 emit_insn (gen_extendxfrf2 (b, operands[1]));
1188 /* y = sqrt (1 / b) */
1189 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
1191 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
1193 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
1194 /* e = 0.5 - (g * h) */
1195 emit_insn (gen_m2subrf4_cond (e, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
1196 /* g1 = g + (g * e) */
1197 emit_insn (gen_m2addrf4_cond (g1, cond, g, g, e, zero, status1, trunc_off));
1198 /* h1 = h + (h * e) */
1199 emit_insn (gen_m2addrf4_cond (h1, cond, h, h, e, zero, status1, trunc_off));
1200 /* e1 = 0.5 - (g1 * h1) */
1201 emit_insn (gen_m2subrf4_cond (e1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
1202 /* g2 = g1 + (g1 * e1) */
1203 emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, e1, zero, status1, trunc_off));
1204 /* h2 = h1 + (h1 * e1) */
1205 emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, e1, zero, status1, trunc_off));
1206 /* d = b - (g2 * g2) */
1207 emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
1208 /* e2 = 0.5 - (g2 * h2) */
1209 emit_insn (gen_m2subrf4_cond (e2, cond, reg_rf_c1, g2, h2, zero, status1, trunc_off));
1210 /* g3 = g2 + (d * h2) */
1211 emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
1212 /* h3 = h2 + (e2 * h2) */
1213 emit_insn (gen_m2addrf4_cond (h3, cond, h2, e2, h2, zero, status1, trunc_off));
1214 /* d1 = b - (g3 * g3) */
1215 emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
1216 /* g4 = g3 + (d1 * h3) */
1217 emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h3, y, status1, trunc_off));
1218 /* Conversion back into SFmode. */
1219 emit_insn (gen_truncrfxf2 (operands[0], g4));