2 ;; Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for both scalar and vector floating point types supported by VSX
22 (define_mode_iterator VSX_B [DF V4SF V2DF])
24 ;; Iterator for the 2 64-bit vector types
25 (define_mode_iterator VSX_D [V2DF V2DI])
27 ;; Iterator for the 2 32-bit vector types
28 (define_mode_iterator VSX_W [V4SF V4SI])
30 ;; Iterator for the DF types
31 (define_mode_iterator VSX_DF [V2DF DF])
33 ;; Iterator for vector floating point types supported by VSX
34 (define_mode_iterator VSX_F [V4SF V2DF])
36 ;; Iterator for logical types supported by VSX
37 (define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
39 ;; Iterator for memory move. Handle TImode specially to allow
40 ;; it to use gprs as well as vsx registers.
41 (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
43 ;; Map into the appropriate load/store name based on the type
44 (define_mode_attr VSm [(V16QI "vw4")
53 ;; Map into the appropriate suffix based on the type
54 (define_mode_attr VSs [(V16QI "sp")
64 ;; Map the register class used
65 (define_mode_attr VSr [(V16QI "v")
75 ;; Map the register class used for float<->int conversions
76 (define_mode_attr VSr2 [(V2DF "wd")
80 (define_mode_attr VSr3 [(V2DF "wa")
84 ;; Map the register class for sp<->dp float conversions, destination
85 (define_mode_attr VSr4 [(SF "ws")
90 ;; Map the register class for sp<->dp float conversions, destination
91 (define_mode_attr VSr5 [(SF "ws")
96 ;; Same size integer type for floating point data
97 (define_mode_attr VSi [(V4SF "v4si")
101 (define_mode_attr VSI [(V4SF "V4SI")
105 ;; Word size for same size conversion
106 (define_mode_attr VSc [(V4SF "w")
110 ;; Map into either s or v, depending on whether this is a scalar or vector
112 (define_mode_attr VSv [(V16QI "v")
120 ;; Appropriate type for add ops (and other simple FP ops)
121 (define_mode_attr VStype_simple [(V2DF "vecdouble")
125 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
129 ;; Appropriate type for multiply ops
130 (define_mode_attr VStype_mul [(V2DF "vecdouble")
134 (define_mode_attr VSfptype_mul [(V2DF "fp_mul_d")
138 ;; Appropriate type for divide ops.
139 (define_mode_attr VStype_div [(V2DF "vecdiv")
143 (define_mode_attr VSfptype_div [(V2DF "fp_div_d")
147 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
149 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")
153 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
157 ;; Iterator and modes for sp<->dp conversions
158 ;; Because scalar SF values are represented internally as double, use the
159 ;; V4SF type to represent this than SF.
160 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
162 (define_mode_attr VS_spdp_res [(DF "V4SF")
166 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
170 (define_mode_attr VS_spdp_type [(DF "fp")
174 ;; Map the scalar mode for a vector type
175 (define_mode_attr VS_scalar [(V2DF "DF")
182 ;; Map to a double-sized vector mode
183 (define_mode_attr VS_double [(V4SI "V8SI")
188 ;; Constants for creating unspecs
189 (define_c_enum "unspec"
211 (define_insn "*vsx_mov<mode>"
212 [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
213 (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
214 "VECTOR_MEM_VSX_P (<MODE>mode)
215 && (register_operand (operands[0], <MODE>mode)
216 || register_operand (operands[1], <MODE>mode))"
218 return rs6000_output_move_128bit (operands);
220 [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
221 (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
223 ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
224 ;; use of TImode is for unions. However for plain data movement, slightly
225 ;; favor the vector loads
226 (define_insn "*vsx_movti_64bit"
227 [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
228 (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
229 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
230 && (register_operand (operands[0], TImode)
231 || register_operand (operands[1], TImode))"
233 return rs6000_output_move_128bit (operands);
235 [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
236 (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
238 (define_insn "*vsx_movti_32bit"
239 [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
240 (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v,r,r, Q, Y, r,n"))]
241 "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
242 && (register_operand (operands[0], TImode)
243 || register_operand (operands[1], TImode))"
245 switch (which_alternative)
248 return "stxvd2x %x1,%y0";
251 return "lxvd2x %x0,%y1";
254 return "xxlor %x0,%x1,%x1";
257 return "xxlxor %x0,%x0,%x0";
260 return output_vec_const_move (operands);
263 return "stvx %1,%y0";
270 return \"stswi %1,%P0,16\";
276 /* If the address is not used in the output, we can use lsi. Otherwise,
277 fall through to generating four loads. */
279 && ! reg_overlap_mentioned_p (operands[0], operands[1]))
280 return \"lswi %0,%P1,16\";
281 /* ... fall through ... */
291 [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *")
292 (set_attr "length" " 4, 4, 4, 4, 8, 4, 4, 16, 16, 16, 16,16,16")
293 (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
294 (const_string "always")
295 (const_string "conditional")))])
297 ;; Explicit load/store expanders for the builtin functions
298 (define_expand "vsx_load_<mode>"
299 [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
300 (match_operand:VSX_M 1 "memory_operand" ""))]
301 "VECTOR_MEM_VSX_P (<MODE>mode)"
304 (define_expand "vsx_store_<mode>"
305 [(set (match_operand:VSX_M 0 "memory_operand" "")
306 (match_operand:VSX_M 1 "vsx_register_operand" ""))]
307 "VECTOR_MEM_VSX_P (<MODE>mode)"
311 ;; VSX scalar and vector floating point arithmetic instructions
312 (define_insn "*vsx_add<mode>3"
313 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
314 (plus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
315 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
316 "VECTOR_UNIT_VSX_P (<MODE>mode)"
317 "x<VSv>add<VSs> %x0,%x1,%x2"
318 [(set_attr "type" "<VStype_simple>")
319 (set_attr "fp_type" "<VSfptype_simple>")])
321 (define_insn "*vsx_sub<mode>3"
322 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
323 (minus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
324 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
325 "VECTOR_UNIT_VSX_P (<MODE>mode)"
326 "x<VSv>sub<VSs> %x0,%x1,%x2"
327 [(set_attr "type" "<VStype_simple>")
328 (set_attr "fp_type" "<VSfptype_simple>")])
330 (define_insn "*vsx_mul<mode>3"
331 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
332 (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
333 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
334 "VECTOR_UNIT_VSX_P (<MODE>mode)"
335 "x<VSv>mul<VSs> %x0,%x1,%x2"
336 [(set_attr "type" "<VStype_mul>")
337 (set_attr "fp_type" "<VSfptype_mul>")])
339 (define_insn "*vsx_div<mode>3"
340 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
341 (div:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
342 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
343 "VECTOR_UNIT_VSX_P (<MODE>mode)"
344 "x<VSv>div<VSs> %x0,%x1,%x2"
345 [(set_attr "type" "<VStype_div>")
346 (set_attr "fp_type" "<VSfptype_div>")])
348 ;; *tdiv* instruction returning the FG flag
349 (define_expand "vsx_tdiv<mode>3_fg"
351 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
352 (match_operand:VSX_B 2 "vsx_register_operand" "")]
354 (set (match_operand:SI 0 "gpc_reg_operand" "")
357 "VECTOR_UNIT_VSX_P (<MODE>mode)"
359 operands[3] = gen_reg_rtx (CCFPmode);
362 ;; *tdiv* instruction returning the FE flag
363 (define_expand "vsx_tdiv<mode>3_fe"
365 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
366 (match_operand:VSX_B 2 "vsx_register_operand" "")]
368 (set (match_operand:SI 0 "gpc_reg_operand" "")
371 "VECTOR_UNIT_VSX_P (<MODE>mode)"
373 operands[3] = gen_reg_rtx (CCFPmode);
376 (define_insn "*vsx_tdiv<mode>3_internal"
377 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
378 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
379 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
381 "VECTOR_UNIT_VSX_P (<MODE>mode)"
382 "x<VSv>tdiv<VSs> %0,%x1,%x2"
383 [(set_attr "type" "<VStype_simple>")
384 (set_attr "fp_type" "<VSfptype_simple>")])
386 (define_insn "vsx_fre<mode>2"
387 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
388 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
390 "VECTOR_UNIT_VSX_P (<MODE>mode)"
391 "x<VSv>re<VSs> %x0,%x1"
392 [(set_attr "type" "<VStype_simple>")
393 (set_attr "fp_type" "<VSfptype_simple>")])
395 (define_insn "*vsx_neg<mode>2"
396 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
397 (neg:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
398 "VECTOR_UNIT_VSX_P (<MODE>mode)"
399 "x<VSv>neg<VSs> %x0,%x1"
400 [(set_attr "type" "<VStype_simple>")
401 (set_attr "fp_type" "<VSfptype_simple>")])
403 (define_insn "*vsx_abs<mode>2"
404 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
405 (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
406 "VECTOR_UNIT_VSX_P (<MODE>mode)"
407 "x<VSv>abs<VSs> %x0,%x1"
408 [(set_attr "type" "<VStype_simple>")
409 (set_attr "fp_type" "<VSfptype_simple>")])
411 (define_insn "vsx_nabs<mode>2"
412 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
415 (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa"))))]
416 "VECTOR_UNIT_VSX_P (<MODE>mode)"
417 "x<VSv>nabs<VSs> %x0,%x1"
418 [(set_attr "type" "<VStype_simple>")
419 (set_attr "fp_type" "<VSfptype_simple>")])
421 (define_insn "vsx_smax<mode>3"
422 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
423 (smax:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
424 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
425 "VECTOR_UNIT_VSX_P (<MODE>mode)"
426 "x<VSv>max<VSs> %x0,%x1,%x2"
427 [(set_attr "type" "<VStype_simple>")
428 (set_attr "fp_type" "<VSfptype_simple>")])
430 (define_insn "*vsx_smin<mode>3"
431 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
432 (smin:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
433 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
434 "VECTOR_UNIT_VSX_P (<MODE>mode)"
435 "x<VSv>min<VSs> %x0,%x1,%x2"
436 [(set_attr "type" "<VStype_simple>")
437 (set_attr "fp_type" "<VSfptype_simple>")])
439 ;; Special VSX version of smin/smax for single precision floating point. Since
440 ;; both numbers are rounded to single precision, we can just use the DP version
441 ;; of the instruction.
443 (define_insn "*vsx_smaxsf3"
444 [(set (match_operand:SF 0 "vsx_register_operand" "=f")
445 (smax:SF (match_operand:SF 1 "vsx_register_operand" "f")
446 (match_operand:SF 2 "vsx_register_operand" "f")))]
447 "VECTOR_UNIT_VSX_P (DFmode)"
448 "xsmaxdp %x0,%x1,%x2"
449 [(set_attr "type" "fp")
450 (set_attr "fp_type" "fp_addsub_d")])
452 (define_insn "*vsx_sminsf3"
453 [(set (match_operand:SF 0 "vsx_register_operand" "=f")
454 (smin:SF (match_operand:SF 1 "vsx_register_operand" "f")
455 (match_operand:SF 2 "vsx_register_operand" "f")))]
456 "VECTOR_UNIT_VSX_P (DFmode)"
457 "xsmindp %x0,%x1,%x2"
458 [(set_attr "type" "fp")
459 (set_attr "fp_type" "fp_addsub_d")])
461 (define_insn "*vsx_sqrt<mode>2"
462 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
463 (sqrt:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
464 "VECTOR_UNIT_VSX_P (<MODE>mode)"
465 "x<VSv>sqrt<VSs> %x0,%x1"
466 [(set_attr "type" "<VStype_sqrt>")
467 (set_attr "fp_type" "<VSfptype_sqrt>")])
469 (define_insn "*vsx_rsqrte<mode>2"
470 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
471 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
473 "VECTOR_UNIT_VSX_P (<MODE>mode)"
474 "x<VSv>rsqrte<VSs> %x0,%x1"
475 [(set_attr "type" "<VStype_simple>")
476 (set_attr "fp_type" "<VSfptype_simple>")])
478 ;; *tsqrt* returning the fg flag
479 (define_expand "vsx_tsqrt<mode>2_fg"
481 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
483 (set (match_operand:SI 0 "gpc_reg_operand" "")
486 "VECTOR_UNIT_VSX_P (<MODE>mode)"
488 operands[3] = gen_reg_rtx (CCFPmode);
491 ;; *tsqrt* returning the fe flag
492 (define_expand "vsx_tsqrt<mode>2_fe"
494 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
496 (set (match_operand:SI 0 "gpc_reg_operand" "")
499 "VECTOR_UNIT_VSX_P (<MODE>mode)"
501 operands[3] = gen_reg_rtx (CCFPmode);
504 (define_insn "*vsx_tsqrt<mode>2_internal"
505 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
506 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
508 "VECTOR_UNIT_VSX_P (<MODE>mode)"
509 "x<VSv>tsqrt<VSs> %0,%x1"
510 [(set_attr "type" "<VStype_simple>")
511 (set_attr "fp_type" "<VSfptype_simple>")])
513 ;; Fused vector multiply/add instructions Support the classical DF versions of
514 ;; fma, which allows the target to be a separate register from the 3 inputs.
515 ;; Under VSX, the target must be either the addend or the first multiply.
516 ;; Where we can, also do the same for the Altivec V4SF fmas.
518 (define_insn "*vsx_fmadf4"
519 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
521 (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
522 (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
523 (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))]
524 "VECTOR_UNIT_VSX_P (DFmode)"
526 xsmaddadp %x0,%x1,%x2
527 xsmaddmdp %x0,%x1,%x3
528 xsmaddadp %x0,%x1,%x2
529 xsmaddmdp %x0,%x1,%x3
531 [(set_attr "type" "fp")
532 (set_attr "fp_type" "fp_maddsub_d")])
534 (define_insn "*vsx_fmav4sf4"
535 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
537 (match_operand:V4SF 1 "vsx_register_operand" "%ws,ws,wa,wa,v")
538 (match_operand:V4SF 2 "vsx_register_operand" "ws,0,wa,0,v")
539 (match_operand:V4SF 3 "vsx_register_operand" "0,ws,0,wa,v")))]
540 "VECTOR_UNIT_VSX_P (V4SFmode)"
542 xvmaddasp %x0,%x1,%x2
543 xvmaddmsp %x0,%x1,%x3
544 xvmaddasp %x0,%x1,%x2
545 xvmaddmsp %x0,%x1,%x3
547 [(set_attr "type" "vecfloat")])
549 (define_insn "*vsx_fmav2df4"
550 [(set (match_operand:V2DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa")
552 (match_operand:V2DF 1 "vsx_register_operand" "%ws,ws,wa,wa")
553 (match_operand:V2DF 2 "vsx_register_operand" "ws,0,wa,0")
554 (match_operand:V2DF 3 "vsx_register_operand" "0,ws,0,wa")))]
555 "VECTOR_UNIT_VSX_P (V2DFmode)"
557 xvmaddadp %x0,%x1,%x2
558 xvmaddmdp %x0,%x1,%x3
559 xvmaddadp %x0,%x1,%x2
560 xvmaddmdp %x0,%x1,%x3"
561 [(set_attr "type" "vecdouble")])
563 (define_insn "*vsx_fmsdf4"
564 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
566 (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
567 (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
569 (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
570 "VECTOR_UNIT_VSX_P (DFmode)"
572 xsmsubadp %x0,%x1,%x2
573 xsmsubmdp %x0,%x1,%x3
574 xsmsubadp %x0,%x1,%x2
575 xsmsubmdp %x0,%x1,%x3
577 [(set_attr "type" "fp")
578 (set_attr "fp_type" "fp_maddsub_d")])
580 (define_insn "*vsx_fms<mode>4"
581 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
583 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
584 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0")
586 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
587 "VECTOR_UNIT_VSX_P (<MODE>mode)"
589 x<VSv>msuba<VSs> %x0,%x1,%x2
590 x<VSv>msubm<VSs> %x0,%x1,%x3
591 x<VSv>msuba<VSs> %x0,%x1,%x2
592 x<VSv>msubm<VSs> %x0,%x1,%x3"
593 [(set_attr "type" "<VStype_mul>")])
595 (define_insn "*vsx_nfmadf4"
596 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
599 (match_operand:DF 1 "vsx_register_operand" "ws,ws,wa,wa,d")
600 (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
601 (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
602 "VECTOR_UNIT_VSX_P (DFmode)"
604 xsnmaddadp %x0,%x1,%x2
605 xsnmaddmdp %x0,%x1,%x3
606 xsnmaddadp %x0,%x1,%x2
607 xsnmaddmdp %x0,%x1,%x3
609 [(set_attr "type" "fp")
610 (set_attr "fp_type" "fp_maddsub_d")])
612 (define_insn "*vsx_nfma<mode>4"
613 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
616 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
617 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0")
618 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
619 "VECTOR_UNIT_VSX_P (<MODE>mode)"
621 x<VSv>nmadda<VSs> %x0,%x1,%x2
622 x<VSv>nmaddm<VSs> %x0,%x1,%x3
623 x<VSv>nmadda<VSs> %x0,%x1,%x2
624 x<VSv>nmaddm<VSs> %x0,%x1,%x3"
625 [(set_attr "type" "<VStype_mul>")
626 (set_attr "fp_type" "<VSfptype_mul>")])
628 (define_insn "*vsx_nfmsdf4"
629 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
632 (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
633 (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
635 (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))))]
636 "VECTOR_UNIT_VSX_P (DFmode)"
638 xsnmsubadp %x0,%x1,%x2
639 xsnmsubmdp %x0,%x1,%x3
640 xsnmsubadp %x0,%x1,%x2
641 xsnmsubmdp %x0,%x1,%x3
643 [(set_attr "type" "fp")
644 (set_attr "fp_type" "fp_maddsub_d")])
646 (define_insn "*vsx_nfmsv4sf4"
647 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
650 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
651 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
653 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
654 "VECTOR_UNIT_VSX_P (V4SFmode)"
656 xvnmsubasp %x0,%x1,%x2
657 xvnmsubmsp %x0,%x1,%x3
658 xvnmsubasp %x0,%x1,%x2
659 xvnmsubmsp %x0,%x1,%x3
660 vnmsubfp %0,%1,%2,%3"
661 [(set_attr "type" "vecfloat")])
663 (define_insn "*vsx_nfmsv2df4"
664 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
667 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
668 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
670 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
671 "VECTOR_UNIT_VSX_P (V2DFmode)"
673 xvnmsubadp %x0,%x1,%x2
674 xvnmsubmdp %x0,%x1,%x3
675 xvnmsubadp %x0,%x1,%x2
676 xvnmsubmdp %x0,%x1,%x3"
677 [(set_attr "type" "vecdouble")])
679 ;; Vector conditional expressions (no scalar version for these instructions)
680 (define_insn "vsx_eq<mode>"
681 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
682 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
683 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
684 "VECTOR_UNIT_VSX_P (<MODE>mode)"
685 "xvcmpeq<VSs> %x0,%x1,%x2"
686 [(set_attr "type" "<VStype_simple>")
687 (set_attr "fp_type" "<VSfptype_simple>")])
689 (define_insn "vsx_gt<mode>"
690 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
691 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
692 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
693 "VECTOR_UNIT_VSX_P (<MODE>mode)"
694 "xvcmpgt<VSs> %x0,%x1,%x2"
695 [(set_attr "type" "<VStype_simple>")
696 (set_attr "fp_type" "<VSfptype_simple>")])
698 (define_insn "*vsx_ge<mode>"
699 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
700 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
701 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
702 "VECTOR_UNIT_VSX_P (<MODE>mode)"
703 "xvcmpge<VSs> %x0,%x1,%x2"
704 [(set_attr "type" "<VStype_simple>")
705 (set_attr "fp_type" "<VSfptype_simple>")])
707 ;; Floating point scalar compare
708 (define_insn "*vsx_cmpdf_internal1"
709 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,?y")
710 (compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "ws,wa")
711 (match_operand:DF 2 "gpc_reg_operand" "ws,wa")))]
712 "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
713 && VECTOR_UNIT_VSX_P (DFmode)"
714 "xscmpudp %0,%x1,%x2"
715 [(set_attr "type" "fpcompare")])
717 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
718 ;; indicate a combined status
719 (define_insn "*vsx_eq_<mode>_p"
722 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
723 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
725 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
726 (eq:VSX_F (match_dup 1)
728 "VECTOR_UNIT_VSX_P (<MODE>mode)"
729 "xvcmpeq<VSs>. %x0,%x1,%x2"
730 [(set_attr "type" "<VStype_simple>")])
732 (define_insn "*vsx_gt_<mode>_p"
735 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
736 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
738 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
739 (gt:VSX_F (match_dup 1)
741 "VECTOR_UNIT_VSX_P (<MODE>mode)"
742 "xvcmpgt<VSs>. %x0,%x1,%x2"
743 [(set_attr "type" "<VStype_simple>")])
745 (define_insn "*vsx_ge_<mode>_p"
748 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
749 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
751 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
752 (ge:VSX_F (match_dup 1)
754 "VECTOR_UNIT_VSX_P (<MODE>mode)"
755 "xvcmpge<VSs>. %x0,%x1,%x2"
756 [(set_attr "type" "<VStype_simple>")])
759 (define_insn "*vsx_xxsel<mode>"
760 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
762 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
763 (match_operand:VSX_L 4 "zero_constant" ""))
764 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
765 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
766 "VECTOR_MEM_VSX_P (<MODE>mode)"
767 "xxsel %x0,%x3,%x2,%x1"
768 [(set_attr "type" "vecperm")])
770 (define_insn "*vsx_xxsel<mode>_uns"
771 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
773 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
774 (match_operand:VSX_L 4 "zero_constant" ""))
775 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
776 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
777 "VECTOR_MEM_VSX_P (<MODE>mode)"
778 "xxsel %x0,%x3,%x2,%x1"
779 [(set_attr "type" "vecperm")])
782 (define_insn "vsx_copysign<mode>3"
783 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
785 [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
786 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
788 "VECTOR_UNIT_VSX_P (<MODE>mode)"
789 "x<VSv>cpsgn<VSs> %x0,%x2,%x1"
790 [(set_attr "type" "<VStype_simple>")
791 (set_attr "fp_type" "<VSfptype_simple>")])
793 ;; For the conversions, limit the register class for the integer value to be
794 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
795 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
796 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
797 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md.
798 (define_insn "vsx_float<VSi><mode>2"
799 [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
800 (float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
801 "VECTOR_UNIT_VSX_P (<MODE>mode)"
802 "x<VSv>cvsx<VSc><VSs> %x0,%x1"
803 [(set_attr "type" "<VStype_simple>")
804 (set_attr "fp_type" "<VSfptype_simple>")])
806 (define_insn "vsx_floatuns<VSi><mode>2"
807 [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
808 (unsigned_float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
809 "VECTOR_UNIT_VSX_P (<MODE>mode)"
810 "x<VSv>cvux<VSc><VSs> %x0,%x1"
811 [(set_attr "type" "<VStype_simple>")
812 (set_attr "fp_type" "<VSfptype_simple>")])
814 (define_insn "vsx_fix_trunc<mode><VSi>2"
815 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
816 (fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
817 "VECTOR_UNIT_VSX_P (<MODE>mode)"
818 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
819 [(set_attr "type" "<VStype_simple>")
820 (set_attr "fp_type" "<VSfptype_simple>")])
822 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
823 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
824 (unsigned_fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
825 "VECTOR_UNIT_VSX_P (<MODE>mode)"
826 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
827 [(set_attr "type" "<VStype_simple>")
828 (set_attr "fp_type" "<VSfptype_simple>")])
830 ;; Math rounding functions
831 (define_insn "vsx_x<VSv>r<VSs>i"
832 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
833 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
834 UNSPEC_VSX_ROUND_I))]
835 "VECTOR_UNIT_VSX_P (<MODE>mode)"
836 "x<VSv>r<VSs>i %x0,%x1"
837 [(set_attr "type" "<VStype_simple>")
838 (set_attr "fp_type" "<VSfptype_simple>")])
840 (define_insn "vsx_x<VSv>r<VSs>ic"
841 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
842 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
843 UNSPEC_VSX_ROUND_IC))]
844 "VECTOR_UNIT_VSX_P (<MODE>mode)"
845 "x<VSv>r<VSs>ic %x0,%x1"
846 [(set_attr "type" "<VStype_simple>")
847 (set_attr "fp_type" "<VSfptype_simple>")])
849 (define_insn "vsx_btrunc<mode>2"
850 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
851 (fix:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
852 "VECTOR_UNIT_VSX_P (<MODE>mode)"
853 "x<VSv>r<VSs>iz %x0,%x1"
854 [(set_attr "type" "<VStype_simple>")
855 (set_attr "fp_type" "<VSfptype_simple>")])
857 (define_insn "*vsx_b2trunc<mode>2"
858 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
859 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
861 "VECTOR_UNIT_VSX_P (<MODE>mode)"
862 "x<VSv>r<VSs>iz %x0,%x1"
863 [(set_attr "type" "<VStype_simple>")
864 (set_attr "fp_type" "<VSfptype_simple>")])
866 (define_insn "vsx_floor<mode>2"
867 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
868 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
870 "VECTOR_UNIT_VSX_P (<MODE>mode)"
871 "x<VSv>r<VSs>im %x0,%x1"
872 [(set_attr "type" "<VStype_simple>")
873 (set_attr "fp_type" "<VSfptype_simple>")])
875 (define_insn "vsx_ceil<mode>2"
876 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
877 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
879 "VECTOR_UNIT_VSX_P (<MODE>mode)"
880 "x<VSv>r<VSs>ip %x0,%x1"
881 [(set_attr "type" "<VStype_simple>")
882 (set_attr "fp_type" "<VSfptype_simple>")])
885 ;; VSX convert to/from double vector
887 ;; Convert between single and double precision
888 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
889 ;; scalar single precision instructions internally use the double format.
890 ;; Prefer the altivec registers, since we likely will need to do a vperm
891 (define_insn "vsx_<VS_spdp_insn>"
892 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?wa")
893 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,wa")]
895 "VECTOR_UNIT_VSX_P (<MODE>mode)"
896 "<VS_spdp_insn> %x0,%x1"
897 [(set_attr "type" "<VS_spdp_type>")])
899 ;; xscvspdp, represent the scalar SF type as V4SF
900 (define_insn "vsx_xscvspdp"
901 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
902 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
904 "VECTOR_UNIT_VSX_P (V4SFmode)"
906 [(set_attr "type" "fp")])
908 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
909 ;; format of scalars is actually DF.
910 (define_insn "vsx_xscvdpsp_scalar"
911 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
912 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
914 "VECTOR_UNIT_VSX_P (V4SFmode)"
916 [(set_attr "type" "fp")])
918 ;; Same as vsx_xscvspdp, but use SF as the type
919 (define_insn "vsx_xscvspdp_scalar2"
920 [(set (match_operand:SF 0 "vsx_register_operand" "=f")
921 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
923 "VECTOR_UNIT_VSX_P (V4SFmode)"
925 [(set_attr "type" "fp")])
927 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
928 (define_insn "vsx_xscvdpspn"
929 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
930 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
931 UNSPEC_VSX_CVDPSPN))]
934 [(set_attr "type" "fp")])
936 (define_insn "vsx_xscvspdpn"
937 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
938 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
939 UNSPEC_VSX_CVSPDPN))]
942 [(set_attr "type" "fp")])
944 (define_insn "vsx_xscvdpspn_scalar"
945 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
946 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
947 UNSPEC_VSX_CVDPSPN))]
950 [(set_attr "type" "fp")])
952 ;; Used by direct move to move a SFmode value from GPR to VSX register
953 (define_insn "vsx_xscvspdpn_directmove"
954 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
955 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
956 UNSPEC_VSX_CVSPDPN))]
959 [(set_attr "type" "fp")])
961 ;; Convert from 64-bit to 32-bit types
962 ;; Note, favor the Altivec registers since the usual use of these instructions
963 ;; is in vector converts and we need to use the Altivec vperm instruction.
965 (define_insn "vsx_xvcvdpsxws"
966 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
967 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
968 UNSPEC_VSX_CVDPSXWS))]
969 "VECTOR_UNIT_VSX_P (V2DFmode)"
971 [(set_attr "type" "vecdouble")])
973 (define_insn "vsx_xvcvdpuxws"
974 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
975 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
976 UNSPEC_VSX_CVDPUXWS))]
977 "VECTOR_UNIT_VSX_P (V2DFmode)"
979 [(set_attr "type" "vecdouble")])
981 (define_insn "vsx_xvcvsxdsp"
982 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
983 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
984 UNSPEC_VSX_CVSXDSP))]
985 "VECTOR_UNIT_VSX_P (V2DFmode)"
987 [(set_attr "type" "vecfloat")])
989 (define_insn "vsx_xvcvuxdsp"
990 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
991 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
992 UNSPEC_VSX_CVUXDSP))]
993 "VECTOR_UNIT_VSX_P (V2DFmode)"
995 [(set_attr "type" "vecdouble")])
997 ;; Convert from 32-bit to 64-bit types
998 (define_insn "vsx_xvcvsxwdp"
999 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1000 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1001 UNSPEC_VSX_CVSXWDP))]
1002 "VECTOR_UNIT_VSX_P (V2DFmode)"
1004 [(set_attr "type" "vecdouble")])
1006 (define_insn "vsx_xvcvuxwdp"
1007 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1008 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1009 UNSPEC_VSX_CVUXWDP))]
1010 "VECTOR_UNIT_VSX_P (V2DFmode)"
1012 [(set_attr "type" "vecdouble")])
1014 (define_insn "vsx_xvcvspsxds"
1015 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1016 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1017 UNSPEC_VSX_CVSPSXDS))]
1018 "VECTOR_UNIT_VSX_P (V2DFmode)"
1019 "xvcvspsxds %x0,%x1"
1020 [(set_attr "type" "vecdouble")])
1022 (define_insn "vsx_xvcvspuxds"
1023 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1024 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1025 UNSPEC_VSX_CVSPUXDS))]
1026 "VECTOR_UNIT_VSX_P (V2DFmode)"
1027 "xvcvspuxds %x0,%x1"
1028 [(set_attr "type" "vecdouble")])
1030 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1031 ;; since the xsrdpiz instruction does not truncate the value if the floating
1032 ;; point value is < LONG_MIN or > LONG_MAX.
1033 (define_insn "*vsx_float_fix_<mode>2"
1034 [(set (match_operand:VSX_DF 0 "vsx_register_operand" "=<VSr>,?wa")
1037 (match_operand:VSX_DF 1 "vsx_register_operand" "<VSr>,?wa"))))]
1038 "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1039 && VECTOR_UNIT_VSX_P (<MODE>mode) && flag_unsafe_math_optimizations
1040 && !flag_trapping_math && TARGET_FRIZ"
1041 "x<VSv>r<VSs>iz %x0,%x1"
1042 [(set_attr "type" "<VStype_simple>")
1043 (set_attr "fp_type" "<VSfptype_simple>")])
1046 ;; Permute operations
1048 ;; Build a V2DF/V2DI vector from two scalars
1049 (define_insn "vsx_concat_<mode>"
1050 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?wa")
1052 (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
1053 (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")))]
1054 "VECTOR_MEM_VSX_P (<MODE>mode)"
1055 "xxpermdi %x0,%x1,%x2,0"
1056 [(set_attr "type" "vecperm")])
1058 ;; Special purpose concat using xxpermdi to glue two single precision values
1059 ;; together, relying on the fact that internally scalar floats are represented
1060 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
1061 (define_insn "vsx_concat_v2sf"
1062 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1064 [(match_operand:SF 1 "vsx_register_operand" "f,f")
1065 (match_operand:SF 2 "vsx_register_operand" "f,f")]
1066 UNSPEC_VSX_CONCAT))]
1067 "VECTOR_MEM_VSX_P (V2DFmode)"
1068 "xxpermdi %x0,%x1,%x2,0"
1069 [(set_attr "type" "vecperm")])
1071 ;; Set the element of a V2DI/VD2F mode
1072 (define_insn "vsx_set_<mode>"
1073 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
1074 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wd,wa")
1075 (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")
1076 (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
1078 "VECTOR_MEM_VSX_P (<MODE>mode)"
1080 if (INTVAL (operands[3]) == 0)
1081 return \"xxpermdi %x0,%x2,%x1,1\";
1082 else if (INTVAL (operands[3]) == 1)
1083 return \"xxpermdi %x0,%x1,%x2,0\";
1087 [(set_attr "type" "vecperm")])
1089 ;; Extract a DF/DI element from V2DF/V2DI
1090 (define_insn "vsx_extract_<mode>"
1091 [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
1092 (vec_select:<VS_scalar> (match_operand:VSX_D 1 "vsx_register_operand" "wd,wd,wa")
1094 [(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
1095 "VECTOR_MEM_VSX_P (<MODE>mode)"
1097 gcc_assert (UINTVAL (operands[2]) <= 1);
1098 operands[3] = GEN_INT (INTVAL (operands[2]) << 1);
1099 return \"xxpermdi %x0,%x1,%x1,%3\";
1101 [(set_attr "type" "vecperm")])
1103 ;; Optimize extracting element 0 from memory
1104 (define_insn "*vsx_extract_<mode>_zero"
1105 [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
1106 (vec_select:<VS_scalar>
1107 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
1108 (parallel [(const_int 0)])))]
1109 "VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
1113 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
1114 (const_string "fpload_ux")
1115 (const_string "fpload")))
1116 (set_attr "length" "4")])
1118 ;; Extract a SF element from V4SF
1119 (define_insn_and_split "vsx_extract_v4sf"
1120 [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
1122 (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
1123 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
1124 (clobber (match_scratch:V4SF 3 "=X,0"))]
1125 "VECTOR_UNIT_VSX_P (V4SFmode)"
1133 rtx op0 = operands[0];
1134 rtx op1 = operands[1];
1135 rtx op2 = operands[2];
1136 rtx op3 = operands[3];
1138 HOST_WIDE_INT ele = INTVAL (op2);
1144 if (GET_CODE (op3) == SCRATCH)
1145 op3 = gen_reg_rtx (V4SFmode);
1146 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, op2));
1149 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
1152 [(set_attr "length" "4,8")
1153 (set_attr "type" "fp")])
1155 ;; Expand the builtin form of xxpermdi to canonical rtl.
1156 (define_expand "vsx_xxpermdi_<mode>"
1157 [(match_operand:VSX_L 0 "vsx_register_operand" "")
1158 (match_operand:VSX_L 1 "vsx_register_operand" "")
1159 (match_operand:VSX_L 2 "vsx_register_operand" "")
1160 (match_operand:QI 3 "u5bit_cint_operand" "")]
1161 "VECTOR_MEM_VSX_P (<MODE>mode)"
1163 rtx target = operands[0];
1164 rtx op0 = operands[1];
1165 rtx op1 = operands[2];
1166 int mask = INTVAL (operands[3]);
1167 rtx perm0 = GEN_INT ((mask >> 1) & 1);
1168 rtx perm1 = GEN_INT ((mask & 1) + 2);
1169 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
1171 if (<MODE>mode == V2DFmode)
1172 gen = gen_vsx_xxpermdi2_v2df_1;
1175 gen = gen_vsx_xxpermdi2_v2di_1;
1176 if (<MODE>mode != V2DImode)
1178 target = gen_lowpart (V2DImode, target);
1179 op0 = gen_lowpart (V2DImode, op0);
1180 op1 = gen_lowpart (V2DImode, op1);
1183 emit_insn (gen (target, op0, op1, perm0, perm1));
1187 (define_insn "vsx_xxpermdi2_<mode>_1"
1188 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
1190 (vec_concat:<VS_double>
1191 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
1192 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
1193 (parallel [(match_operand 3 "const_0_to_1_operand" "")
1194 (match_operand 4 "const_2_to_3_operand" "")])))]
1195 "VECTOR_MEM_VSX_P (<MODE>mode)"
1197 int mask = (INTVAL (operands[3]) << 1) | (INTVAL (operands[4]) - 2);
1198 operands[3] = GEN_INT (mask);
1199 return "xxpermdi %x0,%x1,%x2,%3";
1201 [(set_attr "type" "vecperm")])
1203 (define_expand "vec_perm_const<mode>"
1204 [(match_operand:VSX_D 0 "vsx_register_operand" "")
1205 (match_operand:VSX_D 1 "vsx_register_operand" "")
1206 (match_operand:VSX_D 2 "vsx_register_operand" "")
1207 (match_operand:V2DI 3 "" "")]
1208 "VECTOR_MEM_VSX_P (<MODE>mode)"
1210 if (rs6000_expand_vec_perm_const (operands))
1216 ;; Expanders for builtins
1217 (define_expand "vsx_mergel_<mode>"
1218 [(set (match_operand:VSX_D 0 "vsx_register_operand" "")
1220 (vec_concat:<VS_double>
1221 (match_operand:VSX_D 1 "vsx_register_operand" "")
1222 (match_operand:VSX_D 2 "vsx_register_operand" ""))
1223 (parallel [(const_int 1) (const_int 3)])))]
1224 "VECTOR_MEM_VSX_P (<MODE>mode)"
1227 (define_expand "vsx_mergeh_<mode>"
1228 [(set (match_operand:VSX_D 0 "vsx_register_operand" "")
1230 (vec_concat:<VS_double>
1231 (match_operand:VSX_D 1 "vsx_register_operand" "")
1232 (match_operand:VSX_D 2 "vsx_register_operand" ""))
1233 (parallel [(const_int 0) (const_int 2)])))]
1234 "VECTOR_MEM_VSX_P (<MODE>mode)"
1238 (define_insn "vsx_splat_<mode>"
1239 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa")
1240 (vec_duplicate:VSX_D
1241 (match_operand:<VS_scalar> 1 "splat_input_operand" "ws,f,Z,wa,wa,Z")))]
1242 "VECTOR_MEM_VSX_P (<MODE>mode)"
1244 xxpermdi %x0,%x1,%x1,0
1245 xxpermdi %x0,%x1,%x1,0
1247 xxpermdi %x0,%x1,%x1,0
1248 xxpermdi %x0,%x1,%x1,0
1250 [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
1253 (define_insn "vsx_xxspltw_<mode>"
1254 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
1255 (vec_duplicate:VSX_W
1256 (vec_select:<VS_scalar>
1257 (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
1259 [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
1260 "VECTOR_MEM_VSX_P (<MODE>mode)"
1261 "xxspltw %x0,%x1,%2"
1262 [(set_attr "type" "vecperm")])
1264 ;; V4SF/V4SI interleave
1265 (define_insn "vsx_xxmrghw_<mode>"
1266 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
1268 (vec_concat:<VS_double>
1269 (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
1270 (match_operand:VSX_W 2 "vsx_register_operand" "wf,wa"))
1271 (parallel [(const_int 0) (const_int 4)
1272 (const_int 1) (const_int 5)])))]
1273 "VECTOR_MEM_VSX_P (<MODE>mode)"
1274 "xxmrghw %x0,%x1,%x2"
1275 [(set_attr "type" "vecperm")])
1277 (define_insn "vsx_xxmrglw_<mode>"
1278 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
1280 (vec_concat:<VS_double>
1281 (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
1282 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?wa"))
1283 (parallel [(const_int 2) (const_int 6)
1284 (const_int 3) (const_int 7)])))]
1285 "VECTOR_MEM_VSX_P (<MODE>mode)"
1286 "xxmrglw %x0,%x1,%x2"
1287 [(set_attr "type" "vecperm")])
1289 ;; Shift left double by word immediate
1290 (define_insn "vsx_xxsldwi_<mode>"
1291 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
1292 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
1293 (match_operand:VSX_L 2 "vsx_register_operand" "wa")
1294 (match_operand:QI 3 "u5bit_cint_operand" "i")]
1296 "VECTOR_MEM_VSX_P (<MODE>mode)"
1297 "xxsldwi %x0,%x1,%x2,%3"
1298 [(set_attr "type" "vecperm")])
1301 ;; Vector reduction insns and splitters
1303 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df"
1304 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
1308 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
1309 (parallel [(const_int 1)]))
1312 (parallel [(const_int 0)])))
1314 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
1315 "VECTOR_UNIT_VSX_P (V2DFmode)"
1321 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
1322 ? gen_reg_rtx (V2DFmode)
1324 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
1325 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
1328 [(set_attr "length" "8")
1329 (set_attr "type" "veccomplex")])
1331 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf"
1332 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
1334 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
1335 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
1336 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
1337 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
1338 "VECTOR_UNIT_VSX_P (V4SFmode)"
1344 rtx op0 = operands[0];
1345 rtx op1 = operands[1];
1346 rtx tmp2, tmp3, tmp4;
1348 if (can_create_pseudo_p ())
1350 tmp2 = gen_reg_rtx (V4SFmode);
1351 tmp3 = gen_reg_rtx (V4SFmode);
1352 tmp4 = gen_reg_rtx (V4SFmode);
1361 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
1362 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
1363 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
1364 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
1367 [(set_attr "length" "16")
1368 (set_attr "type" "veccomplex")])
1370 ;; Combiner patterns with the vector reduction patterns that knows we can get
1371 ;; to the top element of the V2DF array without doing an extract.
1373 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
1374 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?wa,ws,?wa")
1379 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
1380 (parallel [(const_int 1)]))
1383 (parallel [(const_int 0)])))
1385 (parallel [(const_int 1)])))
1386 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
1387 "VECTOR_UNIT_VSX_P (V2DFmode)"
1393 rtx hi = gen_highpart (DFmode, operands[1]);
1394 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
1395 ? gen_reg_rtx (DFmode)
1398 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
1399 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
1402 [(set_attr "length" "8")
1403 (set_attr "type" "veccomplex")])
1405 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
1406 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
1409 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
1410 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
1411 (parallel [(const_int 3)])))
1412 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
1413 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
1414 (clobber (match_scratch:V4SF 4 "=0,0"))]
1415 "VECTOR_UNIT_VSX_P (V4SFmode)"
1421 rtx op0 = operands[0];
1422 rtx op1 = operands[1];
1423 rtx tmp2, tmp3, tmp4, tmp5;
1425 if (can_create_pseudo_p ())
1427 tmp2 = gen_reg_rtx (V4SFmode);
1428 tmp3 = gen_reg_rtx (V4SFmode);
1429 tmp4 = gen_reg_rtx (V4SFmode);
1430 tmp5 = gen_reg_rtx (V4SFmode);
1440 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
1441 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
1442 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
1443 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
1444 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
1447 [(set_attr "length" "20")
1448 (set_attr "type" "veccomplex")])