2 ;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for both scalar and vector floating point types supported by VSX
22 (define_mode_iterator VSX_B [DF V4SF V2DF])
24 ;; Iterator for the 2 64-bit vector types
25 (define_mode_iterator VSX_D [V2DF V2DI])
27 ;; Iterator for the 2 32-bit vector types
28 (define_mode_iterator VSX_W [V4SF V4SI])
30 ;; Iterator for the DF types
31 (define_mode_iterator VSX_DF [V2DF DF])
33 ;; Iterator for vector floating point types supported by VSX
34 (define_mode_iterator VSX_F [V4SF V2DF])
36 ;; Iterator for logical types supported by VSX
37 (define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI])
39 ;; Iterator for memory move. Handle TImode specially to allow
40 ;; it to use gprs as well as vsx registers.
41 (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
43 (define_mode_iterator VSX_M2 [V16QI
50 (TI "TARGET_VSX_TIMODE")])
52 ;; Map into the appropriate load/store name based on the type
53 (define_mode_attr VSm [(V16QI "vw4")
63 ;; Map into the appropriate suffix based on the type
64 (define_mode_attr VSs [(V16QI "sp")
75 ;; Map the register class used
76 (define_mode_attr VSr [(V16QI "v")
87 ;; Map the register class used for float<->int conversions
88 (define_mode_attr VSr2 [(V2DF "wd")
92 (define_mode_attr VSr3 [(V2DF "wa")
96 ;; Map the register class for sp<->dp float conversions, destination
97 (define_mode_attr VSr4 [(SF "ws")
102 ;; Map the register class for sp<->dp float conversions, destination
103 (define_mode_attr VSr5 [(SF "ws")
108 ;; Same size integer type for floating point data
109 (define_mode_attr VSi [(V4SF "v4si")
113 (define_mode_attr VSI [(V4SF "V4SI")
117 ;; Word size for same size conversion
118 (define_mode_attr VSc [(V4SF "w")
122 ;; Map into either s or v, depending on whether this is a scalar or vector
124 (define_mode_attr VSv [(V16QI "v")
133 ;; Appropriate type for add ops (and other simple FP ops)
134 (define_mode_attr VStype_simple [(V2DF "vecdouble")
138 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
142 ;; Appropriate type for multiply ops
143 (define_mode_attr VStype_mul [(V2DF "vecdouble")
147 (define_mode_attr VSfptype_mul [(V2DF "fp_mul_d")
151 ;; Appropriate type for divide ops.
152 (define_mode_attr VStype_div [(V2DF "vecdiv")
156 (define_mode_attr VSfptype_div [(V2DF "fp_div_d")
160 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
162 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")
166 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
170 ;; Iterator and modes for sp<->dp conversions
171 ;; Because scalar SF values are represented internally as double, use the
172 ;; V4SF type to represent this than SF.
173 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
175 (define_mode_attr VS_spdp_res [(DF "V4SF")
179 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
183 (define_mode_attr VS_spdp_type [(DF "fp")
187 ;; Map the scalar mode for a vector type
188 (define_mode_attr VS_scalar [(V1TI "TI")
196 ;; Map to a double-sized vector mode
197 (define_mode_attr VS_double [(V4SI "V8SI")
203 ;; Constants for creating unspecs
204 (define_c_enum "unspec"
228 ;; The patterns for LE permuted loads and stores come before the general
229 ;; VSX moves so they match first.
230 (define_insn_and_split "*vsx_le_perm_load_<mode>"
231 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
232 (match_operand:VSX_D 1 "memory_operand" "Z"))]
233 "!BYTES_BIG_ENDIAN && TARGET_VSX"
235 "!BYTES_BIG_ENDIAN && TARGET_VSX"
239 (parallel [(const_int 1) (const_int 0)])))
243 (parallel [(const_int 1) (const_int 0)])))]
246 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
250 [(set_attr "type" "vecload")
251 (set_attr "length" "8")])
253 (define_insn_and_split "*vsx_le_perm_load_<mode>"
254 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
255 (match_operand:VSX_W 1 "memory_operand" "Z"))]
256 "!BYTES_BIG_ENDIAN && TARGET_VSX"
258 "!BYTES_BIG_ENDIAN && TARGET_VSX"
262 (parallel [(const_int 2) (const_int 3)
263 (const_int 0) (const_int 1)])))
267 (parallel [(const_int 2) (const_int 3)
268 (const_int 0) (const_int 1)])))]
271 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
275 [(set_attr "type" "vecload")
276 (set_attr "length" "8")])
278 (define_insn_and_split "*vsx_le_perm_load_v8hi"
279 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
280 (match_operand:V8HI 1 "memory_operand" "Z"))]
281 "!BYTES_BIG_ENDIAN && TARGET_VSX"
283 "!BYTES_BIG_ENDIAN && TARGET_VSX"
287 (parallel [(const_int 4) (const_int 5)
288 (const_int 6) (const_int 7)
289 (const_int 0) (const_int 1)
290 (const_int 2) (const_int 3)])))
294 (parallel [(const_int 4) (const_int 5)
295 (const_int 6) (const_int 7)
296 (const_int 0) (const_int 1)
297 (const_int 2) (const_int 3)])))]
300 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
304 [(set_attr "type" "vecload")
305 (set_attr "length" "8")])
307 (define_insn_and_split "*vsx_le_perm_load_v16qi"
308 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
309 (match_operand:V16QI 1 "memory_operand" "Z"))]
310 "!BYTES_BIG_ENDIAN && TARGET_VSX"
312 "!BYTES_BIG_ENDIAN && TARGET_VSX"
316 (parallel [(const_int 8) (const_int 9)
317 (const_int 10) (const_int 11)
318 (const_int 12) (const_int 13)
319 (const_int 14) (const_int 15)
320 (const_int 0) (const_int 1)
321 (const_int 2) (const_int 3)
322 (const_int 4) (const_int 5)
323 (const_int 6) (const_int 7)])))
327 (parallel [(const_int 8) (const_int 9)
328 (const_int 10) (const_int 11)
329 (const_int 12) (const_int 13)
330 (const_int 14) (const_int 15)
331 (const_int 0) (const_int 1)
332 (const_int 2) (const_int 3)
333 (const_int 4) (const_int 5)
334 (const_int 6) (const_int 7)])))]
337 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
341 [(set_attr "type" "vecload")
342 (set_attr "length" "8")])
344 (define_insn "*vsx_le_perm_store_<mode>"
345 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
346 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
347 "!BYTES_BIG_ENDIAN && TARGET_VSX"
349 [(set_attr "type" "vecstore")
350 (set_attr "length" "12")])
353 [(set (match_operand:VSX_D 0 "memory_operand" "")
354 (match_operand:VSX_D 1 "vsx_register_operand" ""))]
355 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
359 (parallel [(const_int 1) (const_int 0)])))
363 (parallel [(const_int 1) (const_int 0)])))]
365 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
369 ;; The post-reload split requires that we re-permute the source
370 ;; register in case it is still live.
372 [(set (match_operand:VSX_D 0 "memory_operand" "")
373 (match_operand:VSX_D 1 "vsx_register_operand" ""))]
374 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
378 (parallel [(const_int 1) (const_int 0)])))
382 (parallel [(const_int 1) (const_int 0)])))
386 (parallel [(const_int 1) (const_int 0)])))]
389 (define_insn "*vsx_le_perm_store_<mode>"
390 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
391 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
392 "!BYTES_BIG_ENDIAN && TARGET_VSX"
394 [(set_attr "type" "vecstore")
395 (set_attr "length" "12")])
398 [(set (match_operand:VSX_W 0 "memory_operand" "")
399 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
400 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
404 (parallel [(const_int 2) (const_int 3)
405 (const_int 0) (const_int 1)])))
409 (parallel [(const_int 2) (const_int 3)
410 (const_int 0) (const_int 1)])))]
412 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
416 ;; The post-reload split requires that we re-permute the source
417 ;; register in case it is still live.
419 [(set (match_operand:VSX_W 0 "memory_operand" "")
420 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
421 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
425 (parallel [(const_int 2) (const_int 3)
426 (const_int 0) (const_int 1)])))
430 (parallel [(const_int 2) (const_int 3)
431 (const_int 0) (const_int 1)])))
435 (parallel [(const_int 2) (const_int 3)
436 (const_int 0) (const_int 1)])))]
439 (define_insn "*vsx_le_perm_store_v8hi"
440 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
441 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
442 "!BYTES_BIG_ENDIAN && TARGET_VSX"
444 [(set_attr "type" "vecstore")
445 (set_attr "length" "12")])
448 [(set (match_operand:V8HI 0 "memory_operand" "")
449 (match_operand:V8HI 1 "vsx_register_operand" ""))]
450 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
454 (parallel [(const_int 4) (const_int 5)
455 (const_int 6) (const_int 7)
456 (const_int 0) (const_int 1)
457 (const_int 2) (const_int 3)])))
461 (parallel [(const_int 4) (const_int 5)
462 (const_int 6) (const_int 7)
463 (const_int 0) (const_int 1)
464 (const_int 2) (const_int 3)])))]
466 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
470 ;; The post-reload split requires that we re-permute the source
471 ;; register in case it is still live.
473 [(set (match_operand:V8HI 0 "memory_operand" "")
474 (match_operand:V8HI 1 "vsx_register_operand" ""))]
475 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
479 (parallel [(const_int 4) (const_int 5)
480 (const_int 6) (const_int 7)
481 (const_int 0) (const_int 1)
482 (const_int 2) (const_int 3)])))
486 (parallel [(const_int 4) (const_int 5)
487 (const_int 6) (const_int 7)
488 (const_int 0) (const_int 1)
489 (const_int 2) (const_int 3)])))
493 (parallel [(const_int 4) (const_int 5)
494 (const_int 6) (const_int 7)
495 (const_int 0) (const_int 1)
496 (const_int 2) (const_int 3)])))]
499 (define_insn "*vsx_le_perm_store_v16qi"
500 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
501 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
502 "!BYTES_BIG_ENDIAN && TARGET_VSX"
504 [(set_attr "type" "vecstore")
505 (set_attr "length" "12")])
508 [(set (match_operand:V16QI 0 "memory_operand" "")
509 (match_operand:V16QI 1 "vsx_register_operand" ""))]
510 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
514 (parallel [(const_int 8) (const_int 9)
515 (const_int 10) (const_int 11)
516 (const_int 12) (const_int 13)
517 (const_int 14) (const_int 15)
518 (const_int 0) (const_int 1)
519 (const_int 2) (const_int 3)
520 (const_int 4) (const_int 5)
521 (const_int 6) (const_int 7)])))
525 (parallel [(const_int 8) (const_int 9)
526 (const_int 10) (const_int 11)
527 (const_int 12) (const_int 13)
528 (const_int 14) (const_int 15)
529 (const_int 0) (const_int 1)
530 (const_int 2) (const_int 3)
531 (const_int 4) (const_int 5)
532 (const_int 6) (const_int 7)])))]
534 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
538 ;; The post-reload split requires that we re-permute the source
539 ;; register in case it is still live.
541 [(set (match_operand:V16QI 0 "memory_operand" "")
542 (match_operand:V16QI 1 "vsx_register_operand" ""))]
543 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
547 (parallel [(const_int 8) (const_int 9)
548 (const_int 10) (const_int 11)
549 (const_int 12) (const_int 13)
550 (const_int 14) (const_int 15)
551 (const_int 0) (const_int 1)
552 (const_int 2) (const_int 3)
553 (const_int 4) (const_int 5)
554 (const_int 6) (const_int 7)])))
558 (parallel [(const_int 8) (const_int 9)
559 (const_int 10) (const_int 11)
560 (const_int 12) (const_int 13)
561 (const_int 14) (const_int 15)
562 (const_int 0) (const_int 1)
563 (const_int 2) (const_int 3)
564 (const_int 4) (const_int 5)
565 (const_int 6) (const_int 7)])))
569 (parallel [(const_int 8) (const_int 9)
570 (const_int 10) (const_int 11)
571 (const_int 12) (const_int 13)
572 (const_int 14) (const_int 15)
573 (const_int 0) (const_int 1)
574 (const_int 2) (const_int 3)
575 (const_int 4) (const_int 5)
576 (const_int 6) (const_int 7)])))]
580 (define_insn "*vsx_mov<mode>"
581 [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
582 (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
583 "VECTOR_MEM_VSX_P (<MODE>mode)
584 && (register_operand (operands[0], <MODE>mode)
585 || register_operand (operands[1], <MODE>mode))"
587 return rs6000_output_move_128bit (operands);
589 [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
590 (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
592 ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
593 ;; use of TImode is for unions. However for plain data movement, slightly
594 ;; favor the vector loads
595 (define_insn "*vsx_movti_64bit"
596 [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
597 (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
598 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
599 && (register_operand (operands[0], TImode)
600 || register_operand (operands[1], TImode))"
602 return rs6000_output_move_128bit (operands);
604 [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
605 (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
607 (define_insn "*vsx_movti_32bit"
608 [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
609 (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v,r,r, Q, Y, r,n"))]
610 "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
611 && (register_operand (operands[0], TImode)
612 || register_operand (operands[1], TImode))"
614 switch (which_alternative)
617 return "stxvd2x %x1,%y0";
620 return "lxvd2x %x0,%y1";
623 return "xxlor %x0,%x1,%x1";
626 return "xxlxor %x0,%x0,%x0";
629 return output_vec_const_move (operands);
632 return "stvx %1,%y0";
639 return \"stswi %1,%P0,16\";
645 /* If the address is not used in the output, we can use lsi. Otherwise,
646 fall through to generating four loads. */
648 && ! reg_overlap_mentioned_p (operands[0], operands[1]))
649 return \"lswi %0,%P1,16\";
650 /* ... fall through ... */
660 [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *")
661 (set_attr "length" " 4, 4, 4, 4, 8, 4, 4, 16, 16, 16, 16,16,16")
662 (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
663 (const_string "always")
664 (const_string "conditional")))])
666 ;; Explicit load/store expanders for the builtin functions
667 (define_expand "vsx_load_<mode>"
668 [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
669 (match_operand:VSX_M 1 "memory_operand" ""))]
670 "VECTOR_MEM_VSX_P (<MODE>mode)"
673 (define_expand "vsx_store_<mode>"
674 [(set (match_operand:VSX_M 0 "memory_operand" "")
675 (match_operand:VSX_M 1 "vsx_register_operand" ""))]
676 "VECTOR_MEM_VSX_P (<MODE>mode)"
680 ;; VSX vector floating point arithmetic instructions. The VSX scalar
681 ;; instructions are now combined with the insn for the traditional floating
683 (define_insn "*vsx_add<mode>3"
684 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
685 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
686 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
687 "VECTOR_UNIT_VSX_P (<MODE>mode)"
688 "xvadd<VSs> %x0,%x1,%x2"
689 [(set_attr "type" "<VStype_simple>")
690 (set_attr "fp_type" "<VSfptype_simple>")])
692 (define_insn "*vsx_sub<mode>3"
693 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
694 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
695 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
696 "VECTOR_UNIT_VSX_P (<MODE>mode)"
697 "xvsub<VSs> %x0,%x1,%x2"
698 [(set_attr "type" "<VStype_simple>")
699 (set_attr "fp_type" "<VSfptype_simple>")])
701 (define_insn "*vsx_mul<mode>3"
702 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
703 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
704 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
705 "VECTOR_UNIT_VSX_P (<MODE>mode)"
706 "xvmul<VSs> %x0,%x1,%x2"
707 [(set_attr "type" "<VStype_simple>")
708 (set_attr "fp_type" "<VSfptype_mul>")])
710 (define_insn "*vsx_div<mode>3"
711 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
712 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
713 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
714 "VECTOR_UNIT_VSX_P (<MODE>mode)"
715 "xvdiv<VSs> %x0,%x1,%x2"
716 [(set_attr "type" "<VStype_div>")
717 (set_attr "fp_type" "<VSfptype_div>")])
719 ;; *tdiv* instruction returning the FG flag
720 (define_expand "vsx_tdiv<mode>3_fg"
722 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
723 (match_operand:VSX_B 2 "vsx_register_operand" "")]
725 (set (match_operand:SI 0 "gpc_reg_operand" "")
728 "VECTOR_UNIT_VSX_P (<MODE>mode)"
730 operands[3] = gen_reg_rtx (CCFPmode);
733 ;; *tdiv* instruction returning the FE flag
734 (define_expand "vsx_tdiv<mode>3_fe"
736 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
737 (match_operand:VSX_B 2 "vsx_register_operand" "")]
739 (set (match_operand:SI 0 "gpc_reg_operand" "")
742 "VECTOR_UNIT_VSX_P (<MODE>mode)"
744 operands[3] = gen_reg_rtx (CCFPmode);
747 (define_insn "*vsx_tdiv<mode>3_internal"
748 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
749 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
750 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
752 "VECTOR_UNIT_VSX_P (<MODE>mode)"
753 "x<VSv>tdiv<VSs> %0,%x1,%x2"
754 [(set_attr "type" "<VStype_simple>")
755 (set_attr "fp_type" "<VSfptype_simple>")])
757 (define_insn "vsx_fre<mode>2"
758 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
759 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
761 "VECTOR_UNIT_VSX_P (<MODE>mode)"
763 [(set_attr "type" "<VStype_simple>")
764 (set_attr "fp_type" "<VSfptype_simple>")])
766 (define_insn "*vsx_neg<mode>2"
767 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
768 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
769 "VECTOR_UNIT_VSX_P (<MODE>mode)"
771 [(set_attr "type" "<VStype_simple>")
772 (set_attr "fp_type" "<VSfptype_simple>")])
774 (define_insn "*vsx_abs<mode>2"
775 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
776 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
777 "VECTOR_UNIT_VSX_P (<MODE>mode)"
779 [(set_attr "type" "<VStype_simple>")
780 (set_attr "fp_type" "<VSfptype_simple>")])
782 (define_insn "vsx_nabs<mode>2"
783 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
786 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa"))))]
787 "VECTOR_UNIT_VSX_P (<MODE>mode)"
788 "xvnabs<VSs> %x0,%x1"
789 [(set_attr "type" "<VStype_simple>")
790 (set_attr "fp_type" "<VSfptype_simple>")])
792 (define_insn "vsx_smax<mode>3"
793 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
794 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
795 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
796 "VECTOR_UNIT_VSX_P (<MODE>mode)"
797 "xvmax<VSs> %x0,%x1,%x2"
798 [(set_attr "type" "<VStype_simple>")
799 (set_attr "fp_type" "<VSfptype_simple>")])
801 (define_insn "*vsx_smin<mode>3"
802 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
803 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
804 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
805 "VECTOR_UNIT_VSX_P (<MODE>mode)"
806 "xvmin<VSs> %x0,%x1,%x2"
807 [(set_attr "type" "<VStype_simple>")
808 (set_attr "fp_type" "<VSfptype_simple>")])
810 (define_insn "*vsx_sqrt<mode>2"
811 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
812 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
813 "VECTOR_UNIT_VSX_P (<MODE>mode)"
814 "xvsqrt<VSs> %x0,%x1"
815 [(set_attr "type" "<VStype_sqrt>")
816 (set_attr "fp_type" "<VSfptype_sqrt>")])
818 (define_insn "*vsx_rsqrte<mode>2"
819 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
820 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
822 "VECTOR_UNIT_VSX_P (<MODE>mode)"
823 "xvrsqrte<VSs> %x0,%x1"
824 [(set_attr "type" "<VStype_simple>")
825 (set_attr "fp_type" "<VSfptype_simple>")])
827 ;; *tsqrt* returning the fg flag
828 (define_expand "vsx_tsqrt<mode>2_fg"
830 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
832 (set (match_operand:SI 0 "gpc_reg_operand" "")
835 "VECTOR_UNIT_VSX_P (<MODE>mode)"
837 operands[3] = gen_reg_rtx (CCFPmode);
840 ;; *tsqrt* returning the fe flag
841 (define_expand "vsx_tsqrt<mode>2_fe"
843 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
845 (set (match_operand:SI 0 "gpc_reg_operand" "")
848 "VECTOR_UNIT_VSX_P (<MODE>mode)"
850 operands[3] = gen_reg_rtx (CCFPmode);
853 (define_insn "*vsx_tsqrt<mode>2_internal"
854 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
855 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
857 "VECTOR_UNIT_VSX_P (<MODE>mode)"
858 "x<VSv>tsqrt<VSs> %0,%x1"
859 [(set_attr "type" "<VStype_simple>")
860 (set_attr "fp_type" "<VSfptype_simple>")])
862 ;; Fused vector multiply/add instructions. Support the classical Altivec
863 ;; versions of fma, which allows the target to be a separate register from the
864 ;; 3 inputs. Under VSX, the target must be either the addend or the first
867 (define_insn "*vsx_fmav4sf4"
868 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
870 (match_operand:V4SF 1 "vsx_register_operand" "%ws,ws,wa,wa,v")
871 (match_operand:V4SF 2 "vsx_register_operand" "ws,0,wa,0,v")
872 (match_operand:V4SF 3 "vsx_register_operand" "0,ws,0,wa,v")))]
873 "VECTOR_UNIT_VSX_P (V4SFmode)"
875 xvmaddasp %x0,%x1,%x2
876 xvmaddmsp %x0,%x1,%x3
877 xvmaddasp %x0,%x1,%x2
878 xvmaddmsp %x0,%x1,%x3
880 [(set_attr "type" "vecfloat")])
882 (define_insn "*vsx_fmav2df4"
883 [(set (match_operand:V2DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa")
885 (match_operand:V2DF 1 "vsx_register_operand" "%ws,ws,wa,wa")
886 (match_operand:V2DF 2 "vsx_register_operand" "ws,0,wa,0")
887 (match_operand:V2DF 3 "vsx_register_operand" "0,ws,0,wa")))]
888 "VECTOR_UNIT_VSX_P (V2DFmode)"
890 xvmaddadp %x0,%x1,%x2
891 xvmaddmdp %x0,%x1,%x3
892 xvmaddadp %x0,%x1,%x2
893 xvmaddmdp %x0,%x1,%x3"
894 [(set_attr "type" "vecdouble")])
896 (define_insn "*vsx_fms<mode>4"
897 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
899 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
900 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0")
902 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
903 "VECTOR_UNIT_VSX_P (<MODE>mode)"
905 xvmsuba<VSs> %x0,%x1,%x2
906 xvmsubm<VSs> %x0,%x1,%x3
907 xvmsuba<VSs> %x0,%x1,%x2
908 xvmsubm<VSs> %x0,%x1,%x3"
909 [(set_attr "type" "<VStype_mul>")])
911 (define_insn "*vsx_nfma<mode>4"
912 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
915 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
916 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0")
917 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
918 "VECTOR_UNIT_VSX_P (<MODE>mode)"
920 xvnmadda<VSs> %x0,%x1,%x2
921 xvnmaddm<VSs> %x0,%x1,%x3
922 xvnmadda<VSs> %x0,%x1,%x2
923 xvnmaddm<VSs> %x0,%x1,%x3"
924 [(set_attr "type" "<VStype_mul>")
925 (set_attr "fp_type" "<VSfptype_mul>")])
927 (define_insn "*vsx_nfmsv4sf4"
928 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
931 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
932 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
934 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
935 "VECTOR_UNIT_VSX_P (V4SFmode)"
937 xvnmsubasp %x0,%x1,%x2
938 xvnmsubmsp %x0,%x1,%x3
939 xvnmsubasp %x0,%x1,%x2
940 xvnmsubmsp %x0,%x1,%x3
941 vnmsubfp %0,%1,%2,%3"
942 [(set_attr "type" "vecfloat")])
944 (define_insn "*vsx_nfmsv2df4"
945 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
948 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
949 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
951 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
952 "VECTOR_UNIT_VSX_P (V2DFmode)"
954 xvnmsubadp %x0,%x1,%x2
955 xvnmsubmdp %x0,%x1,%x3
956 xvnmsubadp %x0,%x1,%x2
957 xvnmsubmdp %x0,%x1,%x3"
958 [(set_attr "type" "vecdouble")])
960 ;; Vector conditional expressions (no scalar version for these instructions)
961 (define_insn "vsx_eq<mode>"
962 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
963 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
964 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
965 "VECTOR_UNIT_VSX_P (<MODE>mode)"
966 "xvcmpeq<VSs> %x0,%x1,%x2"
967 [(set_attr "type" "<VStype_simple>")
968 (set_attr "fp_type" "<VSfptype_simple>")])
970 (define_insn "vsx_gt<mode>"
971 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
972 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
973 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
974 "VECTOR_UNIT_VSX_P (<MODE>mode)"
975 "xvcmpgt<VSs> %x0,%x1,%x2"
976 [(set_attr "type" "<VStype_simple>")
977 (set_attr "fp_type" "<VSfptype_simple>")])
979 (define_insn "*vsx_ge<mode>"
980 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
981 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
982 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
983 "VECTOR_UNIT_VSX_P (<MODE>mode)"
984 "xvcmpge<VSs> %x0,%x1,%x2"
985 [(set_attr "type" "<VStype_simple>")
986 (set_attr "fp_type" "<VSfptype_simple>")])
988 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
989 ;; indicate a combined status
990 (define_insn "*vsx_eq_<mode>_p"
993 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
994 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
996 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
997 (eq:VSX_F (match_dup 1)
999 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1000 "xvcmpeq<VSs>. %x0,%x1,%x2"
1001 [(set_attr "type" "<VStype_simple>")])
1003 (define_insn "*vsx_gt_<mode>_p"
1006 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
1007 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
1009 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1010 (gt:VSX_F (match_dup 1)
1012 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1013 "xvcmpgt<VSs>. %x0,%x1,%x2"
1014 [(set_attr "type" "<VStype_simple>")])
1016 (define_insn "*vsx_ge_<mode>_p"
1019 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
1020 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
1022 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1023 (ge:VSX_F (match_dup 1)
1025 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1026 "xvcmpge<VSs>. %x0,%x1,%x2"
1027 [(set_attr "type" "<VStype_simple>")])
1030 (define_insn "*vsx_xxsel<mode>"
1031 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
1033 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
1034 (match_operand:VSX_L 4 "zero_constant" ""))
1035 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
1036 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
1037 "VECTOR_MEM_VSX_P (<MODE>mode)"
1038 "xxsel %x0,%x3,%x2,%x1"
1039 [(set_attr "type" "vecperm")])
1041 (define_insn "*vsx_xxsel<mode>_uns"
1042 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
1044 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
1045 (match_operand:VSX_L 4 "zero_constant" ""))
1046 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
1047 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
1048 "VECTOR_MEM_VSX_P (<MODE>mode)"
1049 "xxsel %x0,%x3,%x2,%x1"
1050 [(set_attr "type" "vecperm")])
1053 (define_insn "vsx_copysign<mode>3"
1054 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1056 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
1057 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")]
1059 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1060 "xvcpsgn<VSs> %x0,%x2,%x1"
1061 [(set_attr "type" "<VStype_simple>")
1062 (set_attr "fp_type" "<VSfptype_simple>")])
1064 ;; For the conversions, limit the register class for the integer value to be
1065 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1066 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1067 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1068 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md.
1069 (define_insn "vsx_float<VSi><mode>2"
1070 [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
1071 (float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1072 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1073 "x<VSv>cvsx<VSc><VSs> %x0,%x1"
1074 [(set_attr "type" "<VStype_simple>")
1075 (set_attr "fp_type" "<VSfptype_simple>")])
1077 (define_insn "vsx_floatuns<VSi><mode>2"
1078 [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
1079 (unsigned_float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1080 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1081 "x<VSv>cvux<VSc><VSs> %x0,%x1"
1082 [(set_attr "type" "<VStype_simple>")
1083 (set_attr "fp_type" "<VSfptype_simple>")])
1085 (define_insn "vsx_fix_trunc<mode><VSi>2"
1086 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1087 (fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
1088 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1089 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1090 [(set_attr "type" "<VStype_simple>")
1091 (set_attr "fp_type" "<VSfptype_simple>")])
1093 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1094 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1095 (unsigned_fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
1096 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1097 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1098 [(set_attr "type" "<VStype_simple>")
1099 (set_attr "fp_type" "<VSfptype_simple>")])
1101 ;; Math rounding functions
1102 (define_insn "vsx_x<VSv>r<VSs>i"
1103 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
1104 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
1105 UNSPEC_VSX_ROUND_I))]
1106 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1107 "x<VSv>r<VSs>i %x0,%x1"
1108 [(set_attr "type" "<VStype_simple>")
1109 (set_attr "fp_type" "<VSfptype_simple>")])
1111 (define_insn "vsx_x<VSv>r<VSs>ic"
1112 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
1113 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
1114 UNSPEC_VSX_ROUND_IC))]
1115 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1116 "x<VSv>r<VSs>ic %x0,%x1"
1117 [(set_attr "type" "<VStype_simple>")
1118 (set_attr "fp_type" "<VSfptype_simple>")])
1120 (define_insn "vsx_btrunc<mode>2"
1121 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1122 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
1123 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1124 "xvr<VSs>iz %x0,%x1"
1125 [(set_attr "type" "<VStype_simple>")
1126 (set_attr "fp_type" "<VSfptype_simple>")])
1128 (define_insn "*vsx_b2trunc<mode>2"
1129 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
1130 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
1132 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1133 "x<VSv>r<VSs>iz %x0,%x1"
1134 [(set_attr "type" "<VStype_simple>")
1135 (set_attr "fp_type" "<VSfptype_simple>")])
1137 (define_insn "vsx_floor<mode>2"
1138 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1139 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
1141 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1142 "xvr<VSs>im %x0,%x1"
1143 [(set_attr "type" "<VStype_simple>")
1144 (set_attr "fp_type" "<VSfptype_simple>")])
1146 (define_insn "vsx_ceil<mode>2"
1147 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
1148 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
1150 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1151 "xvr<VSs>ip %x0,%x1"
1152 [(set_attr "type" "<VStype_simple>")
1153 (set_attr "fp_type" "<VSfptype_simple>")])
1156 ;; VSX convert to/from double vector
1158 ;; Convert between single and double precision
1159 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1160 ;; scalar single precision instructions internally use the double format.
1161 ;; Prefer the altivec registers, since we likely will need to do a vperm
1162 (define_insn "vsx_<VS_spdp_insn>"
1163 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?wa")
1164 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,wa")]
1165 UNSPEC_VSX_CVSPDP))]
1166 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1167 "<VS_spdp_insn> %x0,%x1"
1168 [(set_attr "type" "<VS_spdp_type>")])
1170 ;; xscvspdp, represent the scalar SF type as V4SF
1171 (define_insn "vsx_xscvspdp"
1172 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
1173 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
1174 UNSPEC_VSX_CVSPDP))]
1175 "VECTOR_UNIT_VSX_P (V4SFmode)"
1177 [(set_attr "type" "fp")])
1179 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1180 ;; format of scalars is actually DF.
1181 (define_insn "vsx_xscvdpsp_scalar"
1182 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1183 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1184 UNSPEC_VSX_CVSPDP))]
1185 "VECTOR_UNIT_VSX_P (V4SFmode)"
1187 [(set_attr "type" "fp")])
1189 ;; Same as vsx_xscvspdp, but use SF as the type
1190 (define_insn "vsx_xscvspdp_scalar2"
1191 [(set (match_operand:SF 0 "vsx_register_operand" "=f")
1192 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1193 UNSPEC_VSX_CVSPDP))]
1194 "VECTOR_UNIT_VSX_P (V4SFmode)"
1196 [(set_attr "type" "fp")])
1198 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1199 (define_insn "vsx_xscvdpspn"
1200 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
1201 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
1202 UNSPEC_VSX_CVDPSPN))]
1205 [(set_attr "type" "fp")])
1207 (define_insn "vsx_xscvspdpn"
1208 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
1209 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
1210 UNSPEC_VSX_CVSPDPN))]
1213 [(set_attr "type" "fp")])
1215 (define_insn "vsx_xscvdpspn_scalar"
1216 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1217 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1218 UNSPEC_VSX_CVDPSPN))]
1221 [(set_attr "type" "fp")])
1223 ;; Used by direct move to move a SFmode value from GPR to VSX register
1224 (define_insn "vsx_xscvspdpn_directmove"
1225 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1226 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
1227 UNSPEC_VSX_CVSPDPN))]
1230 [(set_attr "type" "fp")])
1232 ;; Convert from 64-bit to 32-bit types
1233 ;; Note, favor the Altivec registers since the usual use of these instructions
1234 ;; is in vector converts and we need to use the Altivec vperm instruction.
1236 (define_insn "vsx_xvcvdpsxws"
1237 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1238 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1239 UNSPEC_VSX_CVDPSXWS))]
1240 "VECTOR_UNIT_VSX_P (V2DFmode)"
1241 "xvcvdpsxws %x0,%x1"
1242 [(set_attr "type" "vecdouble")])
1244 (define_insn "vsx_xvcvdpuxws"
1245 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1246 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1247 UNSPEC_VSX_CVDPUXWS))]
1248 "VECTOR_UNIT_VSX_P (V2DFmode)"
1249 "xvcvdpuxws %x0,%x1"
1250 [(set_attr "type" "vecdouble")])
1252 (define_insn "vsx_xvcvsxdsp"
1253 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1254 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1255 UNSPEC_VSX_CVSXDSP))]
1256 "VECTOR_UNIT_VSX_P (V2DFmode)"
1258 [(set_attr "type" "vecfloat")])
1260 (define_insn "vsx_xvcvuxdsp"
1261 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1262 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1263 UNSPEC_VSX_CVUXDSP))]
1264 "VECTOR_UNIT_VSX_P (V2DFmode)"
1266 [(set_attr "type" "vecdouble")])
1268 ;; Convert from 32-bit to 64-bit types
1269 (define_insn "vsx_xvcvsxwdp"
1270 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1271 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1272 UNSPEC_VSX_CVSXWDP))]
1273 "VECTOR_UNIT_VSX_P (V2DFmode)"
1275 [(set_attr "type" "vecdouble")])
1277 (define_insn "vsx_xvcvuxwdp"
1278 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1279 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1280 UNSPEC_VSX_CVUXWDP))]
1281 "VECTOR_UNIT_VSX_P (V2DFmode)"
1283 [(set_attr "type" "vecdouble")])
1285 (define_insn "vsx_xvcvspsxds"
1286 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1287 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1288 UNSPEC_VSX_CVSPSXDS))]
1289 "VECTOR_UNIT_VSX_P (V2DFmode)"
1290 "xvcvspsxds %x0,%x1"
1291 [(set_attr "type" "vecdouble")])
1293 (define_insn "vsx_xvcvspuxds"
1294 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1295 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1296 UNSPEC_VSX_CVSPUXDS))]
1297 "VECTOR_UNIT_VSX_P (V2DFmode)"
1298 "xvcvspuxds %x0,%x1"
1299 [(set_attr "type" "vecdouble")])
1301 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1302 ;; since the xsrdpiz instruction does not truncate the value if the floating
1303 ;; point value is < LONG_MIN or > LONG_MAX.
1304 (define_insn "*vsx_float_fix_<mode>2"
1305 [(set (match_operand:VSX_DF 0 "vsx_register_operand" "=<VSr>,?wa")
1308 (match_operand:VSX_DF 1 "vsx_register_operand" "<VSr>,?wa"))))]
1309 "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1310 && VECTOR_UNIT_VSX_P (<MODE>mode) && flag_unsafe_math_optimizations
1311 && !flag_trapping_math && TARGET_FRIZ"
1312 "x<VSv>r<VSs>iz %x0,%x1"
1313 [(set_attr "type" "<VStype_simple>")
1314 (set_attr "fp_type" "<VSfptype_simple>")])
1317 ;; Permute operations
1319 ;; Build a V2DF/V2DI vector from two scalars
1320 (define_insn "vsx_concat_<mode>"
1321 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?wa")
1323 (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
1324 (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")))]
1325 "VECTOR_MEM_VSX_P (<MODE>mode)"
1327 if (BYTES_BIG_ENDIAN)
1328 return "xxpermdi %x0,%x1,%x2,0";
1330 return "xxpermdi %x0,%x2,%x1,0";
1332 [(set_attr "type" "vecperm")])
1334 ;; Special purpose concat using xxpermdi to glue two single precision values
1335 ;; together, relying on the fact that internally scalar floats are represented
1336 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
1337 (define_insn "vsx_concat_v2sf"
1338 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1340 [(match_operand:SF 1 "vsx_register_operand" "f,f")
1341 (match_operand:SF 2 "vsx_register_operand" "f,f")]
1342 UNSPEC_VSX_CONCAT))]
1343 "VECTOR_MEM_VSX_P (V2DFmode)"
1345 if (BYTES_BIG_ENDIAN)
1346 return "xxpermdi %x0,%x1,%x2,0";
1348 return "xxpermdi %x0,%x2,%x1,0";
1350 [(set_attr "type" "vecperm")])
1352 ;; xxpermdi for little endian loads and stores. We need several of
1353 ;; these since the form of the PARALLEL differs by mode.
1354 (define_insn "*vsx_xxpermdi2_le_<mode>"
1355 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
1357 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
1358 (parallel [(const_int 1) (const_int 0)])))]
1359 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1360 "xxpermdi %x0,%x1,%x1,2"
1361 [(set_attr "type" "vecperm")])
1363 (define_insn "*vsx_xxpermdi4_le_<mode>"
1364 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
1366 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
1367 (parallel [(const_int 2) (const_int 3)
1368 (const_int 0) (const_int 1)])))]
1369 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1370 "xxpermdi %x0,%x1,%x1,2"
1371 [(set_attr "type" "vecperm")])
1373 (define_insn "*vsx_xxpermdi8_le_V8HI"
1374 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1376 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1377 (parallel [(const_int 4) (const_int 5)
1378 (const_int 6) (const_int 7)
1379 (const_int 0) (const_int 1)
1380 (const_int 2) (const_int 3)])))]
1381 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1382 "xxpermdi %x0,%x1,%x1,2"
1383 [(set_attr "type" "vecperm")])
1385 (define_insn "*vsx_xxpermdi16_le_V16QI"
1386 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1388 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1389 (parallel [(const_int 8) (const_int 9)
1390 (const_int 10) (const_int 11)
1391 (const_int 12) (const_int 13)
1392 (const_int 14) (const_int 15)
1393 (const_int 0) (const_int 1)
1394 (const_int 2) (const_int 3)
1395 (const_int 4) (const_int 5)
1396 (const_int 6) (const_int 7)])))]
1397 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1398 "xxpermdi %x0,%x1,%x1,2"
1399 [(set_attr "type" "vecperm")])
1401 ;; lxvd2x for little endian loads. We need several of
1402 ;; these since the form of the PARALLEL differs by mode.
1403 (define_insn "*vsx_lxvd2x2_le_<mode>"
1404 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
1406 (match_operand:VSX_D 1 "memory_operand" "Z")
1407 (parallel [(const_int 1) (const_int 0)])))]
1408 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1410 [(set_attr "type" "vecload")])
1412 (define_insn "*vsx_lxvd2x4_le_<mode>"
1413 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
1415 (match_operand:VSX_W 1 "memory_operand" "Z")
1416 (parallel [(const_int 2) (const_int 3)
1417 (const_int 0) (const_int 1)])))]
1418 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1420 [(set_attr "type" "vecload")])
1422 (define_insn "*vsx_lxvd2x8_le_V8HI"
1423 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1425 (match_operand:V8HI 1 "memory_operand" "Z")
1426 (parallel [(const_int 4) (const_int 5)
1427 (const_int 6) (const_int 7)
1428 (const_int 0) (const_int 1)
1429 (const_int 2) (const_int 3)])))]
1430 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1432 [(set_attr "type" "vecload")])
1434 (define_insn "*vsx_lxvd2x16_le_V16QI"
1435 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1437 (match_operand:V16QI 1 "memory_operand" "Z")
1438 (parallel [(const_int 8) (const_int 9)
1439 (const_int 10) (const_int 11)
1440 (const_int 12) (const_int 13)
1441 (const_int 14) (const_int 15)
1442 (const_int 0) (const_int 1)
1443 (const_int 2) (const_int 3)
1444 (const_int 4) (const_int 5)
1445 (const_int 6) (const_int 7)])))]
1446 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1448 [(set_attr "type" "vecload")])
1450 ;; stxvd2x for little endian stores. We need several of
1451 ;; these since the form of the PARALLEL differs by mode.
1452 (define_insn "*vsx_stxvd2x2_le_<mode>"
1453 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
1455 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
1456 (parallel [(const_int 1) (const_int 0)])))]
1457 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1459 [(set_attr "type" "vecstore")])
1461 (define_insn "*vsx_stxvd2x4_le_<mode>"
1462 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
1464 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
1465 (parallel [(const_int 2) (const_int 3)
1466 (const_int 0) (const_int 1)])))]
1467 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1469 [(set_attr "type" "vecstore")])
1471 (define_insn "*vsx_stxvd2x8_le_V8HI"
1472 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1474 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1475 (parallel [(const_int 4) (const_int 5)
1476 (const_int 6) (const_int 7)
1477 (const_int 0) (const_int 1)
1478 (const_int 2) (const_int 3)])))]
1479 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1481 [(set_attr "type" "vecstore")])
1483 (define_insn "*vsx_stxvd2x16_le_V16QI"
1484 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1486 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1487 (parallel [(const_int 8) (const_int 9)
1488 (const_int 10) (const_int 11)
1489 (const_int 12) (const_int 13)
1490 (const_int 14) (const_int 15)
1491 (const_int 0) (const_int 1)
1492 (const_int 2) (const_int 3)
1493 (const_int 4) (const_int 5)
1494 (const_int 6) (const_int 7)])))]
1495 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1497 [(set_attr "type" "vecstore")])
1499 ;; Convert a TImode value into V1TImode
1500 (define_expand "vsx_set_v1ti"
1501 [(match_operand:V1TI 0 "nonimmediate_operand" "")
1502 (match_operand:V1TI 1 "nonimmediate_operand" "")
1503 (match_operand:TI 2 "input_operand" "")
1504 (match_operand:QI 3 "u5bit_cint_operand" "")]
1505 "VECTOR_MEM_VSX_P (V1TImode)"
1507 if (operands[3] != const0_rtx)
1510 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
1514 ;; Set the element of a V2DI/VD2F mode
1515 (define_insn "vsx_set_<mode>"
1516 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
1517 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wd,wa")
1518 (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")
1519 (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
1521 "VECTOR_MEM_VSX_P (<MODE>mode)"
1523 int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
1524 if (INTVAL (operands[3]) == idx_first)
1525 return \"xxpermdi %x0,%x2,%x1,1\";
1526 else if (INTVAL (operands[3]) == 1 - idx_first)
1527 return \"xxpermdi %x0,%x1,%x2,0\";
1531 [(set_attr "type" "vecperm")])
1533 ;; Extract a DF/DI element from V2DF/V2DI
1534 (define_expand "vsx_extract_<mode>"
1535 [(set (match_operand:<VS_scalar> 0 "register_operand" "")
1536 (vec_select:<VS_scalar> (match_operand:VSX_D 1 "register_operand" "")
1538 [(match_operand:QI 2 "u5bit_cint_operand" "")])))]
1539 "VECTOR_MEM_VSX_P (<MODE>mode)"
1542 ;; Optimize cases were we can do a simple or direct move.
1543 ;; Or see if we can avoid doing the move at all
1544 (define_insn "*vsx_extract_<mode>_internal1"
1545 [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,ws,?wa,r")
1546 (vec_select:<VS_scalar>
1547 (match_operand:VSX_D 1 "register_operand" "d,wd,wa,wm")
1549 [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD,wD")])))]
1550 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
1552 int op0_regno = REGNO (operands[0]);
1553 int op1_regno = REGNO (operands[1]);
1555 if (op0_regno == op1_regno)
1558 if (INT_REGNO_P (op0_regno))
1559 return "mfvsrd %0,%x1";
1561 if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
1564 return "xxlor %x0,%x1,%x1";
1566 [(set_attr "type" "fp,vecsimple,vecsimple,mftgpr")
1567 (set_attr "length" "4")])
1569 (define_insn "*vsx_extract_<mode>_internal2"
1570 [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=d,ws,ws,?wa")
1571 (vec_select:<VS_scalar>
1572 (match_operand:VSX_D 1 "vsx_register_operand" "d,wd,wd,wa")
1573 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "wD,wD,i,i")])))]
1574 "VECTOR_MEM_VSX_P (<MODE>mode)
1575 && (!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE
1576 || INTVAL (operands[2]) != VECTOR_ELEMENT_SCALAR_64BIT)"
1579 gcc_assert (UINTVAL (operands[2]) <= 1);
1581 if (INTVAL (operands[2]) == VECTOR_ELEMENT_SCALAR_64BIT)
1583 int op0_regno = REGNO (operands[0]);
1584 int op1_regno = REGNO (operands[1]);
1586 if (op0_regno == op1_regno)
1589 if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
1592 return "xxlor %x0,%x1,%x1";
1595 fldDM = INTVAL (operands[2]) << 1;
1596 if (!BYTES_BIG_ENDIAN)
1598 operands[3] = GEN_INT (fldDM);
1599 return "xxpermdi %x0,%x1,%x1,%3";
1601 [(set_attr "type" "fp,vecsimple,vecperm,vecperm")
1602 (set_attr "length" "4")])
1604 ;; Optimize extracting a single scalar element from memory if the scalar is in
1605 ;; the correct location to use a single load.
1606 (define_insn "*vsx_extract_<mode>_load"
1607 [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,wv,wr")
1608 (vec_select:<VS_scalar>
1609 (match_operand:VSX_D 1 "memory_operand" "m,Z,m")
1610 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
1611 "VECTOR_MEM_VSX_P (<MODE>mode)"
1616 [(set_attr_alternative "type"
1618 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
1619 (const_string "fpload_ux")
1621 (match_test "update_address_mem (operands[1], VOIDmode)")
1622 (const_string "fpload_u")
1623 (const_string "fpload")))
1624 (const_string "fpload")
1626 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
1627 (const_string "load_ux")
1629 (match_test "update_address_mem (operands[1], VOIDmode)")
1630 (const_string "load_u")
1631 (const_string "load")))])
1632 (set_attr "length" "4")])
1634 ;; Optimize storing a single scalar element that is the right location to
1636 (define_insn "*vsx_extract_<mode>_store"
1637 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,?Z")
1638 (vec_select:<VS_scalar>
1639 (match_operand:VSX_D 1 "register_operand" "d,wd,wa")
1640 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
1641 "VECTOR_MEM_VSX_P (<MODE>mode)"
1646 [(set_attr_alternative "type"
1648 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
1649 (const_string "fpstore_ux")
1651 (match_test "update_address_mem (operands[0], VOIDmode)")
1652 (const_string "fpstore_u")
1653 (const_string "fpstore")))
1654 (const_string "fpstore")
1655 (const_string "fpstore")])
1656 (set_attr "length" "4")])
1658 ;; Extract a SF element from V4SF
1659 (define_insn_and_split "vsx_extract_v4sf"
1660 [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
1662 (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
1663 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
1664 (clobber (match_scratch:V4SF 3 "=X,0"))]
1665 "VECTOR_UNIT_VSX_P (V4SFmode)"
1673 rtx op0 = operands[0];
1674 rtx op1 = operands[1];
1675 rtx op2 = operands[2];
1676 rtx op3 = operands[3];
1678 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
1684 if (GET_CODE (op3) == SCRATCH)
1685 op3 = gen_reg_rtx (V4SFmode);
1686 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, op2));
1689 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
1692 [(set_attr "length" "4,8")
1693 (set_attr "type" "fp")])
1695 ;; Expand the builtin form of xxpermdi to canonical rtl.
1696 (define_expand "vsx_xxpermdi_<mode>"
1697 [(match_operand:VSX_L 0 "vsx_register_operand" "")
1698 (match_operand:VSX_L 1 "vsx_register_operand" "")
1699 (match_operand:VSX_L 2 "vsx_register_operand" "")
1700 (match_operand:QI 3 "u5bit_cint_operand" "")]
1701 "VECTOR_MEM_VSX_P (<MODE>mode)"
1703 rtx target = operands[0];
1704 rtx op0 = operands[1];
1705 rtx op1 = operands[2];
1706 int mask = INTVAL (operands[3]);
1707 rtx perm0 = GEN_INT ((mask >> 1) & 1);
1708 rtx perm1 = GEN_INT ((mask & 1) + 2);
1709 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
1711 if (<MODE>mode == V2DFmode)
1712 gen = gen_vsx_xxpermdi2_v2df_1;
1715 gen = gen_vsx_xxpermdi2_v2di_1;
1716 if (<MODE>mode != V2DImode)
1718 target = gen_lowpart (V2DImode, target);
1719 op0 = gen_lowpart (V2DImode, op0);
1720 op1 = gen_lowpart (V2DImode, op1);
1723 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
1724 transformation we don't want; it is necessary for
1725 rs6000_expand_vec_perm_const_1 but not for this use. So we
1726 prepare for that by reversing the transformation here. */
1727 if (BYTES_BIG_ENDIAN)
1728 emit_insn (gen (target, op0, op1, perm0, perm1));
1731 rtx p0 = GEN_INT (3 - INTVAL (perm1));
1732 rtx p1 = GEN_INT (3 - INTVAL (perm0));
1733 emit_insn (gen (target, op1, op0, p0, p1));
1738 (define_insn "vsx_xxpermdi2_<mode>_1"
1739 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
1741 (vec_concat:<VS_double>
1742 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
1743 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
1744 (parallel [(match_operand 3 "const_0_to_1_operand" "")
1745 (match_operand 4 "const_2_to_3_operand" "")])))]
1746 "VECTOR_MEM_VSX_P (<MODE>mode)"
1750 /* For little endian, swap operands and invert/swap selectors
1751 to get the correct xxpermdi. The operand swap sets up the
1752 inputs as a little endian array. The selectors are swapped
1753 because they are defined to use big endian ordering. The
1754 selectors are inverted to get the correct doublewords for
1755 little endian ordering. */
1756 if (BYTES_BIG_ENDIAN)
1758 op3 = INTVAL (operands[3]);
1759 op4 = INTVAL (operands[4]);
1763 op3 = 3 - INTVAL (operands[4]);
1764 op4 = 3 - INTVAL (operands[3]);
1767 mask = (op3 << 1) | (op4 - 2);
1768 operands[3] = GEN_INT (mask);
1770 if (BYTES_BIG_ENDIAN)
1771 return "xxpermdi %x0,%x1,%x2,%3";
1773 return "xxpermdi %x0,%x2,%x1,%3";
1775 [(set_attr "type" "vecperm")])
1777 (define_expand "vec_perm_const<mode>"
1778 [(match_operand:VSX_D 0 "vsx_register_operand" "")
1779 (match_operand:VSX_D 1 "vsx_register_operand" "")
1780 (match_operand:VSX_D 2 "vsx_register_operand" "")
1781 (match_operand:V2DI 3 "" "")]
1782 "VECTOR_MEM_VSX_P (<MODE>mode)"
1784 if (rs6000_expand_vec_perm_const (operands))
1790 ;; Expanders for builtins
1791 (define_expand "vsx_mergel_<mode>"
1792 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
1793 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
1794 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
1795 "VECTOR_MEM_VSX_P (<MODE>mode)"
1800 /* Special handling for LE with -maltivec=be. */
1801 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
1803 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
1804 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
1808 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
1809 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
1812 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
1813 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1817 (define_expand "vsx_mergeh_<mode>"
1818 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
1819 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
1820 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
1821 "VECTOR_MEM_VSX_P (<MODE>mode)"
1826 /* Special handling for LE with -maltivec=be. */
1827 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
1829 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
1830 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
1834 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
1835 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
1838 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
1839 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1844 (define_insn "vsx_splat_<mode>"
1845 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa")
1846 (vec_duplicate:VSX_D
1847 (match_operand:<VS_scalar> 1 "splat_input_operand" "ws,f,Z,wa,wa,Z")))]
1848 "VECTOR_MEM_VSX_P (<MODE>mode)"
1850 xxpermdi %x0,%x1,%x1,0
1851 xxpermdi %x0,%x1,%x1,0
1853 xxpermdi %x0,%x1,%x1,0
1854 xxpermdi %x0,%x1,%x1,0
1856 [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
1859 (define_insn "vsx_xxspltw_<mode>"
1860 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
1861 (vec_duplicate:VSX_W
1862 (vec_select:<VS_scalar>
1863 (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
1865 [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
1866 "VECTOR_MEM_VSX_P (<MODE>mode)"
1868 if (!BYTES_BIG_ENDIAN)
1869 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
1871 return "xxspltw %x0,%x1,%2";
1873 [(set_attr "type" "vecperm")])
1875 (define_insn "vsx_xxspltw_<mode>_direct"
1876 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
1877 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
1878 (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
1879 UNSPEC_VSX_XXSPLTW))]
1880 "VECTOR_MEM_VSX_P (<MODE>mode)"
1881 "xxspltw %x0,%x1,%2"
1882 [(set_attr "type" "vecperm")])
1884 ;; V4SF/V4SI interleave
1885 (define_insn "vsx_xxmrghw_<mode>"
1886 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
1888 (vec_concat:<VS_double>
1889 (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
1890 (match_operand:VSX_W 2 "vsx_register_operand" "wf,wa"))
1891 (parallel [(const_int 0) (const_int 4)
1892 (const_int 1) (const_int 5)])))]
1893 "VECTOR_MEM_VSX_P (<MODE>mode)"
1894 "xxmrghw %x0,%x1,%x2"
1895 [(set_attr "type" "vecperm")])
1897 (define_insn "vsx_xxmrglw_<mode>"
1898 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
1900 (vec_concat:<VS_double>
1901 (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
1902 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?wa"))
1903 (parallel [(const_int 2) (const_int 6)
1904 (const_int 3) (const_int 7)])))]
1905 "VECTOR_MEM_VSX_P (<MODE>mode)"
1906 "xxmrglw %x0,%x1,%x2"
1907 [(set_attr "type" "vecperm")])
1909 ;; Shift left double by word immediate
1910 (define_insn "vsx_xxsldwi_<mode>"
1911 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
1912 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
1913 (match_operand:VSX_L 2 "vsx_register_operand" "wa")
1914 (match_operand:QI 3 "u5bit_cint_operand" "i")]
1916 "VECTOR_MEM_VSX_P (<MODE>mode)"
1917 "xxsldwi %x0,%x1,%x2,%3"
1918 [(set_attr "type" "vecperm")])
1921 ;; Vector reduction insns and splitters
1923 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df"
1924 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
1928 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
1929 (parallel [(const_int 1)]))
1932 (parallel [(const_int 0)])))
1934 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
1935 "VECTOR_UNIT_VSX_P (V2DFmode)"
1941 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
1942 ? gen_reg_rtx (V2DFmode)
1944 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
1945 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
1948 [(set_attr "length" "8")
1949 (set_attr "type" "veccomplex")])
1951 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf"
1952 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
1954 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
1955 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
1956 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
1957 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
1958 "VECTOR_UNIT_VSX_P (V4SFmode)"
1964 rtx op0 = operands[0];
1965 rtx op1 = operands[1];
1966 rtx tmp2, tmp3, tmp4;
1968 if (can_create_pseudo_p ())
1970 tmp2 = gen_reg_rtx (V4SFmode);
1971 tmp3 = gen_reg_rtx (V4SFmode);
1972 tmp4 = gen_reg_rtx (V4SFmode);
1981 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
1982 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
1983 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
1984 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
1987 [(set_attr "length" "16")
1988 (set_attr "type" "veccomplex")])
1990 ;; Combiner patterns with the vector reduction patterns that knows we can get
1991 ;; to the top element of the V2DF array without doing an extract.
1993 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
1994 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?wa,ws,?wa")
1999 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2000 (parallel [(const_int 1)]))
2003 (parallel [(const_int 0)])))
2005 (parallel [(const_int 1)])))
2006 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
2007 "VECTOR_UNIT_VSX_P (V2DFmode)"
2013 rtx hi = gen_highpart (DFmode, operands[1]);
2014 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
2015 ? gen_reg_rtx (DFmode)
2018 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
2019 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
2022 [(set_attr "length" "8")
2023 (set_attr "type" "veccomplex")])
2025 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
2026 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
2029 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2030 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
2031 (parallel [(const_int 3)])))
2032 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2033 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
2034 (clobber (match_scratch:V4SF 4 "=0,0"))]
2035 "VECTOR_UNIT_VSX_P (V4SFmode)"
2041 rtx op0 = operands[0];
2042 rtx op1 = operands[1];
2043 rtx tmp2, tmp3, tmp4, tmp5;
2045 if (can_create_pseudo_p ())
2047 tmp2 = gen_reg_rtx (V4SFmode);
2048 tmp3 = gen_reg_rtx (V4SFmode);
2049 tmp4 = gen_reg_rtx (V4SFmode);
2050 tmp5 = gen_reg_rtx (V4SFmode);
2060 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2061 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2062 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2063 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
2064 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
2067 [(set_attr "length" "20")
2068 (set_attr "type" "veccomplex")])
2071 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
2073 [(set (match_operand:P 0 "base_reg_operand" "")
2074 (match_operand:P 1 "short_cint_operand" ""))
2075 (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2076 (mem:VSX_M2 (plus:P (match_dup 0)
2077 (match_operand:P 3 "int_reg_operand" ""))))]
2078 "TARGET_VSX && TARGET_P8_FUSION"
2079 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2080 [(set_attr "length" "8")
2081 (set_attr "type" "vecload")])
2084 [(set (match_operand:P 0 "base_reg_operand" "")
2085 (match_operand:P 1 "short_cint_operand" ""))
2086 (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2087 (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
2089 "TARGET_VSX && TARGET_P8_FUSION"
2090 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2091 [(set_attr "length" "8")
2092 (set_attr "type" "vecload")])