1 ;; Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
17 ;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
20 ;; Define an insn type attribute. This is used in function unit delay
22 ;; multi0 is a multiple insn rtl whose first insn is in pipe0
23 ;; multi1 is a multiple insn rtl whose first insn is in pipe1
24 (define_attr "type" "fx2,shuf,fx3,load,store,br,spr,lnop,nop,fxb,fp6,fp7,fpd,iprefetch,multi0,multi1,hbr,convert"
28 (define_attr "length" ""
31 (define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune")))
32 ;; Processor type -- this attribute must exactly match the processor_type
33 ;; enumeration in spu.h.
35 (define_attr "cpu" "spu"
36 (const (symbol_ref "spu_cpu_attr")))
38 ; (define_function_unit NAME MULTIPLICITY SIMULTANEITY
39 ; TEST READY-DELAY ISSUE-DELAY [CONFLICT-LIST])
41 (define_cpu_unit "pipe0,pipe1,fp,ls")
43 (define_insn_reservation "NOP" 1 (eq_attr "type" "nop")
46 (define_insn_reservation "FX2" 2 (eq_attr "type" "fx2")
49 (define_insn_reservation "FX3" 4 (eq_attr "type" "fx3,fxb")
52 (define_insn_reservation "FP6" 6 (eq_attr "type" "fp6")
53 "pipe0 + fp, nothing*5")
55 (define_insn_reservation "FP7" 7 (eq_attr "type" "fp7")
56 "pipe0, fp, nothing*5")
58 ;; The behavior of the double precision is that both pipes stall
59 ;; for 6 cycles and the rest of the operation pipelines for
60 ;; 7 cycles. The simplest way to model this is to simply ignore
62 (define_insn_reservation "FPD" 7
63 (and (eq_attr "tune" "cell")
64 (eq_attr "type" "fpd"))
65 "pipe0 + pipe1, fp, nothing*5")
67 ;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined
68 (define_insn_reservation "FPD_CELLEDP" 9
69 (and (eq_attr "tune" "celledp")
70 (eq_attr "type" "fpd"))
71 "pipe0 + fp, nothing*8")
73 (define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop")
76 (define_insn_reservation "STORE" 1 (eq_attr "type" "store")
79 (define_insn_reservation "IPREFETCH" 1 (eq_attr "type" "iprefetch")
82 (define_insn_reservation "SHUF" 4 (eq_attr "type" "shuf,br,spr")
85 (define_insn_reservation "LOAD" 6 (eq_attr "type" "load")
86 "pipe1 + ls, nothing*5")
88 (define_insn_reservation "HBR" 18 (eq_attr "type" "hbr")
91 (define_insn_reservation "MULTI0" 4 (eq_attr "type" "multi0")
92 "pipe0+pipe1, nothing*3")
94 (define_insn_reservation "MULTI1" 4 (eq_attr "type" "multi1")
97 (define_insn_reservation "CONVERT" 0 (eq_attr "type" "convert")
100 ;; Force pipe0 to occur before pipe 1 in a cycle.
101 (absence_set "pipe0" "pipe1")
110 (UNSPEC_EXTEND_CMP 5)
153 (UNSPEC_SPU_REALIGN_LOAD 49)
154 (UNSPEC_SPU_MASK_FOR_LOAD 50)
158 (include "predicates.md")
159 (include "constraints.md")
164 (define_mode_iterator ALL [QI V16QI
172 ; Everything except DI and TI which are handled separately because
173 ; they need different constraints to correctly test VOIDmode constants
174 (define_mode_iterator MOV [QI V16QI
181 (define_mode_iterator DTI [DI TI])
183 (define_mode_iterator VINT [QI V16QI
189 (define_mode_iterator VQHSI [QI V16QI
193 (define_mode_iterator VHSI [HI V8HI
196 (define_mode_iterator VSDF [SF V4SF
199 (define_mode_iterator VSI [SI V4SI])
200 (define_mode_iterator VDI [DI V2DI])
201 (define_mode_iterator VSF [SF V4SF])
202 (define_mode_iterator VDF [DF V2DF])
204 (define_mode_iterator VCMP [V16QI
210 (define_mode_iterator VCMPU [V16QI
214 (define_mode_attr bh [(QI "b") (V16QI "b")
218 (define_mode_attr d [(SF "") (V4SF "")
219 (DF "d") (V2DF "d")])
220 (define_mode_attr d6 [(SF "6") (V4SF "6")
221 (DF "d") (V2DF "d")])
223 (define_mode_attr f2i [(SF "si") (V4SF "v4si")
224 (DF "di") (V2DF "v2di")])
225 (define_mode_attr F2I [(SF "SI") (V4SF "V4SI")
226 (DF "DI") (V2DF "V2DI")])
228 (define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")])
230 (define_mode_attr umask [(HI "f") (V8HI "f")
231 (SI "g") (V4SI "g")])
232 (define_mode_attr nmask [(HI "F") (V8HI "F")
233 (SI "G") (V4SI "G")])
235 ;; Used for carry and borrow instructions.
236 (define_mode_iterator CBOP [SI DI V4SI V2DI])
238 ;; Used in vec_set and vec_extract
239 (define_mode_iterator V [V2DI V4SI V8HI V16QI V2DF V4SF])
240 (define_mode_attr inner [(V16QI "QI")
246 (define_mode_attr vmult [(V16QI "1")
252 (define_mode_attr voff [(V16QI "13")
262 (define_expand "mov<mode>"
263 [(set (match_operand:ALL 0 "spu_nonimm_operand" "=r,r,r,m")
264 (match_operand:ALL 1 "general_operand" "r,i,m,r"))]
267 if (spu_expand_mov(operands, <MODE>mode))
272 [(set (match_operand 0 "spu_reg_operand")
273 (match_operand 1 "immediate_operand"))]
277 (high (match_dup 1)))
279 (lo_sum (match_dup 0)
282 if (spu_split_immediate (operands))
288 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
289 (match_operand:SI 1 "immediate_operand" "s"))
294 ;; Whenever a function generates the 'pic' pattern above we need to
295 ;; load the pic_offset_table register.
296 ;; GCC doesn't deal well with labels in the middle of a block so we
297 ;; hardcode the offsets in the asm here.
298 (define_insn "load_pic_offset"
299 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
300 (unspec:SI [(const_int 0)] 0))
301 (set (match_operand:SI 1 "spu_reg_operand" "=r")
302 (unspec:SI [(const_int 0)] 0))]
304 "ila\t%1,.+8\;brsl\t%0,4"
305 [(set_attr "length" "8")
306 (set_attr "type" "multi0")])
311 (define_insn "_mov<mode>"
312 [(set (match_operand:MOV 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
313 (match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))]
314 "spu_valid_move (operands)"
322 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
324 (define_insn "low_<mode>"
325 [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
326 (lo_sum:VSI (match_operand:VSI 1 "spu_reg_operand" "0")
327 (match_operand:VSI 2 "immediate_operand" "i")))]
331 (define_insn "_movdi"
332 [(set (match_operand:DI 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
333 (match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))]
334 "spu_valid_move (operands)"
342 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
344 (define_insn "_movti"
345 [(set (match_operand:TI 0 "spu_nonimm_operand" "=r,r,r,r,r,m")
346 (match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))]
347 "spu_valid_move (operands)"
355 [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
357 (define_insn_and_split "load"
358 [(set (match_operand 0 "spu_reg_operand" "=r")
359 (match_operand 1 "memory_operand" "m"))
360 (clobber (match_operand:TI 2 "spu_reg_operand" "=&r"))
361 (clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))]
362 "GET_MODE(operands[0]) == GET_MODE(operands[1])"
367 { spu_split_load(operands); DONE; })
369 (define_insn_and_split "store"
370 [(set (match_operand 0 "memory_operand" "=m")
371 (match_operand 1 "spu_reg_operand" "r"))
372 (clobber (match_operand:TI 2 "spu_reg_operand" "=&r"))
373 (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))]
374 "GET_MODE(operands[0]) == GET_MODE(operands[1])"
379 { spu_split_store(operands); DONE; })
381 ;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d
383 (define_expand "cpat"
384 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
385 (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
386 (match_operand:SI 2 "spu_nonmem_operand" "r,n")
387 (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
390 rtx x = gen_cpat_const (operands);
393 emit_move_insn (operands[0], x);
399 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
400 (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
401 (match_operand:SI 2 "spu_nonmem_operand" "r,n")
402 (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
407 [(set_attr "type" "shuf")])
410 [(set (match_operand:TI 0 "spu_reg_operand")
411 (unspec:TI [(match_operand:SI 1 "spu_nonmem_operand")
412 (match_operand:SI 2 "immediate_operand")
413 (match_operand:SI 3 "immediate_operand")] UNSPEC_CPAT))]
415 [(set (match_dup:TI 0)
418 operands[4] = gen_cpat_const (operands);
425 (define_insn "extendqihi2"
426 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
427 (sign_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
431 (define_insn "extendhisi2"
432 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
433 (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))]
437 (define_expand "extendsidi2"
438 [(set (match_dup:DI 2)
439 (zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "")))
440 (set (match_operand:DI 0 "spu_reg_operand" "")
441 (sign_extend:DI (vec_select:SI (match_dup:V2SI 3)
442 (parallel [(const_int 1)]))))]
445 operands[2] = gen_reg_rtx (DImode);
446 operands[3] = spu_gen_subreg (V2SImode, operands[2]);
450 [(set (match_operand:DI 0 "spu_reg_operand" "=r")
453 (match_operand:V2SI 1 "spu_reg_operand" "r")
454 (parallel [(const_int 1) ]))))]
458 (define_expand "extendqiti2"
459 [(set (match_operand:TI 0 "register_operand" "")
460 (sign_extend:TI (match_operand:QI 1 "register_operand" "")))]
462 "spu_expand_sign_extend(operands);
465 (define_expand "extendhiti2"
466 [(set (match_operand:TI 0 "register_operand" "")
467 (sign_extend:TI (match_operand:HI 1 "register_operand" "")))]
469 "spu_expand_sign_extend(operands);
472 (define_expand "extendsiti2"
473 [(set (match_operand:TI 0 "register_operand" "")
474 (sign_extend:TI (match_operand:SI 1 "register_operand" "")))]
476 "spu_expand_sign_extend(operands);
479 (define_expand "extendditi2"
480 [(set (match_operand:TI 0 "register_operand" "")
481 (sign_extend:TI (match_operand:DI 1 "register_operand" "")))]
483 "spu_expand_sign_extend(operands);
489 (define_insn "zero_extendqihi2"
490 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
491 (zero_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
493 "andi\t%0,%1,0x00ff")
495 (define_insn "zero_extendqisi2"
496 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
497 (zero_extend:SI (match_operand:QI 1 "spu_reg_operand" "r")))]
499 "andi\t%0,%1,0x00ff")
501 (define_expand "zero_extendhisi2"
502 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
503 (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))
504 (clobber (match_scratch:SI 2 "=&r"))]
507 rtx mask = gen_reg_rtx (SImode);
508 rtx op1 = simplify_gen_subreg (SImode, operands[1], HImode, 0);
509 emit_move_insn (mask, GEN_INT (0xffff));
510 emit_insn (gen_andsi3(operands[0], op1, mask));
514 (define_insn "zero_extendsidi2"
515 [(set (match_operand:DI 0 "spu_reg_operand" "=r")
516 (zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "r")))]
519 [(set_attr "type" "shuf")])
521 (define_insn "zero_extendsiti2"
522 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
523 (zero_extend:TI (match_operand:SI 1 "spu_reg_operand" "r")))]
525 "rotqmbyi\t%0,%1,-12"
526 [(set_attr "type" "shuf")])
528 (define_insn "zero_extendditi2"
529 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
530 (zero_extend:TI (match_operand:DI 1 "spu_reg_operand" "r")))]
533 [(set_attr "type" "shuf")])
538 (define_insn "truncdiqi2"
539 [(set (match_operand:QI 0 "spu_reg_operand" "=r")
540 (truncate:QI (match_operand:DI 1 "spu_reg_operand" "r")))]
543 [(set_attr "type" "shuf")])
545 (define_insn "truncdihi2"
546 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
547 (truncate:HI (match_operand:DI 1 "spu_reg_operand" "r")))]
550 [(set_attr "type" "shuf")])
552 (define_insn "truncdisi2"
553 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
554 (truncate:SI (match_operand:DI 1 "spu_reg_operand" "r")))]
557 [(set_attr "type" "shuf")])
559 (define_insn "trunctiqi2"
560 [(set (match_operand:QI 0 "spu_reg_operand" "=r")
561 (truncate:QI (match_operand:TI 1 "spu_reg_operand" "r")))]
564 [(set_attr "type" "shuf")])
566 (define_insn "trunctihi2"
567 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
568 (truncate:HI (match_operand:TI 1 "spu_reg_operand" "r")))]
571 [(set_attr "type" "shuf")])
573 (define_insn "trunctisi2"
574 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
575 (truncate:SI (match_operand:TI 1 "spu_reg_operand" "r")))]
578 [(set_attr "type" "shuf")])
580 (define_insn "trunctidi2"
581 [(set (match_operand:DI 0 "spu_reg_operand" "=r")
582 (truncate:DI (match_operand:TI 1 "spu_reg_operand" "r")))]
585 [(set_attr "type" "shuf")])
590 (define_insn "floatsisf2"
591 [(set (match_operand:SF 0 "spu_reg_operand" "=r")
592 (float:SF (match_operand:SI 1 "spu_reg_operand" "r")))]
595 [(set_attr "type" "fp7")])
597 (define_insn "floatv4siv4sf2"
598 [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
599 (float:V4SF (match_operand:V4SI 1 "spu_reg_operand" "r")))]
602 [(set_attr "type" "fp7")])
604 (define_insn "fix_truncsfsi2"
605 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
606 (fix:SI (match_operand:SF 1 "spu_reg_operand" "r")))]
609 [(set_attr "type" "fp7")])
611 (define_insn "fix_truncv4sfv4si2"
612 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
613 (fix:V4SI (match_operand:V4SF 1 "spu_reg_operand" "r")))]
616 [(set_attr "type" "fp7")])
618 (define_insn "floatunssisf2"
619 [(set (match_operand:SF 0 "spu_reg_operand" "=r")
620 (unsigned_float:SF (match_operand:SI 1 "spu_reg_operand" "r")))]
623 [(set_attr "type" "fp7")])
625 (define_insn "floatunsv4siv4sf2"
626 [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
627 (unsigned_float:V4SF (match_operand:V4SI 1 "spu_reg_operand" "r")))]
630 [(set_attr "type" "fp7")])
632 (define_insn "fixuns_truncsfsi2"
633 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
634 (unsigned_fix:SI (match_operand:SF 1 "spu_reg_operand" "r")))]
637 [(set_attr "type" "fp7")])
639 (define_insn "fixuns_truncv4sfv4si2"
640 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
641 (unsigned_fix:V4SI (match_operand:V4SF 1 "spu_reg_operand" "r")))]
644 [(set_attr "type" "fp7")])
646 (define_insn "extendsfdf2"
647 [(set (match_operand:DF 0 "spu_reg_operand" "=r")
648 (float_extend:DF (match_operand:SF 1 "spu_reg_operand" "r")))]
651 [(set_attr "type" "fpd")])
653 (define_insn "truncdfsf2"
654 [(set (match_operand:SF 0 "spu_reg_operand" "=r")
655 (float_truncate:SF (match_operand:DF 1 "spu_reg_operand" "r")))]
658 [(set_attr "type" "fpd")])
660 ;; Do (double)(operands[1]+0x80000000u)-(double)0x80000000
661 (define_expand "floatsidf2"
662 [(set (match_operand:DF 0 "register_operand" "")
663 (float:DF (match_operand:SI 1 "register_operand" "")))]
667 rtx c0 = gen_reg_rtx (SImode);
668 rtx c1 = gen_reg_rtx (DFmode);
669 rtx r0 = gen_reg_rtx (SImode);
670 rtx r1 = gen_reg_rtx (DFmode);
672 emit_move_insn (c0, GEN_INT (-0x80000000ll));
673 emit_move_insn (c1, spu_float_const ("2147483648", DFmode));
675 emit_insn (gen_xorsi3 (r0, operands[1], c0));
679 emit_library_call_value (convert_optab_libfunc (ufloat_optab,
681 NULL_RTX, LCT_NORMAL, DFmode, 1, r0, SImode);
683 insns = get_insns ();
685 emit_libcall_block (insns, r1, value,
686 gen_rtx_UNSIGNED_FLOAT (DFmode, r0));
688 emit_insn (gen_subdf3 (operands[0], r1, c1));
692 (define_expand "floatdidf2"
693 [(set (match_operand:DF 0 "register_operand" "")
694 (float:DF (match_operand:DI 1 "register_operand" "")))]
698 rtx c0 = gen_reg_rtx (DImode);
699 rtx r0 = gen_reg_rtx (DImode);
700 rtx r1 = gen_reg_rtx (DFmode);
701 rtx r2 = gen_reg_rtx (DImode);
702 rtx setneg = gen_reg_rtx (DImode);
703 rtx isneg = gen_reg_rtx (SImode);
704 rtx neg = gen_reg_rtx (DImode);
705 rtx mask = gen_reg_rtx (DImode);
707 emit_move_insn (c0, GEN_INT (0x8000000000000000ull));
709 emit_insn (gen_negdi2 (neg, operands[1]));
710 emit_insn (gen_cgt_di_m1 (isneg, operands[1]));
711 emit_insn (gen_extend_compare (mask, isneg));
712 emit_insn (gen_selb (r0, neg, operands[1], mask));
713 emit_insn (gen_andc_di (setneg, c0, mask));
718 emit_library_call_value (convert_optab_libfunc (ufloat_optab,
720 NULL_RTX, LCT_NORMAL, DFmode, 1, r0, DImode);
722 insns = get_insns ();
724 emit_libcall_block (insns, r1, value,
725 gen_rtx_UNSIGNED_FLOAT (DFmode, r0));
727 emit_insn (gen_iordi3 (r2, gen_rtx_SUBREG (DImode, r1, 0), setneg));
728 emit_move_insn (operands[0], gen_rtx_SUBREG (DFmode, r2, 0));
734 (define_expand "addv16qi3"
735 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
736 (plus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
737 (match_operand:V16QI 2 "spu_reg_operand" "r")))]
740 rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
741 rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
742 rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
743 rtx rhs_and = gen_reg_rtx (V8HImode);
744 rtx hi_char = gen_reg_rtx (V8HImode);
745 rtx lo_char = gen_reg_rtx (V8HImode);
746 rtx mask = gen_reg_rtx (V8HImode);
748 emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
749 emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
750 emit_insn (gen_addv8hi3 (hi_char, lhs_short, rhs_and));
751 emit_insn (gen_addv8hi3 (lo_char, lhs_short, rhs_short));
752 emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
756 (define_insn "add<mode>3"
757 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
758 (plus:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
759 (match_operand:VHSI 2 "spu_arith_operand" "r,B")))]
765 (define_expand "add<mode>3"
766 [(set (match_dup:VDI 3)
767 (unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
768 (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_CG))
769 (set (match_dup:VDI 5)
770 (unspec:VDI [(match_dup 3)
772 (match_dup:TI 4)] UNSPEC_SHUFB))
773 (set (match_operand:VDI 0 "spu_reg_operand" "")
774 (unspec:VDI [(match_dup 1)
776 (match_dup 5)] UNSPEC_ADDX))]
779 unsigned char pat[16] = {
780 0x04, 0x05, 0x06, 0x07,
781 0x80, 0x80, 0x80, 0x80,
782 0x0c, 0x0d, 0x0e, 0x0f,
783 0x80, 0x80, 0x80, 0x80
785 operands[3] = gen_reg_rtx (<MODE>mode);
786 operands[4] = gen_reg_rtx (TImode);
787 operands[5] = gen_reg_rtx (<MODE>mode);
788 emit_move_insn (operands[4], array_to_constant (TImode, pat));
791 (define_insn "cg_<mode>"
792 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
793 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
794 (match_operand 2 "spu_reg_operand" "r")] UNSPEC_CG))]
798 (define_insn "cgx_<mode>"
799 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
800 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
801 (match_operand 2 "spu_reg_operand" "r")
802 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_CGX))]
806 (define_insn "addx_<mode>"
807 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
808 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
809 (match_operand 2 "spu_reg_operand" "r")
810 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_ADDX))]
815 ;; This is not the most efficient implementation of addti3.
816 ;; We include this here because 1) the compiler needs it to be
817 ;; defined as the word size is 128-bit and 2) sometimes gcc
818 ;; substitutes an add for a constant left-shift. 2) is unlikely
819 ;; because we also give addti3 a high cost. In case gcc does
820 ;; generate TImode add, here is the code to do it.
821 ;; operand 2 is a nonmemory because the compiler requires it.
822 (define_insn "addti3"
823 [(set (match_operand:TI 0 "spu_reg_operand" "=&r")
824 (plus:TI (match_operand:TI 1 "spu_reg_operand" "r")
825 (match_operand:TI 2 "spu_nonmem_operand" "r")))
826 (clobber (match_scratch:TI 3 "=&r"))]
835 [(set_attr "type" "multi0")
836 (set_attr "length" "28")])
838 (define_insn "add<mode>3"
839 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
840 (plus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
841 (match_operand:VSF 2 "spu_reg_operand" "r")))]
844 [(set_attr "type" "fp6")])
846 (define_insn "add<mode>3"
847 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
848 (plus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
849 (match_operand:VDF 2 "spu_reg_operand" "r")))]
852 [(set_attr "type" "fpd")])
857 (define_expand "subv16qi3"
858 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
859 (minus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
860 (match_operand:V16QI 2 "spu_reg_operand" "r")))]
863 rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
864 rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
865 rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
866 rtx rhs_and = gen_reg_rtx (V8HImode);
867 rtx hi_char = gen_reg_rtx (V8HImode);
868 rtx lo_char = gen_reg_rtx (V8HImode);
869 rtx mask = gen_reg_rtx (V8HImode);
871 emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
872 emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
873 emit_insn (gen_subv8hi3 (hi_char, lhs_short, rhs_and));
874 emit_insn (gen_subv8hi3 (lo_char, lhs_short, rhs_short));
875 emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
879 (define_insn "sub<mode>3"
880 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
881 (minus:VHSI (match_operand:VHSI 1 "spu_arith_operand" "r,B")
882 (match_operand:VHSI 2 "spu_reg_operand" "r,r")))]
888 (define_expand "sub<mode>3"
889 [(set (match_dup:VDI 3)
890 (unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
891 (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_BG))
892 (set (match_dup:VDI 5)
893 (unspec:VDI [(match_dup 3)
895 (match_dup:TI 4)] UNSPEC_SHUFB))
896 (set (match_operand:VDI 0 "spu_reg_operand" "")
897 (unspec:VDI [(match_dup 1)
899 (match_dup 5)] UNSPEC_SFX))]
902 unsigned char pat[16] = {
903 0x04, 0x05, 0x06, 0x07,
904 0xc0, 0xc0, 0xc0, 0xc0,
905 0x0c, 0x0d, 0x0e, 0x0f,
906 0xc0, 0xc0, 0xc0, 0xc0
908 operands[3] = gen_reg_rtx (<MODE>mode);
909 operands[4] = gen_reg_rtx (TImode);
910 operands[5] = gen_reg_rtx (<MODE>mode);
911 emit_move_insn (operands[4], array_to_constant (TImode, pat));
914 (define_insn "bg_<mode>"
915 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
916 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
917 (match_operand 2 "spu_reg_operand" "r")] UNSPEC_BG))]
921 (define_insn "bgx_<mode>"
922 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
923 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
924 (match_operand 2 "spu_reg_operand" "r")
925 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_BGX))]
929 (define_insn "sfx_<mode>"
930 [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
931 (unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
932 (match_operand 2 "spu_reg_operand" "r")
933 (match_operand 3 "spu_reg_operand" "0")] UNSPEC_SFX))]
937 (define_insn "subti3"
938 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
939 (minus:TI (match_operand:TI 1 "spu_reg_operand" "r")
940 (match_operand:TI 2 "spu_reg_operand" "r")))
941 (clobber (match_scratch:TI 3 "=&r"))
942 (clobber (match_scratch:TI 4 "=&r"))
943 (clobber (match_scratch:TI 5 "=&r"))]
954 [(set_attr "type" "multi0")
955 (set_attr "length" "36")])
957 (define_insn "sub<mode>3"
958 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
959 (minus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
960 (match_operand:VSF 2 "spu_reg_operand" "r")))]
963 [(set_attr "type" "fp6")])
965 (define_insn "sub<mode>3"
966 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
967 (minus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
968 (match_operand:VDF 2 "spu_reg_operand" "r")))]
971 [(set_attr "type" "fpd")])
976 (define_expand "negv16qi2"
977 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
978 (neg:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")))]
981 rtx zero = gen_reg_rtx (V16QImode);
982 emit_move_insn (zero, CONST0_RTX (V16QImode));
983 emit_insn (gen_subv16qi3 (operands[0], zero, operands[1]));
987 (define_insn "neg<mode>2"
988 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
989 (neg:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")))]
993 (define_expand "negdi2"
994 [(set (match_operand:DI 0 "spu_reg_operand" "")
995 (neg:DI (match_operand:DI 1 "spu_reg_operand" "")))]
998 rtx zero = gen_reg_rtx(DImode);
999 emit_move_insn(zero, GEN_INT(0));
1000 emit_insn (gen_subdi3(operands[0], zero, operands[1]));
1004 (define_expand "negti2"
1005 [(set (match_operand:TI 0 "spu_reg_operand" "")
1006 (neg:TI (match_operand:TI 1 "spu_reg_operand" "")))]
1009 rtx zero = gen_reg_rtx(TImode);
1010 emit_move_insn(zero, GEN_INT(0));
1011 emit_insn (gen_subti3(operands[0], zero, operands[1]));
1015 (define_expand "neg<mode>2"
1017 [(set (match_operand:VSF 0 "spu_reg_operand" "")
1018 (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
1019 (use (match_dup 2))])]
1021 "operands[2] = gen_reg_rtx (<F2I>mode);
1022 emit_move_insn (operands[2], spu_const (<F2I>mode, -0x80000000ull));")
1024 (define_expand "neg<mode>2"
1026 [(set (match_operand:VDF 0 "spu_reg_operand" "")
1027 (neg:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
1028 (use (match_dup 2))])]
1030 "operands[2] = gen_reg_rtx (<F2I>mode);
1031 emit_move_insn (operands[2], spu_const (<F2I>mode, -0x8000000000000000ull));")
1033 (define_insn_and_split "_neg<mode>2"
1034 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1035 (neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
1036 (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
1040 [(set (match_dup:<F2I> 3)
1041 (xor:<F2I> (match_dup:<F2I> 4)
1042 (match_dup:<F2I> 2)))]
1044 operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
1045 operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
1051 (define_expand "abs<mode>2"
1053 [(set (match_operand:VSF 0 "spu_reg_operand" "")
1054 (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
1055 (use (match_dup 2))])]
1057 "operands[2] = gen_reg_rtx (<F2I>mode);
1058 emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffull));")
1060 (define_expand "abs<mode>2"
1062 [(set (match_operand:VDF 0 "spu_reg_operand" "")
1063 (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
1064 (use (match_dup 2))])]
1066 "operands[2] = gen_reg_rtx (<F2I>mode);
1067 emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffffffffffull));")
1069 (define_insn_and_split "_abs<mode>2"
1070 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1071 (abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
1072 (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
1076 [(set (match_dup:<F2I> 3)
1077 (and:<F2I> (match_dup:<F2I> 4)
1078 (match_dup:<F2I> 2)))]
1080 operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
1081 operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
1087 (define_insn "mulhi3"
1088 [(set (match_operand:HI 0 "spu_reg_operand" "=r,r")
1089 (mult:HI (match_operand:HI 1 "spu_reg_operand" "r,r")
1090 (match_operand:HI 2 "spu_arith_operand" "r,B")))]
1095 [(set_attr "type" "fp7")])
1097 (define_expand "mulv8hi3"
1098 [(set (match_operand:V8HI 0 "spu_reg_operand" "")
1099 (mult:V8HI (match_operand:V8HI 1 "spu_reg_operand" "")
1100 (match_operand:V8HI 2 "spu_reg_operand" "")))]
1103 rtx result = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1104 rtx low = gen_reg_rtx (V4SImode);
1105 rtx high = gen_reg_rtx (V4SImode);
1106 rtx shift = gen_reg_rtx (V4SImode);
1107 rtx mask = gen_reg_rtx (V4SImode);
1109 emit_move_insn (mask, spu_const (V4SImode, 0x0000ffff));
1110 emit_insn (gen_spu_mpyhh (high, operands[1], operands[2]));
1111 emit_insn (gen_spu_mpy (low, operands[1], operands[2]));
1112 emit_insn (gen_ashlv4si3 (shift, high, spu_const(V4SImode, 16)));
1113 emit_insn (gen_selb (result, shift, low, mask));
1117 (define_expand "mul<mode>3"
1119 [(set (match_operand:VSI 0 "spu_reg_operand" "")
1120 (mult:VSI (match_operand:VSI 1 "spu_reg_operand" "")
1121 (match_operand:VSI 2 "spu_reg_operand" "")))
1122 (clobber (match_dup:VSI 3))
1123 (clobber (match_dup:VSI 4))
1124 (clobber (match_dup:VSI 5))
1125 (clobber (match_dup:VSI 6))])]
1128 operands[3] = gen_reg_rtx(<MODE>mode);
1129 operands[4] = gen_reg_rtx(<MODE>mode);
1130 operands[5] = gen_reg_rtx(<MODE>mode);
1131 operands[6] = gen_reg_rtx(<MODE>mode);
1134 (define_insn_and_split "_mulsi3"
1135 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1136 (mult:SI (match_operand:SI 1 "spu_reg_operand" "r")
1137 (match_operand:SI 2 "spu_arith_operand" "rK")))
1138 (clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))
1139 (clobber (match_operand:SI 4 "spu_reg_operand" "=&r"))
1140 (clobber (match_operand:SI 5 "spu_reg_operand" "=&r"))
1141 (clobber (match_operand:SI 6 "spu_reg_operand" "=&r"))]
1145 [(set (match_dup:SI 0)
1146 (mult:SI (match_dup:SI 1)
1149 HOST_WIDE_INT val = 0;
1150 rtx a = operands[3];
1151 rtx b = operands[4];
1152 rtx c = operands[5];
1153 rtx d = operands[6];
1154 if (GET_CODE(operands[2]) == CONST_INT)
1156 val = INTVAL(operands[2]);
1157 emit_move_insn(d, operands[2]);
1160 if (val && (val & 0xffff) == 0)
1162 emit_insn (gen_mpyh_si(operands[0], operands[2], operands[1]));
1164 else if (val > 0 && val < 0x10000)
1166 rtx cst = satisfies_constraint_K (GEN_INT (val)) ? GEN_INT(val) : d;
1167 emit_insn (gen_mpyh_si(a, operands[1], operands[2]));
1168 emit_insn (gen_mpyu_si(c, operands[1], cst));
1169 emit_insn (gen_addsi3(operands[0], a, c));
1173 emit_insn (gen_mpyh_si(a, operands[1], operands[2]));
1174 emit_insn (gen_mpyh_si(b, operands[2], operands[1]));
1175 emit_insn (gen_mpyu_si(c, operands[1], operands[2]));
1176 emit_insn (gen_addsi3(d, a, b));
1177 emit_insn (gen_addsi3(operands[0], d, c));
1182 (define_insn_and_split "_mulv4si3"
1183 [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
1184 (mult:V4SI (match_operand:V4SI 1 "spu_reg_operand" "r")
1185 (match_operand:V4SI 2 "spu_reg_operand" "r")))
1186 (clobber (match_operand:V4SI 3 "spu_reg_operand" "=&r"))
1187 (clobber (match_operand:V4SI 4 "spu_reg_operand" "=&r"))
1188 (clobber (match_operand:V4SI 5 "spu_reg_operand" "=&r"))
1189 (clobber (match_operand:V4SI 6 "spu_reg_operand" "=&r"))]
1193 [(set (match_dup:V4SI 0)
1194 (mult:V4SI (match_dup:V4SI 1)
1195 (match_dup:V4SI 2)))]
1197 rtx a = operands[3];
1198 rtx b = operands[4];
1199 rtx c = operands[5];
1200 rtx d = operands[6];
1201 rtx op1 = simplify_gen_subreg (V8HImode, operands[1], V4SImode, 0);
1202 rtx op2 = simplify_gen_subreg (V8HImode, operands[2], V4SImode, 0);
1203 emit_insn (gen_spu_mpyh(a, op1, op2));
1204 emit_insn (gen_spu_mpyh(b, op2, op1));
1205 emit_insn (gen_spu_mpyu(c, op1, op2));
1206 emit_insn (gen_addv4si3(d, a, b));
1207 emit_insn (gen_addv4si3(operands[0], d, c));
1211 (define_insn "mulhisi3"
1212 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1213 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1214 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
1217 [(set_attr "type" "fp7")])
1219 (define_insn "mulhisi3_imm"
1220 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1221 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1222 (match_operand:SI 2 "imm_K_operand" "K")))]
1225 [(set_attr "type" "fp7")])
1227 (define_insn "umulhisi3"
1228 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1229 (mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1230 (zero_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
1233 [(set_attr "type" "fp7")])
1235 (define_insn "umulhisi3_imm"
1236 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1237 (mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1238 (and:SI (match_operand:SI 2 "imm_K_operand" "K") (const_int 65535))))]
1241 [(set_attr "type" "fp7")])
1243 (define_insn "mpyu_si"
1244 [(set (match_operand:SI 0 "spu_reg_operand" "=r,r")
1245 (mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r,r")
1247 (and:SI (match_operand:SI 2 "spu_arith_operand" "r,K")
1248 (const_int 65535))))]
1253 [(set_attr "type" "fp7")])
1255 ;; This isn't always profitable to use. Consider r = a * b + c * d.
1256 ;; It's faster to do the multiplies in parallel then add them. If we
1257 ;; merge a multiply and add it prevents the multiplies from happening in
1259 (define_insn "mpya_si"
1260 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1261 (plus:SI (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1262 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
1263 (match_operand:SI 3 "spu_reg_operand" "r")))]
1266 [(set_attr "type" "fp7")])
1268 (define_insn "mpyh_si"
1269 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1270 (mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r")
1272 (and:SI (match_operand:SI 2 "spu_reg_operand" "r")
1273 (const_int 65535))))]
1276 [(set_attr "type" "fp7")])
1278 (define_insn "mpys_si"
1279 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1281 (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
1282 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
1286 [(set_attr "type" "fp7")])
1288 (define_insn "mpyhh_si"
1289 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1290 (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1292 (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1296 [(set_attr "type" "fp7")])
1298 (define_insn "mpyhhu_si"
1299 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1300 (mult:SI (lshiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1302 (lshiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1306 [(set_attr "type" "fp7")])
1308 (define_insn "mpyhha_si"
1309 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1310 (plus:SI (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
1312 (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
1314 (match_operand:SI 3 "spu_reg_operand" "0")))]
1317 [(set_attr "type" "fp7")])
1319 (define_insn "mul<mode>3"
1320 [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
1321 (mult:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")
1322 (match_operand:VSDF 2 "spu_reg_operand" "r")))]
1325 [(set_attr "type" "fp<d6>")])
1327 (define_insn "fma_<mode>"
1328 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1329 (plus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1330 (match_operand:VSF 2 "spu_reg_operand" "r"))
1331 (match_operand:VSF 3 "spu_reg_operand" "r")))]
1334 [(set_attr "type" "fp6")])
1336 (define_insn "fnms_<mode>"
1337 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1338 (minus:VSF (match_operand:VSF 3 "spu_reg_operand" "r")
1339 (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1340 (match_operand:VSF 2 "spu_reg_operand" "r"))))]
1343 [(set_attr "type" "fp6")])
1345 (define_insn "fms_<mode>"
1346 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1347 (minus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1348 (match_operand:VSF 2 "spu_reg_operand" "r"))
1349 (match_operand:VSF 3 "spu_reg_operand" "r")))]
1352 [(set_attr "type" "fp6")])
1354 (define_insn "fma_<mode>"
1355 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1356 (plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1357 (match_operand:VDF 2 "spu_reg_operand" "r"))
1358 (match_operand:VDF 3 "spu_reg_operand" "0")))]
1361 [(set_attr "type" "fpd")])
1363 (define_insn "fnma_<mode>"
1364 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1365 (neg:VDF (plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1366 (match_operand:VDF 2 "spu_reg_operand" "r"))
1367 (match_operand:VDF 3 "spu_reg_operand" "0"))))]
1370 [(set_attr "type" "fpd")])
1372 (define_insn "fnms_<mode>"
1373 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1374 (minus:VDF (match_operand:VDF 3 "spu_reg_operand" "0")
1375 (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1376 (match_operand:VDF 2 "spu_reg_operand" "r"))))]
1379 [(set_attr "type" "fpd")])
1381 (define_insn "fms_<mode>"
1382 [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
1383 (minus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
1384 (match_operand:VDF 2 "spu_reg_operand" "r"))
1385 (match_operand:VDF 3 "spu_reg_operand" "0")))]
1388 [(set_attr "type" "fpd")])
1391 ;; mul highpart, used for divide by constant optimizations.
1393 (define_expand "smulsi3_highpart"
1394 [(set (match_operand:SI 0 "register_operand" "")
1397 (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
1398 (sign_extend:DI (match_operand:SI 2 "register_operand" "")))
1402 rtx t0 = gen_reg_rtx (SImode);
1403 rtx t1 = gen_reg_rtx (SImode);
1404 rtx t2 = gen_reg_rtx (SImode);
1405 rtx t3 = gen_reg_rtx (SImode);
1406 rtx t4 = gen_reg_rtx (SImode);
1407 rtx t5 = gen_reg_rtx (SImode);
1408 rtx t6 = gen_reg_rtx (SImode);
1409 rtx t7 = gen_reg_rtx (SImode);
1410 rtx t8 = gen_reg_rtx (SImode);
1411 rtx t9 = gen_reg_rtx (SImode);
1412 rtx t11 = gen_reg_rtx (SImode);
1413 rtx t12 = gen_reg_rtx (SImode);
1414 rtx t14 = gen_reg_rtx (SImode);
1415 rtx t15 = gen_reg_rtx (HImode);
1416 rtx t16 = gen_reg_rtx (HImode);
1417 rtx t17 = gen_reg_rtx (HImode);
1418 rtx t18 = gen_reg_rtx (HImode);
1419 rtx t19 = gen_reg_rtx (SImode);
1420 rtx t20 = gen_reg_rtx (SImode);
1421 rtx t21 = gen_reg_rtx (SImode);
1422 rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
1423 rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
1424 rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
1425 rtx t1_hi = gen_rtx_SUBREG (HImode, t1, 2);
1427 rtx insn = emit_insn (gen_lshrsi3 (t0, operands[1], GEN_INT (16)));
1428 emit_insn (gen_lshrsi3 (t1, operands[2], GEN_INT (16)));
1429 emit_insn (gen_umulhisi3 (t2, op1_hi, op2_hi));
1430 emit_insn (gen_mpyh_si (t3, operands[1], operands[2]));
1431 emit_insn (gen_mpyh_si (t4, operands[2], operands[1]));
1432 emit_insn (gen_mpyhh_si (t5, operands[1], operands[2]));
1433 emit_insn (gen_mpys_si (t6, t0_hi, op2_hi));
1434 emit_insn (gen_mpys_si (t7, t1_hi, op1_hi));
1436 /* Gen carry bits (in t9 and t11). */
1437 emit_insn (gen_addsi3 (t8, t2, t3));
1438 emit_insn (gen_cg_si (t9, t2, t3));
1439 emit_insn (gen_cg_si (t11, t8, t4));
1441 /* Gen high 32 bits in operand[0]. Correct for mpys. */
1442 emit_insn (gen_addx_si (t12, t5, t6, t9));
1443 emit_insn (gen_addx_si (t14, t12, t7, t11));
1445 /* mpys treats both operands as signed when we really want it to treat
1446 the first operand as signed and the second operand as unsigned.
1447 The code below corrects for that difference. */
1448 emit_insn (gen_cgt_hi (t15, op1_hi, GEN_INT (-1)));
1449 emit_insn (gen_cgt_hi (t16, op2_hi, GEN_INT (-1)));
1450 emit_insn (gen_andc_hi (t17, t1_hi, t15));
1451 emit_insn (gen_andc_hi (t18, t0_hi, t16));
1452 emit_insn (gen_extendhisi2 (t19, t17));
1453 emit_insn (gen_extendhisi2 (t20, t18));
1454 emit_insn (gen_addsi3 (t21, t19, t20));
1455 emit_insn (gen_addsi3 (operands[0], t14, t21));
1456 unshare_all_rtl_in_chain (insn);
1460 (define_expand "umulsi3_highpart"
1461 [(set (match_operand:SI 0 "register_operand" "")
1464 (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
1465 (zero_extend:DI (match_operand:SI 2 "register_operand" "")))
1470 rtx t0 = gen_reg_rtx (SImode);
1471 rtx t1 = gen_reg_rtx (SImode);
1472 rtx t2 = gen_reg_rtx (SImode);
1473 rtx t3 = gen_reg_rtx (SImode);
1474 rtx t4 = gen_reg_rtx (SImode);
1475 rtx t5 = gen_reg_rtx (SImode);
1476 rtx t6 = gen_reg_rtx (SImode);
1477 rtx t7 = gen_reg_rtx (SImode);
1478 rtx t8 = gen_reg_rtx (SImode);
1479 rtx t9 = gen_reg_rtx (SImode);
1480 rtx t10 = gen_reg_rtx (SImode);
1481 rtx t12 = gen_reg_rtx (SImode);
1482 rtx t13 = gen_reg_rtx (SImode);
1483 rtx t14 = gen_reg_rtx (SImode);
1484 rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
1485 rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
1486 rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
1488 rtx insn = emit_insn (gen_rotlsi3 (t0, operands[2], GEN_INT (16)));
1489 emit_insn (gen_umulhisi3 (t1, op1_hi, op2_hi));
1490 emit_insn (gen_umulhisi3 (t2, op1_hi, t0_hi));
1491 emit_insn (gen_mpyhhu_si (t3, operands[1], t0));
1492 emit_insn (gen_mpyhhu_si (t4, operands[1], operands[2]));
1493 emit_insn (gen_ashlsi3 (t5, t2, GEN_INT (16)));
1494 emit_insn (gen_ashlsi3 (t6, t3, GEN_INT (16)));
1495 emit_insn (gen_lshrsi3 (t7, t2, GEN_INT (16)));
1496 emit_insn (gen_lshrsi3 (t8, t3, GEN_INT (16)));
1498 /* Gen carry bits (in t10 and t12). */
1499 emit_insn (gen_addsi3 (t9, t1, t5));
1500 emit_insn (gen_cg_si (t10, t1, t5));
1501 emit_insn (gen_cg_si (t12, t9, t6));
1503 /* Gen high 32 bits in operand[0]. */
1504 emit_insn (gen_addx_si (t13, t4, t7, t10));
1505 emit_insn (gen_addx_si (t14, t13, t8, t12));
1506 emit_insn (gen_movsi (operands[0], t14));
1507 unshare_all_rtl_in_chain (insn);
1514 ;; Not necessarily the best implementation of divide but faster then
1515 ;; the default that gcc provides because this is inlined and it uses
1517 (define_insn "divmodsi4"
1518 [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
1519 (div:SI (match_operand:SI 1 "spu_reg_operand" "r")
1520 (match_operand:SI 2 "spu_reg_operand" "r")))
1521 (set (match_operand:SI 3 "spu_reg_operand" "=&r")
1522 (mod:SI (match_dup 1)
1524 (clobber (match_scratch:SI 4 "=&r"))
1525 (clobber (match_scratch:SI 5 "=&r"))
1526 (clobber (match_scratch:SI 6 "=&r"))
1527 (clobber (match_scratch:SI 7 "=&r"))
1528 (clobber (match_scratch:SI 8 "=&r"))
1529 (clobber (match_scratch:SI 9 "=&r"))
1530 (clobber (match_scratch:SI 10 "=&r"))
1531 (clobber (match_scratch:SI 11 "=&r"))
1532 (clobber (match_scratch:SI 12 "=&r"))
1533 (clobber (reg:SI 130))]
1541 selb %8,%8,%1,%10\\n\\
1542 selb %9,%9,%2,%11\\n\\
1548 shlqbyi %3,%8,0\\n\\
1549 xor %11,%10,%11\\n\\
1553 1: or %12,%0,%5\\n\\
1554 rotqmbii %5,%5,-1\\n\\
1558 rotqmbii %4,%4,-1\\n\\
1559 selb %0,%12,%0,%6\\n\\
1561 selb %3,%7,%3,%6\\n\\
1565 selb %3,%8,%3,%10\\n\\
1567 [(set_attr "type" "multi0")
1568 (set_attr "length" "128")])
1570 (define_insn "udivmodsi4"
1571 [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
1572 (udiv:SI (match_operand:SI 1 "spu_reg_operand" "r")
1573 (match_operand:SI 2 "spu_reg_operand" "r")))
1574 (set (match_operand:SI 3 "spu_reg_operand" "=&r")
1575 (umod:SI (match_dup 1)
1577 (clobber (match_scratch:SI 4 "=&r"))
1578 (clobber (match_scratch:SI 5 "=&r"))
1579 (clobber (match_scratch:SI 6 "=&r"))
1580 (clobber (match_scratch:SI 7 "=&r"))
1581 (clobber (match_scratch:SI 8 "=&r"))
1582 (clobber (reg:SI 130))]
1595 rotqmbii %5,%5,-1\\n\\
1599 rotqmbii %4,%4,-1\\n\\
1600 selb %0,%8,%0,%6\\n\\
1602 selb %3,%7,%3,%6\\n\\
1605 [(set_attr "type" "multi0")
1606 (set_attr "length" "80")])
1608 (define_insn_and_split "div<mode>3"
1609 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1610 (div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
1611 (match_operand:VSF 2 "spu_reg_operand" "r")))
1612 (clobber (match_scratch:VSF 3 "=&r"))
1613 (clobber (match_scratch:VSF 4 "=&r"))]
1617 [(set (match_dup:VSF 0)
1618 (div:VSF (match_dup:VSF 1)
1620 (clobber (match_dup:VSF 3))
1621 (clobber (match_dup:VSF 4))]
1623 emit_insn (gen_frest_<mode>(operands[3], operands[2]));
1624 emit_insn (gen_fi_<mode>(operands[3], operands[2], operands[3]));
1625 emit_insn (gen_mul<mode>3(operands[4], operands[1], operands[3]));
1626 emit_insn (gen_fnms_<mode>(operands[0], operands[4], operands[2], operands[1]));
1627 emit_insn (gen_fma_<mode>(operands[0], operands[0], operands[3], operands[4]));
1634 (define_insn_and_split "sqrtsf2"
1635 [(set (match_operand:SF 0 "spu_reg_operand" "=r")
1636 (sqrt:SF (match_operand:SF 1 "spu_reg_operand" "r")))
1637 (clobber (match_scratch:SF 2 "=&r"))
1638 (clobber (match_scratch:SF 3 "=&r"))
1639 (clobber (match_scratch:SF 4 "=&r"))
1640 (clobber (match_scratch:SF 5 "=&r"))]
1644 [(set (match_dup:SF 0)
1645 (sqrt:SF (match_dup:SF 1)))
1646 (clobber (match_dup:SF 2))
1647 (clobber (match_dup:SF 3))
1648 (clobber (match_dup:SF 4))
1649 (clobber (match_dup:SF 5))]
1651 emit_move_insn (operands[3],spu_float_const(\"0.5\",SFmode));
1652 emit_move_insn (operands[4],spu_float_const(\"1.00000011920928955078125\",SFmode));
1653 emit_insn (gen_frsqest_sf(operands[2],operands[1]));
1654 emit_insn (gen_fi_sf(operands[2],operands[1],operands[2]));
1655 emit_insn (gen_mulsf3(operands[5],operands[2],operands[1]));
1656 emit_insn (gen_mulsf3(operands[3],operands[5],operands[3]));
1657 emit_insn (gen_fnms_sf(operands[4],operands[2],operands[5],operands[4]));
1658 emit_insn (gen_fma_sf(operands[0],operands[4],operands[3],operands[5]));
1662 (define_insn "frest_<mode>"
1663 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1664 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FREST))]
1667 [(set_attr "type" "shuf")])
1669 (define_insn "frsqest_<mode>"
1670 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1671 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FRSQEST))]
1674 [(set_attr "type" "shuf")])
1676 (define_insn "fi_<mode>"
1677 [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
1678 (unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")
1679 (match_operand:VSF 2 "spu_reg_operand" "r")] UNSPEC_FI))]
1682 [(set_attr "type" "fp7")])
1687 (define_insn "and<mode>3"
1688 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
1689 (and:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
1690 (match_operand:MOV 2 "spu_logical_operand" "r,C")))]
1694 and%j2i\t%0,%1,%J2")
1696 (define_insn "anddi3"
1697 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
1698 (and:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
1699 (match_operand:DI 2 "spu_logical_operand" "r,c")))]
1703 and%k2i\t%0,%1,%K2")
1705 (define_insn "andti3"
1706 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
1707 (and:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
1708 (match_operand:TI 2 "spu_logical_operand" "r,Y")))]
1712 and%m2i\t%0,%1,%L2")
1714 (define_insn "andc_<mode>"
1715 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1716 (and:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
1717 (match_operand:ALL 1 "spu_reg_operand" "r")))]
1721 (define_insn "nand_<mode>"
1722 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1723 (not:ALL (and:ALL (match_operand:ALL 2 "spu_reg_operand" "r")
1724 (match_operand:ALL 1 "spu_reg_operand" "r"))))]
1731 (define_insn "ior<mode>3"
1732 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r,r")
1733 (ior:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r,0")
1734 (match_operand:MOV 2 "spu_ior_operand" "r,C,D")))]
1741 (define_insn "iordi3"
1742 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r,r")
1743 (ior:DI (match_operand:DI 1 "spu_reg_operand" "r,r,0")
1744 (match_operand:DI 2 "spu_ior_operand" "r,c,d")))]
1751 (define_insn "iorti3"
1752 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r,r")
1753 (ior:TI (match_operand:TI 1 "spu_reg_operand" "r,r,0")
1754 (match_operand:TI 2 "spu_ior_operand" "r,Y,Z")))]
1761 (define_insn "orc_<mode>"
1762 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1763 (ior:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
1764 (match_operand:ALL 1 "spu_reg_operand" "r")))]
1768 (define_insn "nor_<mode>"
1769 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1770 (not:ALL (ior:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
1771 (match_operand:ALL 2 "spu_reg_operand" "r"))))]
1777 (define_insn "xor<mode>3"
1778 [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
1779 (xor:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
1780 (match_operand:MOV 2 "spu_logical_operand" "r,B")))]
1784 xor%j2i\t%0,%1,%J2")
1786 (define_insn "xordi3"
1787 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
1788 (xor:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
1789 (match_operand:DI 2 "spu_logical_operand" "r,c")))]
1793 xor%k2i\t%0,%1,%K2")
1795 (define_insn "xorti3"
1796 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
1797 (xor:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
1798 (match_operand:TI 2 "spu_logical_operand" "r,Y")))]
1802 xor%m2i\t%0,%1,%L2")
1804 (define_insn "eqv_<mode>"
1805 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1806 (not:ALL (xor:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
1807 (match_operand:ALL 2 "spu_reg_operand" "r"))))]
1813 (define_insn "one_cmpl<mode>2"
1814 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
1815 (not:ALL (match_operand:ALL 1 "spu_reg_operand" "r")))]
1822 (define_expand "selb"
1823 [(set (match_operand 0 "spu_reg_operand" "")
1824 (unspec [(match_operand 1 "spu_reg_operand" "")
1825 (match_operand 2 "spu_reg_operand" "")
1826 (match_operand 3 "spu_reg_operand" "")] UNSPEC_SELB))]
1829 rtx s = gen__selb (operands[0], operands[1], operands[2], operands[3]);
1830 PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
1835 ;; This could be defined as a combination of logical operations, but at
1836 ;; one time it caused a crash due to recursive expansion of rtl during CSE.
1837 (define_insn "_selb"
1838 [(set (match_operand 0 "spu_reg_operand" "=r")
1839 (unspec [(match_operand 1 "spu_reg_operand" "r")
1840 (match_operand 2 "spu_reg_operand" "r")
1841 (match_operand 3 "spu_reg_operand" "r")] UNSPEC_SELB))]
1842 "GET_MODE(operands[0]) == GET_MODE(operands[1])
1843 && GET_MODE(operands[1]) == GET_MODE(operands[2])"
1844 "selb\t%0,%1,%2,%3")
1847 ;; Misc. byte/bit operations
1848 ;; clz/ctz/ffs/popcount/parity
1851 (define_insn "clz<mode>2"
1852 [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
1853 (clz:VSI (match_operand:VSI 1 "spu_reg_operand" "r")))]
1857 (define_expand "ctz<mode>2"
1859 (neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
1860 (set (match_dup 3) (and:VSI (match_dup 1)
1862 (set (match_dup 4) (clz:VSI (match_dup 3)))
1863 (set (match_operand:VSI 0 "spu_reg_operand" "")
1864 (minus:VSI (match_dup 5) (match_dup 4)))]
1867 operands[2] = gen_reg_rtx (<MODE>mode);
1868 operands[3] = gen_reg_rtx (<MODE>mode);
1869 operands[4] = gen_reg_rtx (<MODE>mode);
1870 operands[5] = spu_const(<MODE>mode, 31);
1873 (define_expand "ffs<mode>2"
1875 (neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
1876 (set (match_dup 3) (and:VSI (match_dup 1)
1878 (set (match_dup 4) (clz:VSI (match_dup 3)))
1879 (set (match_operand:VSI 0 "spu_reg_operand" "")
1880 (minus:VSI (match_dup 5) (match_dup 4)))]
1883 operands[2] = gen_reg_rtx (<MODE>mode);
1884 operands[3] = gen_reg_rtx (<MODE>mode);
1885 operands[4] = gen_reg_rtx (<MODE>mode);
1886 operands[5] = spu_const(<MODE>mode, 32);
1889 (define_expand "popcountsi2"
1891 (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "")]
1894 (unspec:HI [(match_dup 2)] UNSPEC_SUMB))
1895 (set (match_operand:SI 0 "spu_reg_operand" "")
1896 (sign_extend:SI (match_dup 3)))]
1899 operands[2] = gen_reg_rtx (SImode);
1900 operands[3] = gen_reg_rtx (HImode);
1903 (define_expand "paritysi2"
1904 [(set (match_operand:SI 0 "spu_reg_operand" "")
1905 (parity:SI (match_operand:SI 1 "spu_reg_operand" "")))]
1908 operands[2] = gen_reg_rtx (SImode);
1909 emit_insn (gen_popcountsi2(operands[2], operands[1]));
1910 emit_insn (gen_andsi3(operands[0], operands[2], GEN_INT (1)));
1914 (define_insn "cntb_si"
1915 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
1916 (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "r")]
1920 [(set_attr "type" "fxb")])
1922 (define_insn "cntb_v16qi"
1923 [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
1924 (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")]
1928 [(set_attr "type" "fxb")])
1930 (define_insn "sumb_si"
1931 [(set (match_operand:HI 0 "spu_reg_operand" "=r")
1932 (unspec:HI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_SUMB))]
1935 [(set_attr "type" "fxb")])
1940 (define_insn "ashl<mode>3"
1941 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
1942 (ashift:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
1943 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
1947 shl<bh>i\t%0,%1,%<umask>2"
1948 [(set_attr "type" "fx3")])
1950 (define_insn_and_split "ashldi3"
1951 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
1952 (ashift:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
1953 (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
1954 (clobber (match_scratch:SI 3 "=&r,X"))]
1958 [(set (match_dup:DI 0)
1959 (ashift:DI (match_dup:DI 1)
1962 rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
1963 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1964 rtx op2 = operands[2];
1965 rtx op3 = operands[3];
1967 if (GET_CODE (operands[2]) == REG)
1969 emit_insn (gen_addsi3 (op3, op2, GEN_INT (64)));
1970 emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
1971 emit_insn (gen_shlqbybi_ti (op0, op0, op3));
1972 emit_insn (gen_shlqbi_ti (op0, op0, op3));
1976 HOST_WIDE_INT val = INTVAL (operands[2]);
1977 emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
1978 emit_insn (gen_shlqby_ti (op0, op0, GEN_INT (val / 8 + 8)));
1980 emit_insn (gen_shlqbi_ti (op0, op0, GEN_INT (val % 8)));
1985 (define_expand "ashlti3"
1986 [(parallel [(set (match_operand:TI 0 "spu_reg_operand" "")
1987 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "")
1988 (match_operand:SI 2 "spu_nonmem_operand" "")))
1989 (clobber (match_dup:TI 3))])]
1991 "if (GET_CODE (operands[2]) == CONST_INT)
1993 emit_insn (gen_ashlti3_imm(operands[0], operands[1], operands[2]));
1996 operands[3] = gen_reg_rtx (TImode);")
1998 (define_insn_and_split "ashlti3_imm"
1999 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2000 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2001 (match_operand:SI 2 "immediate_operand" "O,P")))]
2006 "!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])"
2007 [(set (match_dup:TI 0)
2008 (ashift:TI (match_dup:TI 1)
2010 (set (match_dup:TI 0)
2011 (ashift:TI (match_dup:TI 0)
2014 HOST_WIDE_INT val = INTVAL(operands[2]);
2015 operands[3] = GEN_INT (val&7);
2016 operands[4] = GEN_INT (val&-8);
2018 [(set_attr "type" "shuf,shuf")])
2020 (define_insn_and_split "ashlti3_reg"
2021 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2022 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r")
2023 (match_operand:SI 2 "spu_reg_operand" "r")))
2024 (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))]
2028 [(set (match_dup:TI 3)
2029 (ashift:TI (match_dup:TI 1)
2030 (and:SI (match_dup:SI 2)
2032 (set (match_dup:TI 0)
2033 (ashift:TI (match_dup:TI 3)
2034 (and:SI (match_dup:SI 2)
2038 (define_insn "shlqbybi_ti"
2039 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2040 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2041 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2047 [(set_attr "type" "shuf,shuf")])
2049 (define_insn "shlqbi_ti"
2050 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2051 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2052 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2058 [(set_attr "type" "shuf,shuf")])
2060 (define_insn "shlqby_ti"
2061 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2062 (ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2063 (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2069 [(set_attr "type" "shuf,shuf")])
2074 (define_insn_and_split "lshr<mode>3"
2075 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2076 (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2077 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
2078 (clobber (match_scratch:VHSI 3 "=&r,X"))]
2082 rot<bh>mi\t%0,%1,-%<umask>2"
2083 "reload_completed && GET_CODE (operands[2]) == REG"
2084 [(set (match_dup:VHSI 3)
2085 (neg:VHSI (match_dup:VHSI 2)))
2086 (set (match_dup:VHSI 0)
2087 (lshiftrt:VHSI (match_dup:VHSI 1)
2088 (neg:VHSI (match_dup:VHSI 3))))]
2090 [(set_attr "type" "*,fx3")])
2093 (define_insn "rotm_<mode>"
2094 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2095 (lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2096 (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
2100 rot<bh>mi\t%0,%1,-%<nmask>2"
2101 [(set_attr "type" "fx3")])
2103 (define_expand "lshr<mode>3"
2104 [(parallel [(set (match_operand:DTI 0 "spu_reg_operand" "")
2105 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "")
2106 (match_operand:SI 2 "spu_nonmem_operand" "")))
2107 (clobber (match_dup:DTI 3))
2108 (clobber (match_dup:SI 4))
2109 (clobber (match_dup:SI 5))])]
2111 "if (GET_CODE (operands[2]) == CONST_INT)
2113 emit_insn (gen_lshr<mode>3_imm(operands[0], operands[1], operands[2]));
2116 operands[3] = gen_reg_rtx (<MODE>mode);
2117 operands[4] = gen_reg_rtx (SImode);
2118 operands[5] = gen_reg_rtx (SImode);")
2120 (define_insn_and_split "lshr<mode>3_imm"
2121 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2122 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2123 (match_operand:SI 2 "immediate_operand" "O,P")))]
2126 rotqmbyi\t%0,%1,-%h2
2127 rotqmbii\t%0,%1,-%e2"
2128 "!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])"
2129 [(set (match_dup:DTI 0)
2130 (lshiftrt:DTI (match_dup:DTI 1)
2132 (set (match_dup:DTI 0)
2133 (lshiftrt:DTI (match_dup:DTI 0)
2136 HOST_WIDE_INT val = INTVAL(operands[2]);
2137 operands[4] = GEN_INT (val&7);
2138 operands[5] = GEN_INT (val&-8);
2140 [(set_attr "type" "shuf,shuf")])
2142 (define_insn_and_split "lshr<mode>3_reg"
2143 [(set (match_operand:DTI 0 "spu_reg_operand" "=r")
2144 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r")
2145 (match_operand:SI 2 "spu_reg_operand" "r")))
2146 (clobber (match_operand:DTI 3 "spu_reg_operand" "=&r"))
2147 (clobber (match_operand:SI 4 "spu_reg_operand" "=&r"))
2148 (clobber (match_operand:SI 5 "spu_reg_operand" "=&r"))]
2152 [(set (match_dup:DTI 3)
2153 (lshiftrt:DTI (match_dup:DTI 1)
2154 (and:SI (neg:SI (match_dup:SI 4))
2156 (set (match_dup:DTI 0)
2157 (lshiftrt:DTI (match_dup:DTI 3)
2158 (and:SI (neg:SI (and:SI (match_dup:SI 5)
2162 emit_insn (gen_subsi3(operands[4], GEN_INT(0), operands[2]));
2163 emit_insn (gen_subsi3(operands[5], GEN_INT(7), operands[2]));
2166 (define_insn "rotqmbybi_<mode>"
2167 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2168 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2169 (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2175 rotqmbyi\t%0,%1,-%H2"
2176 [(set_attr "type" "shuf")])
2178 (define_insn "rotqmbi_<mode>"
2179 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2180 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2181 (and:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
2186 rotqmbii\t%0,%1,-%E2"
2187 [(set_attr "type" "shuf")])
2189 (define_insn "rotqmby_<mode>"
2190 [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
2191 (lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
2192 (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
2197 rotqmbyi\t%0,%1,-%F2"
2198 [(set_attr "type" "shuf")])
2203 (define_insn_and_split "ashr<mode>3"
2204 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2205 (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2206 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
2207 (clobber (match_scratch:VHSI 3 "=&r,X"))]
2211 rotma<bh>i\t%0,%1,-%<umask>2"
2212 "reload_completed && GET_CODE (operands[2]) == REG"
2213 [(set (match_dup:VHSI 3)
2214 (neg:VHSI (match_dup:VHSI 2)))
2215 (set (match_dup:VHSI 0)
2216 (ashiftrt:VHSI (match_dup:VHSI 1)
2217 (neg:VHSI (match_dup:VHSI 3))))]
2219 [(set_attr "type" "*,fx3")])
2222 (define_insn "rotma_<mode>"
2223 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2224 (ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2225 (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
2229 rotma<bh>i\t%0,%1,-%<nmask>2"
2230 [(set_attr "type" "fx3")])
2232 (define_insn_and_split "ashrdi3"
2233 [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
2234 (ashiftrt:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
2235 (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
2236 (clobber (match_scratch:TI 3 "=&r,&r"))
2237 (clobber (match_scratch:TI 4 "=&r,&r"))
2238 (clobber (match_scratch:SI 5 "=&r,&r"))]
2242 [(set (match_dup:DI 0)
2243 (ashiftrt:DI (match_dup:DI 1)
2246 rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
2247 rtx op0v = gen_rtx_REG (V4SImode, REGNO (op0));
2248 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
2249 rtx op1s = gen_rtx_REG (SImode, REGNO (op1));
2250 rtx op2 = operands[2];
2251 rtx op3 = operands[3];
2252 rtx op4 = operands[4];
2253 rtx op5 = operands[5];
2255 if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 63)
2257 rtx op0s = gen_rtx_REG (SImode, REGNO (op0));
2258 emit_insn (gen_ashrsi3 (op0s, op1s, GEN_INT (32)));
2259 emit_insn (gen_spu_fsm (op0v, op0s));
2261 else if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 32)
2263 rtx op0d = gen_rtx_REG (V2DImode, REGNO (op0));
2264 HOST_WIDE_INT val = INTVAL (op2);
2265 emit_insn (gen_lshrti3 (op0, op1, GEN_INT (32)));
2266 emit_insn (gen_spu_xswd (op0d, op0v));
2268 emit_insn (gen_ashrv4si3 (op0v, op0v, spu_const (V4SImode, val - 32)));
2272 rtx op3v = gen_rtx_REG (V4SImode, REGNO (op3));
2273 unsigned char arr[16] = {
2274 0xff, 0xff, 0xff, 0xff,
2275 0xff, 0xff, 0xff, 0xff,
2276 0x00, 0x00, 0x00, 0x00,
2277 0x00, 0x00, 0x00, 0x00
2280 emit_insn (gen_ashrsi3 (op5, op1s, GEN_INT (31)));
2281 emit_move_insn (op4, array_to_constant (TImode, arr));
2282 emit_insn (gen_spu_fsm (op3v, op5));
2284 if (GET_CODE (operands[2]) == REG)
2286 emit_insn (gen_selb (op4, op3, op1, op4));
2287 emit_insn (gen_negsi2 (op5, op2));
2288 emit_insn (gen_rotqbybi_ti (op0, op4, op5));
2289 emit_insn (gen_rotqbi_ti (op0, op0, op5));
2293 HOST_WIDE_INT val = -INTVAL (op2);
2294 emit_insn (gen_selb (op0, op3, op1, op4));
2296 emit_insn (gen_rotqby_ti (op0, op0, GEN_INT ((val - 7) / 8)));
2298 emit_insn (gen_rotqbi_ti (op0, op0, GEN_INT (val % 8)));
2305 (define_expand "ashrti3"
2306 [(set (match_operand:TI 0 "spu_reg_operand" "")
2307 (ashiftrt:TI (match_operand:TI 1 "spu_reg_operand" "")
2308 (match_operand:SI 2 "spu_nonmem_operand" "")))]
2311 rtx sign_shift = gen_reg_rtx (SImode);
2312 rtx sign_mask = gen_reg_rtx (TImode);
2313 rtx sign_mask_v4si = gen_rtx_SUBREG (V4SImode, sign_mask, 0);
2314 rtx op1_v4si = spu_gen_subreg (V4SImode, operands[1]);
2315 rtx t = gen_reg_rtx (TImode);
2316 emit_insn (gen_subsi3 (sign_shift, GEN_INT (128), force_reg (SImode, operands[2])));
2317 emit_insn (gen_ashrv4si3 (sign_mask_v4si, op1_v4si, spu_const (V4SImode, 31)));
2318 emit_insn (gen_fsm_ti (sign_mask, sign_mask));
2319 emit_insn (gen_ashlti3 (sign_mask, sign_mask, sign_shift));
2320 emit_insn (gen_lshrti3 (t, operands[1], operands[2]));
2321 emit_insn (gen_iorti3 (operands[0], t, sign_mask));
2325 ;; fsm is used after rotam to replicate the sign across the whole register.
2326 (define_insn "fsm_ti"
2327 [(set (match_operand:TI 0 "spu_reg_operand" "=r")
2328 (unspec:TI [(match_operand:TI 1 "spu_reg_operand" "r")] UNSPEC_FSM))]
2331 [(set_attr "type" "shuf")])
2336 (define_insn "rotl<mode>3"
2337 [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
2338 (rotate:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
2339 (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
2343 rot<bh>i\t%0,%1,%<umask>2"
2344 [(set_attr "type" "fx3")])
2346 (define_insn "rotlti3"
2347 [(set (match_operand:TI 0 "spu_reg_operand" "=&r,r,r,r")
2348 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r,r,r")
2349 (match_operand:SI 2 "spu_nonmem_operand" "r,O,P,I")))]
2352 rotqbybi\t%0,%1,%2\;rotqbi\t%0,%0,%2
2355 rotqbyi\t%0,%1,%h2\;rotqbii\t%0,%0,%e2"
2356 [(set_attr "length" "8,4,4,8")
2357 (set_attr "type" "multi1,shuf,shuf,multi1")])
2359 (define_insn "rotqbybi_ti"
2360 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2361 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2362 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2368 [(set_attr "type" "shuf,shuf")])
2370 (define_insn "rotqby_ti"
2371 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2372 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2373 (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2379 [(set_attr "type" "shuf,shuf")])
2381 (define_insn "rotqbi_ti"
2382 [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
2383 (rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
2384 (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
2390 [(set_attr "type" "shuf,shuf")])
2393 ;; struct extract/insert
2394 ;; We have to handle mem's because GCC will generate invalid SUBREG's
2395 ;; if it handles them. We generate better code anyway.
2397 (define_expand "extv"
2398 [(set (match_operand 0 "register_operand" "")
2399 (sign_extract (match_operand 1 "register_operand" "")
2400 (match_operand:SI 2 "const_int_operand" "")
2401 (match_operand:SI 3 "const_int_operand" "")))]
2403 { spu_expand_extv(operands, 0); DONE; })
2405 (define_expand "extzv"
2406 [(set (match_operand 0 "register_operand" "")
2407 (zero_extract (match_operand 1 "register_operand" "")
2408 (match_operand:SI 2 "const_int_operand" "")
2409 (match_operand:SI 3 "const_int_operand" "")))]
2411 { spu_expand_extv(operands, 1); DONE; })
2413 (define_expand "insv"
2414 [(set (zero_extract (match_operand 0 "register_operand" "")
2415 (match_operand:SI 1 "const_int_operand" "")
2416 (match_operand:SI 2 "const_int_operand" ""))
2417 (match_operand 3 "nonmemory_operand" ""))]
2419 { spu_expand_insv(operands); DONE; })
2422 ;; String/block move insn.
2423 ;; Argument 0 is the destination
2424 ;; Argument 1 is the source
2425 ;; Argument 2 is the length
2426 ;; Argument 3 is the alignment
2428 (define_expand "movstrsi"
2429 [(parallel [(set (match_operand:BLK 0 "" "")
2430 (match_operand:BLK 1 "" ""))
2431 (use (match_operand:SI 2 "" ""))
2432 (use (match_operand:SI 3 "" ""))])]
2436 if (spu_expand_block_move (operands))
2445 (define_insn "indirect_jump"
2446 [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))]
2449 [(set_attr "type" "br")])
2453 (label_ref (match_operand 0 "" "")))]
2456 [(set_attr "type" "br")])
2461 ;; This will be used for leaf functions, that don't save any regs and
2462 ;; don't have locals on stack, maybe... that is for functions that
2463 ;; don't change $sp and don't need to save $lr.
2464 (define_expand "return"
2469 ;; used in spu_expand_epilogue to generate return from a function and
2470 ;; explicitly set use of $lr.
2472 (define_insn "_return"
2476 [(set_attr "type" "br")])
2482 (define_insn "ceq_<mode>"
2483 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
2484 (eq:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
2485 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
2489 ceq<bh>i\t%0,%1,%2")
2491 (define_insn_and_split "ceq_di"
2492 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2493 (eq:SI (match_operand:DI 1 "spu_reg_operand" "r")
2494 (match_operand:DI 2 "spu_reg_operand" "r")))]
2498 [(set (match_dup:SI 0)
2499 (eq:SI (match_dup:DI 1)
2502 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
2503 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
2504 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
2505 emit_insn (gen_ceq_v4si (op0, op1, op2));
2506 emit_insn (gen_spu_gb (op0, op0));
2507 emit_insn (gen_cgt_si (operands[0], operands[0], GEN_INT (11)));
2512 ;; We provide the TI compares for completeness and because some parts of
2513 ;; gcc/libgcc use them, even though user code might never see it.
2514 (define_insn "ceq_ti"
2515 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2516 (eq:SI (match_operand:TI 1 "spu_reg_operand" "r")
2517 (match_operand:TI 2 "spu_reg_operand" "r")))]
2519 "ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15"
2520 [(set_attr "type" "multi0")
2521 (set_attr "length" "12")])
2523 (define_insn "ceq_<mode>"
2524 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
2525 (eq:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
2526 (match_operand:VSF 2 "spu_reg_operand" "r")))]
2530 (define_insn "cmeq_<mode>"
2531 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
2532 (eq:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
2533 (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
2537 ;; These implementations of ceq_df and cgt_df do not correctly handle
2538 ;; NAN or INF. We will also get incorrect results when the result
2539 ;; of the double subtract is too small.
2540 (define_expand "ceq_df"
2541 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2542 (eq:SI (match_operand:DF 1 "spu_reg_operand" "r")
2543 (match_operand:DF 2 "const_zero_operand" "i")))]
2546 if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
2548 rtx s0_ti = gen_reg_rtx(TImode);
2549 rtx s1_v4 = gen_reg_rtx(V4SImode);
2550 rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
2551 rtx to_ti = gen_reg_rtx(TImode);
2552 rtx to_v4 = gen_reg_rtx(V4SImode);
2553 rtx l_v4 = gen_reg_rtx(V4SImode);
2554 emit_insn (gen_spu_convert (l_v4, operands[1]));
2555 emit_insn (gen_movv4si(s1_v4, spu_const(V4SImode, -0x80000000ll)));
2556 emit_insn (gen_ceq_v4si(s0_v4, l_v4, CONST0_RTX(V4SImode)));
2557 emit_insn (gen_ceq_v4si(s1_v4, l_v4, s1_v4));
2558 emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
2559 emit_insn (gen_spu_convert (to_v4, to_ti));
2560 emit_insn (gen_iorv4si3(s1_v4, s0_v4, s1_v4));
2561 emit_insn (gen_andv4si3(to_v4, to_v4, s1_v4));
2562 emit_insn (gen_spu_convert (operands[0], to_v4));
2567 (define_insn "ceq_<mode>_celledp"
2568 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
2569 (eq:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
2570 (match_operand:VDF 2 "spu_reg_operand" "r")))]
2571 "spu_arch == PROCESSOR_CELLEDP"
2573 [(set_attr "type" "fpd")])
2575 (define_insn "cmeq_<mode>_celledp"
2576 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
2577 (eq:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
2578 (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
2579 "spu_arch == PROCESSOR_CELLEDP"
2581 [(set_attr "type" "fpd")])
2583 (define_expand "ceq_v2df"
2584 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
2585 (eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
2586 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
2589 if (spu_arch == PROCESSOR_CELL)
2591 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
2592 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
2593 rtx temp = gen_reg_rtx (TImode);
2594 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
2595 rtx temp2 = gen_reg_rtx (V4SImode);
2596 rtx biteq = gen_reg_rtx (V4SImode);
2597 rtx ahi_inf = gen_reg_rtx (V4SImode);
2598 rtx a_nan = gen_reg_rtx (V4SImode);
2599 rtx a_abs = gen_reg_rtx (V4SImode);
2600 rtx b_abs = gen_reg_rtx (V4SImode);
2601 rtx iszero = gen_reg_rtx (V4SImode);
2602 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
2603 0x7FFFFFFF, 0xFFFFFFFF);
2604 rtx sign_mask = gen_reg_rtx (V4SImode);
2605 rtx nan_mask = gen_reg_rtx (V4SImode);
2606 rtx hihi_promote = gen_reg_rtx (TImode);
2608 emit_move_insn (sign_mask, pat);
2609 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
2611 emit_move_insn (nan_mask, pat);
2612 pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
2613 0x08090A0B, 0x18191A1B);
2614 emit_move_insn (hihi_promote, pat);
2616 emit_insn (gen_ceq_v4si (biteq, ra, rb));
2617 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
2619 emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
2620 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
2621 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
2622 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
2623 emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
2624 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
2626 emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
2627 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
2628 emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
2629 emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
2630 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
2632 emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
2633 emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
2634 emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
2635 emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
2640 (define_expand "cmeq_v2df"
2641 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
2642 (eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
2643 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
2646 if (spu_arch == PROCESSOR_CELL)
2648 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
2649 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
2650 rtx temp = gen_reg_rtx (TImode);
2651 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
2652 rtx temp2 = gen_reg_rtx (V4SImode);
2653 rtx biteq = gen_reg_rtx (V4SImode);
2654 rtx ahi_inf = gen_reg_rtx (V4SImode);
2655 rtx a_nan = gen_reg_rtx (V4SImode);
2656 rtx a_abs = gen_reg_rtx (V4SImode);
2657 rtx b_abs = gen_reg_rtx (V4SImode);
2659 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
2660 0x7FFFFFFF, 0xFFFFFFFF);
2661 rtx sign_mask = gen_reg_rtx (V4SImode);
2662 rtx nan_mask = gen_reg_rtx (V4SImode);
2663 rtx hihi_promote = gen_reg_rtx (TImode);
2665 emit_move_insn (sign_mask, pat);
2667 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
2669 emit_move_insn (nan_mask, pat);
2670 pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
2671 0x08090A0B, 0x18191A1B);
2672 emit_move_insn (hihi_promote, pat);
2674 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
2675 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
2676 emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs));
2677 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
2679 emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
2680 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
2681 emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
2682 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
2684 emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
2685 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
2686 emit_insn (gen_andc_v4si (temp2, biteq, a_nan));
2687 emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
2695 (define_insn "cgt_<mode>"
2696 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
2697 (gt:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
2698 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
2702 cgt<bh>i\t%0,%1,%2")
2704 (define_insn "cgt_di_m1"
2705 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2706 (gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
2711 (define_insn_and_split "cgt_di"
2712 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2713 (gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
2714 (match_operand:DI 2 "spu_reg_operand" "r")))
2715 (clobber (match_scratch:V4SI 3 "=&r"))
2716 (clobber (match_scratch:V4SI 4 "=&r"))
2717 (clobber (match_scratch:V4SI 5 "=&r"))]
2721 [(set (match_dup:SI 0)
2722 (gt:SI (match_dup:DI 1)
2725 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
2726 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
2727 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
2728 rtx op3 = operands[3];
2729 rtx op4 = operands[4];
2730 rtx op5 = operands[5];
2731 rtx op3d = gen_rtx_REG (V2DImode, REGNO (operands[3]));
2732 emit_insn (gen_clgt_v4si (op3, op1, op2));
2733 emit_insn (gen_ceq_v4si (op4, op1, op2));
2734 emit_insn (gen_cgt_v4si (op5, op1, op2));
2735 emit_insn (gen_spu_xswd (op3d, op3));
2736 emit_insn (gen_selb (op0, op5, op3, op4));
2740 (define_insn "cgt_ti"
2741 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2742 (gt:SI (match_operand:TI 1 "spu_reg_operand" "r")
2743 (match_operand:TI 2 "spu_reg_operand" "r")))
2744 (clobber (match_scratch:V4SI 3 "=&r"))
2745 (clobber (match_scratch:V4SI 4 "=&r"))
2746 (clobber (match_scratch:V4SI 5 "=&r"))]
2752 selb\t%0,%4,%0,%3\;\
2754 selb\t%0,%4,%0,%3\;\
2757 [(set_attr "type" "multi0")
2758 (set_attr "length" "36")])
2760 (define_insn "cgt_<mode>"
2761 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
2762 (gt:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
2763 (match_operand:VSF 2 "spu_reg_operand" "r")))]
2767 (define_insn "cmgt_<mode>"
2768 [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
2769 (gt:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
2770 (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
2774 (define_expand "cgt_df"
2775 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2776 (gt:SI (match_operand:DF 1 "spu_reg_operand" "r")
2777 (match_operand:DF 2 "const_zero_operand" "i")))]
2780 if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
2782 rtx s0_ti = gen_reg_rtx(TImode);
2783 rtx s1_v4 = gen_reg_rtx(V4SImode);
2784 rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
2785 rtx to_ti = gen_reg_rtx(TImode);
2786 rtx to_v4 = gen_reg_rtx(V4SImode);
2787 rtx l_v4 = gen_reg_rtx(V4SImode);
2788 emit_insn (gen_spu_convert(l_v4, operands[1]));
2789 emit_insn (gen_ceq_v4si(s0_v4, l_v4, const0_rtx));
2790 emit_insn (gen_cgt_v4si(s1_v4, l_v4, const0_rtx));
2791 emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
2792 emit_insn (gen_spu_convert(to_v4, to_ti));
2793 emit_insn (gen_andc_v4si(to_v4, s0_v4, to_v4));
2794 emit_insn (gen_iorv4si3(to_v4, to_v4, s1_v4));
2795 emit_insn (gen_spu_convert(operands[0], to_v4));
2800 (define_insn "cgt_<mode>_celledp"
2801 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
2802 (gt:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
2803 (match_operand:VDF 2 "spu_reg_operand" "r")))]
2804 "spu_arch == PROCESSOR_CELLEDP"
2806 [(set_attr "type" "fpd")])
2808 (define_insn "cmgt_<mode>_celledp"
2809 [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
2810 (gt:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
2811 (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
2812 "spu_arch == PROCESSOR_CELLEDP"
2814 [(set_attr "type" "fpd")])
2816 (define_expand "cgt_v2df"
2817 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
2818 (gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
2819 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
2822 if (spu_arch == PROCESSOR_CELL)
2824 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
2825 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
2826 rtx zero = gen_reg_rtx (V4SImode);
2827 rtx temp = gen_reg_rtx (TImode);
2828 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
2829 rtx temp2 = gen_reg_rtx (V4SImode);
2830 rtx hi_inf = gen_reg_rtx (V4SImode);
2831 rtx a_nan = gen_reg_rtx (V4SImode);
2832 rtx b_nan = gen_reg_rtx (V4SImode);
2833 rtx a_abs = gen_reg_rtx (V4SImode);
2834 rtx b_abs = gen_reg_rtx (V4SImode);
2835 rtx asel = gen_reg_rtx (V4SImode);
2836 rtx bsel = gen_reg_rtx (V4SImode);
2837 rtx abor = gen_reg_rtx (V4SImode);
2838 rtx bbor = gen_reg_rtx (V4SImode);
2839 rtx gt_hi = gen_reg_rtx (V4SImode);
2840 rtx gt_lo = gen_reg_rtx (V4SImode);
2841 rtx sign_mask = gen_reg_rtx (V4SImode);
2842 rtx nan_mask = gen_reg_rtx (V4SImode);
2843 rtx hi_promote = gen_reg_rtx (TImode);
2844 rtx borrow_shuffle = gen_reg_rtx (TImode);
2845 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
2846 0x7FFFFFFF, 0xFFFFFFFF);
2847 emit_move_insn (sign_mask, pat);
2848 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
2850 emit_move_insn (nan_mask, pat);
2851 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
2852 0x08090A0B, 0x08090A0B);
2853 emit_move_insn (hi_promote, pat);
2854 pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
2855 0x0C0D0E0F, 0xC0C0C0C0);
2856 emit_move_insn (borrow_shuffle, pat);
2858 emit_insn (gen_andv4si3 (a_nan, ra, sign_mask));
2859 emit_insn (gen_ceq_v4si (hi_inf, a_nan, nan_mask));
2860 emit_insn (gen_clgt_v4si (a_nan, a_nan, nan_mask));
2861 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
2863 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
2864 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
2865 emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
2866 emit_insn (gen_andv4si3 (b_nan, rb, sign_mask));
2867 emit_insn (gen_ceq_v4si (hi_inf, b_nan, nan_mask));
2868 emit_insn (gen_clgt_v4si (b_nan, b_nan, nan_mask));
2869 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
2871 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
2872 emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
2873 emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
2874 emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
2875 emit_move_insn (zero, CONST0_RTX (V4SImode));
2876 emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
2877 emit_insn (gen_shufb (asel, asel, asel, hi_promote));
2878 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
2879 emit_insn (gen_bg_v4si (abor, zero, a_abs));
2880 emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
2881 emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
2882 emit_insn (gen_selb (abor, a_abs, abor, asel));
2883 emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
2884 emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
2885 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
2886 emit_insn (gen_bg_v4si (bbor, zero, b_abs));
2887 emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
2888 emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
2889 emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
2890 emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
2891 emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
2892 emit_insn (gen_ceq_v4si (temp2, abor, bbor));
2893 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
2895 emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
2896 emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
2898 emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
2899 emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
2900 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
2905 (define_expand "cmgt_v2df"
2906 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
2907 (gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
2908 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
2911 if (spu_arch == PROCESSOR_CELL)
2913 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
2914 rtx rb = spu_gen_subreg (V4SImode, operands[2]);
2915 rtx temp = gen_reg_rtx (TImode);
2916 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
2917 rtx temp2 = gen_reg_rtx (V4SImode);
2918 rtx hi_inf = gen_reg_rtx (V4SImode);
2919 rtx a_nan = gen_reg_rtx (V4SImode);
2920 rtx b_nan = gen_reg_rtx (V4SImode);
2921 rtx a_abs = gen_reg_rtx (V4SImode);
2922 rtx b_abs = gen_reg_rtx (V4SImode);
2923 rtx gt_hi = gen_reg_rtx (V4SImode);
2924 rtx gt_lo = gen_reg_rtx (V4SImode);
2925 rtx sign_mask = gen_reg_rtx (V4SImode);
2926 rtx nan_mask = gen_reg_rtx (V4SImode);
2927 rtx hi_promote = gen_reg_rtx (TImode);
2928 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
2929 0x7FFFFFFF, 0xFFFFFFFF);
2930 emit_move_insn (sign_mask, pat);
2931 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
2933 emit_move_insn (nan_mask, pat);
2934 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
2935 0x08090A0B, 0x08090A0B);
2936 emit_move_insn (hi_promote, pat);
2938 emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
2939 emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
2940 emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
2941 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
2943 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
2944 emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
2945 emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
2946 emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
2947 emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
2948 emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
2949 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
2951 emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
2952 emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
2953 emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
2954 emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
2956 emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs));
2957 emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs));
2958 emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs));
2959 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
2961 emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
2962 emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
2963 emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
2964 emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
2965 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
2973 (define_insn "clgt_<mode>"
2974 [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
2975 (gtu:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
2976 (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
2980 clgt<bh>i\t%0,%1,%2")
2982 (define_insn_and_split "clgt_di"
2983 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
2984 (gtu:SI (match_operand:DI 1 "spu_reg_operand" "r")
2985 (match_operand:DI 2 "spu_reg_operand" "r")))
2986 (clobber (match_scratch:V4SI 3 "=&r"))
2987 (clobber (match_scratch:V4SI 4 "=&r"))
2988 (clobber (match_scratch:V4SI 5 "=&r"))]
2992 [(set (match_dup:SI 0)
2993 (gtu:SI (match_dup:DI 1)
2996 rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
2997 rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
2998 rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
2999 rtx op3 = operands[3];
3000 rtx op4 = operands[4];
3001 rtx op5 = operands[5];
3002 rtx op5d = gen_rtx_REG (V2DImode, REGNO (operands[5]));
3003 emit_insn (gen_clgt_v4si (op3, op1, op2));
3004 emit_insn (gen_ceq_v4si (op4, op1, op2));
3005 emit_insn (gen_spu_xswd (op5d, op3));
3006 emit_insn (gen_selb (op0, op3, op5, op4));
3010 (define_insn "clgt_ti"
3011 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3012 (gtu:SI (match_operand:TI 1 "spu_reg_operand" "r")
3013 (match_operand:TI 2 "spu_reg_operand" "r")))
3014 (clobber (match_scratch:V4SI 3 "=&r"))
3015 (clobber (match_scratch:V4SI 4 "=&r"))]
3020 selb\t%0,%4,%0,%3\;\
3022 selb\t%0,%4,%0,%3\;\
3025 [(set_attr "type" "multi0")
3026 (set_attr "length" "32")])
3030 (define_insn "dftsv_celledp"
3031 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3032 (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand" "r")
3033 (match_operand:SI 2 "const_int_operand" "i")]
3035 "spu_arch == PROCESSOR_CELLEDP"
3037 [(set_attr "type" "fpd")])
3039 (define_expand "dftsv"
3040 [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
3041 (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand" "r")
3042 (match_operand:SI 2 "const_int_operand" "i")]
3046 if (spu_arch == PROCESSOR_CELL)
3048 rtx result = gen_reg_rtx (V4SImode);
3049 emit_move_insn (result, CONST0_RTX (V4SImode));
3051 if (INTVAL (operands[2]))
3053 rtx ra = spu_gen_subreg (V4SImode, operands[1]);
3054 rtx abs = gen_reg_rtx (V4SImode);
3055 rtx sign = gen_reg_rtx (V4SImode);
3056 rtx temp = gen_reg_rtx (TImode);
3057 rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
3058 rtx temp2 = gen_reg_rtx (V4SImode);
3059 rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
3060 0x7FFFFFFF, 0xFFFFFFFF);
3061 rtx sign_mask = gen_reg_rtx (V4SImode);
3062 rtx hi_promote = gen_reg_rtx (TImode);
3063 emit_move_insn (sign_mask, pat);
3064 pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
3065 0x08090A0B, 0x08090A0B);
3066 emit_move_insn (hi_promote, pat);
3068 emit_insn (gen_ashrv4si3 (sign, ra, spu_const (V4SImode, 31)));
3069 emit_insn (gen_shufb (sign, sign, sign, hi_promote));
3070 emit_insn (gen_andv4si3 (abs, ra, sign_mask));
3072 /* NaN or +inf or -inf */
3073 if (INTVAL (operands[2]) & 0x70)
3075 rtx nan_mask = gen_reg_rtx (V4SImode);
3076 rtx isinf = gen_reg_rtx (V4SImode);
3077 pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
3079 emit_move_insn (nan_mask, pat);
3080 emit_insn (gen_ceq_v4si (isinf, abs, nan_mask));
3083 if (INTVAL (operands[2]) & 0x40)
3085 rtx isnan = gen_reg_rtx (V4SImode);
3086 emit_insn (gen_clgt_v4si (isnan, abs, nan_mask));
3087 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan),
3089 emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf));
3090 emit_insn (gen_iorv4si3 (isnan, isnan, temp2));
3091 emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote));
3092 emit_insn (gen_iorv4si3 (result, result, isnan));
3095 if (INTVAL (operands[2]) & 0x30)
3097 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf),
3099 emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si));
3100 emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote));
3103 if (INTVAL (operands[2]) & 0x20)
3105 emit_insn (gen_andc_v4si (temp2, isinf, sign));
3106 emit_insn (gen_iorv4si3 (result, result, temp2));
3109 if (INTVAL (operands[2]) & 0x10)
3111 emit_insn (gen_andv4si3 (temp2, isinf, sign));
3112 emit_insn (gen_iorv4si3 (result, result, temp2));
3118 if (INTVAL (operands[2]) & 0xF)
3120 rtx iszero = gen_reg_rtx (V4SImode);
3121 emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode)));
3122 emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
3124 emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
3127 if (INTVAL (operands[2]) & 0x3)
3129 rtx isdenorm = gen_reg_rtx (V4SImode);
3130 rtx denorm_mask = gen_reg_rtx (V4SImode);
3131 emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF));
3132 emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask));
3133 emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero));
3134 emit_insn (gen_shufb (isdenorm, isdenorm,
3135 isdenorm, hi_promote));
3137 if (INTVAL (operands[2]) & 0x2)
3139 emit_insn (gen_andc_v4si (temp2, isdenorm, sign));
3140 emit_insn (gen_iorv4si3 (result, result, temp2));
3143 if (INTVAL (operands[2]) & 0x1)
3145 emit_insn (gen_andv4si3 (temp2, isdenorm, sign));
3146 emit_insn (gen_iorv4si3 (result, result, temp2));
3151 if (INTVAL (operands[2]) & 0xC)
3153 emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote));
3155 if (INTVAL (operands[2]) & 0x8)
3157 emit_insn (gen_andc_v4si (temp2, iszero, sign));
3158 emit_insn (gen_iorv4si3 (result, result, temp2));
3161 if (INTVAL (operands[2]) & 0x4)
3163 emit_insn (gen_andv4si3 (temp2, iszero, sign));
3164 emit_insn (gen_iorv4si3 (result, result, temp2));
3169 emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result));
3179 (if_then_else (match_operator 1 "branch_comparison_operator"
3181 "spu_reg_operand" "r")
3183 (label_ref (match_operand 0 "" ""))
3187 [(set_attr "type" "br")])
3191 (if_then_else (match_operator 0 "branch_comparison_operator"
3193 "spu_reg_operand" "r")
3199 [(set_attr "type" "br")])
3203 (if_then_else (match_operator 1 "branch_comparison_operator"
3205 "spu_reg_operand" "r")
3208 (label_ref (match_operand 0 "" ""))))]
3211 [(set_attr "type" "br")])
3215 (if_then_else (match_operator 0 "branch_comparison_operator"
3217 "spu_reg_operand" "r")
3223 [(set_attr "type" "br")])
3226 ;; Compare insns are next. Note that the spu has two types of compares,
3227 ;; signed & unsigned, and one type of branch.
3229 ;; Start with the DEFINE_EXPANDs to generate the rtl for compares, scc
3230 ;; insns, and branches. We store the operands of compares until we see
3233 (define_expand "cmp<mode>"
3235 (compare (match_operand:VQHSI 0 "spu_reg_operand" "")
3236 (match_operand:VQHSI 1 "spu_nonmem_operand" "")))]
3239 spu_compare_op0 = operands[0];
3240 spu_compare_op1 = operands[1];
3244 (define_expand "cmp<mode>"
3246 (compare (match_operand:DTI 0 "spu_reg_operand" "")
3247 (match_operand:DTI 1 "spu_reg_operand" "")))]
3250 spu_compare_op0 = operands[0];
3251 spu_compare_op1 = operands[1];
3255 (define_expand "cmp<mode>"
3257 (compare (match_operand:VSF 0 "spu_reg_operand" "")
3258 (match_operand:VSF 1 "spu_reg_operand" "")))]
3261 spu_compare_op0 = operands[0];
3262 spu_compare_op1 = operands[1];
3266 (define_expand "cmpdf"
3268 (compare (match_operand:DF 0 "register_operand" "")
3269 (match_operand:DF 1 "register_operand" "")))]
3270 "(flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
3271 || spu_arch == PROCESSOR_CELLEDP "
3273 spu_compare_op0 = operands[0];
3274 spu_compare_op1 = operands[1];
3278 ;; vector conditional compare patterns
3279 (define_expand "vcond<mode>"
3280 [(set (match_operand:VCMP 0 "spu_reg_operand" "=r")
3282 (match_operator 3 "comparison_operator"
3283 [(match_operand:VCMP 4 "spu_reg_operand" "r")
3284 (match_operand:VCMP 5 "spu_reg_operand" "r")])
3285 (match_operand:VCMP 1 "spu_reg_operand" "r")
3286 (match_operand:VCMP 2 "spu_reg_operand" "r")))]
3289 if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
3290 operands[3], operands[4], operands[5]))
3296 (define_expand "vcondu<mode>"
3297 [(set (match_operand:VCMPU 0 "spu_reg_operand" "=r")
3299 (match_operator 3 "comparison_operator"
3300 [(match_operand:VCMPU 4 "spu_reg_operand" "r")
3301 (match_operand:VCMPU 5 "spu_reg_operand" "r")])
3302 (match_operand:VCMPU 1 "spu_reg_operand" "r")
3303 (match_operand:VCMPU 2 "spu_reg_operand" "r")))]
3306 if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
3307 operands[3], operands[4], operands[5]))
3314 ;; branch on condition
3316 (define_expand "beq"
3317 [(use (match_operand 0 "" ""))]
3319 { spu_emit_branch_or_set (0, EQ, operands); DONE; })
3321 (define_expand "bne"
3322 [(use (match_operand 0 "" ""))]
3324 { spu_emit_branch_or_set (0, NE, operands); DONE; })
3326 (define_expand "bge"
3327 [(use (match_operand 0 "" ""))]
3329 { spu_emit_branch_or_set (0, GE, operands); DONE; })
3331 (define_expand "bgt"
3332 [(use (match_operand 0 "" ""))]
3334 { spu_emit_branch_or_set (0, GT, operands); DONE; })
3336 (define_expand "ble"
3337 [(use (match_operand 0 "" ""))]
3339 { spu_emit_branch_or_set (0, LE, operands); DONE; })
3341 (define_expand "blt"
3342 [(use (match_operand 0 "" ""))]
3344 { spu_emit_branch_or_set (0, LT, operands); DONE; })
3346 (define_expand "bgeu"
3347 [(use (match_operand 0 "" ""))]
3349 { spu_emit_branch_or_set (0, GEU, operands); DONE; })
3351 (define_expand "bgtu"
3352 [(use (match_operand 0 "" ""))]
3354 { spu_emit_branch_or_set (0, GTU, operands); DONE; })
3356 (define_expand "bleu"
3357 [(use (match_operand 0 "" ""))]
3359 { spu_emit_branch_or_set (0, LEU, operands); DONE; })
3361 (define_expand "bltu"
3362 [(use (match_operand 0 "" ""))]
3364 { spu_emit_branch_or_set (0, LTU, operands); DONE; })
3369 (define_expand "seq"
3370 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3372 { spu_emit_branch_or_set (1, EQ, operands); DONE; })
3374 (define_expand "sne"
3375 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3377 { spu_emit_branch_or_set (1, NE, operands); DONE; })
3379 (define_expand "sgt"
3380 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3382 { spu_emit_branch_or_set (1, GT, operands); DONE; })
3384 (define_expand "slt"
3385 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3387 { spu_emit_branch_or_set (1, LT, operands); DONE; })
3389 (define_expand "sge"
3390 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3392 { spu_emit_branch_or_set (1, GE, operands); DONE; })
3394 (define_expand "sle"
3395 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3397 { spu_emit_branch_or_set (1, LE, operands); DONE; })
3399 (define_expand "sgtu"
3400 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3402 { spu_emit_branch_or_set (1, GTU, operands); DONE; })
3404 (define_expand "sltu"
3405 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3407 { spu_emit_branch_or_set (1, LTU, operands); DONE; })
3409 (define_expand "sgeu"
3410 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3412 { spu_emit_branch_or_set (1, GEU, operands); DONE; })
3414 (define_expand "sleu"
3415 [(clobber (match_operand:SI 0 "spu_reg_operand" ""))]
3417 { spu_emit_branch_or_set (1, LEU, operands); DONE; })
3422 ;; Define this first one so HAVE_conditional_move is defined.
3423 (define_insn "movcc_dummy"
3424 [(set (match_operand 0 "" "")
3425 (if_then_else (match_operand 1 "" "")
3426 (match_operand 2 "" "")
3427 (match_operand 3 "" "")))]
3431 (define_expand "mov<mode>cc"
3432 [(set (match_operand:ALL 0 "spu_reg_operand" "")
3433 (if_then_else:ALL (match_operand 1 "comparison_operator" "")
3434 (match_operand:ALL 2 "spu_reg_operand" "")
3435 (match_operand:ALL 3 "spu_reg_operand" "")))]
3438 spu_emit_branch_or_set(2, GET_CODE(operands[1]), operands);
3442 ;; This pattern is used when the result of a compare is not large
3443 ;; enough to use in a selb when expanding conditional moves.
3444 (define_expand "extend_compare"
3445 [(set (match_operand 0 "spu_reg_operand" "=r")
3446 (unspec [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
3449 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3450 gen_rtx_UNSPEC (GET_MODE (operands[0]),
3451 gen_rtvec (1, operands[1]),
3452 UNSPEC_EXTEND_CMP)));
3456 (define_insn "extend_compare<mode>"
3457 [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
3458 (unspec:ALL [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
3461 [(set_attr "type" "shuf")])
3466 ;; operand 0 is index
3467 ;; operand 1 is the minimum bound
3468 ;; operand 2 is the maximum bound - minimum bound + 1
3469 ;; operand 3 is CODE_LABEL for the table;
3470 ;; operand 4 is the CODE_LABEL to go to if index out of range.
3471 (define_expand "casesi"
3472 [(match_operand:SI 0 "spu_reg_operand" "")
3473 (match_operand:SI 1 "immediate_operand" "")
3474 (match_operand:SI 2 "immediate_operand" "")
3475 (match_operand 3 "" "")
3476 (match_operand 4 "" "")]
3479 rtx table = gen_reg_rtx (SImode);
3480 rtx index = gen_reg_rtx (SImode);
3481 rtx sindex = gen_reg_rtx (SImode);
3482 rtx addr = gen_reg_rtx (Pmode);
3484 emit_move_insn (table, gen_rtx_LABEL_REF (SImode, operands[3]));
3486 emit_insn (gen_subsi3(index, operands[0], force_reg(SImode, operands[1])));
3487 emit_insn (gen_ashlsi3(sindex, index, GEN_INT (2)));
3488 emit_move_insn (addr, gen_rtx_MEM (SImode,
3489 gen_rtx_PLUS (SImode, table, sindex)));
3491 emit_insn (gen_addsi3 (addr, addr, table));
3493 emit_cmp_and_jump_insns (index, operands[2], GTU, NULL_RTX, SImode, 1, operands[4]);
3494 emit_jump_insn (gen_tablejump (addr, operands[3]));
3498 (define_insn "tablejump"
3499 [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))
3500 (use (label_ref (match_operand 1 "" "")))]
3503 [(set_attr "type" "br")])
3508 ;; Note that operand 1 is total size of args, in bytes,
3509 ;; and what the call insn wants is the number of words.
3510 (define_expand "sibcall"
3512 [(call (match_operand:QI 0 "call_operand" "")
3513 (match_operand:QI 1 "" ""))
3517 if (! call_operand (operands[0], QImode))
3518 XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
3521 (define_insn "_sibcall"
3523 [(call (match_operand:QI 0 "call_operand" "R,S")
3524 (match_operand:QI 1 "" "i,i"))
3526 "SIBLING_CALL_P(insn)"
3530 [(set_attr "type" "br,br")])
3532 (define_expand "sibcall_value"
3534 [(set (match_operand 0 "" "")
3535 (call (match_operand:QI 1 "call_operand" "")
3536 (match_operand:QI 2 "" "")))
3540 if (! call_operand (operands[1], QImode))
3541 XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
3544 (define_insn "_sibcall_value"
3546 [(set (match_operand 0 "" "")
3547 (call (match_operand:QI 1 "call_operand" "R,S")
3548 (match_operand:QI 2 "" "i,i")))
3550 "SIBLING_CALL_P(insn)"
3554 [(set_attr "type" "br,br")])
3556 ;; Note that operand 1 is total size of args, in bytes,
3557 ;; and what the call insn wants is the number of words.
3558 (define_expand "call"
3560 [(call (match_operand:QI 0 "call_operand" "")
3561 (match_operand:QI 1 "" ""))
3562 (clobber (reg:SI 0))
3563 (clobber (reg:SI 130))])]
3566 if (! call_operand (operands[0], QImode))
3567 XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
3570 (define_insn "_call"
3572 [(call (match_operand:QI 0 "call_operand" "R,S,T")
3573 (match_operand:QI 1 "" "i,i,i"))
3574 (clobber (reg:SI 0))
3575 (clobber (reg:SI 130))])]
3581 [(set_attr "type" "br")])
3583 (define_expand "call_value"
3585 [(set (match_operand 0 "" "")
3586 (call (match_operand:QI 1 "call_operand" "")
3587 (match_operand:QI 2 "" "")))
3588 (clobber (reg:SI 0))
3589 (clobber (reg:SI 130))])]
3592 if (! call_operand (operands[1], QImode))
3593 XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
3596 (define_insn "_call_value"
3598 [(set (match_operand 0 "" "")
3599 (call (match_operand:QI 1 "call_operand" "R,S,T")
3600 (match_operand:QI 2 "" "i,i,i")))
3601 (clobber (reg:SI 0))
3602 (clobber (reg:SI 130))])]
3608 [(set_attr "type" "br")])
3610 (define_expand "untyped_call"
3611 [(parallel [(call (match_operand 0 "" "")
3613 (match_operand 1 "" "")
3614 (match_operand 2 "" "")])]
3618 rtx reg = gen_rtx_REG (TImode, 3);
3620 /* We need to use call_value so the return value registers don't get
3622 emit_call_insn (gen_call_value (reg, operands[0], const0_rtx));
3624 for (i = 0; i < XVECLEN (operands[2], 0); i++)
3626 rtx set = XVECEXP (operands[2], 0, i);
3627 emit_move_insn (SET_DEST (set), SET_SRC (set));
3630 /* The optimizer does not know that the call sets the function value
3631 registers we stored in the result block. We avoid problems by
3632 claiming that all hard registers are used and clobbered at this
3634 emit_insn (gen_blockage ());
3640 ;; Patterns used for splitting and combining.
3643 ;; Function prologue and epilogue.
3645 (define_expand "prologue"
3648 { spu_expand_prologue (); DONE; })
3650 ;; "blockage" is only emited in epilogue. This is what it took to
3651 ;; make "basic block reordering" work with the insns sequence
3652 ;; generated by the spu_expand_epilogue (taken from mips.md)
3654 (define_insn "blockage"
3655 [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
3658 [(set_attr "type" "convert")
3659 (set_attr "length" "0")])
3661 (define_expand "epilogue"
3664 { spu_expand_epilogue (false); DONE; })
3666 (define_expand "sibcall_epilogue"
3669 { spu_expand_epilogue (true); DONE; })
3672 ;; stack manipulations
3674 ;; An insn to allocate new stack space for dynamic use (e.g., alloca).
3675 ;; We move the back-chain and decrement the stack pointer.
3676 (define_expand "allocate_stack"
3677 [(set (match_operand 0 "spu_reg_operand" "")
3678 (minus (reg 1) (match_operand 1 "spu_nonmem_operand" "")))
3680 (minus (reg 1) (match_dup 1)))]
3682 "spu_allocate_stack (operands[0], operands[1]); DONE;")
3684 ;; These patterns say how to save and restore the stack pointer. We need not
3685 ;; save the stack pointer at function level since we are careful to preserve
3689 ;; At block level the stack pointer is saved and restored, so that the
3690 ;; stack space allocated within a block is deallocated when leaving
3691 ;; block scope. By default, according to the SPU ABI, the stack
3692 ;; pointer and available stack size are saved in a register. Upon
3693 ;; restoration, the stack pointer is simply copied back, and the
3694 ;; current available stack size is calculated against the restored
3697 ;; For nonlocal gotos, we must save the stack pointer and its
3698 ;; backchain and restore both. Note that in the nonlocal case, the
3699 ;; save area is a memory location.
3701 (define_expand "save_stack_function"
3702 [(match_operand 0 "general_operand" "")
3703 (match_operand 1 "general_operand" "")]
3707 (define_expand "restore_stack_function"
3708 [(match_operand 0 "general_operand" "")
3709 (match_operand 1 "general_operand" "")]
3713 (define_expand "restore_stack_block"
3714 [(match_operand 0 "spu_reg_operand" "")
3715 (match_operand 1 "memory_operand" "")]
3719 spu_restore_stack_block (operands[0], operands[1]);
3723 (define_expand "save_stack_nonlocal"
3724 [(match_operand 0 "memory_operand" "")
3725 (match_operand 1 "spu_reg_operand" "")]
3729 rtx temp = gen_reg_rtx (Pmode);
3731 /* Copy the backchain to the first word, sp to the second. We need to
3732 save the back chain because __builtin_apply appears to clobber it. */
3733 emit_move_insn (temp, gen_rtx_MEM (Pmode, operands[1]));
3734 emit_move_insn (adjust_address_nv (operands[0], SImode, 0), temp);
3735 emit_move_insn (adjust_address_nv (operands[0], SImode, 4), operands[1]);
3739 (define_expand "restore_stack_nonlocal"
3740 [(match_operand 0 "spu_reg_operand" "")
3741 (match_operand 1 "memory_operand" "")]
3745 spu_restore_stack_nonlocal(operands[0], operands[1]);
3752 ;; Vector initialization
3753 (define_expand "vec_init<mode>"
3754 [(match_operand:V 0 "register_operand" "")
3755 (match_operand 1 "" "")]
3758 spu_expand_vector_init (operands[0], operands[1]);
3762 (define_expand "vec_set<mode>"
3763 [(use (match_operand:SI 2 "spu_nonmem_operand" ""))
3764 (set (match_dup:TI 3)
3765 (unspec:TI [(match_dup:SI 4)
3767 (match_dup:SI 6)] UNSPEC_CPAT))
3768 (set (match_operand:V 0 "spu_reg_operand" "")
3769 (unspec:V [(match_operand:<inner> 1 "spu_reg_operand" "")
3771 (match_dup:TI 3)] UNSPEC_SHUFB))]
3774 HOST_WIDE_INT size = GET_MODE_SIZE (<inner>mode);
3775 rtx offset = GEN_INT (INTVAL (operands[2]) * size);
3776 operands[3] = gen_reg_rtx (TImode);
3777 operands[4] = stack_pointer_rtx;
3778 operands[5] = offset;
3779 operands[6] = GEN_INT (size);
3782 (define_expand "vec_extract<mode>"
3783 [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
3784 (vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
3785 (parallel [(match_operand 2 "const_int_operand" "i")])))]
3788 if ((INTVAL (operands[2]) * <vmult> + <voff>) % 16 == 0)
3790 emit_insn (gen_spu_convert (operands[0], operands[1]));
3795 (define_insn "_vec_extract<mode>"
3796 [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
3797 (vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
3798 (parallel [(match_operand 2 "const_int_operand" "i")])))]
3800 "rotqbyi\t%0,%1,(%2*<vmult>+<voff>)%%16"
3801 [(set_attr "type" "shuf")])
3803 (define_insn "_vec_extractv8hi_ze"
3804 [(set (match_operand:SI 0 "spu_reg_operand" "=r")
3805 (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "spu_reg_operand" "r")
3806 (parallel [(const_int 0)]))))]
3808 "rotqmbyi\t%0,%1,-2"
3809 [(set_attr "type" "shuf")])
3814 (define_expand "shufb"
3815 [(set (match_operand 0 "spu_reg_operand" "")
3816 (unspec [(match_operand 1 "spu_reg_operand" "")
3817 (match_operand 2 "spu_reg_operand" "")
3818 (match_operand:TI 3 "spu_reg_operand" "")] UNSPEC_SHUFB))]
3821 rtx s = gen__shufb (operands[0], operands[1], operands[2], operands[3]);
3822 PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
3827 (define_insn "_shufb"
3828 [(set (match_operand 0 "spu_reg_operand" "=r")
3829 (unspec [(match_operand 1 "spu_reg_operand" "r")
3830 (match_operand 2 "spu_reg_operand" "r")
3831 (match_operand:TI 3 "spu_reg_operand" "r")] UNSPEC_SHUFB))]
3833 "shufb\t%0,%1,%2,%3"
3834 [(set_attr "type" "shuf")])
3837 [(unspec_volatile [(const_int 0)] UNSPEC_NOP)]
3840 [(set_attr "type" "nop")])
3843 [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "K")] UNSPEC_NOP)]
3846 [(set_attr "type" "nop")])
3849 [(unspec_volatile [(const_int 0)] UNSPEC_LNOP)]
3852 [(set_attr "type" "lnop")])
3854 (define_insn "iprefetch"
3855 [(unspec [(const_int 0)] UNSPEC_IPREFETCH)]
3858 [(set_attr "type" "iprefetch")])
3862 (unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i,i")
3863 (match_operand:SI 1 "nonmemory_operand" "r,s,i")] UNSPEC_HBR))
3864 (unspec [(const_int 0)] UNSPEC_HBR)]
3870 [(set_attr "type" "hbr")])
3873 [(unspec_volatile [(const_int 0)] UNSPEC_SYNC)
3874 (clobber (mem:BLK (scratch)))]
3877 [(set_attr "type" "br")])
3879 (define_insn "syncc"
3880 [(unspec_volatile [(const_int 1)] UNSPEC_SYNC)
3881 (clobber (mem:BLK (scratch)))]
3884 [(set_attr "type" "br")])
3886 (define_insn "dsync"
3887 [(unspec_volatile [(const_int 2)] UNSPEC_SYNC)
3888 (clobber (mem:BLK (scratch)))]
3891 [(set_attr "type" "br")])
3895 ;; Define the subtract-one-and-jump insns so loop.c
3896 ;; knows what to generate.
3897 (define_expand "doloop_end"
3898 [(use (match_operand 0 "" "")) ; loop pseudo
3899 (use (match_operand 1 "" "")) ; iterations; zero if unknown
3900 (use (match_operand 2 "" "")) ; max iterations
3901 (use (match_operand 3 "" "")) ; loop level
3902 (use (match_operand 4 "" ""))] ; label
3906 /* Currently SMS relies on the do-loop pattern to recognize loops
3907 where (1) the control part comprises of all insns defining and/or
3908 using a certain 'count' register and (2) the loop count can be
3909 adjusted by modifying this register prior to the loop.
3910 . ??? The possible introduction of a new block to initialize the
3911 new IV can potentially effects branch optimizations. */
3912 if (optimize > 0 && flag_modulo_sched)
3918 /* Only use this on innermost loops. */
3919 if (INTVAL (operands[3]) > 1)
3921 if (GET_MODE (operands[0]) != SImode)
3925 emit_move_insn (s0, gen_rtx_PLUS (SImode, s0, GEN_INT (-1)));
3926 bcomp = gen_rtx_NE(SImode, s0, const0_rtx);
3927 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]);
3928 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
3929 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
3937 ;; convert between any two modes, avoiding any GCC assumptions
3938 (define_expand "spu_convert"
3939 [(set (match_operand 0 "spu_reg_operand" "")
3940 (unspec [(match_operand 1 "spu_reg_operand" "")] UNSPEC_CONVERT))]
3943 rtx c = gen__spu_convert (operands[0], operands[1]);
3944 PUT_MODE (SET_SRC (c), GET_MODE (operands[0]));
3949 (define_insn "_spu_convert"
3950 [(set (match_operand 0 "spu_reg_operand" "=r")
3951 (unspec [(match_operand 1 "spu_reg_operand" "0")] UNSPEC_CONVERT))]
3954 [(set_attr "type" "convert")
3955 (set_attr "length" "0")])
3958 [(set (match_operand 0 "spu_reg_operand")
3959 (unspec [(match_operand 1 "spu_reg_operand")] UNSPEC_CONVERT))]
3961 [(use (const_int 0))]
3966 (include "spu-builtins.md")
3969 (define_expand "smaxv4sf3"
3970 [(set (match_operand:V4SF 0 "register_operand" "=r")
3971 (smax:V4SF (match_operand:V4SF 1 "register_operand" "r")
3972 (match_operand:V4SF 2 "register_operand" "r")))]
3976 rtx mask = gen_reg_rtx (V4SImode);
3978 emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
3979 emit_insn (gen_selb (operands[0], operands[2], operands[1], mask));
3983 (define_expand "sminv4sf3"
3984 [(set (match_operand:V4SF 0 "register_operand" "=r")
3985 (smin:V4SF (match_operand:V4SF 1 "register_operand" "r")
3986 (match_operand:V4SF 2 "register_operand" "r")))]
3990 rtx mask = gen_reg_rtx (V4SImode);
3992 emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
3993 emit_insn (gen_selb (operands[0], operands[1], operands[2], mask));
3997 (define_expand "smaxv2df3"
3998 [(set (match_operand:V2DF 0 "register_operand" "=r")
3999 (smax:V2DF (match_operand:V2DF 1 "register_operand" "r")
4000 (match_operand:V2DF 2 "register_operand" "r")))]
4004 rtx mask = gen_reg_rtx (V2DImode);
4005 emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
4006 emit_insn (gen_selb (operands[0], operands[2], operands[1],
4007 spu_gen_subreg (V4SImode, mask)));
4011 (define_expand "sminv2df3"
4012 [(set (match_operand:V2DF 0 "register_operand" "=r")
4013 (smin:V2DF (match_operand:V2DF 1 "register_operand" "r")
4014 (match_operand:V2DF 2 "register_operand" "r")))]
4018 rtx mask = gen_reg_rtx (V2DImode);
4019 emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
4020 emit_insn (gen_selb (operands[0], operands[1], operands[2],
4021 spu_gen_subreg (V4SImode, mask)));
4025 (define_expand "vec_widen_umult_hi_v8hi"
4026 [(set (match_operand:V4SI 0 "register_operand" "=r")
4030 (match_operand:V8HI 1 "register_operand" "r")
4031 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
4034 (match_operand:V8HI 2 "register_operand" "r")
4035 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
4039 rtx ve = gen_reg_rtx (V4SImode);
4040 rtx vo = gen_reg_rtx (V4SImode);
4041 rtx mask = gen_reg_rtx (TImode);
4042 unsigned char arr[16] = {
4043 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
4044 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
4046 emit_move_insn (mask, array_to_constant (TImode, arr));
4047 emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2]));
4048 emit_insn (gen_spu_mpyu (vo, operands[1], operands[2]));
4049 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4053 (define_expand "vec_widen_umult_lo_v8hi"
4054 [(set (match_operand:V4SI 0 "register_operand" "=r")
4058 (match_operand:V8HI 1 "register_operand" "r")
4059 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
4062 (match_operand:V8HI 2 "register_operand" "r")
4063 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
4067 rtx ve = gen_reg_rtx (V4SImode);
4068 rtx vo = gen_reg_rtx (V4SImode);
4069 rtx mask = gen_reg_rtx (TImode);
4070 unsigned char arr[16] = {
4071 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
4072 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
4074 emit_move_insn (mask, array_to_constant (TImode, arr));
4075 emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2]));
4076 emit_insn (gen_spu_mpyu (vo, operands[1], operands[2]));
4077 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4081 (define_expand "vec_widen_smult_hi_v8hi"
4082 [(set (match_operand:V4SI 0 "register_operand" "=r")
4086 (match_operand:V8HI 1 "register_operand" "r")
4087 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
4090 (match_operand:V8HI 2 "register_operand" "r")
4091 (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
4095 rtx ve = gen_reg_rtx (V4SImode);
4096 rtx vo = gen_reg_rtx (V4SImode);
4097 rtx mask = gen_reg_rtx (TImode);
4098 unsigned char arr[16] = {
4099 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
4100 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
4102 emit_move_insn (mask, array_to_constant (TImode, arr));
4103 emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2]));
4104 emit_insn (gen_spu_mpy (vo, operands[1], operands[2]));
4105 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4109 (define_expand "vec_widen_smult_lo_v8hi"
4110 [(set (match_operand:V4SI 0 "register_operand" "=r")
4114 (match_operand:V8HI 1 "register_operand" "r")
4115 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
4118 (match_operand:V8HI 2 "register_operand" "r")
4119 (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
4123 rtx ve = gen_reg_rtx (V4SImode);
4124 rtx vo = gen_reg_rtx (V4SImode);
4125 rtx mask = gen_reg_rtx (TImode);
4126 unsigned char arr[16] = {
4127 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
4128 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
4130 emit_move_insn (mask, array_to_constant (TImode, arr));
4131 emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2]));
4132 emit_insn (gen_spu_mpy (vo, operands[1], operands[2]));
4133 emit_insn (gen_shufb (operands[0], ve, vo, mask));
4137 (define_expand "vec_realign_load_<mode>"
4138 [(set (match_operand:ALL 0 "register_operand" "=r")
4139 (unspec:ALL [(match_operand:ALL 1 "register_operand" "r")
4140 (match_operand:ALL 2 "register_operand" "r")
4141 (match_operand:TI 3 "register_operand" "r")] UNSPEC_SPU_REALIGN_LOAD))]
4145 emit_insn (gen_shufb (operands[0], operands[1], operands[2], operands[3]));
4149 (define_expand "spu_lvsr"
4150 [(set (match_operand:V16QI 0 "register_operand" "")
4151 (unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_SPU_MASK_FOR_LOAD))]
4156 rtx offset = gen_reg_rtx (V8HImode);
4157 rtx addr_bits = gen_reg_rtx (SImode);
4158 rtx addr_bits_vec = gen_reg_rtx (V8HImode);
4159 rtx splatqi = gen_reg_rtx (TImode);
4160 rtx result = gen_reg_rtx (V8HImode);
4161 unsigned char arr[16] = {
4162 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
4163 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};
4164 unsigned char arr2[16] = {
4165 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
4166 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03};
4168 emit_move_insn (offset, array_to_constant (V8HImode, arr));
4169 emit_move_insn (splatqi, array_to_constant (TImode, arr2));
4171 gcc_assert (GET_CODE (operands[1]) == MEM);
4172 addr = force_reg (Pmode, XEXP (operands[1], 0));
4173 emit_insn (gen_andsi3 (addr_bits, addr, GEN_INT (0xF)));
4174 emit_insn (gen_shufb (addr_bits_vec, addr_bits, addr_bits, splatqi));
4176 /* offset - (addr & 0xF)
4177 It is safe to use a single sfh, because each byte of offset is > 15 and
4178 each byte of addr is <= 15. */
4179 emit_insn (gen_subv8hi3 (result, offset, addr_bits_vec));
4181 result = simplify_gen_subreg (V16QImode, result, V8HImode, 0);
4182 emit_move_insn (operands[0], result);