1 ;; Copyright (C) 2016-2019 Free Software Foundation, Inc.
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
17 ;; {{{ Vector iterators
19 ; Vector modes for one vector register
20 (define_mode_iterator VEC_1REG_MODE
21 [V64QI V64HI V64SI V64HF V64SF])
22 (define_mode_iterator VEC_1REG_ALT
23 [V64QI V64HI V64SI V64HF V64SF])
25 (define_mode_iterator VEC_1REG_INT_MODE
27 (define_mode_iterator VEC_1REG_INT_ALT
30 ; Vector modes for two vector registers
31 (define_mode_iterator VEC_2REG_MODE
35 (define_mode_iterator VEC_REG_MODE
36 [V64QI V64HI V64SI V64HF V64SF ; Single reg
37 V64DI V64DF]) ; Double reg
39 (define_mode_attr scalar_mode
40 [(V64QI "qi") (V64HI "hi") (V64SI "si")
41 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
43 (define_mode_attr SCALAR_MODE
44 [(V64QI "QI") (V64HI "HI") (V64SI "SI")
45 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
50 (define_subst_attr "exec" "vec_merge"
52 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
54 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
56 (define_subst_attr "exec_scatter" "scatter_store"
59 (define_subst "vec_merge"
60 [(set (match_operand:VEC_REG_MODE 0)
61 (match_operand:VEC_REG_MODE 1))]
64 (vec_merge:VEC_REG_MODE
66 (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
67 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
69 (define_subst "vec_merge_with_clobber"
70 [(set (match_operand:VEC_REG_MODE 0)
71 (match_operand:VEC_REG_MODE 1))
72 (clobber (match_operand 2))]
75 (vec_merge:VEC_REG_MODE
77 (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
78 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
79 (clobber (match_dup 2))])
81 (define_subst "vec_merge_with_vcc"
82 [(set (match_operand:VEC_REG_MODE 0)
83 (match_operand:VEC_REG_MODE 1))
84 (set (match_operand:DI 2)
85 (match_operand:DI 3))]
89 (vec_merge:VEC_REG_MODE
91 (match_operand:VEC_REG_MODE 4
92 "gcn_register_or_unspec_operand" "U0")
93 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
96 (reg:DI EXEC_REG)))])])
98 (define_subst "scatter_store"
99 [(set (mem:BLK (scratch))
107 [(set (mem:BLK (scratch))
113 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
119 ; This is the entry point for all vector register moves. Memory accesses can
120 ; come this way also, but will more usually use the reload_in/out,
121 ; gather/scatter, maskload/store, etc.
123 (define_expand "mov<mode>"
124 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
125 (match_operand:VEC_REG_MODE 1 "general_operand"))]
128 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
130 operands[1] = force_reg (<MODE>mode, operands[1]);
131 rtx scratch = gen_rtx_SCRATCH (V64DImode);
132 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
133 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
134 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
137 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
140 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
142 rtx scratch = gen_rtx_SCRATCH (V64DImode);
143 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
144 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
145 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
148 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
151 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
153 gcc_assert (!reload_completed);
154 rtx scratch = gen_reg_rtx (V64DImode);
155 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
160 ; A pseudo instruction that helps LRA use the "U0" constraint.
162 (define_insn "mov<mode>_unspec"
163 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand" "=v")
164 (match_operand:VEC_REG_MODE 1 "gcn_unspec_operand" " U"))]
167 [(set_attr "type" "unknown")
168 (set_attr "length" "0")])
170 (define_insn "*mov<mode>"
171 [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v")
172 (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B"))]
175 [(set_attr "type" "vop1,vop1")
176 (set_attr "length" "4,8")])
178 (define_insn "mov<mode>_exec"
179 [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand"
181 (vec_merge:VEC_1REG_MODE
182 (match_operand:VEC_1REG_MODE 1 "general_operand"
184 (match_operand:VEC_1REG_MODE 3 "gcn_alu_or_unspec_operand"
186 (match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e")))
187 (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))]
188 "!MEM_P (operands[0]) || REG_P (operands[1])"
192 v_cndmask_b32\t%0, %3, %1, vcc
193 v_cndmask_b32\t%0, %3, %1, %2
196 [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
197 (set_attr "length" "4,8,4,8,16,16")])
199 ; This variant does not accept an unspec, but does permit MEM
200 ; read/modify/write which is necessary for maskstore.
202 ;(define_insn "*mov<mode>_exec_match"
203 ; [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v, v, m")
204 ; (vec_merge:VEC_1REG_MODE
205 ; (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B, m, v")
207 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
208 ; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))]
209 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
215 ; [(set_attr "type" "vop1,vop1,*,*")
216 ; (set_attr "length" "4,8,16,16")])
218 (define_insn "*mov<mode>"
219 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v")
220 (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))]
223 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
224 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
226 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
228 [(set_attr "type" "vmult")
229 (set_attr "length" "16")])
231 (define_insn "mov<mode>_exec"
232 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
234 (vec_merge:VEC_2REG_MODE
235 (match_operand:VEC_2REG_MODE 1 "general_operand"
237 (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
238 " U0,vDA0,vDA0,U0,U0")
239 (match_operand:DI 2 "register_operand" " e, cV, Sv, e, e")))
240 (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))]
241 "!MEM_P (operands[0]) || REG_P (operands[1])"
243 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
244 switch (which_alternative)
247 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
249 return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
250 "v_cndmask_b32\t%H0, %H3, %H1, vcc";
252 return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
253 "v_cndmask_b32\t%H0, %H3, %H1, %2";
256 switch (which_alternative)
259 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
261 return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
262 "v_cndmask_b32\t%L0, %L3, %L1, vcc";
264 return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
265 "v_cndmask_b32\t%L0, %L3, %L1, %2";
270 [(set_attr "type" "vmult,vmult,vmult,*,*")
271 (set_attr "length" "16,16,16,16,16")])
273 ; This variant does not accept an unspec, but does permit MEM
274 ; read/modify/write which is necessary for maskstore.
276 ;(define_insn "*mov<mode>_exec_match"
277 ; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
278 ; (vec_merge:VEC_2REG_MODE
279 ; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")
281 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
282 ; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))]
283 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
285 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
286 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
288 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
291 ; [(set_attr "type" "vmult,*,*")
292 ; (set_attr "length" "16,16,16")])
294 ; A SGPR-base load looks like:
297 ; There's no hardware instruction that corresponds to this, but vector base
298 ; addresses are placed in an SGPR because it is easier to add to a vector.
299 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
302 ; vT = v1 << log2(element-size)
306 (define_insn "mov<mode>_sgprbase"
307 [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "= v, v, v, m")
308 (unspec:VEC_1REG_MODE
309 [(match_operand:VEC_1REG_MODE 1 "general_operand" " vA,vB, m, v")]
311 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))]
312 "lra_in_progress || reload_completed"
318 [(set_attr "type" "vop1,vop1,*,*")
319 (set_attr "length" "4,8,12,12")])
321 (define_insn "mov<mode>_sgprbase"
322 [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
323 (unspec:VEC_2REG_MODE
324 [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")]
326 (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))]
327 "lra_in_progress || reload_completed"
329 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
330 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
332 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
335 [(set_attr "type" "vmult,*,*")
336 (set_attr "length" "8,12,12")])
338 ; reload_in was once a standard name, but here it's only referenced by
339 ; gcn_secondary_reload. It allows a reload with a scratch register.
341 (define_expand "reload_in<mode>"
342 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "= v")
343 (match_operand:VEC_REG_MODE 1 "memory_operand" " m"))
344 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
347 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
351 ; reload_out is similar to reload_in, above.
353 (define_expand "reload_out<mode>"
354 [(set (match_operand:VEC_REG_MODE 0 "memory_operand" "= m")
355 (match_operand:VEC_REG_MODE 1 "register_operand" " v"))
356 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
359 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
363 ; Expand scalar addresses into gather/scatter patterns
366 [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
368 [(match_operand:VEC_REG_MODE 1 "general_operand")]
370 (clobber (match_scratch:V64DI 2))]
372 [(set (mem:BLK (scratch))
373 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
376 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
379 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
380 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
384 [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
385 (vec_merge:VEC_REG_MODE
386 (match_operand:VEC_REG_MODE 1 "general_operand")
387 (match_operand:VEC_REG_MODE 2 "")
388 (match_operand:DI 3 "gcn_exec_reg_operand")))
389 (clobber (match_scratch:V64DI 4))]
391 [(set (mem:BLK (scratch))
392 (unspec:BLK [(match_dup 5) (match_dup 1)
393 (match_dup 6) (match_dup 7) (match_dup 3)]
396 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
400 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
401 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
405 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
407 [(match_operand:VEC_REG_MODE 1 "memory_operand")]
409 (clobber (match_scratch:V64DI 2))]
412 (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
416 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
419 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
420 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
424 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
425 (vec_merge:VEC_REG_MODE
426 (match_operand:VEC_REG_MODE 1 "memory_operand")
427 (match_operand:VEC_REG_MODE 2 "")
428 (match_operand:DI 3 "gcn_exec_reg_operand")))
429 (clobber (match_scratch:V64DI 4))]
432 (vec_merge:VEC_REG_MODE
433 (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
439 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
443 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
444 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
447 ; TODO: Add zero/sign extending variants.
452 ; v_writelane and v_readlane work regardless of exec flags.
453 ; We allow source to be scratch.
455 ; FIXME these should take A immediates
457 (define_insn "*vec_set<mode>"
458 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "= v")
459 (vec_merge:VEC_1REG_MODE
460 (vec_duplicate:VEC_1REG_MODE
461 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
462 (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
464 (ashift (const_int 1)
465 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
467 "v_writelane_b32 %0, %1, %2"
468 [(set_attr "type" "vop3a")
469 (set_attr "length" "8")
470 (set_attr "exec" "none")
471 (set_attr "laneselect" "yes")])
473 ; FIXME: 64bit operations really should be splitters, but I am not sure how
474 ; to represent vertical subregs.
475 (define_insn "*vec_set<mode>"
476 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
477 (vec_merge:VEC_2REG_MODE
478 (vec_duplicate:VEC_2REG_MODE
479 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
480 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
482 (ashift (const_int 1)
483 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
485 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
486 [(set_attr "type" "vmult")
487 (set_attr "length" "16")
488 (set_attr "exec" "none")
489 (set_attr "laneselect" "yes")])
491 (define_expand "vec_set<mode>"
492 [(set (match_operand:VEC_REG_MODE 0 "register_operand")
493 (vec_merge:VEC_REG_MODE
494 (vec_duplicate:VEC_REG_MODE
495 (match_operand:<SCALAR_MODE> 1 "register_operand"))
497 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
500 (define_insn "*vec_set<mode>_1"
501 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
502 (vec_merge:VEC_1REG_MODE
503 (vec_duplicate:VEC_1REG_MODE
504 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
505 (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
507 (match_operand:SI 2 "const_int_operand" " i")))]
508 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
510 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
511 return "v_writelane_b32 %0, %1, %2";
513 [(set_attr "type" "vop3a")
514 (set_attr "length" "8")
515 (set_attr "exec" "none")
516 (set_attr "laneselect" "yes")])
518 (define_insn "*vec_set<mode>_1"
519 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
520 (vec_merge:VEC_2REG_MODE
521 (vec_duplicate:VEC_2REG_MODE
522 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
523 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
525 (match_operand:SI 2 "const_int_operand" " i")))]
526 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
528 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
529 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
531 [(set_attr "type" "vmult")
532 (set_attr "length" "16")
533 (set_attr "exec" "none")
534 (set_attr "laneselect" "yes")])
536 (define_insn "vec_duplicate<mode><exec>"
537 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
538 (vec_duplicate:VEC_1REG_MODE
539 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
542 [(set_attr "type" "vop3a")
543 (set_attr "length" "8")])
545 (define_insn "vec_duplicate<mode><exec>"
546 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
547 (vec_duplicate:VEC_2REG_MODE
548 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
550 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
551 [(set_attr "type" "vop3a")
552 (set_attr "length" "16")])
554 (define_insn "vec_extract<mode><scalar_mode>"
555 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
556 (vec_select:<SCALAR_MODE>
557 (match_operand:VEC_1REG_MODE 1 "register_operand" " v")
558 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
560 "v_readlane_b32 %0, %1, %2"
561 [(set_attr "type" "vop3a")
562 (set_attr "length" "8")
563 (set_attr "exec" "none")
564 (set_attr "laneselect" "yes")])
566 (define_insn "vec_extract<mode><scalar_mode>"
567 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
568 (vec_select:<SCALAR_MODE>
569 (match_operand:VEC_2REG_MODE 1 "register_operand" " v")
570 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
572 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
573 [(set_attr "type" "vmult")
574 (set_attr "length" "16")
575 (set_attr "exec" "none")
576 (set_attr "laneselect" "yes")])
578 (define_expand "vec_init<mode><scalar_mode>"
579 [(match_operand:VEC_REG_MODE 0 "register_operand")
583 gcn_expand_vector_init (operands[0], operands[1]);
588 ;; {{{ Scatter / Gather
590 ;; GCN does not have an instruction for loading a vector from contiguous
591 ;; memory so *all* loads and stores are eventually converted to scatter
594 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
595 ;; unspec. The unspec formats are as follows:
598 ;; [(<address expression>)
601 ;; (mem:BLK (scratch))]
605 ;; [(<address expression>)
606 ;; (<source register>)
612 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
613 ;; - The mem:BLK does not contain any real information, but indicates that an
614 ;; unknown memory read is taking place. Stores are expected to use a similar
615 ;; mem:BLK outside the unspec.
616 ;; - The address space and glc (volatile) fields are there to replace the
617 ;; fields normally found in a MEM.
618 ;; - Multiple forms of address expression are supported, below.
620 (define_expand "gather_load<mode>"
621 [(match_operand:VEC_REG_MODE 0 "register_operand")
622 (match_operand:DI 1 "register_operand")
623 (match_operand 2 "register_operand")
624 (match_operand 3 "immediate_operand")
625 (match_operand:SI 4 "gcn_alu_operand")]
628 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
629 operands[2], operands[4],
630 INTVAL (operands[3]), NULL);
632 if (GET_MODE (addr) == V64DImode)
633 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
634 const0_rtx, const0_rtx));
636 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
637 addr, const0_rtx, const0_rtx,
642 (define_expand "gather<mode>_exec"
643 [(match_operand:VEC_REG_MODE 0 "register_operand")
644 (match_operand:DI 1 "register_operand")
645 (match_operand:V64SI 2 "register_operand")
646 (match_operand 3 "immediate_operand")
647 (match_operand:SI 4 "gcn_alu_operand")
648 (match_operand:DI 5 "gcn_exec_reg_operand")]
651 rtx undefmode = gcn_gen_undef (<MODE>mode);
653 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
654 operands[2], operands[4],
655 INTVAL (operands[3]), operands[5]);
657 if (GET_MODE (addr) == V64DImode)
658 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
659 const0_rtx, const0_rtx,
660 const0_rtx, undefmode,
663 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
665 const0_rtx, const0_rtx,
666 undefmode, operands[5]));
670 ; Allow any address expression
671 (define_expand "gather<mode>_expr<exec>"
672 [(set (match_operand:VEC_REG_MODE 0 "register_operand")
674 [(match_operand 1 "")
675 (match_operand 2 "immediate_operand")
676 (match_operand 3 "immediate_operand")
682 (define_insn "gather<mode>_insn_1offset<exec>"
683 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
685 [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
687 (match_operand 2 "immediate_operand" " n")))
688 (match_operand 3 "immediate_operand" " n")
689 (match_operand 4 "immediate_operand" " n")
692 "(AS_FLAT_P (INTVAL (operands[3]))
693 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
694 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
695 || (AS_GLOBAL_P (INTVAL (operands[3]))
696 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
698 addr_space_t as = INTVAL (operands[3]);
699 const char *glc = INTVAL (operands[4]) ? " glc" : "";
701 static char buf[200];
704 if (TARGET_GCN5_PLUS)
705 sprintf (buf, "flat_load%%s0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
708 sprintf (buf, "flat_load%%s0\t%%0, %%1%s\;s_waitcnt\t0", glc);
710 else if (AS_GLOBAL_P (as))
711 sprintf (buf, "global_load%%s0\t%%0, %%1, off offset:%%2%s\;"
712 "s_waitcnt\tvmcnt(0)", glc);
718 [(set_attr "type" "flat")
719 (set_attr "length" "12")])
721 (define_insn "gather<mode>_insn_1offset_ds<exec>"
722 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
724 [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
726 (match_operand 2 "immediate_operand" " n")))
727 (match_operand 3 "immediate_operand" " n")
728 (match_operand 4 "immediate_operand" " n")
731 "(AS_ANY_DS_P (INTVAL (operands[3]))
732 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
734 addr_space_t as = INTVAL (operands[3]);
735 static char buf[200];
736 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
737 (AS_GDS_P (as) ? " gds" : ""));
740 [(set_attr "type" "ds")
741 (set_attr "length" "12")])
743 (define_insn "gather<mode>_insn_2offsets<exec>"
744 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
749 (match_operand:DI 1 "register_operand" "Sv"))
751 (match_operand:V64SI 2 "register_operand" " v")))
752 (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
753 (match_operand 4 "immediate_operand" " n")
754 (match_operand 5 "immediate_operand" " n")
757 "(AS_GLOBAL_P (INTVAL (operands[4]))
758 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
760 addr_space_t as = INTVAL (operands[4]);
761 const char *glc = INTVAL (operands[5]) ? " glc" : "";
763 static char buf[200];
764 if (AS_GLOBAL_P (as))
766 /* Work around assembler bug in which a 64-bit register is expected,
767 but a 32-bit value would be correct. */
768 int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
769 sprintf (buf, "global_load%%s0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
770 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
777 [(set_attr "type" "flat")
778 (set_attr "length" "12")])
780 (define_expand "scatter_store<mode>"
781 [(match_operand:DI 0 "register_operand")
782 (match_operand 1 "register_operand")
783 (match_operand 2 "immediate_operand")
784 (match_operand:SI 3 "gcn_alu_operand")
785 (match_operand:VEC_REG_MODE 4 "register_operand")]
788 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
789 operands[1], operands[3],
790 INTVAL (operands[2]), NULL);
792 if (GET_MODE (addr) == V64DImode)
793 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
794 const0_rtx, const0_rtx));
796 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
797 const0_rtx, operands[4],
798 const0_rtx, const0_rtx));
802 (define_expand "scatter<mode>_exec"
803 [(match_operand:DI 0 "register_operand")
804 (match_operand 1 "register_operand")
805 (match_operand 2 "immediate_operand")
806 (match_operand:SI 3 "gcn_alu_operand")
807 (match_operand:VEC_REG_MODE 4 "register_operand")
808 (match_operand:DI 5 "gcn_exec_reg_operand")]
811 operands[5] = force_reg (DImode, operands[5]);
813 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
814 operands[1], operands[3],
815 INTVAL (operands[2]), operands[5]);
817 if (GET_MODE (addr) == V64DImode)
818 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
819 operands[4], const0_rtx,
823 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
824 const0_rtx, operands[4],
825 const0_rtx, const0_rtx,
830 ; Allow any address expression
831 (define_expand "scatter<mode>_expr<exec_scatter>"
832 [(set (mem:BLK (scratch))
834 [(match_operand:V64DI 0 "")
835 (match_operand:VEC_REG_MODE 1 "register_operand")
836 (match_operand 2 "immediate_operand")
837 (match_operand 3 "immediate_operand")]
842 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
843 [(set (mem:BLK (scratch))
845 [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
847 (match_operand 1 "immediate_operand" "n")))
848 (match_operand:VEC_REG_MODE 2 "register_operand" "v")
849 (match_operand 3 "immediate_operand" "n")
850 (match_operand 4 "immediate_operand" "n")]
852 "(AS_FLAT_P (INTVAL (operands[3]))
853 && (INTVAL(operands[1]) == 0
855 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
856 || (AS_GLOBAL_P (INTVAL (operands[3]))
857 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
859 addr_space_t as = INTVAL (operands[3]);
860 const char *glc = INTVAL (operands[4]) ? " glc" : "";
862 static char buf[200];
865 if (TARGET_GCN5_PLUS)
866 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s\;"
867 "s_waitcnt\texpcnt(0)", glc);
869 sprintf (buf, "flat_store%%s2\t%%0, %%2%s\;s_waitcnt\texpcnt(0)",
872 else if (AS_GLOBAL_P (as))
873 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s\;"
874 "s_waitcnt\texpcnt(0)", glc);
880 [(set_attr "type" "flat")
881 (set_attr "length" "12")])
883 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
884 [(set (mem:BLK (scratch))
886 [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
888 (match_operand 1 "immediate_operand" "n")))
889 (match_operand:VEC_REG_MODE 2 "register_operand" "v")
890 (match_operand 3 "immediate_operand" "n")
891 (match_operand 4 "immediate_operand" "n")]
893 "(AS_ANY_DS_P (INTVAL (operands[3]))
894 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
896 addr_space_t as = INTVAL (operands[3]);
897 static char buf[200];
898 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\texpcnt(0)",
899 (AS_GDS_P (as) ? " gds" : ""));
902 [(set_attr "type" "ds")
903 (set_attr "length" "12")])
905 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
906 [(set (mem:BLK (scratch))
911 (match_operand:DI 0 "register_operand" "Sv"))
913 (match_operand:V64SI 1 "register_operand" " v")))
914 (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
916 (match_operand:VEC_REG_MODE 3 "register_operand" " v")
917 (match_operand 4 "immediate_operand" " n")
918 (match_operand 5 "immediate_operand" " n")]
920 "(AS_GLOBAL_P (INTVAL (operands[4]))
921 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
923 addr_space_t as = INTVAL (operands[4]);
924 const char *glc = INTVAL (operands[5]) ? " glc" : "";
926 static char buf[200];
927 if (AS_GLOBAL_P (as))
929 /* Work around assembler bug in which a 64-bit register is expected,
930 but a 32-bit value would be correct. */
931 int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
932 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s\;"
933 "s_waitcnt\texpcnt(0)", reg, reg + 1, glc);
940 [(set_attr "type" "flat")
941 (set_attr "length" "12")])
946 (define_insn "ds_bpermute<mode>"
947 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
948 (unspec:VEC_1REG_MODE
949 [(match_operand:VEC_1REG_MODE 2 "register_operand" " v")
950 (match_operand:V64SI 1 "register_operand" " v")
951 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
954 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
955 [(set_attr "type" "vop2")
956 (set_attr "length" "12")])
958 (define_insn_and_split "ds_bpermute<mode>"
959 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
960 (unspec:VEC_2REG_MODE
961 [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
962 (match_operand:V64SI 1 "register_operand" " v")
963 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
968 [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
970 (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
973 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
974 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
975 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
976 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
978 [(set_attr "type" "vmult")
979 (set_attr "length" "24")])
982 ;; {{{ ALU special case: add/sub
984 (define_insn "addv64si3<exec_clobber>"
985 [(set (match_operand:V64SI 0 "register_operand" "= v")
987 (match_operand:V64SI 1 "register_operand" "% v")
988 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB")))
989 (clobber (reg:DI VCC_REG))]
991 "v_add%^_u32\t%0, vcc, %2, %1"
992 [(set_attr "type" "vop2")
993 (set_attr "length" "8")])
995 (define_insn "addv64si3_dup<exec_clobber>"
996 [(set (match_operand:V64SI 0 "register_operand" "= v")
999 (match_operand:SI 2 "gcn_alu_operand" "SvB"))
1000 (match_operand:V64SI 1 "register_operand" " v")))
1001 (clobber (reg:DI VCC_REG))]
1003 "v_add%^_u32\t%0, vcc, %2, %1"
1004 [(set_attr "type" "vop2")
1005 (set_attr "length" "8")])
1007 (define_insn "addv64si3_vcc<exec_vcc>"
1008 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1010 (match_operand:V64SI 1 "register_operand" "% v, v")
1011 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB")))
1012 (set (match_operand:DI 3 "register_operand" "= cV, Sg")
1013 (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
1016 "v_add%^_u32\t%0, %3, %2, %1"
1017 [(set_attr "type" "vop2,vop3b")
1018 (set_attr "length" "8")])
1020 ; This pattern only changes the VCC bits when the corresponding lane is
1021 ; enabled, so the set must be described as an ior.
1023 (define_insn "addv64si3_vcc_dup<exec_vcc>"
1024 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1026 (vec_duplicate:V64SI
1027 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
1028 (match_operand:V64SI 2 "register_operand" " v, v")))
1029 (set (match_operand:DI 3 "register_operand" "=cV, Sg")
1030 (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
1032 (vec_duplicate:V64SI (match_dup 2))))]
1034 "v_add%^_u32\t%0, %3, %2, %1"
1035 [(set_attr "type" "vop2,vop3b")
1036 (set_attr "length" "8,8")])
1038 ; This pattern does not accept SGPR because VCC read already counts as an
1039 ; SGPR use and number of SGPR operands is limited to 1.
1041 (define_insn "addcv64si3<exec_vcc>"
1042 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1046 (vec_duplicate:V64SI (const_int 1))
1047 (vec_duplicate:V64SI (const_int 0))
1048 (match_operand:DI 3 "register_operand" " cV,Sv"))
1049 (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA"))
1050 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB")))
1051 (set (match_operand:DI 4 "register_operand" "=cV,Sg")
1052 (ior:DI (ltu:DI (plus:V64SI
1055 (vec_duplicate:V64SI (const_int 1))
1056 (vec_duplicate:V64SI (const_int 0))
1063 (vec_duplicate:V64SI (const_int 1))
1064 (vec_duplicate:V64SI (const_int 0))
1069 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1070 [(set_attr "type" "vop2,vop3b")
1071 (set_attr "length" "4,8")])
1073 (define_insn "addcv64si3_dup<exec_vcc>"
1074 [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1078 (vec_duplicate:V64SI (const_int 1))
1079 (vec_duplicate:V64SI (const_int 0))
1080 (match_operand:DI 3 "register_operand" " cV, Sv"))
1081 (match_operand:V64SI 1 "gcn_alu_operand" "%vA, vA"))
1082 (vec_duplicate:V64SI
1083 (match_operand:SI 2 "gcn_alu_operand" "SvB,SvB"))))
1084 (set (match_operand:DI 4 "register_operand" "=cV, Sg")
1085 (ior:DI (ltu:DI (plus:V64SI (plus:V64SI
1087 (vec_duplicate:V64SI (const_int 1))
1088 (vec_duplicate:V64SI (const_int 0))
1091 (vec_duplicate:V64SI
1093 (vec_duplicate:V64SI
1095 (ltu:DI (plus:V64SI (vec_merge:V64SI
1096 (vec_duplicate:V64SI (const_int 1))
1097 (vec_duplicate:V64SI (const_int 0))
1102 "v_addc%^_u32\t%0, %4, %1, %2, %3"
1103 [(set_attr "type" "vop2,vop3b")
1104 (set_attr "length" "4,8")])
1106 (define_insn "subv64si3<exec_clobber>"
1107 [(set (match_operand:V64SI 0 "register_operand" "= v, v")
1109 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB, v")
1110 (match_operand:V64SI 2 "gcn_alu_operand" " v,vSvB")))
1111 (clobber (reg:DI VCC_REG))]
1114 v_sub%^_u32\t%0, vcc, %1, %2
1115 v_subrev%^_u32\t%0, vcc, %2, %1"
1116 [(set_attr "type" "vop2")
1117 (set_attr "length" "8,8")])
1119 (define_insn "subv64si3_vcc<exec_vcc>"
1120 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1122 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1123 (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1124 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1125 (gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
1129 v_sub%^_u32\t%0, %3, %1, %2
1130 v_sub%^_u32\t%0, %3, %1, %2
1131 v_subrev%^_u32\t%0, %3, %2, %1
1132 v_subrev%^_u32\t%0, %3, %2, %1"
1133 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1134 (set_attr "length" "8")])
1136 ; This pattern does not accept SGPR because VCC read already counts
1137 ; as a SGPR use and number of SGPR operands is limited to 1.
1139 (define_insn "subcv64si3<exec_vcc>"
1140 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
1144 (vec_duplicate:V64SI (const_int 1))
1145 (vec_duplicate:V64SI (const_int 0))
1146 (match_operand:DI 3 "gcn_alu_operand" " cV,Sv,cV,Sv"))
1147 (match_operand:V64SI 1 "gcn_alu_operand" " vA,vA,vB,vB"))
1148 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB,vA,vA")))
1149 (set (match_operand:DI 4 "register_operand" "=cV,Sg,cV,Sg")
1150 (ior:DI (gtu:DI (minus:V64SI (minus:V64SI
1152 (vec_duplicate:V64SI (const_int 1))
1153 (vec_duplicate:V64SI (const_int 0))
1158 (ltu:DI (minus:V64SI (vec_merge:V64SI
1159 (vec_duplicate:V64SI (const_int 1))
1160 (vec_duplicate:V64SI (const_int 0))
1166 v_subb%^_u32\t%0, %4, %1, %2, %3
1167 v_subb%^_u32\t%0, %4, %1, %2, %3
1168 v_subbrev%^_u32\t%0, %4, %2, %1, %3
1169 v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1170 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1171 (set_attr "length" "8")])
1173 (define_insn_and_split "addv64di3"
1174 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1176 (match_operand:V64DI 1 "register_operand" "% v0")
1177 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0")))
1178 (clobber (reg:DI VCC_REG))]
1181 "gcn_can_split_p (V64DImode, operands[0])
1182 && gcn_can_split_p (V64DImode, operands[1])
1183 && gcn_can_split_p (V64DImode, operands[2])"
1186 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1187 emit_insn (gen_addv64si3_vcc
1188 (gcn_operand_part (V64DImode, operands[0], 0),
1189 gcn_operand_part (V64DImode, operands[1], 0),
1190 gcn_operand_part (V64DImode, operands[2], 0),
1192 emit_insn (gen_addcv64si3
1193 (gcn_operand_part (V64DImode, operands[0], 1),
1194 gcn_operand_part (V64DImode, operands[1], 1),
1195 gcn_operand_part (V64DImode, operands[2], 1),
1199 [(set_attr "type" "vmult")
1200 (set_attr "length" "8")])
1202 (define_insn_and_split "addv64di3_exec"
1203 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1206 (match_operand:V64DI 1 "register_operand" "% v0")
1207 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0"))
1208 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1209 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1210 (clobber (reg:DI VCC_REG))]
1213 "gcn_can_split_p (V64DImode, operands[0])
1214 && gcn_can_split_p (V64DImode, operands[1])
1215 && gcn_can_split_p (V64DImode, operands[2])
1216 && gcn_can_split_p (V64DImode, operands[4])"
1219 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1220 emit_insn (gen_addv64si3_vcc_exec
1221 (gcn_operand_part (V64DImode, operands[0], 0),
1222 gcn_operand_part (V64DImode, operands[1], 0),
1223 gcn_operand_part (V64DImode, operands[2], 0),
1225 gcn_operand_part (V64DImode, operands[3], 0),
1227 emit_insn (gen_addcv64si3_exec
1228 (gcn_operand_part (V64DImode, operands[0], 1),
1229 gcn_operand_part (V64DImode, operands[1], 1),
1230 gcn_operand_part (V64DImode, operands[2], 1),
1232 gcn_operand_part (V64DImode, operands[3], 1),
1236 [(set_attr "type" "vmult")
1237 (set_attr "length" "8")])
1239 (define_insn_and_split "subv64di3"
1240 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1242 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1243 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0")))
1244 (clobber (reg:DI VCC_REG))]
1247 "gcn_can_split_p (V64DImode, operands[0])
1248 && gcn_can_split_p (V64DImode, operands[1])
1249 && gcn_can_split_p (V64DImode, operands[2])"
1252 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1253 emit_insn (gen_subv64si3_vcc
1254 (gcn_operand_part (V64DImode, operands[0], 0),
1255 gcn_operand_part (V64DImode, operands[1], 0),
1256 gcn_operand_part (V64DImode, operands[2], 0),
1258 emit_insn (gen_subcv64si3
1259 (gcn_operand_part (V64DImode, operands[0], 1),
1260 gcn_operand_part (V64DImode, operands[1], 1),
1261 gcn_operand_part (V64DImode, operands[2], 1),
1265 [(set_attr "type" "vmult")
1266 (set_attr "length" "8,8")])
1268 (define_insn_and_split "subv64di3_exec"
1269 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
1272 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
1273 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0"))
1274 (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
1276 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1277 (clobber (reg:DI VCC_REG))]
1278 "register_operand (operands[1], VOIDmode)
1279 || register_operand (operands[2], VOIDmode)"
1281 "gcn_can_split_p (V64DImode, operands[0])
1282 && gcn_can_split_p (V64DImode, operands[1])
1283 && gcn_can_split_p (V64DImode, operands[2])
1284 && gcn_can_split_p (V64DImode, operands[3])"
1287 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1288 emit_insn (gen_subv64si3_vcc_exec
1289 (gcn_operand_part (V64DImode, operands[0], 0),
1290 gcn_operand_part (V64DImode, operands[1], 0),
1291 gcn_operand_part (V64DImode, operands[2], 0),
1293 gcn_operand_part (V64DImode, operands[3], 0),
1295 emit_insn (gen_subcv64si3_exec
1296 (gcn_operand_part (V64DImode, operands[0], 1),
1297 gcn_operand_part (V64DImode, operands[1], 1),
1298 gcn_operand_part (V64DImode, operands[2], 1),
1300 gcn_operand_part (V64DImode, operands[3], 1),
1304 [(set_attr "type" "vmult")
1305 (set_attr "length" "8,8")])
1307 (define_insn_and_split "addv64di3_dup"
1308 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1310 (match_operand:V64DI 1 "register_operand" " v0")
1311 (vec_duplicate:V64DI
1312 (match_operand:DI 2 "gcn_alu_operand" "SvDB"))))
1313 (clobber (reg:DI VCC_REG))]
1316 "gcn_can_split_p (V64DImode, operands[0])
1317 && gcn_can_split_p (V64DImode, operands[1])
1318 && gcn_can_split_p (V64DImode, operands[2])"
1321 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1322 emit_insn (gen_addv64si3_vcc_dup
1323 (gcn_operand_part (V64DImode, operands[0], 0),
1324 gcn_operand_part (DImode, operands[2], 0),
1325 gcn_operand_part (V64DImode, operands[1], 0),
1327 emit_insn (gen_addcv64si3_dup
1328 (gcn_operand_part (V64DImode, operands[0], 1),
1329 gcn_operand_part (V64DImode, operands[1], 1),
1330 gcn_operand_part (DImode, operands[2], 1),
1334 [(set_attr "type" "vmult")
1335 (set_attr "length" "8")])
1337 (define_insn_and_split "addv64di3_dup_exec"
1338 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1341 (match_operand:V64DI 1 "register_operand" " v0")
1342 (vec_duplicate:V64DI
1343 (match_operand:DI 2 "gcn_alu_operand" "SvDB")))
1344 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1345 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1346 (clobber (reg:DI VCC_REG))]
1349 "gcn_can_split_p (V64DImode, operands[0])
1350 && gcn_can_split_p (V64DImode, operands[1])
1351 && gcn_can_split_p (V64DImode, operands[2])
1352 && gcn_can_split_p (V64DImode, operands[3])"
1355 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1356 emit_insn (gen_addv64si3_vcc_dup_exec
1357 (gcn_operand_part (V64DImode, operands[0], 0),
1358 gcn_operand_part (DImode, operands[2], 0),
1359 gcn_operand_part (V64DImode, operands[1], 0),
1361 gcn_operand_part (V64DImode, operands[3], 0),
1363 emit_insn (gen_addcv64si3_dup_exec
1364 (gcn_operand_part (V64DImode, operands[0], 1),
1365 gcn_operand_part (V64DImode, operands[1], 1),
1366 gcn_operand_part (DImode, operands[2], 1),
1368 gcn_operand_part (V64DImode, operands[3], 1),
1372 [(set_attr "type" "vmult")
1373 (set_attr "length" "8")])
1375 (define_insn_and_split "addv64di3_zext"
1376 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1379 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1380 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")))
1381 (clobber (reg:DI VCC_REG))]
1384 "gcn_can_split_p (V64DImode, operands[0])
1385 && gcn_can_split_p (V64DImode, operands[2])"
1388 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1389 emit_insn (gen_addv64si3_vcc
1390 (gcn_operand_part (V64DImode, operands[0], 0),
1392 gcn_operand_part (V64DImode, operands[2], 0),
1394 emit_insn (gen_addcv64si3
1395 (gcn_operand_part (V64DImode, operands[0], 1),
1396 gcn_operand_part (V64DImode, operands[2], 1),
1397 const0_rtx, vcc, vcc));
1400 [(set_attr "type" "vmult")
1401 (set_attr "length" "8,8")])
1403 (define_insn_and_split "addv64di3_zext_exec"
1404 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
1408 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1409 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA"))
1410 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1411 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1412 (clobber (reg:DI VCC_REG))]
1415 "gcn_can_split_p (V64DImode, operands[0])
1416 && gcn_can_split_p (V64DImode, operands[2])
1417 && gcn_can_split_p (V64DImode, operands[3])"
1420 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1421 emit_insn (gen_addv64si3_vcc_exec
1422 (gcn_operand_part (V64DImode, operands[0], 0),
1424 gcn_operand_part (V64DImode, operands[2], 0),
1426 gcn_operand_part (V64DImode, operands[3], 0),
1428 emit_insn (gen_addcv64si3_exec
1429 (gcn_operand_part (V64DImode, operands[0], 1),
1430 gcn_operand_part (V64DImode, operands[2], 1),
1431 const0_rtx, vcc, vcc,
1432 gcn_operand_part (V64DImode, operands[3], 1),
1436 [(set_attr "type" "vmult")
1437 (set_attr "length" "8,8")])
1439 (define_insn_and_split "addv64di3_zext_dup"
1440 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1443 (vec_duplicate:V64SI
1444 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1445 (match_operand:V64DI 2 "gcn_alu_operand" "vA0")))
1446 (clobber (reg:DI VCC_REG))]
1449 "gcn_can_split_p (V64DImode, operands[0])
1450 && gcn_can_split_p (V64DImode, operands[2])"
1453 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1454 emit_insn (gen_addv64si3_vcc_dup
1455 (gcn_operand_part (V64DImode, operands[0], 0),
1456 gcn_operand_part (DImode, operands[1], 0),
1457 gcn_operand_part (V64DImode, operands[2], 0),
1459 emit_insn (gen_addcv64si3
1460 (gcn_operand_part (V64DImode, operands[0], 1),
1461 gcn_operand_part (V64DImode, operands[2], 1),
1462 const0_rtx, vcc, vcc));
1465 [(set_attr "type" "vmult")
1466 (set_attr "length" "8")])
1468 (define_insn_and_split "addv64di3_zext_dup_exec"
1469 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1473 (vec_duplicate:V64SI
1474 (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1475 (match_operand:V64DI 2 "gcn_alu_operand" "vA0"))
1476 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1477 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1478 (clobber (reg:DI VCC_REG))]
1481 "gcn_can_split_p (V64DImode, operands[0])
1482 && gcn_can_split_p (V64DImode, operands[2])
1483 && gcn_can_split_p (V64DImode, operands[3])"
1486 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1487 emit_insn (gen_addv64si3_vcc_dup_exec
1488 (gcn_operand_part (V64DImode, operands[0], 0),
1489 gcn_operand_part (DImode, operands[1], 0),
1490 gcn_operand_part (V64DImode, operands[2], 0),
1492 gcn_operand_part (V64DImode, operands[3], 0),
1494 emit_insn (gen_addcv64si3_exec
1495 (gcn_operand_part (V64DImode, operands[0], 1),
1496 gcn_operand_part (V64DImode, operands[2], 1),
1497 const0_rtx, vcc, vcc,
1498 gcn_operand_part (V64DImode, operands[3], 1),
1502 [(set_attr "type" "vmult")
1503 (set_attr "length" "8")])
1505 (define_insn_and_split "addv64di3_zext_dup2"
1506 [(set (match_operand:V64DI 0 "register_operand" "= v")
1508 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1509 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1510 (clobber (reg:DI VCC_REG))]
1513 "gcn_can_split_p (V64DImode, operands[0])"
1516 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1517 emit_insn (gen_addv64si3_vcc_dup
1518 (gcn_operand_part (V64DImode, operands[0], 0),
1519 gcn_operand_part (DImode, operands[2], 0),
1522 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1523 emit_insn (gen_vec_duplicatev64si
1524 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1525 emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
1528 [(set_attr "type" "vmult")
1529 (set_attr "length" "8")])
1531 (define_insn_and_split "addv64di3_zext_dup2_exec"
1532 [(set (match_operand:V64DI 0 "register_operand" "= v")
1535 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1537 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1538 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1539 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1540 (clobber (reg:DI VCC_REG))]
1543 "gcn_can_split_p (V64DImode, operands[0])
1544 && gcn_can_split_p (V64DImode, operands[3])"
1547 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1548 emit_insn (gen_addv64si3_vcc_dup_exec
1549 (gcn_operand_part (V64DImode, operands[0], 0),
1550 gcn_operand_part (DImode, operands[2], 0),
1553 gcn_operand_part (V64DImode, operands[3], 0),
1555 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1556 emit_insn (gen_vec_duplicatev64si_exec
1557 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1558 gcn_gen_undef (V64SImode), operands[4]));
1559 emit_insn (gen_addcv64si3_exec
1560 (dsthi, dsthi, const0_rtx, vcc, vcc,
1561 gcn_operand_part (V64DImode, operands[3], 1),
1565 [(set_attr "type" "vmult")
1566 (set_attr "length" "8")])
1568 (define_insn_and_split "addv64di3_sext_dup2"
1569 [(set (match_operand:V64DI 0 "register_operand" "= v")
1571 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1572 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
1573 (clobber (match_scratch:V64SI 3 "=&v"))
1574 (clobber (reg:DI VCC_REG))]
1577 "gcn_can_split_p (V64DImode, operands[0])"
1580 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1581 emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
1582 emit_insn (gen_addv64si3_vcc_dup
1583 (gcn_operand_part (V64DImode, operands[0], 0),
1584 gcn_operand_part (DImode, operands[2], 0),
1587 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1588 emit_insn (gen_vec_duplicatev64si
1589 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1590 emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
1593 [(set_attr "type" "vmult")
1594 (set_attr "length" "8")])
1596 (define_insn_and_split "addv64di3_sext_dup2_exec"
1597 [(set (match_operand:V64DI 0 "register_operand" "= v")
1600 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1602 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1603 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1604 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1605 (clobber (match_scratch:V64SI 5 "=&v"))
1606 (clobber (reg:DI VCC_REG))]
1609 "gcn_can_split_p (V64DImode, operands[0])
1610 && gcn_can_split_p (V64DImode, operands[3])"
1613 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1614 emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
1615 gcn_gen_undef (V64SImode), operands[4]));
1616 emit_insn (gen_addv64si3_vcc_dup_exec
1617 (gcn_operand_part (V64DImode, operands[0], 0),
1618 gcn_operand_part (DImode, operands[2], 0),
1621 gcn_operand_part (V64DImode, operands[3], 0),
1623 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1624 emit_insn (gen_vec_duplicatev64si_exec
1625 (dsthi, gcn_operand_part (DImode, operands[2], 1),
1626 gcn_gen_undef (V64SImode), operands[4]));
1627 emit_insn (gen_addcv64si3_exec
1628 (dsthi, dsthi, operands[5], vcc, vcc,
1629 gcn_operand_part (V64DImode, operands[3], 1),
1633 [(set_attr "type" "vmult")
1634 (set_attr "length" "8")])
1637 ;; {{{ DS memory ALU: add/sub
1639 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1640 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1642 ;; FIXME: the vector patterns probably need RD expanded to a vector of
1643 ;; addresses. For now, the only way a vector can get into LDS is
1644 ;; if the user puts it there manually.
1646 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1647 ;; checked to see if anything can ever use them.
1649 (define_insn "add<mode>3_ds<exec>"
1650 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1652 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1653 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1654 "rtx_equal_p (operands[0], operands[1])"
1655 "ds_add%u0\t%A0, %2%O0"
1656 [(set_attr "type" "ds")
1657 (set_attr "length" "8")])
1659 (define_insn "add<mode>3_ds_scalar"
1660 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1661 (plus:DS_ARITH_SCALAR_MODE
1662 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1664 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1665 "rtx_equal_p (operands[0], operands[1])"
1666 "ds_add%u0\t%A0, %2%O0"
1667 [(set_attr "type" "ds")
1668 (set_attr "length" "8")])
1670 (define_insn "sub<mode>3_ds<exec>"
1671 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1672 (minus:DS_ARITH_MODE
1673 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1674 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
1675 "rtx_equal_p (operands[0], operands[1])"
1676 "ds_sub%u0\t%A0, %2%O0"
1677 [(set_attr "type" "ds")
1678 (set_attr "length" "8")])
1680 (define_insn "sub<mode>3_ds_scalar"
1681 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1682 (minus:DS_ARITH_SCALAR_MODE
1683 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1685 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
1686 "rtx_equal_p (operands[0], operands[1])"
1687 "ds_sub%u0\t%A0, %2%O0"
1688 [(set_attr "type" "ds")
1689 (set_attr "length" "8")])
1691 (define_insn "subr<mode>3_ds<exec>"
1692 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
1693 (minus:DS_ARITH_MODE
1694 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
1695 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1696 "rtx_equal_p (operands[0], operands[1])"
1697 "ds_rsub%u0\t%A0, %2%O0"
1698 [(set_attr "type" "ds")
1699 (set_attr "length" "8")])
1701 (define_insn "subr<mode>3_ds_scalar"
1702 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1703 (minus:DS_ARITH_SCALAR_MODE
1704 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
1705 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1707 "rtx_equal_p (operands[0], operands[1])"
1708 "ds_rsub%u0\t%A0, %2%O0"
1709 [(set_attr "type" "ds")
1710 (set_attr "length" "8")])
1713 ;; {{{ ALU special case: mult
1715 (define_insn "<su>mulv64si3_highpart<exec>"
1716 [(set (match_operand:V64SI 0 "register_operand" "= v")
1721 (match_operand:V64SI 1 "gcn_alu_operand" " %v"))
1723 (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
1726 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1727 [(set_attr "type" "vop3a")
1728 (set_attr "length" "8")])
1730 (define_insn "mulv64si3<exec>"
1731 [(set (match_operand:V64SI 0 "register_operand" "= v")
1733 (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
1734 (match_operand:V64SI 2 "gcn_alu_operand" " vSvA")))]
1736 "v_mul_lo_u32\t%0, %1, %2"
1737 [(set_attr "type" "vop3a")
1738 (set_attr "length" "8")])
1740 (define_insn "mulv64si3_dup<exec>"
1741 [(set (match_operand:V64SI 0 "register_operand" "= v")
1743 (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
1744 (vec_duplicate:V64SI
1745 (match_operand:SI 2 "gcn_alu_operand" " SvA"))))]
1747 "v_mul_lo_u32\t%0, %1, %2"
1748 [(set_attr "type" "vop3a")
1749 (set_attr "length" "8")])
1751 (define_insn_and_split "mulv64di3"
1752 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1754 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1755 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1756 (clobber (match_scratch:V64SI 3 "=&v"))]
1762 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1763 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1764 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1765 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1766 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1767 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1768 rtx tmp = operands[3];
1770 emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
1771 emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
1772 emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
1773 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1774 emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
1775 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1776 emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
1777 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1781 (define_insn_and_split "mulv64di3_exec"
1782 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1785 (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1786 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1787 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1788 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1789 (clobber (match_scratch:V64SI 5 "=&v"))]
1795 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1796 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1797 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1798 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1799 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1800 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1801 rtx exec = operands[4];
1802 rtx tmp = operands[5];
1805 if (GET_CODE (operands[3]) == UNSPEC)
1807 old_lo = old_hi = gcn_gen_undef (V64SImode);
1811 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1812 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1815 rtx undef = gcn_gen_undef (V64SImode);
1817 emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1818 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
1820 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
1821 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1822 emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
1823 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1824 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
1825 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1829 (define_insn_and_split "mulv64di3_zext"
1830 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1833 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1834 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1835 (clobber (match_scratch:V64SI 3 "=&v"))]
1841 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1842 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1843 rtx left = operands[1];
1844 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1845 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1846 rtx tmp = operands[3];
1848 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1849 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1850 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1851 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1855 (define_insn_and_split "mulv64di3_zext_exec"
1856 [(set (match_operand:V64DI 0 "register_operand" "=&v")
1860 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1861 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
1862 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1863 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1864 (clobber (match_scratch:V64SI 5 "=&v"))]
1870 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1871 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1872 rtx left = operands[1];
1873 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1874 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1875 rtx exec = operands[4];
1876 rtx tmp = operands[5];
1879 if (GET_CODE (operands[3]) == UNSPEC)
1881 old_lo = old_hi = gcn_gen_undef (V64SImode);
1885 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1886 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1889 rtx undef = gcn_gen_undef (V64SImode);
1891 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1892 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1894 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1895 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1899 (define_insn_and_split "mulv64di3_zext_dup2"
1900 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1903 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1904 (vec_duplicate:V64DI
1905 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
1906 (clobber (match_scratch:V64SI 3 "= &v"))]
1912 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1913 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1914 rtx left = operands[1];
1915 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1916 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1917 rtx tmp = operands[3];
1919 emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1920 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1921 emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1922 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1926 (define_insn_and_split "mulv64di3_zext_dup2_exec"
1927 [(set (match_operand:V64DI 0 "register_operand" "= &v")
1931 (match_operand:V64SI 1 "gcn_alu_operand" " v"))
1932 (vec_duplicate:V64DI
1933 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
1934 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1935 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1936 (clobber (match_scratch:V64SI 5 "= &v"))]
1942 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1943 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1944 rtx left = operands[1];
1945 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1946 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1947 rtx exec = operands[4];
1948 rtx tmp = operands[5];
1951 if (GET_CODE (operands[3]) == UNSPEC)
1953 old_lo = old_hi = gcn_gen_undef (V64SImode);
1957 old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1958 old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1961 rtx undef = gcn_gen_undef (V64SImode);
1963 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1964 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1966 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1967 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1972 ;; {{{ ALU generic case
1974 (define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI])
1976 (define_code_iterator bitop [and ior xor])
1977 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
1978 (define_code_iterator minmaxop [smin smax umin umax])
1980 (define_insn "<expander><mode>2<exec>"
1981 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v")
1982 (bitunop:VEC_1REG_INT_MODE
1983 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
1985 "v_<mnemonic>0\t%0, %1"
1986 [(set_attr "type" "vop1")
1987 (set_attr "length" "8")])
1989 (define_insn "<expander><mode>3<exec>"
1990 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
1991 (bitop:VEC_1REG_INT_MODE
1992 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
1994 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
1998 v_<mnemonic>0\t%0, %2, %1
1999 ds_<mnemonic>0\t%A0, %2%O0"
2000 [(set_attr "type" "vop2,ds")
2001 (set_attr "length" "8,8")])
2003 (define_insn_and_split "<expander>v64di3"
2004 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2006 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2007 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2011 ds_<mnemonic>0\t%A0, %2%O0"
2012 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2014 (bitop:V64SI (match_dup 5) (match_dup 7)))
2016 (bitop:V64SI (match_dup 6) (match_dup 8)))]
2018 operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
2019 operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
2020 operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
2021 operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
2022 operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
2023 operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
2025 [(set_attr "type" "vmult,ds")
2026 (set_attr "length" "16,8")])
2028 (define_insn_and_split "<expander>v64di3_exec"
2029 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2032 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
2033 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2034 (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
2036 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2037 "!memory_operand (operands[0], VOIDmode)
2038 || (rtx_equal_p (operands[0], operands[1])
2039 && register_operand (operands[2], VOIDmode))"
2042 ds_<mnemonic>0\t%A0, %2%O0"
2043 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2046 (bitop:V64SI (match_dup 7) (match_dup 9))
2051 (bitop:V64SI (match_dup 8) (match_dup 10))
2055 operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
2056 operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
2057 operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
2058 operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
2059 operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
2060 operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
2061 operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
2062 operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
2064 [(set_attr "type" "vmult,ds")
2065 (set_attr "length" "16,8")])
2067 (define_insn "<expander>v64si3<exec>"
2068 [(set (match_operand:V64SI 0 "register_operand" "= v")
2070 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2071 (vec_duplicate:V64SI
2072 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2074 "v_<revmnemonic>0\t%0, %2, %1"
2075 [(set_attr "type" "vop2")
2076 (set_attr "length" "8")])
2078 (define_insn "v<expander>v64si3<exec>"
2079 [(set (match_operand:V64SI 0 "register_operand" "=v")
2081 (match_operand:V64SI 1 "gcn_alu_operand" " v")
2082 (match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
2084 "v_<revmnemonic>0\t%0, %2, %1"
2085 [(set_attr "type" "vop2")
2086 (set_attr "length" "8")])
2088 (define_insn "<expander><mode>3<exec>"
2089 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
2090 (minmaxop:VEC_1REG_INT_MODE
2091 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
2093 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
2097 v_<mnemonic>0\t%0, %2, %1
2098 ds_<mnemonic>0\t%A0, %2%O0"
2099 [(set_attr "type" "vop2,ds")
2100 (set_attr "length" "8,8")])
2103 ;; {{{ FP binops - special cases
2105 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2106 ; adding the negated second operand to the first.
2108 (define_insn "subv64df3<exec>"
2109 [(set (match_operand:V64DF 0 "register_operand" "= v, v")
2111 (match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v")
2112 (match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))]
2115 v_add_f64\t%0, %1, -%2
2116 v_add_f64\t%0, -%2, %1"
2117 [(set_attr "type" "vop3a")
2118 (set_attr "length" "8,8")])
2120 (define_insn "subdf"
2121 [(set (match_operand:DF 0 "register_operand" "= v, v")
2123 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2124 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2127 v_add_f64\t%0, %1, -%2
2128 v_add_f64\t%0, -%2, %1"
2129 [(set_attr "type" "vop3a")
2130 (set_attr "length" "8,8")])
2133 ;; {{{ FP binops - generic
2135 (define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
2136 (define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
2137 (define_mode_iterator FP_MODE [HF SF DF])
2138 (define_mode_iterator FP_1REG_MODE [HF SF])
2140 (define_code_iterator comm_fp [plus mult smin smax])
2141 (define_code_iterator nocomm_fp [minus])
2142 (define_code_iterator all_fp [plus mult minus smin smax])
2144 (define_insn "<expander><mode>3<exec>"
2145 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2146 (comm_fp:VEC_FP_MODE
2147 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v")
2148 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
2150 "v_<mnemonic>0\t%0, %2, %1"
2151 [(set_attr "type" "vop2")
2152 (set_attr "length" "8")])
2154 (define_insn "<expander><mode>3"
2155 [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL")
2157 (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0")
2158 (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2161 v_<mnemonic>0\t%0, %2, %1
2162 v_<mnemonic>0\t%0, %1%O0"
2163 [(set_attr "type" "vop2,ds")
2164 (set_attr "length" "8")])
2166 (define_insn "<expander><mode>3<exec>"
2167 [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v")
2168 (nocomm_fp:VEC_FP_1REG_MODE
2169 (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2170 (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2173 v_<mnemonic>0\t%0, %1, %2
2174 v_<revmnemonic>0\t%0, %2, %1"
2175 [(set_attr "type" "vop2")
2176 (set_attr "length" "8,8")])
2178 (define_insn "<expander><mode>3"
2179 [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v")
2180 (nocomm_fp:FP_1REG_MODE
2181 (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
2182 (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
2185 v_<mnemonic>0\t%0, %1, %2
2186 v_<revmnemonic>0\t%0, %2, %1"
2187 [(set_attr "type" "vop2")
2188 (set_attr "length" "8,8")])
2193 (define_insn "abs<mode>2"
2194 [(set (match_operand:FP_MODE 0 "register_operand" "=v")
2195 (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
2197 "v_add%i0\t%0, 0, |%1|"
2198 [(set_attr "type" "vop3a")
2199 (set_attr "length" "8")])
2201 (define_insn "abs<mode>2<exec>"
2202 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2204 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2206 "v_add%i0\t%0, 0, |%1|"
2207 [(set_attr "type" "vop3a")
2208 (set_attr "length" "8")])
2210 (define_insn "neg<mode>2<exec>"
2211 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
2213 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2215 "v_add%i0\t%0, 0, -%1"
2216 [(set_attr "type" "vop3a")
2217 (set_attr "length" "8")])
2219 (define_insn "sqrt<mode>2<exec>"
2220 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2222 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2223 "flag_unsafe_math_optimizations"
2225 [(set_attr "type" "vop1")
2226 (set_attr "length" "8")])
2228 (define_insn "sqrt<mode>2"
2229 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2231 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2232 "flag_unsafe_math_optimizations"
2234 [(set_attr "type" "vop1")
2235 (set_attr "length" "8")])
2238 ;; {{{ FP fused multiply and add
2240 (define_insn "fma<mode>4<exec>"
2241 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v")
2243 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2244 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2245 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2247 "v_fma%i0\t%0, %1, %2, %3"
2248 [(set_attr "type" "vop3a")
2249 (set_attr "length" "8")])
2251 (define_insn "fma<mode>4_negop2<exec>"
2252 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v")
2254 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2256 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2257 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2259 "v_fma%i0\t%0, %1, -%2, %3"
2260 [(set_attr "type" "vop3a")
2261 (set_attr "length" "8")])
2263 (define_insn "fma<mode>4"
2264 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v")
2266 (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA")
2267 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
2268 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
2270 "v_fma%i0\t%0, %1, %2, %3"
2271 [(set_attr "type" "vop3a")
2272 (set_attr "length" "8")])
2274 (define_insn "fma<mode>4_negop2"
2275 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v")
2277 (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
2279 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
2280 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
2282 "v_fma%i0\t%0, %1, -%2, %3"
2283 [(set_attr "type" "vop3a")
2284 (set_attr "length" "8")])
2289 (define_insn "recip<mode>2<exec>"
2290 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
2292 (vec_duplicate:VEC_FP_MODE (float:<SCALAR_MODE> (const_int 1)))
2293 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2296 [(set_attr "type" "vop1")
2297 (set_attr "length" "8")])
2299 (define_insn "recip<mode>2"
2300 [(set (match_operand:FP_MODE 0 "register_operand" "= v")
2302 (float:FP_MODE (const_int 1))
2303 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2306 [(set_attr "type" "vop1")
2307 (set_attr "length" "8")])
2309 ;; Do division via a = b * 1/c
2310 ;; The v_rcp_* instructions are not sufficiently accurate on their own,
2311 ;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2312 ;; which the ISA manual says is enough to improve the reciprocal accuracy.
2314 ;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2316 (define_expand "div<mode>3"
2317 [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
2318 (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
2319 (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
2320 "flag_reciprocal_math"
2322 rtx two = gcn_vec_constant (<MODE>mode,
2323 const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
2324 rtx initrcp = gen_reg_rtx (<MODE>mode);
2325 rtx fma = gen_reg_rtx (<MODE>mode);
2328 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
2330 (CONST_DOUBLE_REAL_VALUE
2331 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
2336 rcp = gen_reg_rtx (<MODE>mode);
2338 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2339 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2340 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2343 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2348 (define_expand "div<mode>3"
2349 [(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
2350 (match_operand:FP_MODE 1 "gcn_valu_src0_operand")
2351 (match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
2352 "flag_reciprocal_math"
2354 rtx two = const_double_from_real_value (dconst2, <MODE>mode);
2355 rtx initrcp = gen_reg_rtx (<MODE>mode);
2356 rtx fma = gen_reg_rtx (<MODE>mode);
2359 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2360 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2366 rcp = gen_reg_rtx (<MODE>mode);
2368 emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2369 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2370 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2373 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2379 ;; {{{ Int/FP conversions
2381 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2382 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2384 (define_mode_iterator VCVT_FROM_MODE [V64HI V64SI V64HF V64SF V64DF])
2385 (define_mode_iterator VCVT_TO_MODE [V64HI V64SI V64HF V64SF V64DF])
2387 (define_code_iterator cvt_op [fix unsigned_fix
2388 float unsigned_float
2389 float_extend float_truncate])
2390 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2391 (float "float") (unsigned_float "floatuns")
2392 (float_extend "extend") (float_truncate "trunc")])
2393 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2394 (float "%i0%i1") (unsigned_float "%i0%u1")
2395 (float_extend "%i0%i1")
2396 (float_truncate "%i0%i1")])
2398 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2399 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
2401 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2402 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2404 "v_cvt<cvt_operands>\t%0, %1"
2405 [(set_attr "type" "vop1")
2406 (set_attr "length" "8")])
2408 (define_insn "<cvt_name><VCVT_FROM_MODE:mode><VCVT_TO_MODE:mode>2<exec>"
2409 [(set (match_operand:VCVT_TO_MODE 0 "register_operand" "= v")
2410 (cvt_op:VCVT_TO_MODE
2411 (match_operand:VCVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2412 "gcn_valid_cvt_p (<VCVT_FROM_MODE:MODE>mode, <VCVT_TO_MODE:MODE>mode,
2414 "v_cvt<cvt_operands>\t%0, %1"
2415 [(set_attr "type" "vop1")
2416 (set_attr "length" "8")])
2419 ;; {{{ Int/int conversions
2421 ;; GCC can already do these for scalar types, but not for vector types.
2422 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2423 ;; so there must be a few tricks here.
2425 (define_insn_and_split "vec_truncatev64div64si"
2426 [(set (match_operand:V64SI 0 "register_operand" "=v,&v")
2428 (match_operand:V64DI 1 "register_operand" " 0, v")))]
2432 [(set (match_dup 0) (match_dup 1))]
2434 operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
2436 [(set_attr "type" "vop2")
2437 (set_attr "length" "0,4")])
2439 (define_insn_and_split "vec_truncatev64div64si_exec"
2440 [(set (match_operand:V64SI 0 "register_operand" "=v,&v")
2443 (match_operand:V64DI 1 "register_operand" " 0, v"))
2444 (match_operand:V64SI 2 "gcn_alu_or_unspec_operand" "U0,U0")
2445 (match_operand:DI 3 "gcn_exec_operand" " e, e")))]
2449 [(parallel [(set (match_dup 0)
2450 (vec_merge:V64SI (match_dup 1) (match_dup 2) (match_dup 3)))
2451 (clobber (scratch:V64DI))])]
2453 operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
2455 [(set_attr "type" "vop2")
2456 (set_attr "length" "0,4")])
2459 ;; {{{ Vector comparison/merge
2461 (define_insn "vec_cmp<mode>di"
2462 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2463 (match_operator 1 "comparison_operator"
2464 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2465 "vSv, B,vSv, B, v,vA")
2466 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2467 " v, v, v, v,vA, v")]))
2468 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
2471 v_cmp%E1\tvcc, %2, %3
2472 v_cmp%E1\tvcc, %2, %3
2473 v_cmpx%E1\tvcc, %2, %3
2474 v_cmpx%E1\tvcc, %2, %3
2475 v_cmp%E1\t%0, %2, %3
2476 v_cmp%E1\t%0, %2, %3"
2477 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2478 (set_attr "length" "4,8,4,8,8,8")])
2480 (define_expand "vec_cmpu<mode>di"
2481 [(match_operand:DI 0 "register_operand")
2482 (match_operator 1 "comparison_operator"
2483 [(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2484 (match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")])]
2487 /* Unsigned comparisons use the same patterns as signed comparisons,
2488 except that they use unsigned operators (e.g. LTU vs LT).
2489 The '%E1' directive then does the Right Thing. */
2490 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2495 (define_insn "vec_cmp<mode>di_exec"
2496 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
2498 (match_operator 1 "comparison_operator"
2499 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2500 "vSv, B,vSv, B, v,vA")
2501 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2502 " v, v, v, v,vA, v")])
2503 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
2504 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
2507 v_cmp%E1\tvcc, %2, %3
2508 v_cmp%E1\tvcc, %2, %3
2509 v_cmpx%E1\tvcc, %2, %3
2510 v_cmpx%E1\tvcc, %2, %3
2511 v_cmp%E1\t%0, %2, %3
2512 v_cmp%E1\t%0, %2, %3"
2513 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2514 (set_attr "length" "4,8,4,8,8,8")])
2516 (define_insn "vec_cmp<mode>di_dup"
2517 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2518 (match_operator 1 "comparison_operator"
2519 [(vec_duplicate:VEC_1REG_MODE
2520 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2522 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2524 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
2527 v_cmp%E1\tvcc, %2, %3
2528 v_cmp%E1\tvcc, %2, %3
2529 v_cmpx%E1\tvcc, %2, %3
2530 v_cmpx%E1\tvcc, %2, %3
2531 v_cmp%E1\t%0, %2, %3"
2532 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2533 (set_attr "length" "4,8,4,8,8")])
2535 (define_insn "vec_cmp<mode>di_dup_exec"
2536 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
2538 (match_operator 1 "comparison_operator"
2539 [(vec_duplicate:VEC_1REG_MODE
2540 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2542 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2544 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
2545 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
2548 v_cmp%E1\tvcc, %2, %3
2549 v_cmp%E1\tvcc, %2, %3
2550 v_cmpx%E1\tvcc, %2, %3
2551 v_cmpx%E1\tvcc, %2, %3
2552 v_cmp%E1\t%0, %2, %3"
2553 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2554 (set_attr "length" "4,8,4,8,8")])
2556 (define_expand "vcond_mask_<mode>di"
2558 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "")
2559 (vec_merge:VEC_REG_MODE
2560 (match_operand:VEC_REG_MODE 1 "gcn_vop3_operand" "")
2561 (match_operand:VEC_REG_MODE 2 "gcn_alu_operand" "")
2562 (match_operand:DI 3 "register_operand" "")))
2563 (clobber (scratch:V64DI))])]
2567 (define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>"
2568 [(match_operand:VEC_1REG_MODE 0 "register_operand")
2569 (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
2570 (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
2571 (match_operator 3 "comparison_operator"
2572 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2573 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])]
2576 rtx tmp = gen_reg_rtx (DImode);
2577 emit_insn (gen_vec_cmp<mode>di (tmp, operands[3], operands[4],
2579 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2584 (define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>_exec"
2585 [(match_operand:VEC_1REG_MODE 0 "register_operand")
2586 (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
2587 (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
2588 (match_operator 3 "comparison_operator"
2589 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2590 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])
2591 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2594 rtx tmp = gen_reg_rtx (DImode);
2595 emit_insn (gen_vec_cmp<mode>di_exec (tmp, operands[3], operands[4],
2596 operands[5], operands[6]));
2597 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2602 (define_expand "vcondu<VEC_1REG_INT_MODE:mode><VEC_1REG_INT_ALT:mode>"
2603 [(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
2604 (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
2605 (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2606 (match_operator 3 "comparison_operator"
2607 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2608 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])]
2611 rtx tmp = gen_reg_rtx (DImode);
2612 emit_insn (gen_vec_cmp<mode>di (tmp, operands[3], operands[4],
2614 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2619 (define_expand "vcondu<VEC_1REG_INT_MODE:mode><VEC_1REG_INT_ALT:mode>_exec"
2620 [(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
2621 (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
2622 (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2623 (match_operator 3 "comparison_operator"
2624 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2625 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])
2626 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2629 rtx tmp = gen_reg_rtx (DImode);
2630 emit_insn (gen_vec_cmp<mode>di_exec (tmp, operands[3], operands[4],
2631 operands[5], operands[6]));
2632 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2638 ;; {{{ Fully masked loop support
2640 (define_expand "while_ultsidi"
2641 [(match_operand:DI 0 "register_operand")
2642 (match_operand:SI 1 "")
2643 (match_operand:SI 2 "")]
2646 if (GET_CODE (operands[1]) != CONST_INT
2647 || GET_CODE (operands[2]) != CONST_INT)
2649 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2651 if (GET_CODE (operands[1]) != CONST_INT
2652 || INTVAL (operands[1]) != 0)
2654 tmp = gen_reg_rtx (V64SImode);
2655 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2657 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2658 gen_rtx_GT (VOIDmode, 0, 0),
2663 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2664 HOST_WIDE_INT mask = (diff >= 64 ? -1
2665 : ~((unsigned HOST_WIDE_INT)-1 << diff));
2666 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2671 (define_expand "maskload<mode>di"
2672 [(match_operand:VEC_REG_MODE 0 "register_operand")
2673 (match_operand:VEC_REG_MODE 1 "memory_operand")
2674 (match_operand 2 "")]
2677 rtx exec = force_reg (DImode, operands[2]);
2678 rtx addr = gcn_expand_scalar_to_vector_address
2679 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
2680 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2681 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
2682 rtx undef = gcn_gen_undef (<MODE>mode);
2683 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, undef,
2688 (define_expand "maskstore<mode>di"
2689 [(match_operand:VEC_REG_MODE 0 "memory_operand")
2690 (match_operand:VEC_REG_MODE 1 "register_operand")
2691 (match_operand 2 "")]
2694 rtx exec = force_reg (DImode, operands[2]);
2695 rtx addr = gcn_expand_scalar_to_vector_address
2696 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
2697 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2698 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2699 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2703 (define_expand "mask_gather_load<mode>"
2704 [(match_operand:VEC_REG_MODE 0 "register_operand")
2705 (match_operand:DI 1 "register_operand")
2706 (match_operand 2 "register_operand")
2707 (match_operand 3 "immediate_operand")
2708 (match_operand:SI 4 "gcn_alu_operand")
2709 (match_operand:DI 5 "")]
2712 rtx exec = force_reg (DImode, operands[5]);
2714 /* TODO: more conversions will be needed when more types are vectorized. */
2715 if (GET_MODE (operands[2]) == V64DImode)
2717 rtx tmp = gen_reg_rtx (V64SImode);
2718 emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[2],
2719 gcn_gen_undef (V64SImode),
2724 emit_insn (gen_gather<mode>_exec (operands[0], operands[1], operands[2],
2725 operands[3], operands[4], exec));
2729 (define_expand "mask_scatter_store<mode>"
2730 [(match_operand:DI 0 "register_operand")
2731 (match_operand 1 "register_operand")
2732 (match_operand 2 "immediate_operand")
2733 (match_operand:SI 3 "gcn_alu_operand")
2734 (match_operand:VEC_REG_MODE 4 "register_operand")
2735 (match_operand:DI 5 "")]
2738 rtx exec = force_reg (DImode, operands[5]);
2740 /* TODO: more conversions will be needed when more types are vectorized. */
2741 if (GET_MODE (operands[1]) == V64DImode)
2743 rtx tmp = gen_reg_rtx (V64SImode);
2744 emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[1],
2745 gcn_gen_undef (V64SImode),
2750 emit_insn (gen_scatter<mode>_exec (operands[0], operands[1], operands[2],
2751 operands[3], operands[4], exec));
2755 ; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
2756 (define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
2757 (define_mode_iterator COND_INT_MODE [V64SI V64DI])
2759 (define_code_iterator cond_op [plus minus])
2761 (define_expand "cond_<expander><mode>"
2762 [(match_operand:COND_MODE 0 "register_operand")
2763 (match_operand:DI 1 "register_operand")
2765 (match_operand:COND_MODE 2 "gcn_alu_operand")
2766 (match_operand:COND_MODE 3 "gcn_alu_operand"))
2767 (match_operand:COND_MODE 4 "register_operand")]
2770 operands[1] = force_reg (DImode, operands[1]);
2771 operands[2] = force_reg (<MODE>mode, operands[2]);
2773 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2774 operands[3], operands[4],
2779 (define_code_iterator cond_bitop [and ior xor])
2781 (define_expand "cond_<expander><mode>"
2782 [(match_operand:COND_INT_MODE 0 "register_operand")
2783 (match_operand:DI 1 "register_operand")
2784 (cond_bitop:COND_INT_MODE
2785 (match_operand:COND_INT_MODE 2 "gcn_alu_operand")
2786 (match_operand:COND_INT_MODE 3 "gcn_alu_operand"))
2787 (match_operand:COND_INT_MODE 4 "register_operand")]
2790 operands[1] = force_reg (DImode, operands[1]);
2791 operands[2] = force_reg (<MODE>mode, operands[2]);
2793 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2794 operands[3], operands[4],
2800 ;; {{{ Vector reductions
2802 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
2803 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
2806 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2808 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
2810 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2812 ; FIXME: Isn't there a better way of doing this?
2813 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
2814 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
2815 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
2816 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
2817 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
2818 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
2819 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
2820 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
2822 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
2823 (UNSPEC_SMAX_DPP_SHR "smax")
2824 (UNSPEC_UMIN_DPP_SHR "umin")
2825 (UNSPEC_UMAX_DPP_SHR "umax")
2826 (UNSPEC_PLUS_DPP_SHR "plus")
2827 (UNSPEC_AND_DPP_SHR "and")
2828 (UNSPEC_IOR_DPP_SHR "ior")
2829 (UNSPEC_XOR_DPP_SHR "xor")])
2831 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
2832 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
2833 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
2834 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
2835 (UNSPEC_PLUS_DPP_SHR "v_add%u0")
2836 (UNSPEC_AND_DPP_SHR "v_and%b0")
2837 (UNSPEC_IOR_DPP_SHR "v_or%b0")
2838 (UNSPEC_XOR_DPP_SHR "v_xor%b0")])
2840 (define_expand "reduc_<reduc_op>_scal_<mode>"
2841 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
2842 (unspec:<SCALAR_MODE>
2843 [(match_operand:VEC_1REG_MODE 1 "register_operand")]
2847 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
2850 /* The result of the reduction is in lane 63 of tmp. */
2851 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
2856 (define_expand "reduc_<reduc_op>_scal_v64di"
2857 [(set (match_operand:DI 0 "register_operand")
2859 [(match_operand:V64DI 1 "register_operand")]
2860 REDUC_2REG_UNSPEC))]
2863 rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1],
2866 /* The result of the reduction is in lane 63 of tmp. */
2867 emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp));
2872 (define_insn "*<reduc_op>_dpp_shr_<mode>"
2873 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
2874 (unspec:VEC_1REG_MODE
2875 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v")
2876 (match_operand:VEC_1REG_MODE 2 "register_operand" "v")
2877 (match_operand:SI 3 "const_int_operand" "n")]
2879 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
2880 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
2882 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
2883 <reduc_unspec>, INTVAL (operands[3]));
2885 [(set_attr "type" "vop_dpp")
2886 (set_attr "length" "8")])
2888 (define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
2889 [(set (match_operand:V64DI 0 "register_operand" "=&v")
2891 [(match_operand:V64DI 1 "register_operand" "v0")
2892 (match_operand:V64DI 2 "register_operand" "v0")
2893 (match_operand:SI 3 "const_int_operand" "n")]
2894 REDUC_2REG_UNSPEC))]
2900 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
2903 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
2905 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
2906 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
2907 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
2908 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
2909 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
2910 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
2912 [(set_attr "type" "vmult")
2913 (set_attr "length" "16")])
2915 ; Special cases for addition.
2917 (define_insn "*plus_carry_dpp_shr_<mode>"
2918 [(set (match_operand:VEC_1REG_INT_MODE 0 "register_operand" "=v")
2919 (unspec:VEC_1REG_INT_MODE
2920 [(match_operand:VEC_1REG_INT_MODE 1 "register_operand" "v")
2921 (match_operand:VEC_1REG_INT_MODE 2 "register_operand" "v")
2922 (match_operand:SI 3 "const_int_operand" "n")]
2923 UNSPEC_PLUS_CARRY_DPP_SHR))
2924 (clobber (reg:DI VCC_REG))]
2927 const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0";
2928 return gcn_expand_dpp_shr_insn (<MODE>mode, insn,
2929 UNSPEC_PLUS_CARRY_DPP_SHR,
2930 INTVAL (operands[3]));
2932 [(set_attr "type" "vop_dpp")
2933 (set_attr "length" "8")])
2935 (define_insn "*plus_carry_in_dpp_shr_v64si"
2936 [(set (match_operand:V64SI 0 "register_operand" "=v")
2938 [(match_operand:V64SI 1 "register_operand" "v")
2939 (match_operand:V64SI 2 "register_operand" "v")
2940 (match_operand:SI 3 "const_int_operand" "n")
2941 (match_operand:DI 4 "register_operand" "cV")]
2942 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
2943 (clobber (reg:DI VCC_REG))]
2946 const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0";
2947 return gcn_expand_dpp_shr_insn (V64SImode, insn,
2948 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
2949 INTVAL (operands[3]));
2951 [(set_attr "type" "vop_dpp")
2952 (set_attr "length" "8")])
2954 (define_insn_and_split "*plus_carry_dpp_shr_v64di"
2955 [(set (match_operand:V64DI 0 "register_operand" "=&v")
2957 [(match_operand:V64DI 1 "register_operand" "v0")
2958 (match_operand:V64DI 2 "register_operand" "v0")
2959 (match_operand:SI 3 "const_int_operand" "n")]
2960 UNSPEC_PLUS_CARRY_DPP_SHR))
2961 (clobber (reg:DI VCC_REG))]
2965 [(parallel [(set (match_dup 4)
2967 [(match_dup 6) (match_dup 8) (match_dup 3)]
2968 UNSPEC_PLUS_CARRY_DPP_SHR))
2969 (clobber (reg:DI VCC_REG))])
2970 (parallel [(set (match_dup 5)
2972 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
2973 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
2974 (clobber (reg:DI VCC_REG))])]
2976 operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
2977 operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
2978 operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
2979 operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
2980 operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
2981 operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
2983 [(set_attr "type" "vmult")
2984 (set_attr "length" "16")])
2986 ; Instructions to move a scalar value from lane 63 of a vector register.
2987 (define_insn "mov_from_lane63_<mode>"
2988 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
2989 (unspec:<SCALAR_MODE>
2990 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v,v")]
2991 UNSPEC_MOV_FROM_LANE63))]
2994 v_readlane_b32\t%0, %1, 63
2995 v_mov_b32\t%0, %1 wave_ror:1"
2996 [(set_attr "type" "vop3a,vop_dpp")
2997 (set_attr "exec" "none,*")
2998 (set_attr "length" "8")])
3000 (define_insn "mov_from_lane63_v64di"
3001 [(set (match_operand:DI 0 "register_operand" "=Sg,v")
3003 [(match_operand:V64DI 1 "register_operand" "v,v")]
3004 UNSPEC_MOV_FROM_LANE63))]
3007 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3008 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
3009 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
3010 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
3012 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
3013 \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3014 [(set_attr "type" "vop3a,vop_dpp")
3015 (set_attr "exec" "none,*")
3016 (set_attr "length" "8")])
3019 ;; {{{ Miscellaneous
3021 (define_expand "vec_seriesv64si"
3022 [(match_operand:V64SI 0 "register_operand")
3023 (match_operand:SI 1 "gcn_alu_operand")
3024 (match_operand:SI 2 "gcn_alu_operand")]
3027 rtx tmp = gen_reg_rtx (V64SImode);
3028 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3030 emit_insn (gen_mulv64si3_dup (tmp, v1, operands[2]));
3031 emit_insn (gen_addv64si3_dup (operands[0], tmp, operands[1]));
3035 (define_expand "vec_seriesv64di"
3036 [(match_operand:V64DI 0 "register_operand")
3037 (match_operand:DI 1 "gcn_alu_operand")
3038 (match_operand:DI 2 "gcn_alu_operand")]
3041 rtx tmp = gen_reg_rtx (V64DImode);
3042 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3044 emit_insn (gen_mulv64di3_zext_dup2 (tmp, v1, operands[2]));
3045 emit_insn (gen_addv64di3_dup (operands[0], tmp, operands[1]));