PR ada/66205
[official-gcc.git] / gcc / tree-vect-stmts.c
blob0b9f4e60fe1b2f485ed38e59ae7d3f422f19fed9
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
103 kind = vector_scatter_store;
104 if (body_cost_vec)
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 stmt_info_for_cost si = { count, kind,
108 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
109 misalign };
110 body_cost_vec->safe_push (si);
111 return (unsigned)
112 (builtin_vectorization_cost (kind, vectype, misalign) * count);
114 else
115 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
116 count, kind, stmt_info, misalign, where);
119 /* Return a variable of type ELEM_TYPE[NELEMS]. */
121 static tree
122 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
124 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
125 "vect_array");
128 /* ARRAY is an array of vectors created by create_vector_array.
129 Return an SSA_NAME for the vector in index N. The reference
130 is part of the vectorization of STMT and the vector is associated
131 with scalar destination SCALAR_DEST. */
133 static tree
134 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
135 tree array, unsigned HOST_WIDE_INT n)
137 tree vect_type, vect, vect_name, array_ref;
138 gimple *new_stmt;
140 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
141 vect_type = TREE_TYPE (TREE_TYPE (array));
142 vect = vect_create_destination_var (scalar_dest, vect_type);
143 array_ref = build4 (ARRAY_REF, vect_type, array,
144 build_int_cst (size_type_node, n),
145 NULL_TREE, NULL_TREE);
147 new_stmt = gimple_build_assign (vect, array_ref);
148 vect_name = make_ssa_name (vect, new_stmt);
149 gimple_assign_set_lhs (new_stmt, vect_name);
150 vect_finish_stmt_generation (stmt, new_stmt, gsi);
152 return vect_name;
155 /* ARRAY is an array of vectors created by create_vector_array.
156 Emit code to store SSA_NAME VECT in index N of the array.
157 The store is part of the vectorization of STMT. */
159 static void
160 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
161 tree array, unsigned HOST_WIDE_INT n)
163 tree array_ref;
164 gimple *new_stmt;
166 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
167 build_int_cst (size_type_node, n),
168 NULL_TREE, NULL_TREE);
170 new_stmt = gimple_build_assign (array_ref, vect);
171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
174 /* PTR is a pointer to an array of type TYPE. Return a representation
175 of *PTR. The memory reference replaces those in FIRST_DR
176 (and its group). */
178 static tree
179 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
181 tree mem_ref;
183 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
184 /* Arrays have the same alignment as their type. */
185 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
186 return mem_ref;
189 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
191 /* Function vect_mark_relevant.
193 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
195 static void
196 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
197 enum vect_relevant relevant, bool live_p)
199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
200 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
201 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
202 gimple *pattern_stmt;
204 if (dump_enabled_p ())
206 dump_printf_loc (MSG_NOTE, vect_location,
207 "mark relevant %d, live %d: ", relevant, live_p);
208 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 /* This is the last stmt in a sequence that was detected as a
218 pattern that can potentially be vectorized. Don't mark the stmt
219 as relevant/live because it's not going to be vectorized.
220 Instead mark the pattern-stmt that replaces it. */
222 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
224 if (dump_enabled_p ())
225 dump_printf_loc (MSG_NOTE, vect_location,
226 "last stmt in pattern. don't mark"
227 " relevant/live.\n");
228 stmt_info = vinfo_for_stmt (pattern_stmt);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
230 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
231 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
232 stmt = pattern_stmt;
235 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
236 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
237 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
240 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE, vect_location,
244 "already marked relevant/live.\n");
245 return;
248 worklist->safe_push (stmt);
252 /* Function is_simple_and_all_uses_invariant
254 Return true if STMT is simple and all uses of it are invariant. */
256 bool
257 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
259 tree op;
260 gimple *def_stmt;
261 ssa_op_iter iter;
263 if (!is_gimple_assign (stmt))
264 return false;
266 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
268 enum vect_def_type dt = vect_uninitialized_def;
270 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 "use not simple.\n");
275 return false;
278 if (dt != vect_external_def && dt != vect_constant_def)
279 return false;
281 return true;
284 /* Function vect_stmt_relevant_p.
286 Return true if STMT in loop that is represented by LOOP_VINFO is
287 "relevant for vectorization".
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
294 CHECKME: what other side effects would the vectorizer allow? */
296 static bool
297 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
298 enum vect_relevant *relevant, bool *live_p)
300 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301 ssa_op_iter op_iter;
302 imm_use_iterator imm_iter;
303 use_operand_p use_p;
304 def_operand_p def_p;
306 *relevant = vect_unused_in_scope;
307 *live_p = false;
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt)
311 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
312 != loop_exit_ctrl_vec_info_type)
313 *relevant = vect_used_in_scope;
315 /* changing memory. */
316 if (gimple_code (stmt) != GIMPLE_PHI)
317 if (gimple_vdef (stmt)
318 && !gimple_clobber_p (stmt))
320 if (dump_enabled_p ())
321 dump_printf_loc (MSG_NOTE, vect_location,
322 "vec_stmt_relevant_p: stmt has vdefs.\n");
323 *relevant = vect_used_in_scope;
326 /* uses outside the loop. */
327 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
329 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 basic_block bb = gimple_bb (USE_STMT (use_p));
332 if (!flow_bb_inside_loop_p (loop, bb))
334 if (dump_enabled_p ())
335 dump_printf_loc (MSG_NOTE, vect_location,
336 "vec_stmt_relevant_p: used out of loop.\n");
338 if (is_gimple_debug (USE_STMT (use_p)))
339 continue;
341 /* We expect all such uses to be in the loop exit phis
342 (because of loop closed form) */
343 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
344 gcc_assert (bb == single_exit (loop)->dest);
346 *live_p = true;
351 if (*live_p && *relevant == vect_unused_in_scope
352 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE, vect_location,
356 "vec_stmt_relevant_p: stmt live but not relevant.\n");
357 *relevant = vect_used_only_live;
360 return (*live_p || *relevant);
364 /* Function exist_non_indexing_operands_for_use_p
366 USE is one of the uses attached to STMT. Check if USE is
367 used in STMT for anything other than indexing an array. */
369 static bool
370 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
372 tree operand;
373 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 if (!gimple_assign_copy_p (stmt))
396 if (is_gimple_call (stmt)
397 && gimple_call_internal_p (stmt))
398 switch (gimple_call_internal_fn (stmt))
400 case IFN_MASK_STORE:
401 operand = gimple_call_arg (stmt, 3);
402 if (operand == use)
403 return true;
404 /* FALLTHRU */
405 case IFN_MASK_LOAD:
406 operand = gimple_call_arg (stmt, 2);
407 if (operand == use)
408 return true;
409 break;
410 default:
411 break;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (stmt);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
450 skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
452 be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static bool
457 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<gimple *> *worklist,
459 bool force)
461 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
462 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
463 stmt_vec_info dstmt_vinfo;
464 basic_block bb, def_bb;
465 gimple *def_stmt;
466 enum vect_def_type dt;
468 /* case 1: we are only interested in uses that need to be vectorized. Uses
469 that are used for address computation are not considered relevant. */
470 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
471 return true;
473 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
475 if (dump_enabled_p ())
476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
477 "not vectorized: unsupported use in stmt.\n");
478 return false;
481 if (!def_stmt || gimple_nop_p (def_stmt))
482 return true;
484 def_bb = gimple_bb (def_stmt);
485 if (!flow_bb_inside_loop_p (loop, def_bb))
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
489 return true;
492 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
493 DEF_STMT must have already been processed, because this should be the
494 only way that STMT, which is a reduction-phi, was put in the worklist,
495 as there should be no other uses for DEF_STMT in the loop. So we just
496 check that everything is as expected, and we are done. */
497 dstmt_vinfo = vinfo_for_stmt (def_stmt);
498 bb = gimple_bb (stmt);
499 if (gimple_code (stmt) == GIMPLE_PHI
500 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
501 && gimple_code (def_stmt) != GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
503 && bb->loop_father == def_bb->loop_father)
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "reduc-stmt defining reduc-phi in the same nest.\n");
508 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
509 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
510 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
511 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
512 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
513 return true;
516 /* case 3a: outer-loop stmt defining an inner-loop stmt:
517 outer-loop-header-bb:
518 d = def_stmt
519 inner-loop:
520 stmt # use (d)
521 outer-loop-tail-bb:
522 ... */
523 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
525 if (dump_enabled_p ())
526 dump_printf_loc (MSG_NOTE, vect_location,
527 "outer-loop def-stmt defining inner-loop stmt.\n");
529 switch (relevant)
531 case vect_unused_in_scope:
532 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
533 vect_used_in_scope : vect_unused_in_scope;
534 break;
536 case vect_used_in_outer_by_reduction:
537 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
538 relevant = vect_used_by_reduction;
539 break;
541 case vect_used_in_outer:
542 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
543 relevant = vect_used_in_scope;
544 break;
546 case vect_used_in_scope:
547 break;
549 default:
550 gcc_unreachable ();
554 /* case 3b: inner-loop stmt defining an outer-loop stmt:
555 outer-loop-header-bb:
557 inner-loop:
558 d = def_stmt
559 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
560 stmt # use (d) */
561 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
563 if (dump_enabled_p ())
564 dump_printf_loc (MSG_NOTE, vect_location,
565 "inner-loop def-stmt defining outer-loop stmt.\n");
567 switch (relevant)
569 case vect_unused_in_scope:
570 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
571 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
572 vect_used_in_outer_by_reduction : vect_unused_in_scope;
573 break;
575 case vect_used_by_reduction:
576 case vect_used_only_live:
577 relevant = vect_used_in_outer_by_reduction;
578 break;
580 case vect_used_in_scope:
581 relevant = vect_used_in_outer;
582 break;
584 default:
585 gcc_unreachable ();
588 /* We are also not interested in uses on loop PHI backedges that are
589 inductions. Otherwise we'll needlessly vectorize the IV increment
590 and cause hybrid SLP for SLP inductions. Unless the PHI is live
591 of course. */
592 else if (gimple_code (stmt) == GIMPLE_PHI
593 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
594 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
595 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
596 == use))
598 if (dump_enabled_p ())
599 dump_printf_loc (MSG_NOTE, vect_location,
600 "induction value on backedge.\n");
601 return true;
605 vect_mark_relevant (worklist, def_stmt, relevant, false);
606 return true;
610 /* Function vect_mark_stmts_to_be_vectorized.
612 Not all stmts in the loop need to be vectorized. For example:
614 for i...
615 for j...
616 1. T0 = i + j
617 2. T1 = a[T0]
619 3. j = j + 1
621 Stmt 1 and 3 do not need to be vectorized, because loop control and
622 addressing of vectorized data-refs are handled differently.
624 This pass detects such stmts. */
626 bool
627 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
629 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
630 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
631 unsigned int nbbs = loop->num_nodes;
632 gimple_stmt_iterator si;
633 gimple *stmt;
634 unsigned int i;
635 stmt_vec_info stmt_vinfo;
636 basic_block bb;
637 gimple *phi;
638 bool live_p;
639 enum vect_relevant relevant;
641 if (dump_enabled_p ())
642 dump_printf_loc (MSG_NOTE, vect_location,
643 "=== vect_mark_stmts_to_be_vectorized ===\n");
645 auto_vec<gimple *, 64> worklist;
647 /* 1. Init worklist. */
648 for (i = 0; i < nbbs; i++)
650 bb = bbs[i];
651 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
653 phi = gsi_stmt (si);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
660 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
661 vect_mark_relevant (&worklist, phi, relevant, live_p);
663 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
665 stmt = gsi_stmt (si);
666 if (dump_enabled_p ())
668 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
672 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
673 vect_mark_relevant (&worklist, stmt, relevant, live_p);
677 /* 2. Process_worklist */
678 while (worklist.length () > 0)
680 use_operand_p use_p;
681 ssa_op_iter iter;
683 stmt = worklist.pop ();
684 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
687 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
690 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
691 (DEF_STMT) as relevant/irrelevant according to the relevance property
692 of STMT. */
693 stmt_vinfo = vinfo_for_stmt (stmt);
694 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
696 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
697 propagated as is to the DEF_STMTs of its USEs.
699 One exception is when STMT has been identified as defining a reduction
700 variable; in this case we set the relevance to vect_used_by_reduction.
701 This is because we distinguish between two kinds of relevant stmts -
702 those that are used by a reduction computation, and those that are
703 (also) used by a regular computation. This allows us later on to
704 identify stmts that are used solely by a reduction, and therefore the
705 order of the results that they produce does not have to be kept. */
707 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
709 case vect_reduction_def:
710 gcc_assert (relevant != vect_unused_in_scope);
711 if (relevant != vect_unused_in_scope
712 && relevant != vect_used_in_scope
713 && relevant != vect_used_by_reduction
714 && relevant != vect_used_only_live)
716 if (dump_enabled_p ())
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
718 "unsupported use of reduction.\n");
719 return false;
721 break;
723 case vect_nested_cycle:
724 if (relevant != vect_unused_in_scope
725 && relevant != vect_used_in_outer_by_reduction
726 && relevant != vect_used_in_outer)
728 if (dump_enabled_p ())
729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
730 "unsupported use of nested cycle.\n");
732 return false;
734 break;
736 case vect_double_reduction_def:
737 if (relevant != vect_unused_in_scope
738 && relevant != vect_used_by_reduction
739 && relevant != vect_used_only_live)
741 if (dump_enabled_p ())
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
743 "unsupported use of double reduction.\n");
745 return false;
747 break;
749 default:
750 break;
753 if (is_pattern_stmt_p (stmt_vinfo))
755 /* Pattern statements are not inserted into the code, so
756 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
757 have to scan the RHS or function arguments instead. */
758 if (is_gimple_assign (stmt))
760 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
761 tree op = gimple_assign_rhs1 (stmt);
763 i = 1;
764 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
766 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
767 relevant, &worklist, false)
768 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
769 relevant, &worklist, false))
770 return false;
771 i = 2;
773 for (; i < gimple_num_ops (stmt); i++)
775 op = gimple_op (stmt, i);
776 if (TREE_CODE (op) == SSA_NAME
777 && !process_use (stmt, op, loop_vinfo, relevant,
778 &worklist, false))
779 return false;
782 else if (is_gimple_call (stmt))
784 for (i = 0; i < gimple_call_num_args (stmt); i++)
786 tree arg = gimple_call_arg (stmt, i);
787 if (!process_use (stmt, arg, loop_vinfo, relevant,
788 &worklist, false))
789 return false;
793 else
794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
796 tree op = USE_FROM_PTR (use_p);
797 if (!process_use (stmt, op, loop_vinfo, relevant,
798 &worklist, false))
799 return false;
802 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
804 gather_scatter_info gs_info;
805 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
806 gcc_unreachable ();
807 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
808 &worklist, true))
809 return false;
811 } /* while worklist */
813 return true;
817 /* Function vect_model_simple_cost.
819 Models cost for simple operations, i.e. those that only emit ncopies of a
820 single op. Right now, this does not account for multiple insns that could
821 be generated for the single vector op. We will handle that shortly. */
823 void
824 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
825 enum vect_def_type *dt,
826 int ndts,
827 stmt_vector_for_cost *prologue_cost_vec,
828 stmt_vector_for_cost *body_cost_vec)
830 int i;
831 int inside_cost = 0, prologue_cost = 0;
833 /* The SLP costs were already calculated during SLP tree build. */
834 if (PURE_SLP_STMT (stmt_info))
835 return;
837 /* Cost the "broadcast" of a scalar operand in to a vector operand.
838 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
839 cost model. */
840 for (i = 0; i < ndts; i++)
841 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
842 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
843 stmt_info, 0, vect_prologue);
845 /* Pass the inside-of-loop statements to the target-specific cost model. */
846 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
847 stmt_info, 0, vect_body);
849 if (dump_enabled_p ())
850 dump_printf_loc (MSG_NOTE, vect_location,
851 "vect_model_simple_cost: inside_cost = %d, "
852 "prologue_cost = %d .\n", inside_cost, prologue_cost);
856 /* Model cost for type demotion and promotion operations. PWR is normally
857 zero for single-step promotions and demotions. It will be one if
858 two-step promotion/demotion is required, and so on. Each additional
859 step doubles the number of instructions required. */
861 static void
862 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
863 enum vect_def_type *dt, int pwr)
865 int i, tmp;
866 int inside_cost = 0, prologue_cost = 0;
867 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
868 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
869 void *target_cost_data;
871 /* The SLP costs were already calculated during SLP tree build. */
872 if (PURE_SLP_STMT (stmt_info))
873 return;
875 if (loop_vinfo)
876 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
877 else
878 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
880 for (i = 0; i < pwr + 1; i++)
882 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
883 (i + 1) : i;
884 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
885 vec_promote_demote, stmt_info, 0,
886 vect_body);
889 /* FORNOW: Assuming maximum 2 args per stmts. */
890 for (i = 0; i < 2; i++)
891 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
892 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
893 stmt_info, 0, vect_prologue);
895 if (dump_enabled_p ())
896 dump_printf_loc (MSG_NOTE, vect_location,
897 "vect_model_promotion_demotion_cost: inside_cost = %d, "
898 "prologue_cost = %d .\n", inside_cost, prologue_cost);
901 /* Function vect_model_store_cost
903 Models cost for stores. In the case of grouped accesses, one access
904 has the overhead of the grouped access attributed to it. */
906 void
907 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
908 vect_memory_access_type memory_access_type,
909 enum vect_def_type dt, slp_tree slp_node,
910 stmt_vector_for_cost *prologue_cost_vec,
911 stmt_vector_for_cost *body_cost_vec)
913 unsigned int inside_cost = 0, prologue_cost = 0;
914 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
915 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
916 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
918 if (dt == vect_constant_def || dt == vect_external_def)
919 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
920 stmt_info, 0, vect_prologue);
922 /* Grouped stores update all elements in the group at once,
923 so we want the DR for the first statement. */
924 if (!slp_node && grouped_access_p)
926 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
927 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
930 /* True if we should include any once-per-group costs as well as
931 the cost of the statement itself. For SLP we only get called
932 once per group anyhow. */
933 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
935 /* We assume that the cost of a single store-lanes instruction is
936 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
937 access is instead being provided by a permute-and-store operation,
938 include the cost of the permutes. */
939 if (first_stmt_p
940 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
942 /* Uses a high and low interleave or shuffle operations for each
943 needed permute. */
944 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
945 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
946 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
947 stmt_info, 0, vect_body);
949 if (dump_enabled_p ())
950 dump_printf_loc (MSG_NOTE, vect_location,
951 "vect_model_store_cost: strided group_size = %d .\n",
952 group_size);
955 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
956 /* Costs of the stores. */
957 if (memory_access_type == VMAT_ELEMENTWISE
958 || memory_access_type == VMAT_GATHER_SCATTER)
959 /* N scalar stores plus extracting the elements. */
960 inside_cost += record_stmt_cost (body_cost_vec,
961 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
962 scalar_store, stmt_info, 0, vect_body);
963 else
964 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
966 if (memory_access_type == VMAT_ELEMENTWISE
967 || memory_access_type == VMAT_STRIDED_SLP)
968 inside_cost += record_stmt_cost (body_cost_vec,
969 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
970 vec_to_scalar, stmt_info, 0, vect_body);
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_NOTE, vect_location,
974 "vect_model_store_cost: inside_cost = %d, "
975 "prologue_cost = %d .\n", inside_cost, prologue_cost);
979 /* Calculate cost of DR's memory access. */
980 void
981 vect_get_store_cost (struct data_reference *dr, int ncopies,
982 unsigned int *inside_cost,
983 stmt_vector_for_cost *body_cost_vec)
985 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
986 gimple *stmt = DR_STMT (dr);
987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
989 switch (alignment_support_scheme)
991 case dr_aligned:
993 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994 vector_store, stmt_info, 0,
995 vect_body);
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: aligned.\n");
1000 break;
1003 case dr_unaligned_supported:
1005 /* Here, we assign an additional cost for the unaligned store. */
1006 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1007 unaligned_store, stmt_info,
1008 DR_MISALIGNMENT (dr), vect_body);
1009 if (dump_enabled_p ())
1010 dump_printf_loc (MSG_NOTE, vect_location,
1011 "vect_model_store_cost: unaligned supported by "
1012 "hardware.\n");
1013 break;
1016 case dr_unaligned_unsupported:
1018 *inside_cost = VECT_MAX_COST;
1020 if (dump_enabled_p ())
1021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1022 "vect_model_store_cost: unsupported access.\n");
1023 break;
1026 default:
1027 gcc_unreachable ();
1032 /* Function vect_model_load_cost
1034 Models cost for loads. In the case of grouped accesses, one access has
1035 the overhead of the grouped access attributed to it. Since unaligned
1036 accesses are supported for loads, we also account for the costs of the
1037 access scheme chosen. */
1039 void
1040 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1041 vect_memory_access_type memory_access_type,
1042 slp_tree slp_node,
1043 stmt_vector_for_cost *prologue_cost_vec,
1044 stmt_vector_for_cost *body_cost_vec)
1046 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1047 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1048 unsigned int inside_cost = 0, prologue_cost = 0;
1049 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1051 /* Grouped loads read all elements in the group at once,
1052 so we want the DR for the first statement. */
1053 if (!slp_node && grouped_access_p)
1055 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1056 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1059 /* True if we should include any once-per-group costs as well as
1060 the cost of the statement itself. For SLP we only get called
1061 once per group anyhow. */
1062 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1064 /* We assume that the cost of a single load-lanes instruction is
1065 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1066 access is instead being provided by a load-and-permute operation,
1067 include the cost of the permutes. */
1068 if (first_stmt_p
1069 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1071 /* Uses an even and odd extract operations or shuffle operations
1072 for each needed permute. */
1073 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1074 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1075 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 stmt_info, 0, vect_body);
1078 if (dump_enabled_p ())
1079 dump_printf_loc (MSG_NOTE, vect_location,
1080 "vect_model_load_cost: strided group_size = %d .\n",
1081 group_size);
1084 /* The loads themselves. */
1085 if (memory_access_type == VMAT_ELEMENTWISE
1086 || memory_access_type == VMAT_GATHER_SCATTER)
1088 /* N scalar loads plus gathering them into a vector. */
1089 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1090 inside_cost += record_stmt_cost (body_cost_vec,
1091 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1092 scalar_load, stmt_info, 0, vect_body);
1094 else
1095 vect_get_load_cost (dr, ncopies, first_stmt_p,
1096 &inside_cost, &prologue_cost,
1097 prologue_cost_vec, body_cost_vec, true);
1098 if (memory_access_type == VMAT_ELEMENTWISE
1099 || memory_access_type == VMAT_STRIDED_SLP)
1100 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1101 stmt_info, 0, vect_body);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: inside_cost = %d, "
1106 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1110 /* Calculate cost of DR's memory access. */
1111 void
1112 vect_get_load_cost (struct data_reference *dr, int ncopies,
1113 bool add_realign_cost, unsigned int *inside_cost,
1114 unsigned int *prologue_cost,
1115 stmt_vector_for_cost *prologue_cost_vec,
1116 stmt_vector_for_cost *body_cost_vec,
1117 bool record_prologue_costs)
1119 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1120 gimple *stmt = DR_STMT (dr);
1121 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123 switch (alignment_support_scheme)
1125 case dr_aligned:
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1128 stmt_info, 0, vect_body);
1130 if (dump_enabled_p ())
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "vect_model_load_cost: aligned.\n");
1134 break;
1136 case dr_unaligned_supported:
1138 /* Here, we assign an additional cost for the unaligned load. */
1139 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1140 unaligned_load, stmt_info,
1141 DR_MISALIGNMENT (dr), vect_body);
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "vect_model_load_cost: unaligned supported by "
1146 "hardware.\n");
1148 break;
1150 case dr_explicit_realign:
1152 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1153 vector_load, stmt_info, 0, vect_body);
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1155 vec_perm, stmt_info, 0, vect_body);
1157 /* FIXME: If the misalignment remains fixed across the iterations of
1158 the containing loop, the following cost should be added to the
1159 prologue costs. */
1160 if (targetm.vectorize.builtin_mask_for_load)
1161 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1162 stmt_info, 0, vect_body);
1164 if (dump_enabled_p ())
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "vect_model_load_cost: explicit realign\n");
1168 break;
1170 case dr_explicit_realign_optimized:
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE, vect_location,
1174 "vect_model_load_cost: unaligned software "
1175 "pipelined.\n");
1177 /* Unaligned software pipeline has a load of an address, an initial
1178 load, and possibly a mask operation to "prime" the loop. However,
1179 if this is an access in a group of loads, which provide grouped
1180 access, then the above cost should only be considered for one
1181 access in the group. Inside the loop, there is a load op
1182 and a realignment op. */
1184 if (add_realign_cost && record_prologue_costs)
1186 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1187 vector_stmt, stmt_info,
1188 0, vect_prologue);
1189 if (targetm.vectorize.builtin_mask_for_load)
1190 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1191 vector_stmt, stmt_info,
1192 0, vect_prologue);
1195 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1196 stmt_info, 0, vect_body);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1198 stmt_info, 0, vect_body);
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: explicit realign optimized"
1203 "\n");
1205 break;
1208 case dr_unaligned_unsupported:
1210 *inside_cost = VECT_MAX_COST;
1212 if (dump_enabled_p ())
1213 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1214 "vect_model_load_cost: unsupported access.\n");
1215 break;
1218 default:
1219 gcc_unreachable ();
1223 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1224 the loop preheader for the vectorized stmt STMT. */
1226 static void
1227 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1229 if (gsi)
1230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1231 else
1233 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1234 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1236 if (loop_vinfo)
1238 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1239 basic_block new_bb;
1240 edge pe;
1242 if (nested_in_vect_loop_p (loop, stmt))
1243 loop = loop->inner;
1245 pe = loop_preheader_edge (loop);
1246 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1247 gcc_assert (!new_bb);
1249 else
1251 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1252 basic_block bb;
1253 gimple_stmt_iterator gsi_bb_start;
1255 gcc_assert (bb_vinfo);
1256 bb = BB_VINFO_BB (bb_vinfo);
1257 gsi_bb_start = gsi_after_labels (bb);
1258 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1262 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE, vect_location,
1265 "created new init_stmt: ");
1266 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1270 /* Function vect_init_vector.
1272 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1273 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1274 vector type a vector with all elements equal to VAL is created first.
1275 Place the initialization at BSI if it is not NULL. Otherwise, place the
1276 initialization at the loop preheader.
1277 Return the DEF of INIT_STMT.
1278 It will be used in the vectorization of STMT. */
1280 tree
1281 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1283 gimple *init_stmt;
1284 tree new_temp;
1286 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1287 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1289 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1290 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1292 /* Scalar boolean value should be transformed into
1293 all zeros or all ones value before building a vector. */
1294 if (VECTOR_BOOLEAN_TYPE_P (type))
1296 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1297 tree false_val = build_zero_cst (TREE_TYPE (type));
1299 if (CONSTANT_CLASS_P (val))
1300 val = integer_zerop (val) ? false_val : true_val;
1301 else
1303 new_temp = make_ssa_name (TREE_TYPE (type));
1304 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1305 val, true_val, false_val);
1306 vect_init_vector_1 (stmt, init_stmt, gsi);
1307 val = new_temp;
1310 else if (CONSTANT_CLASS_P (val))
1311 val = fold_convert (TREE_TYPE (type), val);
1312 else
1314 new_temp = make_ssa_name (TREE_TYPE (type));
1315 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1316 init_stmt = gimple_build_assign (new_temp,
1317 fold_build1 (VIEW_CONVERT_EXPR,
1318 TREE_TYPE (type),
1319 val));
1320 else
1321 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1322 vect_init_vector_1 (stmt, init_stmt, gsi);
1323 val = new_temp;
1326 val = build_vector_from_val (type, val);
1329 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1330 init_stmt = gimple_build_assign (new_temp, val);
1331 vect_init_vector_1 (stmt, init_stmt, gsi);
1332 return new_temp;
1335 /* Function vect_get_vec_def_for_operand_1.
1337 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1338 DT that will be used in the vectorized stmt. */
1340 tree
1341 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1343 tree vec_oprnd;
1344 gimple *vec_stmt;
1345 stmt_vec_info def_stmt_info = NULL;
1347 switch (dt)
1349 /* operand is a constant or a loop invariant. */
1350 case vect_constant_def:
1351 case vect_external_def:
1352 /* Code should use vect_get_vec_def_for_operand. */
1353 gcc_unreachable ();
1355 /* operand is defined inside the loop. */
1356 case vect_internal_def:
1358 /* Get the def from the vectorized stmt. */
1359 def_stmt_info = vinfo_for_stmt (def_stmt);
1361 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1362 /* Get vectorized pattern statement. */
1363 if (!vec_stmt
1364 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1365 && !STMT_VINFO_RELEVANT (def_stmt_info))
1366 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1367 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1368 gcc_assert (vec_stmt);
1369 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1370 vec_oprnd = PHI_RESULT (vec_stmt);
1371 else if (is_gimple_call (vec_stmt))
1372 vec_oprnd = gimple_call_lhs (vec_stmt);
1373 else
1374 vec_oprnd = gimple_assign_lhs (vec_stmt);
1375 return vec_oprnd;
1378 /* operand is defined by a loop header phi. */
1379 case vect_reduction_def:
1380 case vect_double_reduction_def:
1381 case vect_nested_cycle:
1382 case vect_induction_def:
1384 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1386 /* Get the def from the vectorized stmt. */
1387 def_stmt_info = vinfo_for_stmt (def_stmt);
1388 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1389 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1390 vec_oprnd = PHI_RESULT (vec_stmt);
1391 else
1392 vec_oprnd = gimple_get_lhs (vec_stmt);
1393 return vec_oprnd;
1396 default:
1397 gcc_unreachable ();
1402 /* Function vect_get_vec_def_for_operand.
1404 OP is an operand in STMT. This function returns a (vector) def that will be
1405 used in the vectorized stmt for STMT.
1407 In the case that OP is an SSA_NAME which is defined in the loop, then
1408 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1410 In case OP is an invariant or constant, a new stmt that creates a vector def
1411 needs to be introduced. VECTYPE may be used to specify a required type for
1412 vector invariant. */
1414 tree
1415 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1417 gimple *def_stmt;
1418 enum vect_def_type dt;
1419 bool is_simple_use;
1420 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1421 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1423 if (dump_enabled_p ())
1425 dump_printf_loc (MSG_NOTE, vect_location,
1426 "vect_get_vec_def_for_operand: ");
1427 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1428 dump_printf (MSG_NOTE, "\n");
1431 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1432 gcc_assert (is_simple_use);
1433 if (def_stmt && dump_enabled_p ())
1435 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1436 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1439 if (dt == vect_constant_def || dt == vect_external_def)
1441 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1442 tree vector_type;
1444 if (vectype)
1445 vector_type = vectype;
1446 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1447 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1448 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1449 else
1450 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1452 gcc_assert (vector_type);
1453 return vect_init_vector (stmt, op, vector_type, NULL);
1455 else
1456 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1460 /* Function vect_get_vec_def_for_stmt_copy
1462 Return a vector-def for an operand. This function is used when the
1463 vectorized stmt to be created (by the caller to this function) is a "copy"
1464 created in case the vectorized result cannot fit in one vector, and several
1465 copies of the vector-stmt are required. In this case the vector-def is
1466 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1467 of the stmt that defines VEC_OPRND.
1468 DT is the type of the vector def VEC_OPRND.
1470 Context:
1471 In case the vectorization factor (VF) is bigger than the number
1472 of elements that can fit in a vectype (nunits), we have to generate
1473 more than one vector stmt to vectorize the scalar stmt. This situation
1474 arises when there are multiple data-types operated upon in the loop; the
1475 smallest data-type determines the VF, and as a result, when vectorizing
1476 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1477 vector stmt (each computing a vector of 'nunits' results, and together
1478 computing 'VF' results in each iteration). This function is called when
1479 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1480 which VF=16 and nunits=4, so the number of copies required is 4):
1482 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1484 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1485 VS1.1: vx.1 = memref1 VS1.2
1486 VS1.2: vx.2 = memref2 VS1.3
1487 VS1.3: vx.3 = memref3
1489 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1490 VSnew.1: vz1 = vx.1 + ... VSnew.2
1491 VSnew.2: vz2 = vx.2 + ... VSnew.3
1492 VSnew.3: vz3 = vx.3 + ...
1494 The vectorization of S1 is explained in vectorizable_load.
1495 The vectorization of S2:
1496 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1497 the function 'vect_get_vec_def_for_operand' is called to
1498 get the relevant vector-def for each operand of S2. For operand x it
1499 returns the vector-def 'vx.0'.
1501 To create the remaining copies of the vector-stmt (VSnew.j), this
1502 function is called to get the relevant vector-def for each operand. It is
1503 obtained from the respective VS1.j stmt, which is recorded in the
1504 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1506 For example, to obtain the vector-def 'vx.1' in order to create the
1507 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1508 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1509 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1510 and return its def ('vx.1').
1511 Overall, to create the above sequence this function will be called 3 times:
1512 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1513 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1514 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1516 tree
1517 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1519 gimple *vec_stmt_for_operand;
1520 stmt_vec_info def_stmt_info;
1522 /* Do nothing; can reuse same def. */
1523 if (dt == vect_external_def || dt == vect_constant_def )
1524 return vec_oprnd;
1526 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1527 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1528 gcc_assert (def_stmt_info);
1529 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1530 gcc_assert (vec_stmt_for_operand);
1531 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1532 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1533 else
1534 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1535 return vec_oprnd;
1539 /* Get vectorized definitions for the operands to create a copy of an original
1540 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1542 void
1543 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1544 vec<tree> *vec_oprnds0,
1545 vec<tree> *vec_oprnds1)
1547 tree vec_oprnd = vec_oprnds0->pop ();
1549 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1550 vec_oprnds0->quick_push (vec_oprnd);
1552 if (vec_oprnds1 && vec_oprnds1->length ())
1554 vec_oprnd = vec_oprnds1->pop ();
1555 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1556 vec_oprnds1->quick_push (vec_oprnd);
1561 /* Get vectorized definitions for OP0 and OP1. */
1563 void
1564 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1565 vec<tree> *vec_oprnds0,
1566 vec<tree> *vec_oprnds1,
1567 slp_tree slp_node)
1569 if (slp_node)
1571 int nops = (op1 == NULL_TREE) ? 1 : 2;
1572 auto_vec<tree> ops (nops);
1573 auto_vec<vec<tree> > vec_defs (nops);
1575 ops.quick_push (op0);
1576 if (op1)
1577 ops.quick_push (op1);
1579 vect_get_slp_defs (ops, slp_node, &vec_defs);
1581 *vec_oprnds0 = vec_defs[0];
1582 if (op1)
1583 *vec_oprnds1 = vec_defs[1];
1585 else
1587 tree vec_oprnd;
1589 vec_oprnds0->create (1);
1590 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1591 vec_oprnds0->quick_push (vec_oprnd);
1593 if (op1)
1595 vec_oprnds1->create (1);
1596 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1597 vec_oprnds1->quick_push (vec_oprnd);
1603 /* Function vect_finish_stmt_generation.
1605 Insert a new stmt. */
1607 void
1608 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1609 gimple_stmt_iterator *gsi)
1611 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1612 vec_info *vinfo = stmt_info->vinfo;
1614 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1616 if (!gsi_end_p (*gsi)
1617 && gimple_has_mem_ops (vec_stmt))
1619 gimple *at_stmt = gsi_stmt (*gsi);
1620 tree vuse = gimple_vuse (at_stmt);
1621 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1623 tree vdef = gimple_vdef (at_stmt);
1624 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1625 /* If we have an SSA vuse and insert a store, update virtual
1626 SSA form to avoid triggering the renamer. Do so only
1627 if we can easily see all uses - which is what almost always
1628 happens with the way vectorized stmts are inserted. */
1629 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1630 && ((is_gimple_assign (vec_stmt)
1631 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1632 || (is_gimple_call (vec_stmt)
1633 && !(gimple_call_flags (vec_stmt)
1634 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1636 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1637 gimple_set_vdef (vec_stmt, new_vdef);
1638 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1642 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1644 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1646 if (dump_enabled_p ())
1648 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1649 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1652 gimple_set_location (vec_stmt, gimple_location (stmt));
1654 /* While EH edges will generally prevent vectorization, stmt might
1655 e.g. be in a must-not-throw region. Ensure newly created stmts
1656 that could throw are part of the same region. */
1657 int lp_nr = lookup_stmt_eh_lp (stmt);
1658 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1659 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1662 /* We want to vectorize a call to combined function CFN with function
1663 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1664 as the types of all inputs. Check whether this is possible using
1665 an internal function, returning its code if so or IFN_LAST if not. */
1667 static internal_fn
1668 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1669 tree vectype_out, tree vectype_in)
1671 internal_fn ifn;
1672 if (internal_fn_p (cfn))
1673 ifn = as_internal_fn (cfn);
1674 else
1675 ifn = associated_internal_fn (fndecl);
1676 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1678 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1679 if (info.vectorizable)
1681 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1682 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1683 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1684 OPTIMIZE_FOR_SPEED))
1685 return ifn;
1688 return IFN_LAST;
1692 static tree permute_vec_elements (tree, tree, tree, gimple *,
1693 gimple_stmt_iterator *);
1695 /* STMT is a non-strided load or store, meaning that it accesses
1696 elements with a known constant step. Return -1 if that step
1697 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1699 static int
1700 compare_step_with_zero (gimple *stmt)
1702 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1703 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1704 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1705 size_zero_node);
1708 /* If the target supports a permute mask that reverses the elements in
1709 a vector of type VECTYPE, return that mask, otherwise return null. */
1711 static tree
1712 perm_mask_for_reverse (tree vectype)
1714 int i, nunits;
1716 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1718 auto_vec_perm_indices sel (nunits);
1719 for (i = 0; i < nunits; ++i)
1720 sel.quick_push (nunits - 1 - i);
1722 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
1723 return NULL_TREE;
1724 return vect_gen_perm_mask_checked (vectype, sel);
1727 /* A subroutine of get_load_store_type, with a subset of the same
1728 arguments. Handle the case where STMT is part of a grouped load
1729 or store.
1731 For stores, the statements in the group are all consecutive
1732 and there is no gap at the end. For loads, the statements in the
1733 group might not be consecutive; there can be gaps between statements
1734 as well as at the end. */
1736 static bool
1737 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1738 vec_load_store_type vls_type,
1739 vect_memory_access_type *memory_access_type)
1741 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1742 vec_info *vinfo = stmt_info->vinfo;
1743 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1744 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1745 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1746 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1747 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1748 bool single_element_p = (stmt == first_stmt
1749 && !GROUP_NEXT_ELEMENT (stmt_info));
1750 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1751 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1753 /* True if the vectorized statements would access beyond the last
1754 statement in the group. */
1755 bool overrun_p = false;
1757 /* True if we can cope with such overrun by peeling for gaps, so that
1758 there is at least one final scalar iteration after the vector loop. */
1759 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1761 /* There can only be a gap at the end of the group if the stride is
1762 known at compile time. */
1763 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1765 /* Stores can't yet have gaps. */
1766 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1768 if (slp)
1770 if (STMT_VINFO_STRIDED_P (stmt_info))
1772 /* Try to use consecutive accesses of GROUP_SIZE elements,
1773 separated by the stride, until we have a complete vector.
1774 Fall back to scalar accesses if that isn't possible. */
1775 if (nunits % group_size == 0)
1776 *memory_access_type = VMAT_STRIDED_SLP;
1777 else
1778 *memory_access_type = VMAT_ELEMENTWISE;
1780 else
1782 overrun_p = loop_vinfo && gap != 0;
1783 if (overrun_p && vls_type != VLS_LOAD)
1785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1786 "Grouped store with gaps requires"
1787 " non-consecutive accesses\n");
1788 return false;
1790 /* An overrun is fine if the trailing elements are smaller
1791 than the alignment boundary B. Every vector access will
1792 be a multiple of B and so we are guaranteed to access a
1793 non-gap element in the same B-sized block. */
1794 if (overrun_p
1795 && gap < (vect_known_alignment_in_bytes (first_dr)
1796 / vect_get_scalar_dr_size (first_dr)))
1797 overrun_p = false;
1798 if (overrun_p && !can_overrun_p)
1800 if (dump_enabled_p ())
1801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1802 "Peeling for outer loop is not supported\n");
1803 return false;
1805 *memory_access_type = VMAT_CONTIGUOUS;
1808 else
1810 /* We can always handle this case using elementwise accesses,
1811 but see if something more efficient is available. */
1812 *memory_access_type = VMAT_ELEMENTWISE;
1814 /* If there is a gap at the end of the group then these optimizations
1815 would access excess elements in the last iteration. */
1816 bool would_overrun_p = (gap != 0);
1817 /* An overrun is fine if the trailing elements are smaller than the
1818 alignment boundary B. Every vector access will be a multiple of B
1819 and so we are guaranteed to access a non-gap element in the
1820 same B-sized block. */
1821 if (would_overrun_p
1822 && gap < (vect_known_alignment_in_bytes (first_dr)
1823 / vect_get_scalar_dr_size (first_dr)))
1824 would_overrun_p = false;
1826 if (!STMT_VINFO_STRIDED_P (stmt_info)
1827 && (can_overrun_p || !would_overrun_p)
1828 && compare_step_with_zero (stmt) > 0)
1830 /* First try using LOAD/STORE_LANES. */
1831 if (vls_type == VLS_LOAD
1832 ? vect_load_lanes_supported (vectype, group_size)
1833 : vect_store_lanes_supported (vectype, group_size))
1835 *memory_access_type = VMAT_LOAD_STORE_LANES;
1836 overrun_p = would_overrun_p;
1839 /* If that fails, try using permuting loads. */
1840 if (*memory_access_type == VMAT_ELEMENTWISE
1841 && (vls_type == VLS_LOAD
1842 ? vect_grouped_load_supported (vectype, single_element_p,
1843 group_size)
1844 : vect_grouped_store_supported (vectype, group_size)))
1846 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1847 overrun_p = would_overrun_p;
1852 if (vls_type != VLS_LOAD && first_stmt == stmt)
1854 /* STMT is the leader of the group. Check the operands of all the
1855 stmts of the group. */
1856 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1857 while (next_stmt)
1859 gcc_assert (gimple_assign_single_p (next_stmt));
1860 tree op = gimple_assign_rhs1 (next_stmt);
1861 gimple *def_stmt;
1862 enum vect_def_type dt;
1863 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1865 if (dump_enabled_p ())
1866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1867 "use not simple.\n");
1868 return false;
1870 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1874 if (overrun_p)
1876 gcc_assert (can_overrun_p);
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1879 "Data access with gaps requires scalar "
1880 "epilogue loop\n");
1881 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1884 return true;
1887 /* A subroutine of get_load_store_type, with a subset of the same
1888 arguments. Handle the case where STMT is a load or store that
1889 accesses consecutive elements with a negative step. */
1891 static vect_memory_access_type
1892 get_negative_load_store_type (gimple *stmt, tree vectype,
1893 vec_load_store_type vls_type,
1894 unsigned int ncopies)
1896 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1897 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1898 dr_alignment_support alignment_support_scheme;
1900 if (ncopies > 1)
1902 if (dump_enabled_p ())
1903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1904 "multiple types with negative step.\n");
1905 return VMAT_ELEMENTWISE;
1908 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1909 if (alignment_support_scheme != dr_aligned
1910 && alignment_support_scheme != dr_unaligned_supported)
1912 if (dump_enabled_p ())
1913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1914 "negative step but alignment required.\n");
1915 return VMAT_ELEMENTWISE;
1918 if (vls_type == VLS_STORE_INVARIANT)
1920 if (dump_enabled_p ())
1921 dump_printf_loc (MSG_NOTE, vect_location,
1922 "negative step with invariant source;"
1923 " no permute needed.\n");
1924 return VMAT_CONTIGUOUS_DOWN;
1927 if (!perm_mask_for_reverse (vectype))
1929 if (dump_enabled_p ())
1930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1931 "negative step and reversing not supported.\n");
1932 return VMAT_ELEMENTWISE;
1935 return VMAT_CONTIGUOUS_REVERSE;
1938 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1939 if there is a memory access type that the vectorized form can use,
1940 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1941 or scatters, fill in GS_INFO accordingly.
1943 SLP says whether we're performing SLP rather than loop vectorization.
1944 VECTYPE is the vector type that the vectorized statements will use.
1945 NCOPIES is the number of vector statements that will be needed. */
1947 static bool
1948 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1949 vec_load_store_type vls_type, unsigned int ncopies,
1950 vect_memory_access_type *memory_access_type,
1951 gather_scatter_info *gs_info)
1953 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1954 vec_info *vinfo = stmt_info->vinfo;
1955 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1956 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1958 *memory_access_type = VMAT_GATHER_SCATTER;
1959 gimple *def_stmt;
1960 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1961 gcc_unreachable ();
1962 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1963 &gs_info->offset_dt,
1964 &gs_info->offset_vectype))
1966 if (dump_enabled_p ())
1967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1968 "%s index use not simple.\n",
1969 vls_type == VLS_LOAD ? "gather" : "scatter");
1970 return false;
1973 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1975 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1976 memory_access_type))
1977 return false;
1979 else if (STMT_VINFO_STRIDED_P (stmt_info))
1981 gcc_assert (!slp);
1982 *memory_access_type = VMAT_ELEMENTWISE;
1984 else
1986 int cmp = compare_step_with_zero (stmt);
1987 if (cmp < 0)
1988 *memory_access_type = get_negative_load_store_type
1989 (stmt, vectype, vls_type, ncopies);
1990 else if (cmp == 0)
1992 gcc_assert (vls_type == VLS_LOAD);
1993 *memory_access_type = VMAT_INVARIANT;
1995 else
1996 *memory_access_type = VMAT_CONTIGUOUS;
1999 /* FIXME: At the moment the cost model seems to underestimate the
2000 cost of using elementwise accesses. This check preserves the
2001 traditional behavior until that can be fixed. */
2002 if (*memory_access_type == VMAT_ELEMENTWISE
2003 && !STMT_VINFO_STRIDED_P (stmt_info))
2005 if (dump_enabled_p ())
2006 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2007 "not falling back to elementwise accesses\n");
2008 return false;
2010 return true;
2013 /* Function vectorizable_mask_load_store.
2015 Check if STMT performs a conditional load or store that can be vectorized.
2016 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2017 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2018 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2020 static bool
2021 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2022 gimple **vec_stmt, slp_tree slp_node)
2024 tree vec_dest = NULL;
2025 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2026 stmt_vec_info prev_stmt_info;
2027 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2028 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2029 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2030 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2031 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2032 tree rhs_vectype = NULL_TREE;
2033 tree mask_vectype;
2034 tree elem_type;
2035 gimple *new_stmt;
2036 tree dummy;
2037 tree dataref_ptr = NULL_TREE;
2038 gimple *ptr_incr;
2039 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2040 int ncopies;
2041 int i, j;
2042 bool inv_p;
2043 gather_scatter_info gs_info;
2044 vec_load_store_type vls_type;
2045 tree mask;
2046 gimple *def_stmt;
2047 enum vect_def_type dt;
2049 if (slp_node != NULL)
2050 return false;
2052 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2053 gcc_assert (ncopies >= 1);
2055 mask = gimple_call_arg (stmt, 2);
2057 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2058 return false;
2060 /* FORNOW. This restriction should be relaxed. */
2061 if (nested_in_vect_loop && ncopies > 1)
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2065 "multiple types in nested loop.");
2066 return false;
2069 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2070 return false;
2072 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2073 && ! vec_stmt)
2074 return false;
2076 if (!STMT_VINFO_DATA_REF (stmt_info))
2077 return false;
2079 elem_type = TREE_TYPE (vectype);
2081 if (TREE_CODE (mask) != SSA_NAME)
2082 return false;
2084 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2085 return false;
2087 if (!mask_vectype)
2088 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2090 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2091 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2092 return false;
2094 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2096 tree rhs = gimple_call_arg (stmt, 3);
2097 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2098 return false;
2099 if (dt == vect_constant_def || dt == vect_external_def)
2100 vls_type = VLS_STORE_INVARIANT;
2101 else
2102 vls_type = VLS_STORE;
2104 else
2105 vls_type = VLS_LOAD;
2107 vect_memory_access_type memory_access_type;
2108 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2109 &memory_access_type, &gs_info))
2110 return false;
2112 if (memory_access_type == VMAT_GATHER_SCATTER)
2114 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2115 tree masktype
2116 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2117 if (TREE_CODE (masktype) == INTEGER_TYPE)
2119 if (dump_enabled_p ())
2120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2121 "masked gather with integer mask not supported.");
2122 return false;
2125 else if (memory_access_type != VMAT_CONTIGUOUS)
2127 if (dump_enabled_p ())
2128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2129 "unsupported access type for masked %s.\n",
2130 vls_type == VLS_LOAD ? "load" : "store");
2131 return false;
2133 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2134 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2135 TYPE_MODE (mask_vectype),
2136 vls_type == VLS_LOAD)
2137 || (rhs_vectype
2138 && !useless_type_conversion_p (vectype, rhs_vectype)))
2139 return false;
2141 if (!vec_stmt) /* transformation not required. */
2143 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2144 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2145 if (vls_type == VLS_LOAD)
2146 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2147 NULL, NULL, NULL);
2148 else
2149 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2150 dt, NULL, NULL, NULL);
2151 return true;
2153 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2155 /* Transform. */
2157 if (memory_access_type == VMAT_GATHER_SCATTER)
2159 tree vec_oprnd0 = NULL_TREE, op;
2160 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2161 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2162 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2163 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2164 tree mask_perm_mask = NULL_TREE;
2165 edge pe = loop_preheader_edge (loop);
2166 gimple_seq seq;
2167 basic_block new_bb;
2168 enum { NARROW, NONE, WIDEN } modifier;
2169 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2171 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2172 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2173 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2174 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2175 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2176 scaletype = TREE_VALUE (arglist);
2177 gcc_checking_assert (types_compatible_p (srctype, rettype)
2178 && types_compatible_p (srctype, masktype));
2180 if (nunits == gather_off_nunits)
2181 modifier = NONE;
2182 else if (nunits == gather_off_nunits / 2)
2184 modifier = WIDEN;
2186 auto_vec_perm_indices sel (gather_off_nunits);
2187 for (i = 0; i < gather_off_nunits; ++i)
2188 sel.quick_push (i | nunits);
2190 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2192 else if (nunits == gather_off_nunits * 2)
2194 modifier = NARROW;
2196 auto_vec_perm_indices sel (nunits);
2197 sel.quick_grow (nunits);
2198 for (i = 0; i < nunits; ++i)
2199 sel[i] = i < gather_off_nunits
2200 ? i : i + nunits - gather_off_nunits;
2202 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2203 ncopies *= 2;
2204 for (i = 0; i < nunits; ++i)
2205 sel[i] = i | gather_off_nunits;
2206 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2208 else
2209 gcc_unreachable ();
2211 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2213 ptr = fold_convert (ptrtype, gs_info.base);
2214 if (!is_gimple_min_invariant (ptr))
2216 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2217 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2218 gcc_assert (!new_bb);
2221 scale = build_int_cst (scaletype, gs_info.scale);
2223 prev_stmt_info = NULL;
2224 for (j = 0; j < ncopies; ++j)
2226 if (modifier == WIDEN && (j & 1))
2227 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2228 perm_mask, stmt, gsi);
2229 else if (j == 0)
2230 op = vec_oprnd0
2231 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2232 else
2233 op = vec_oprnd0
2234 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2236 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2238 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2239 == TYPE_VECTOR_SUBPARTS (idxtype));
2240 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2241 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2242 new_stmt
2243 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2244 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2245 op = var;
2248 if (mask_perm_mask && (j & 1))
2249 mask_op = permute_vec_elements (mask_op, mask_op,
2250 mask_perm_mask, stmt, gsi);
2251 else
2253 if (j == 0)
2254 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2255 else
2257 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2258 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2261 mask_op = vec_mask;
2262 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2264 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2265 == TYPE_VECTOR_SUBPARTS (masktype));
2266 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2267 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2268 new_stmt
2269 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2270 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2271 mask_op = var;
2275 new_stmt
2276 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2277 scale);
2279 if (!useless_type_conversion_p (vectype, rettype))
2281 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2282 == TYPE_VECTOR_SUBPARTS (rettype));
2283 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2284 gimple_call_set_lhs (new_stmt, op);
2285 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2286 var = make_ssa_name (vec_dest);
2287 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2288 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2290 else
2292 var = make_ssa_name (vec_dest, new_stmt);
2293 gimple_call_set_lhs (new_stmt, var);
2296 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2298 if (modifier == NARROW)
2300 if ((j & 1) == 0)
2302 prev_res = var;
2303 continue;
2305 var = permute_vec_elements (prev_res, var,
2306 perm_mask, stmt, gsi);
2307 new_stmt = SSA_NAME_DEF_STMT (var);
2310 if (prev_stmt_info == NULL)
2311 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2312 else
2313 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2314 prev_stmt_info = vinfo_for_stmt (new_stmt);
2317 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2318 from the IL. */
2319 if (STMT_VINFO_RELATED_STMT (stmt_info))
2321 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2322 stmt_info = vinfo_for_stmt (stmt);
2324 tree lhs = gimple_call_lhs (stmt);
2325 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2326 set_vinfo_for_stmt (new_stmt, stmt_info);
2327 set_vinfo_for_stmt (stmt, NULL);
2328 STMT_VINFO_STMT (stmt_info) = new_stmt;
2329 gsi_replace (gsi, new_stmt, true);
2330 return true;
2332 else if (vls_type != VLS_LOAD)
2334 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2335 prev_stmt_info = NULL;
2336 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2337 for (i = 0; i < ncopies; i++)
2339 unsigned align, misalign;
2341 if (i == 0)
2343 tree rhs = gimple_call_arg (stmt, 3);
2344 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2345 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2346 mask_vectype);
2347 /* We should have catched mismatched types earlier. */
2348 gcc_assert (useless_type_conversion_p (vectype,
2349 TREE_TYPE (vec_rhs)));
2350 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2351 NULL_TREE, &dummy, gsi,
2352 &ptr_incr, false, &inv_p);
2353 gcc_assert (!inv_p);
2355 else
2357 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2358 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2359 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2360 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2361 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2362 TYPE_SIZE_UNIT (vectype));
2365 align = DR_TARGET_ALIGNMENT (dr);
2366 if (aligned_access_p (dr))
2367 misalign = 0;
2368 else if (DR_MISALIGNMENT (dr) == -1)
2370 align = TYPE_ALIGN_UNIT (elem_type);
2371 misalign = 0;
2373 else
2374 misalign = DR_MISALIGNMENT (dr);
2375 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2376 misalign);
2377 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2378 misalign ? least_bit_hwi (misalign) : align);
2379 gcall *call
2380 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2381 ptr, vec_mask, vec_rhs);
2382 gimple_call_set_nothrow (call, true);
2383 new_stmt = call;
2384 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2385 if (i == 0)
2386 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2387 else
2388 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2389 prev_stmt_info = vinfo_for_stmt (new_stmt);
2392 else
2394 tree vec_mask = NULL_TREE;
2395 prev_stmt_info = NULL;
2396 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2397 for (i = 0; i < ncopies; i++)
2399 unsigned align, misalign;
2401 if (i == 0)
2403 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2404 mask_vectype);
2405 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2406 NULL_TREE, &dummy, gsi,
2407 &ptr_incr, false, &inv_p);
2408 gcc_assert (!inv_p);
2410 else
2412 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2413 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2414 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2415 TYPE_SIZE_UNIT (vectype));
2418 align = DR_TARGET_ALIGNMENT (dr);
2419 if (aligned_access_p (dr))
2420 misalign = 0;
2421 else if (DR_MISALIGNMENT (dr) == -1)
2423 align = TYPE_ALIGN_UNIT (elem_type);
2424 misalign = 0;
2426 else
2427 misalign = DR_MISALIGNMENT (dr);
2428 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2429 misalign);
2430 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2431 misalign ? least_bit_hwi (misalign) : align);
2432 gcall *call
2433 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2434 ptr, vec_mask);
2435 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2436 gimple_call_set_nothrow (call, true);
2437 vect_finish_stmt_generation (stmt, call, gsi);
2438 if (i == 0)
2439 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
2440 else
2441 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2442 prev_stmt_info = vinfo_for_stmt (call);
2446 if (vls_type == VLS_LOAD)
2448 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2449 from the IL. */
2450 if (STMT_VINFO_RELATED_STMT (stmt_info))
2452 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2453 stmt_info = vinfo_for_stmt (stmt);
2455 tree lhs = gimple_call_lhs (stmt);
2456 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2457 set_vinfo_for_stmt (new_stmt, stmt_info);
2458 set_vinfo_for_stmt (stmt, NULL);
2459 STMT_VINFO_STMT (stmt_info) = new_stmt;
2460 gsi_replace (gsi, new_stmt, true);
2463 return true;
2466 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2468 static bool
2469 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2470 gimple **vec_stmt, slp_tree slp_node,
2471 tree vectype_in, enum vect_def_type *dt)
2473 tree op, vectype;
2474 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2475 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2476 unsigned ncopies, nunits;
2478 op = gimple_call_arg (stmt, 0);
2479 vectype = STMT_VINFO_VECTYPE (stmt_info);
2480 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2482 /* Multiple types in SLP are handled by creating the appropriate number of
2483 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2484 case of SLP. */
2485 if (slp_node)
2486 ncopies = 1;
2487 else
2488 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2490 gcc_assert (ncopies >= 1);
2492 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2493 if (! char_vectype)
2494 return false;
2496 unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2497 unsigned word_bytes = num_bytes / nunits;
2499 auto_vec_perm_indices elts (num_bytes);
2500 for (unsigned i = 0; i < nunits; ++i)
2501 for (unsigned j = 0; j < word_bytes; ++j)
2502 elts.quick_push ((i + 1) * word_bytes - j - 1);
2504 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts))
2505 return false;
2507 if (! vec_stmt)
2509 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2512 "\n");
2513 if (! PURE_SLP_STMT (stmt_info))
2515 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2516 1, vector_stmt, stmt_info, 0, vect_prologue);
2517 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2518 ncopies, vec_perm, stmt_info, 0, vect_body);
2520 return true;
2523 auto_vec<tree, 32> telts (num_bytes);
2524 for (unsigned i = 0; i < num_bytes; ++i)
2525 telts.quick_push (build_int_cst (char_type_node, elts[i]));
2526 tree bswap_vconst = build_vector (char_vectype, telts);
2528 /* Transform. */
2529 vec<tree> vec_oprnds = vNULL;
2530 gimple *new_stmt = NULL;
2531 stmt_vec_info prev_stmt_info = NULL;
2532 for (unsigned j = 0; j < ncopies; j++)
2534 /* Handle uses. */
2535 if (j == 0)
2536 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2537 else
2538 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2540 /* Arguments are ready. create the new vector stmt. */
2541 unsigned i;
2542 tree vop;
2543 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2545 tree tem = make_ssa_name (char_vectype);
2546 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2547 char_vectype, vop));
2548 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2549 tree tem2 = make_ssa_name (char_vectype);
2550 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2551 tem, tem, bswap_vconst);
2552 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2553 tem = make_ssa_name (vectype);
2554 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2555 vectype, tem2));
2556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2557 if (slp_node)
2558 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2561 if (slp_node)
2562 continue;
2564 if (j == 0)
2565 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2566 else
2567 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2569 prev_stmt_info = vinfo_for_stmt (new_stmt);
2572 vec_oprnds.release ();
2573 return true;
2576 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2577 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2578 in a single step. On success, store the binary pack code in
2579 *CONVERT_CODE. */
2581 static bool
2582 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2583 tree_code *convert_code)
2585 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2586 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2587 return false;
2589 tree_code code;
2590 int multi_step_cvt = 0;
2591 auto_vec <tree, 8> interm_types;
2592 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2593 &code, &multi_step_cvt,
2594 &interm_types)
2595 || multi_step_cvt)
2596 return false;
2598 *convert_code = code;
2599 return true;
2602 /* Function vectorizable_call.
2604 Check if GS performs a function call that can be vectorized.
2605 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2606 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2607 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2609 static bool
2610 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2611 slp_tree slp_node)
2613 gcall *stmt;
2614 tree vec_dest;
2615 tree scalar_dest;
2616 tree op, type;
2617 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2618 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2619 tree vectype_out, vectype_in;
2620 int nunits_in;
2621 int nunits_out;
2622 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2623 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2624 vec_info *vinfo = stmt_info->vinfo;
2625 tree fndecl, new_temp, rhs_type;
2626 gimple *def_stmt;
2627 enum vect_def_type dt[3]
2628 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2629 int ndts = 3;
2630 gimple *new_stmt = NULL;
2631 int ncopies, j;
2632 vec<tree> vargs = vNULL;
2633 enum { NARROW, NONE, WIDEN } modifier;
2634 size_t i, nargs;
2635 tree lhs;
2637 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2638 return false;
2640 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2641 && ! vec_stmt)
2642 return false;
2644 /* Is GS a vectorizable call? */
2645 stmt = dyn_cast <gcall *> (gs);
2646 if (!stmt)
2647 return false;
2649 if (gimple_call_internal_p (stmt)
2650 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2651 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2652 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2653 slp_node);
2655 if (gimple_call_lhs (stmt) == NULL_TREE
2656 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2657 return false;
2659 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2661 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2663 /* Process function arguments. */
2664 rhs_type = NULL_TREE;
2665 vectype_in = NULL_TREE;
2666 nargs = gimple_call_num_args (stmt);
2668 /* Bail out if the function has more than three arguments, we do not have
2669 interesting builtin functions to vectorize with more than two arguments
2670 except for fma. No arguments is also not good. */
2671 if (nargs == 0 || nargs > 3)
2672 return false;
2674 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2675 if (gimple_call_internal_p (stmt)
2676 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2678 nargs = 0;
2679 rhs_type = unsigned_type_node;
2682 for (i = 0; i < nargs; i++)
2684 tree opvectype;
2686 op = gimple_call_arg (stmt, i);
2688 /* We can only handle calls with arguments of the same type. */
2689 if (rhs_type
2690 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2692 if (dump_enabled_p ())
2693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2694 "argument types differ.\n");
2695 return false;
2697 if (!rhs_type)
2698 rhs_type = TREE_TYPE (op);
2700 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2702 if (dump_enabled_p ())
2703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2704 "use not simple.\n");
2705 return false;
2708 if (!vectype_in)
2709 vectype_in = opvectype;
2710 else if (opvectype
2711 && opvectype != vectype_in)
2713 if (dump_enabled_p ())
2714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2715 "argument vector types differ.\n");
2716 return false;
2719 /* If all arguments are external or constant defs use a vector type with
2720 the same size as the output vector type. */
2721 if (!vectype_in)
2722 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2723 if (vec_stmt)
2724 gcc_assert (vectype_in);
2725 if (!vectype_in)
2727 if (dump_enabled_p ())
2729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2730 "no vectype for scalar type ");
2731 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2732 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2735 return false;
2738 /* FORNOW */
2739 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2740 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2741 if (nunits_in == nunits_out / 2)
2742 modifier = NARROW;
2743 else if (nunits_out == nunits_in)
2744 modifier = NONE;
2745 else if (nunits_out == nunits_in / 2)
2746 modifier = WIDEN;
2747 else
2748 return false;
2750 /* We only handle functions that do not read or clobber memory. */
2751 if (gimple_vuse (stmt))
2753 if (dump_enabled_p ())
2754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2755 "function reads from or writes to memory.\n");
2756 return false;
2759 /* For now, we only vectorize functions if a target specific builtin
2760 is available. TODO -- in some cases, it might be profitable to
2761 insert the calls for pieces of the vector, in order to be able
2762 to vectorize other operations in the loop. */
2763 fndecl = NULL_TREE;
2764 internal_fn ifn = IFN_LAST;
2765 combined_fn cfn = gimple_call_combined_fn (stmt);
2766 tree callee = gimple_call_fndecl (stmt);
2768 /* First try using an internal function. */
2769 tree_code convert_code = ERROR_MARK;
2770 if (cfn != CFN_LAST
2771 && (modifier == NONE
2772 || (modifier == NARROW
2773 && simple_integer_narrowing (vectype_out, vectype_in,
2774 &convert_code))))
2775 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2776 vectype_in);
2778 /* If that fails, try asking for a target-specific built-in function. */
2779 if (ifn == IFN_LAST)
2781 if (cfn != CFN_LAST)
2782 fndecl = targetm.vectorize.builtin_vectorized_function
2783 (cfn, vectype_out, vectype_in);
2784 else
2785 fndecl = targetm.vectorize.builtin_md_vectorized_function
2786 (callee, vectype_out, vectype_in);
2789 if (ifn == IFN_LAST && !fndecl)
2791 if (cfn == CFN_GOMP_SIMD_LANE
2792 && !slp_node
2793 && loop_vinfo
2794 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2795 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2796 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2797 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2799 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2800 { 0, 1, 2, ... vf - 1 } vector. */
2801 gcc_assert (nargs == 0);
2803 else if (modifier == NONE
2804 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2805 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2806 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2807 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2808 vectype_in, dt);
2809 else
2811 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2813 "function is not vectorizable.\n");
2814 return false;
2818 if (slp_node)
2819 ncopies = 1;
2820 else if (modifier == NARROW && ifn == IFN_LAST)
2821 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2822 else
2823 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2825 /* Sanity check: make sure that at least one copy of the vectorized stmt
2826 needs to be generated. */
2827 gcc_assert (ncopies >= 1);
2829 if (!vec_stmt) /* transformation not required. */
2831 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2832 if (dump_enabled_p ())
2833 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2834 "\n");
2835 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2836 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2837 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2838 vec_promote_demote, stmt_info, 0, vect_body);
2840 return true;
2843 /* Transform. */
2845 if (dump_enabled_p ())
2846 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2848 /* Handle def. */
2849 scalar_dest = gimple_call_lhs (stmt);
2850 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2852 prev_stmt_info = NULL;
2853 if (modifier == NONE || ifn != IFN_LAST)
2855 tree prev_res = NULL_TREE;
2856 for (j = 0; j < ncopies; ++j)
2858 /* Build argument list for the vectorized call. */
2859 if (j == 0)
2860 vargs.create (nargs);
2861 else
2862 vargs.truncate (0);
2864 if (slp_node)
2866 auto_vec<vec<tree> > vec_defs (nargs);
2867 vec<tree> vec_oprnds0;
2869 for (i = 0; i < nargs; i++)
2870 vargs.quick_push (gimple_call_arg (stmt, i));
2871 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2872 vec_oprnds0 = vec_defs[0];
2874 /* Arguments are ready. Create the new vector stmt. */
2875 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2877 size_t k;
2878 for (k = 0; k < nargs; k++)
2880 vec<tree> vec_oprndsk = vec_defs[k];
2881 vargs[k] = vec_oprndsk[i];
2883 if (modifier == NARROW)
2885 tree half_res = make_ssa_name (vectype_in);
2886 gcall *call
2887 = gimple_build_call_internal_vec (ifn, vargs);
2888 gimple_call_set_lhs (call, half_res);
2889 gimple_call_set_nothrow (call, true);
2890 new_stmt = call;
2891 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2892 if ((i & 1) == 0)
2894 prev_res = half_res;
2895 continue;
2897 new_temp = make_ssa_name (vec_dest);
2898 new_stmt = gimple_build_assign (new_temp, convert_code,
2899 prev_res, half_res);
2901 else
2903 gcall *call;
2904 if (ifn != IFN_LAST)
2905 call = gimple_build_call_internal_vec (ifn, vargs);
2906 else
2907 call = gimple_build_call_vec (fndecl, vargs);
2908 new_temp = make_ssa_name (vec_dest, call);
2909 gimple_call_set_lhs (call, new_temp);
2910 gimple_call_set_nothrow (call, true);
2911 new_stmt = call;
2913 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2914 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2917 for (i = 0; i < nargs; i++)
2919 vec<tree> vec_oprndsi = vec_defs[i];
2920 vec_oprndsi.release ();
2922 continue;
2925 for (i = 0; i < nargs; i++)
2927 op = gimple_call_arg (stmt, i);
2928 if (j == 0)
2929 vec_oprnd0
2930 = vect_get_vec_def_for_operand (op, stmt);
2931 else
2933 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2934 vec_oprnd0
2935 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2938 vargs.quick_push (vec_oprnd0);
2941 if (gimple_call_internal_p (stmt)
2942 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2944 auto_vec<tree, 32> v (nunits_out);
2945 for (int k = 0; k < nunits_out; ++k)
2946 v.quick_push (build_int_cst (unsigned_type_node,
2947 j * nunits_out + k));
2948 tree cst = build_vector (vectype_out, v);
2949 tree new_var
2950 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2951 gimple *init_stmt = gimple_build_assign (new_var, cst);
2952 vect_init_vector_1 (stmt, init_stmt, NULL);
2953 new_temp = make_ssa_name (vec_dest);
2954 new_stmt = gimple_build_assign (new_temp, new_var);
2956 else if (modifier == NARROW)
2958 tree half_res = make_ssa_name (vectype_in);
2959 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2960 gimple_call_set_lhs (call, half_res);
2961 gimple_call_set_nothrow (call, true);
2962 new_stmt = call;
2963 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2964 if ((j & 1) == 0)
2966 prev_res = half_res;
2967 continue;
2969 new_temp = make_ssa_name (vec_dest);
2970 new_stmt = gimple_build_assign (new_temp, convert_code,
2971 prev_res, half_res);
2973 else
2975 gcall *call;
2976 if (ifn != IFN_LAST)
2977 call = gimple_build_call_internal_vec (ifn, vargs);
2978 else
2979 call = gimple_build_call_vec (fndecl, vargs);
2980 new_temp = make_ssa_name (vec_dest, new_stmt);
2981 gimple_call_set_lhs (call, new_temp);
2982 gimple_call_set_nothrow (call, true);
2983 new_stmt = call;
2985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2987 if (j == (modifier == NARROW ? 1 : 0))
2988 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2989 else
2990 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2992 prev_stmt_info = vinfo_for_stmt (new_stmt);
2995 else if (modifier == NARROW)
2997 for (j = 0; j < ncopies; ++j)
2999 /* Build argument list for the vectorized call. */
3000 if (j == 0)
3001 vargs.create (nargs * 2);
3002 else
3003 vargs.truncate (0);
3005 if (slp_node)
3007 auto_vec<vec<tree> > vec_defs (nargs);
3008 vec<tree> vec_oprnds0;
3010 for (i = 0; i < nargs; i++)
3011 vargs.quick_push (gimple_call_arg (stmt, i));
3012 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3013 vec_oprnds0 = vec_defs[0];
3015 /* Arguments are ready. Create the new vector stmt. */
3016 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3018 size_t k;
3019 vargs.truncate (0);
3020 for (k = 0; k < nargs; k++)
3022 vec<tree> vec_oprndsk = vec_defs[k];
3023 vargs.quick_push (vec_oprndsk[i]);
3024 vargs.quick_push (vec_oprndsk[i + 1]);
3026 gcall *call;
3027 if (ifn != IFN_LAST)
3028 call = gimple_build_call_internal_vec (ifn, vargs);
3029 else
3030 call = gimple_build_call_vec (fndecl, vargs);
3031 new_temp = make_ssa_name (vec_dest, call);
3032 gimple_call_set_lhs (call, new_temp);
3033 gimple_call_set_nothrow (call, true);
3034 new_stmt = call;
3035 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3036 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3039 for (i = 0; i < nargs; i++)
3041 vec<tree> vec_oprndsi = vec_defs[i];
3042 vec_oprndsi.release ();
3044 continue;
3047 for (i = 0; i < nargs; i++)
3049 op = gimple_call_arg (stmt, i);
3050 if (j == 0)
3052 vec_oprnd0
3053 = vect_get_vec_def_for_operand (op, stmt);
3054 vec_oprnd1
3055 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3057 else
3059 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3060 vec_oprnd0
3061 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3062 vec_oprnd1
3063 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3066 vargs.quick_push (vec_oprnd0);
3067 vargs.quick_push (vec_oprnd1);
3070 new_stmt = gimple_build_call_vec (fndecl, vargs);
3071 new_temp = make_ssa_name (vec_dest, new_stmt);
3072 gimple_call_set_lhs (new_stmt, new_temp);
3073 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3075 if (j == 0)
3076 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3077 else
3078 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3080 prev_stmt_info = vinfo_for_stmt (new_stmt);
3083 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3085 else
3086 /* No current target implements this case. */
3087 return false;
3089 vargs.release ();
3091 /* The call in STMT might prevent it from being removed in dce.
3092 We however cannot remove it here, due to the way the ssa name
3093 it defines is mapped to the new definition. So just replace
3094 rhs of the statement with something harmless. */
3096 if (slp_node)
3097 return true;
3099 type = TREE_TYPE (scalar_dest);
3100 if (is_pattern_stmt_p (stmt_info))
3101 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3102 else
3103 lhs = gimple_call_lhs (stmt);
3105 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3106 set_vinfo_for_stmt (new_stmt, stmt_info);
3107 set_vinfo_for_stmt (stmt, NULL);
3108 STMT_VINFO_STMT (stmt_info) = new_stmt;
3109 gsi_replace (gsi, new_stmt, false);
3111 return true;
3115 struct simd_call_arg_info
3117 tree vectype;
3118 tree op;
3119 HOST_WIDE_INT linear_step;
3120 enum vect_def_type dt;
3121 unsigned int align;
3122 bool simd_lane_linear;
3125 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3126 is linear within simd lane (but not within whole loop), note it in
3127 *ARGINFO. */
3129 static void
3130 vect_simd_lane_linear (tree op, struct loop *loop,
3131 struct simd_call_arg_info *arginfo)
3133 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3135 if (!is_gimple_assign (def_stmt)
3136 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3137 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3138 return;
3140 tree base = gimple_assign_rhs1 (def_stmt);
3141 HOST_WIDE_INT linear_step = 0;
3142 tree v = gimple_assign_rhs2 (def_stmt);
3143 while (TREE_CODE (v) == SSA_NAME)
3145 tree t;
3146 def_stmt = SSA_NAME_DEF_STMT (v);
3147 if (is_gimple_assign (def_stmt))
3148 switch (gimple_assign_rhs_code (def_stmt))
3150 case PLUS_EXPR:
3151 t = gimple_assign_rhs2 (def_stmt);
3152 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3153 return;
3154 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3155 v = gimple_assign_rhs1 (def_stmt);
3156 continue;
3157 case MULT_EXPR:
3158 t = gimple_assign_rhs2 (def_stmt);
3159 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3160 return;
3161 linear_step = tree_to_shwi (t);
3162 v = gimple_assign_rhs1 (def_stmt);
3163 continue;
3164 CASE_CONVERT:
3165 t = gimple_assign_rhs1 (def_stmt);
3166 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3167 || (TYPE_PRECISION (TREE_TYPE (v))
3168 < TYPE_PRECISION (TREE_TYPE (t))))
3169 return;
3170 if (!linear_step)
3171 linear_step = 1;
3172 v = t;
3173 continue;
3174 default:
3175 return;
3177 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3178 && loop->simduid
3179 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3180 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3181 == loop->simduid))
3183 if (!linear_step)
3184 linear_step = 1;
3185 arginfo->linear_step = linear_step;
3186 arginfo->op = base;
3187 arginfo->simd_lane_linear = true;
3188 return;
3193 /* Function vectorizable_simd_clone_call.
3195 Check if STMT performs a function call that can be vectorized
3196 by calling a simd clone of the function.
3197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3198 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3201 static bool
3202 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3203 gimple **vec_stmt, slp_tree slp_node)
3205 tree vec_dest;
3206 tree scalar_dest;
3207 tree op, type;
3208 tree vec_oprnd0 = NULL_TREE;
3209 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3210 tree vectype;
3211 unsigned int nunits;
3212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3213 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3214 vec_info *vinfo = stmt_info->vinfo;
3215 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3216 tree fndecl, new_temp;
3217 gimple *def_stmt;
3218 gimple *new_stmt = NULL;
3219 int ncopies, j;
3220 auto_vec<simd_call_arg_info> arginfo;
3221 vec<tree> vargs = vNULL;
3222 size_t i, nargs;
3223 tree lhs, rtype, ratype;
3224 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3226 /* Is STMT a vectorizable call? */
3227 if (!is_gimple_call (stmt))
3228 return false;
3230 fndecl = gimple_call_fndecl (stmt);
3231 if (fndecl == NULL_TREE)
3232 return false;
3234 struct cgraph_node *node = cgraph_node::get (fndecl);
3235 if (node == NULL || node->simd_clones == NULL)
3236 return false;
3238 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3239 return false;
3241 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3242 && ! vec_stmt)
3243 return false;
3245 if (gimple_call_lhs (stmt)
3246 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3247 return false;
3249 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3251 vectype = STMT_VINFO_VECTYPE (stmt_info);
3253 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3254 return false;
3256 /* FORNOW */
3257 if (slp_node)
3258 return false;
3260 /* Process function arguments. */
3261 nargs = gimple_call_num_args (stmt);
3263 /* Bail out if the function has zero arguments. */
3264 if (nargs == 0)
3265 return false;
3267 arginfo.reserve (nargs, true);
3269 for (i = 0; i < nargs; i++)
3271 simd_call_arg_info thisarginfo;
3272 affine_iv iv;
3274 thisarginfo.linear_step = 0;
3275 thisarginfo.align = 0;
3276 thisarginfo.op = NULL_TREE;
3277 thisarginfo.simd_lane_linear = false;
3279 op = gimple_call_arg (stmt, i);
3280 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3281 &thisarginfo.vectype)
3282 || thisarginfo.dt == vect_uninitialized_def)
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3286 "use not simple.\n");
3287 return false;
3290 if (thisarginfo.dt == vect_constant_def
3291 || thisarginfo.dt == vect_external_def)
3292 gcc_assert (thisarginfo.vectype == NULL_TREE);
3293 else
3294 gcc_assert (thisarginfo.vectype != NULL_TREE);
3296 /* For linear arguments, the analyze phase should have saved
3297 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3298 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3299 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3301 gcc_assert (vec_stmt);
3302 thisarginfo.linear_step
3303 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3304 thisarginfo.op
3305 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3306 thisarginfo.simd_lane_linear
3307 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3308 == boolean_true_node);
3309 /* If loop has been peeled for alignment, we need to adjust it. */
3310 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3311 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3312 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3314 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3315 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3316 tree opt = TREE_TYPE (thisarginfo.op);
3317 bias = fold_convert (TREE_TYPE (step), bias);
3318 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3319 thisarginfo.op
3320 = fold_build2 (POINTER_TYPE_P (opt)
3321 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3322 thisarginfo.op, bias);
3325 else if (!vec_stmt
3326 && thisarginfo.dt != vect_constant_def
3327 && thisarginfo.dt != vect_external_def
3328 && loop_vinfo
3329 && TREE_CODE (op) == SSA_NAME
3330 && simple_iv (loop, loop_containing_stmt (stmt), op,
3331 &iv, false)
3332 && tree_fits_shwi_p (iv.step))
3334 thisarginfo.linear_step = tree_to_shwi (iv.step);
3335 thisarginfo.op = iv.base;
3337 else if ((thisarginfo.dt == vect_constant_def
3338 || thisarginfo.dt == vect_external_def)
3339 && POINTER_TYPE_P (TREE_TYPE (op)))
3340 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3341 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3342 linear too. */
3343 if (POINTER_TYPE_P (TREE_TYPE (op))
3344 && !thisarginfo.linear_step
3345 && !vec_stmt
3346 && thisarginfo.dt != vect_constant_def
3347 && thisarginfo.dt != vect_external_def
3348 && loop_vinfo
3349 && !slp_node
3350 && TREE_CODE (op) == SSA_NAME)
3351 vect_simd_lane_linear (op, loop, &thisarginfo);
3353 arginfo.quick_push (thisarginfo);
3356 unsigned int badness = 0;
3357 struct cgraph_node *bestn = NULL;
3358 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3359 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3360 else
3361 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3362 n = n->simdclone->next_clone)
3364 unsigned int this_badness = 0;
3365 if (n->simdclone->simdlen
3366 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3367 || n->simdclone->nargs != nargs)
3368 continue;
3369 if (n->simdclone->simdlen
3370 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3371 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3372 - exact_log2 (n->simdclone->simdlen)) * 1024;
3373 if (n->simdclone->inbranch)
3374 this_badness += 2048;
3375 int target_badness = targetm.simd_clone.usable (n);
3376 if (target_badness < 0)
3377 continue;
3378 this_badness += target_badness * 512;
3379 /* FORNOW: Have to add code to add the mask argument. */
3380 if (n->simdclone->inbranch)
3381 continue;
3382 for (i = 0; i < nargs; i++)
3384 switch (n->simdclone->args[i].arg_type)
3386 case SIMD_CLONE_ARG_TYPE_VECTOR:
3387 if (!useless_type_conversion_p
3388 (n->simdclone->args[i].orig_type,
3389 TREE_TYPE (gimple_call_arg (stmt, i))))
3390 i = -1;
3391 else if (arginfo[i].dt == vect_constant_def
3392 || arginfo[i].dt == vect_external_def
3393 || arginfo[i].linear_step)
3394 this_badness += 64;
3395 break;
3396 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3397 if (arginfo[i].dt != vect_constant_def
3398 && arginfo[i].dt != vect_external_def)
3399 i = -1;
3400 break;
3401 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3402 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3403 if (arginfo[i].dt == vect_constant_def
3404 || arginfo[i].dt == vect_external_def
3405 || (arginfo[i].linear_step
3406 != n->simdclone->args[i].linear_step))
3407 i = -1;
3408 break;
3409 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3411 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3412 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3413 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3414 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3415 /* FORNOW */
3416 i = -1;
3417 break;
3418 case SIMD_CLONE_ARG_TYPE_MASK:
3419 gcc_unreachable ();
3421 if (i == (size_t) -1)
3422 break;
3423 if (n->simdclone->args[i].alignment > arginfo[i].align)
3425 i = -1;
3426 break;
3428 if (arginfo[i].align)
3429 this_badness += (exact_log2 (arginfo[i].align)
3430 - exact_log2 (n->simdclone->args[i].alignment));
3432 if (i == (size_t) -1)
3433 continue;
3434 if (bestn == NULL || this_badness < badness)
3436 bestn = n;
3437 badness = this_badness;
3441 if (bestn == NULL)
3442 return false;
3444 for (i = 0; i < nargs; i++)
3445 if ((arginfo[i].dt == vect_constant_def
3446 || arginfo[i].dt == vect_external_def)
3447 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3449 arginfo[i].vectype
3450 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3451 i)));
3452 if (arginfo[i].vectype == NULL
3453 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3454 > bestn->simdclone->simdlen))
3455 return false;
3458 fndecl = bestn->decl;
3459 nunits = bestn->simdclone->simdlen;
3460 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3462 /* If the function isn't const, only allow it in simd loops where user
3463 has asserted that at least nunits consecutive iterations can be
3464 performed using SIMD instructions. */
3465 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3466 && gimple_vuse (stmt))
3467 return false;
3469 /* Sanity check: make sure that at least one copy of the vectorized stmt
3470 needs to be generated. */
3471 gcc_assert (ncopies >= 1);
3473 if (!vec_stmt) /* transformation not required. */
3475 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3476 for (i = 0; i < nargs; i++)
3477 if ((bestn->simdclone->args[i].arg_type
3478 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3479 || (bestn->simdclone->args[i].arg_type
3480 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3482 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3483 + 1);
3484 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3485 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3486 ? size_type_node : TREE_TYPE (arginfo[i].op);
3487 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3488 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3489 tree sll = arginfo[i].simd_lane_linear
3490 ? boolean_true_node : boolean_false_node;
3491 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3493 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3494 if (dump_enabled_p ())
3495 dump_printf_loc (MSG_NOTE, vect_location,
3496 "=== vectorizable_simd_clone_call ===\n");
3497 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3498 return true;
3501 /* Transform. */
3503 if (dump_enabled_p ())
3504 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3506 /* Handle def. */
3507 scalar_dest = gimple_call_lhs (stmt);
3508 vec_dest = NULL_TREE;
3509 rtype = NULL_TREE;
3510 ratype = NULL_TREE;
3511 if (scalar_dest)
3513 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3514 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3515 if (TREE_CODE (rtype) == ARRAY_TYPE)
3517 ratype = rtype;
3518 rtype = TREE_TYPE (ratype);
3522 prev_stmt_info = NULL;
3523 for (j = 0; j < ncopies; ++j)
3525 /* Build argument list for the vectorized call. */
3526 if (j == 0)
3527 vargs.create (nargs);
3528 else
3529 vargs.truncate (0);
3531 for (i = 0; i < nargs; i++)
3533 unsigned int k, l, m, o;
3534 tree atype;
3535 op = gimple_call_arg (stmt, i);
3536 switch (bestn->simdclone->args[i].arg_type)
3538 case SIMD_CLONE_ARG_TYPE_VECTOR:
3539 atype = bestn->simdclone->args[i].vector_type;
3540 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3541 for (m = j * o; m < (j + 1) * o; m++)
3543 if (TYPE_VECTOR_SUBPARTS (atype)
3544 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3546 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3547 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3548 / TYPE_VECTOR_SUBPARTS (atype));
3549 gcc_assert ((k & (k - 1)) == 0);
3550 if (m == 0)
3551 vec_oprnd0
3552 = vect_get_vec_def_for_operand (op, stmt);
3553 else
3555 vec_oprnd0 = arginfo[i].op;
3556 if ((m & (k - 1)) == 0)
3557 vec_oprnd0
3558 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3559 vec_oprnd0);
3561 arginfo[i].op = vec_oprnd0;
3562 vec_oprnd0
3563 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3564 bitsize_int (prec),
3565 bitsize_int ((m & (k - 1)) * prec));
3566 new_stmt
3567 = gimple_build_assign (make_ssa_name (atype),
3568 vec_oprnd0);
3569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3570 vargs.safe_push (gimple_assign_lhs (new_stmt));
3572 else
3574 k = (TYPE_VECTOR_SUBPARTS (atype)
3575 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3576 gcc_assert ((k & (k - 1)) == 0);
3577 vec<constructor_elt, va_gc> *ctor_elts;
3578 if (k != 1)
3579 vec_alloc (ctor_elts, k);
3580 else
3581 ctor_elts = NULL;
3582 for (l = 0; l < k; l++)
3584 if (m == 0 && l == 0)
3585 vec_oprnd0
3586 = vect_get_vec_def_for_operand (op, stmt);
3587 else
3588 vec_oprnd0
3589 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3590 arginfo[i].op);
3591 arginfo[i].op = vec_oprnd0;
3592 if (k == 1)
3593 break;
3594 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3595 vec_oprnd0);
3597 if (k == 1)
3598 vargs.safe_push (vec_oprnd0);
3599 else
3601 vec_oprnd0 = build_constructor (atype, ctor_elts);
3602 new_stmt
3603 = gimple_build_assign (make_ssa_name (atype),
3604 vec_oprnd0);
3605 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3606 vargs.safe_push (gimple_assign_lhs (new_stmt));
3610 break;
3611 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3612 vargs.safe_push (op);
3613 break;
3614 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3615 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3616 if (j == 0)
3618 gimple_seq stmts;
3619 arginfo[i].op
3620 = force_gimple_operand (arginfo[i].op, &stmts, true,
3621 NULL_TREE);
3622 if (stmts != NULL)
3624 basic_block new_bb;
3625 edge pe = loop_preheader_edge (loop);
3626 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3627 gcc_assert (!new_bb);
3629 if (arginfo[i].simd_lane_linear)
3631 vargs.safe_push (arginfo[i].op);
3632 break;
3634 tree phi_res = copy_ssa_name (op);
3635 gphi *new_phi = create_phi_node (phi_res, loop->header);
3636 set_vinfo_for_stmt (new_phi,
3637 new_stmt_vec_info (new_phi, loop_vinfo));
3638 add_phi_arg (new_phi, arginfo[i].op,
3639 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3640 enum tree_code code
3641 = POINTER_TYPE_P (TREE_TYPE (op))
3642 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3643 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3644 ? sizetype : TREE_TYPE (op);
3645 widest_int cst
3646 = wi::mul (bestn->simdclone->args[i].linear_step,
3647 ncopies * nunits);
3648 tree tcst = wide_int_to_tree (type, cst);
3649 tree phi_arg = copy_ssa_name (op);
3650 new_stmt
3651 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3652 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3653 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3654 set_vinfo_for_stmt (new_stmt,
3655 new_stmt_vec_info (new_stmt, loop_vinfo));
3656 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3657 UNKNOWN_LOCATION);
3658 arginfo[i].op = phi_res;
3659 vargs.safe_push (phi_res);
3661 else
3663 enum tree_code code
3664 = POINTER_TYPE_P (TREE_TYPE (op))
3665 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3666 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3667 ? sizetype : TREE_TYPE (op);
3668 widest_int cst
3669 = wi::mul (bestn->simdclone->args[i].linear_step,
3670 j * nunits);
3671 tree tcst = wide_int_to_tree (type, cst);
3672 new_temp = make_ssa_name (TREE_TYPE (op));
3673 new_stmt = gimple_build_assign (new_temp, code,
3674 arginfo[i].op, tcst);
3675 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3676 vargs.safe_push (new_temp);
3678 break;
3679 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3680 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3681 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3682 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3683 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3684 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3685 default:
3686 gcc_unreachable ();
3690 new_stmt = gimple_build_call_vec (fndecl, vargs);
3691 if (vec_dest)
3693 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3694 if (ratype)
3695 new_temp = create_tmp_var (ratype);
3696 else if (TYPE_VECTOR_SUBPARTS (vectype)
3697 == TYPE_VECTOR_SUBPARTS (rtype))
3698 new_temp = make_ssa_name (vec_dest, new_stmt);
3699 else
3700 new_temp = make_ssa_name (rtype, new_stmt);
3701 gimple_call_set_lhs (new_stmt, new_temp);
3703 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3705 if (vec_dest)
3707 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3709 unsigned int k, l;
3710 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3711 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3712 gcc_assert ((k & (k - 1)) == 0);
3713 for (l = 0; l < k; l++)
3715 tree t;
3716 if (ratype)
3718 t = build_fold_addr_expr (new_temp);
3719 t = build2 (MEM_REF, vectype, t,
3720 build_int_cst (TREE_TYPE (t),
3721 l * prec / BITS_PER_UNIT));
3723 else
3724 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3725 bitsize_int (prec), bitsize_int (l * prec));
3726 new_stmt
3727 = gimple_build_assign (make_ssa_name (vectype), t);
3728 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3729 if (j == 0 && l == 0)
3730 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3731 else
3732 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3734 prev_stmt_info = vinfo_for_stmt (new_stmt);
3737 if (ratype)
3739 tree clobber = build_constructor (ratype, NULL);
3740 TREE_THIS_VOLATILE (clobber) = 1;
3741 new_stmt = gimple_build_assign (new_temp, clobber);
3742 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3744 continue;
3746 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3748 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3749 / TYPE_VECTOR_SUBPARTS (rtype));
3750 gcc_assert ((k & (k - 1)) == 0);
3751 if ((j & (k - 1)) == 0)
3752 vec_alloc (ret_ctor_elts, k);
3753 if (ratype)
3755 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3756 for (m = 0; m < o; m++)
3758 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3759 size_int (m), NULL_TREE, NULL_TREE);
3760 new_stmt
3761 = gimple_build_assign (make_ssa_name (rtype), tem);
3762 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3763 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3764 gimple_assign_lhs (new_stmt));
3766 tree clobber = build_constructor (ratype, NULL);
3767 TREE_THIS_VOLATILE (clobber) = 1;
3768 new_stmt = gimple_build_assign (new_temp, clobber);
3769 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3771 else
3772 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3773 if ((j & (k - 1)) != k - 1)
3774 continue;
3775 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3776 new_stmt
3777 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3778 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3780 if ((unsigned) j == k - 1)
3781 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3782 else
3783 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3785 prev_stmt_info = vinfo_for_stmt (new_stmt);
3786 continue;
3788 else if (ratype)
3790 tree t = build_fold_addr_expr (new_temp);
3791 t = build2 (MEM_REF, vectype, t,
3792 build_int_cst (TREE_TYPE (t), 0));
3793 new_stmt
3794 = gimple_build_assign (make_ssa_name (vec_dest), t);
3795 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3796 tree clobber = build_constructor (ratype, NULL);
3797 TREE_THIS_VOLATILE (clobber) = 1;
3798 vect_finish_stmt_generation (stmt,
3799 gimple_build_assign (new_temp,
3800 clobber), gsi);
3804 if (j == 0)
3805 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3806 else
3807 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3809 prev_stmt_info = vinfo_for_stmt (new_stmt);
3812 vargs.release ();
3814 /* The call in STMT might prevent it from being removed in dce.
3815 We however cannot remove it here, due to the way the ssa name
3816 it defines is mapped to the new definition. So just replace
3817 rhs of the statement with something harmless. */
3819 if (slp_node)
3820 return true;
3822 if (scalar_dest)
3824 type = TREE_TYPE (scalar_dest);
3825 if (is_pattern_stmt_p (stmt_info))
3826 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3827 else
3828 lhs = gimple_call_lhs (stmt);
3829 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3831 else
3832 new_stmt = gimple_build_nop ();
3833 set_vinfo_for_stmt (new_stmt, stmt_info);
3834 set_vinfo_for_stmt (stmt, NULL);
3835 STMT_VINFO_STMT (stmt_info) = new_stmt;
3836 gsi_replace (gsi, new_stmt, true);
3837 unlink_stmt_vdef (stmt);
3839 return true;
3843 /* Function vect_gen_widened_results_half
3845 Create a vector stmt whose code, type, number of arguments, and result
3846 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3847 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3848 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3849 needs to be created (DECL is a function-decl of a target-builtin).
3850 STMT is the original scalar stmt that we are vectorizing. */
3852 static gimple *
3853 vect_gen_widened_results_half (enum tree_code code,
3854 tree decl,
3855 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3856 tree vec_dest, gimple_stmt_iterator *gsi,
3857 gimple *stmt)
3859 gimple *new_stmt;
3860 tree new_temp;
3862 /* Generate half of the widened result: */
3863 if (code == CALL_EXPR)
3865 /* Target specific support */
3866 if (op_type == binary_op)
3867 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3868 else
3869 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3870 new_temp = make_ssa_name (vec_dest, new_stmt);
3871 gimple_call_set_lhs (new_stmt, new_temp);
3873 else
3875 /* Generic support */
3876 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3877 if (op_type != binary_op)
3878 vec_oprnd1 = NULL;
3879 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3880 new_temp = make_ssa_name (vec_dest, new_stmt);
3881 gimple_assign_set_lhs (new_stmt, new_temp);
3883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3885 return new_stmt;
3889 /* Get vectorized definitions for loop-based vectorization. For the first
3890 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3891 scalar operand), and for the rest we get a copy with
3892 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3893 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3894 The vectors are collected into VEC_OPRNDS. */
3896 static void
3897 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3898 vec<tree> *vec_oprnds, int multi_step_cvt)
3900 tree vec_oprnd;
3902 /* Get first vector operand. */
3903 /* All the vector operands except the very first one (that is scalar oprnd)
3904 are stmt copies. */
3905 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3906 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3907 else
3908 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3910 vec_oprnds->quick_push (vec_oprnd);
3912 /* Get second vector operand. */
3913 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3914 vec_oprnds->quick_push (vec_oprnd);
3916 *oprnd = vec_oprnd;
3918 /* For conversion in multiple steps, continue to get operands
3919 recursively. */
3920 if (multi_step_cvt)
3921 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3925 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3926 For multi-step conversions store the resulting vectors and call the function
3927 recursively. */
3929 static void
3930 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3931 int multi_step_cvt, gimple *stmt,
3932 vec<tree> vec_dsts,
3933 gimple_stmt_iterator *gsi,
3934 slp_tree slp_node, enum tree_code code,
3935 stmt_vec_info *prev_stmt_info)
3937 unsigned int i;
3938 tree vop0, vop1, new_tmp, vec_dest;
3939 gimple *new_stmt;
3940 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3942 vec_dest = vec_dsts.pop ();
3944 for (i = 0; i < vec_oprnds->length (); i += 2)
3946 /* Create demotion operation. */
3947 vop0 = (*vec_oprnds)[i];
3948 vop1 = (*vec_oprnds)[i + 1];
3949 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3950 new_tmp = make_ssa_name (vec_dest, new_stmt);
3951 gimple_assign_set_lhs (new_stmt, new_tmp);
3952 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3954 if (multi_step_cvt)
3955 /* Store the resulting vector for next recursive call. */
3956 (*vec_oprnds)[i/2] = new_tmp;
3957 else
3959 /* This is the last step of the conversion sequence. Store the
3960 vectors in SLP_NODE or in vector info of the scalar statement
3961 (or in STMT_VINFO_RELATED_STMT chain). */
3962 if (slp_node)
3963 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3964 else
3966 if (!*prev_stmt_info)
3967 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3968 else
3969 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3971 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3976 /* For multi-step demotion operations we first generate demotion operations
3977 from the source type to the intermediate types, and then combine the
3978 results (stored in VEC_OPRNDS) in demotion operation to the destination
3979 type. */
3980 if (multi_step_cvt)
3982 /* At each level of recursion we have half of the operands we had at the
3983 previous level. */
3984 vec_oprnds->truncate ((i+1)/2);
3985 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3986 stmt, vec_dsts, gsi, slp_node,
3987 VEC_PACK_TRUNC_EXPR,
3988 prev_stmt_info);
3991 vec_dsts.quick_push (vec_dest);
3995 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3996 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3997 the resulting vectors and call the function recursively. */
3999 static void
4000 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4001 vec<tree> *vec_oprnds1,
4002 gimple *stmt, tree vec_dest,
4003 gimple_stmt_iterator *gsi,
4004 enum tree_code code1,
4005 enum tree_code code2, tree decl1,
4006 tree decl2, int op_type)
4008 int i;
4009 tree vop0, vop1, new_tmp1, new_tmp2;
4010 gimple *new_stmt1, *new_stmt2;
4011 vec<tree> vec_tmp = vNULL;
4013 vec_tmp.create (vec_oprnds0->length () * 2);
4014 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4016 if (op_type == binary_op)
4017 vop1 = (*vec_oprnds1)[i];
4018 else
4019 vop1 = NULL_TREE;
4021 /* Generate the two halves of promotion operation. */
4022 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4023 op_type, vec_dest, gsi, stmt);
4024 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4025 op_type, vec_dest, gsi, stmt);
4026 if (is_gimple_call (new_stmt1))
4028 new_tmp1 = gimple_call_lhs (new_stmt1);
4029 new_tmp2 = gimple_call_lhs (new_stmt2);
4031 else
4033 new_tmp1 = gimple_assign_lhs (new_stmt1);
4034 new_tmp2 = gimple_assign_lhs (new_stmt2);
4037 /* Store the results for the next step. */
4038 vec_tmp.quick_push (new_tmp1);
4039 vec_tmp.quick_push (new_tmp2);
4042 vec_oprnds0->release ();
4043 *vec_oprnds0 = vec_tmp;
4047 /* Check if STMT performs a conversion operation, that can be vectorized.
4048 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4049 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4050 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4052 static bool
4053 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4054 gimple **vec_stmt, slp_tree slp_node)
4056 tree vec_dest;
4057 tree scalar_dest;
4058 tree op0, op1 = NULL_TREE;
4059 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4060 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4061 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4062 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4063 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4064 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4065 tree new_temp;
4066 gimple *def_stmt;
4067 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4068 int ndts = 2;
4069 gimple *new_stmt = NULL;
4070 stmt_vec_info prev_stmt_info;
4071 int nunits_in;
4072 int nunits_out;
4073 tree vectype_out, vectype_in;
4074 int ncopies, i, j;
4075 tree lhs_type, rhs_type;
4076 enum { NARROW, NONE, WIDEN } modifier;
4077 vec<tree> vec_oprnds0 = vNULL;
4078 vec<tree> vec_oprnds1 = vNULL;
4079 tree vop0;
4080 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4081 vec_info *vinfo = stmt_info->vinfo;
4082 int multi_step_cvt = 0;
4083 vec<tree> interm_types = vNULL;
4084 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4085 int op_type;
4086 unsigned short fltsz;
4088 /* Is STMT a vectorizable conversion? */
4090 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4091 return false;
4093 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4094 && ! vec_stmt)
4095 return false;
4097 if (!is_gimple_assign (stmt))
4098 return false;
4100 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4101 return false;
4103 code = gimple_assign_rhs_code (stmt);
4104 if (!CONVERT_EXPR_CODE_P (code)
4105 && code != FIX_TRUNC_EXPR
4106 && code != FLOAT_EXPR
4107 && code != WIDEN_MULT_EXPR
4108 && code != WIDEN_LSHIFT_EXPR)
4109 return false;
4111 op_type = TREE_CODE_LENGTH (code);
4113 /* Check types of lhs and rhs. */
4114 scalar_dest = gimple_assign_lhs (stmt);
4115 lhs_type = TREE_TYPE (scalar_dest);
4116 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4118 op0 = gimple_assign_rhs1 (stmt);
4119 rhs_type = TREE_TYPE (op0);
4121 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4122 && !((INTEGRAL_TYPE_P (lhs_type)
4123 && INTEGRAL_TYPE_P (rhs_type))
4124 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4125 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4126 return false;
4128 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4129 && ((INTEGRAL_TYPE_P (lhs_type)
4130 && !type_has_mode_precision_p (lhs_type))
4131 || (INTEGRAL_TYPE_P (rhs_type)
4132 && !type_has_mode_precision_p (rhs_type))))
4134 if (dump_enabled_p ())
4135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4136 "type conversion to/from bit-precision unsupported."
4137 "\n");
4138 return false;
4141 /* Check the operands of the operation. */
4142 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4144 if (dump_enabled_p ())
4145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4146 "use not simple.\n");
4147 return false;
4149 if (op_type == binary_op)
4151 bool ok;
4153 op1 = gimple_assign_rhs2 (stmt);
4154 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4155 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4156 OP1. */
4157 if (CONSTANT_CLASS_P (op0))
4158 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4159 else
4160 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4162 if (!ok)
4164 if (dump_enabled_p ())
4165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4166 "use not simple.\n");
4167 return false;
4171 /* If op0 is an external or constant defs use a vector type of
4172 the same size as the output vector type. */
4173 if (!vectype_in)
4174 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4175 if (vec_stmt)
4176 gcc_assert (vectype_in);
4177 if (!vectype_in)
4179 if (dump_enabled_p ())
4181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4182 "no vectype for scalar type ");
4183 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4184 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4187 return false;
4190 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4191 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4193 if (dump_enabled_p ())
4195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4196 "can't convert between boolean and non "
4197 "boolean vectors");
4198 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4199 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4202 return false;
4205 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4206 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4207 if (nunits_in < nunits_out)
4208 modifier = NARROW;
4209 else if (nunits_out == nunits_in)
4210 modifier = NONE;
4211 else
4212 modifier = WIDEN;
4214 /* Multiple types in SLP are handled by creating the appropriate number of
4215 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4216 case of SLP. */
4217 if (slp_node)
4218 ncopies = 1;
4219 else if (modifier == NARROW)
4220 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4221 else
4222 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4224 /* Sanity check: make sure that at least one copy of the vectorized stmt
4225 needs to be generated. */
4226 gcc_assert (ncopies >= 1);
4228 bool found_mode = false;
4229 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4230 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4231 opt_scalar_mode rhs_mode_iter;
4233 /* Supportable by target? */
4234 switch (modifier)
4236 case NONE:
4237 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4238 return false;
4239 if (supportable_convert_operation (code, vectype_out, vectype_in,
4240 &decl1, &code1))
4241 break;
4242 /* FALLTHRU */
4243 unsupported:
4244 if (dump_enabled_p ())
4245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4246 "conversion not supported by target.\n");
4247 return false;
4249 case WIDEN:
4250 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4251 &code1, &code2, &multi_step_cvt,
4252 &interm_types))
4254 /* Binary widening operation can only be supported directly by the
4255 architecture. */
4256 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4257 break;
4260 if (code != FLOAT_EXPR
4261 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4262 goto unsupported;
4264 fltsz = GET_MODE_SIZE (lhs_mode);
4265 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4267 rhs_mode = rhs_mode_iter.require ();
4268 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4269 break;
4271 cvt_type
4272 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4273 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4274 if (cvt_type == NULL_TREE)
4275 goto unsupported;
4277 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4279 if (!supportable_convert_operation (code, vectype_out,
4280 cvt_type, &decl1, &codecvt1))
4281 goto unsupported;
4283 else if (!supportable_widening_operation (code, stmt, vectype_out,
4284 cvt_type, &codecvt1,
4285 &codecvt2, &multi_step_cvt,
4286 &interm_types))
4287 continue;
4288 else
4289 gcc_assert (multi_step_cvt == 0);
4291 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4292 vectype_in, &code1, &code2,
4293 &multi_step_cvt, &interm_types))
4295 found_mode = true;
4296 break;
4300 if (!found_mode)
4301 goto unsupported;
4303 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4304 codecvt2 = ERROR_MARK;
4305 else
4307 multi_step_cvt++;
4308 interm_types.safe_push (cvt_type);
4309 cvt_type = NULL_TREE;
4311 break;
4313 case NARROW:
4314 gcc_assert (op_type == unary_op);
4315 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4316 &code1, &multi_step_cvt,
4317 &interm_types))
4318 break;
4320 if (code != FIX_TRUNC_EXPR
4321 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4322 goto unsupported;
4324 cvt_type
4325 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4326 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4327 if (cvt_type == NULL_TREE)
4328 goto unsupported;
4329 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4330 &decl1, &codecvt1))
4331 goto unsupported;
4332 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4333 &code1, &multi_step_cvt,
4334 &interm_types))
4335 break;
4336 goto unsupported;
4338 default:
4339 gcc_unreachable ();
4342 if (!vec_stmt) /* transformation not required. */
4344 if (dump_enabled_p ())
4345 dump_printf_loc (MSG_NOTE, vect_location,
4346 "=== vectorizable_conversion ===\n");
4347 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4349 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4350 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4352 else if (modifier == NARROW)
4354 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4355 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4357 else
4359 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4360 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4362 interm_types.release ();
4363 return true;
4366 /* Transform. */
4367 if (dump_enabled_p ())
4368 dump_printf_loc (MSG_NOTE, vect_location,
4369 "transform conversion. ncopies = %d.\n", ncopies);
4371 if (op_type == binary_op)
4373 if (CONSTANT_CLASS_P (op0))
4374 op0 = fold_convert (TREE_TYPE (op1), op0);
4375 else if (CONSTANT_CLASS_P (op1))
4376 op1 = fold_convert (TREE_TYPE (op0), op1);
4379 /* In case of multi-step conversion, we first generate conversion operations
4380 to the intermediate types, and then from that types to the final one.
4381 We create vector destinations for the intermediate type (TYPES) received
4382 from supportable_*_operation, and store them in the correct order
4383 for future use in vect_create_vectorized_*_stmts (). */
4384 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4385 vec_dest = vect_create_destination_var (scalar_dest,
4386 (cvt_type && modifier == WIDEN)
4387 ? cvt_type : vectype_out);
4388 vec_dsts.quick_push (vec_dest);
4390 if (multi_step_cvt)
4392 for (i = interm_types.length () - 1;
4393 interm_types.iterate (i, &intermediate_type); i--)
4395 vec_dest = vect_create_destination_var (scalar_dest,
4396 intermediate_type);
4397 vec_dsts.quick_push (vec_dest);
4401 if (cvt_type)
4402 vec_dest = vect_create_destination_var (scalar_dest,
4403 modifier == WIDEN
4404 ? vectype_out : cvt_type);
4406 if (!slp_node)
4408 if (modifier == WIDEN)
4410 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4411 if (op_type == binary_op)
4412 vec_oprnds1.create (1);
4414 else if (modifier == NARROW)
4415 vec_oprnds0.create (
4416 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4418 else if (code == WIDEN_LSHIFT_EXPR)
4419 vec_oprnds1.create (slp_node->vec_stmts_size);
4421 last_oprnd = op0;
4422 prev_stmt_info = NULL;
4423 switch (modifier)
4425 case NONE:
4426 for (j = 0; j < ncopies; j++)
4428 if (j == 0)
4429 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4430 else
4431 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4433 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4435 /* Arguments are ready, create the new vector stmt. */
4436 if (code1 == CALL_EXPR)
4438 new_stmt = gimple_build_call (decl1, 1, vop0);
4439 new_temp = make_ssa_name (vec_dest, new_stmt);
4440 gimple_call_set_lhs (new_stmt, new_temp);
4442 else
4444 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4445 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4446 new_temp = make_ssa_name (vec_dest, new_stmt);
4447 gimple_assign_set_lhs (new_stmt, new_temp);
4450 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4451 if (slp_node)
4452 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4453 else
4455 if (!prev_stmt_info)
4456 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4457 else
4458 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4459 prev_stmt_info = vinfo_for_stmt (new_stmt);
4463 break;
4465 case WIDEN:
4466 /* In case the vectorization factor (VF) is bigger than the number
4467 of elements that we can fit in a vectype (nunits), we have to
4468 generate more than one vector stmt - i.e - we need to "unroll"
4469 the vector stmt by a factor VF/nunits. */
4470 for (j = 0; j < ncopies; j++)
4472 /* Handle uses. */
4473 if (j == 0)
4475 if (slp_node)
4477 if (code == WIDEN_LSHIFT_EXPR)
4479 unsigned int k;
4481 vec_oprnd1 = op1;
4482 /* Store vec_oprnd1 for every vector stmt to be created
4483 for SLP_NODE. We check during the analysis that all
4484 the shift arguments are the same. */
4485 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4486 vec_oprnds1.quick_push (vec_oprnd1);
4488 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4489 slp_node);
4491 else
4492 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4493 &vec_oprnds1, slp_node);
4495 else
4497 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4498 vec_oprnds0.quick_push (vec_oprnd0);
4499 if (op_type == binary_op)
4501 if (code == WIDEN_LSHIFT_EXPR)
4502 vec_oprnd1 = op1;
4503 else
4504 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4505 vec_oprnds1.quick_push (vec_oprnd1);
4509 else
4511 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4512 vec_oprnds0.truncate (0);
4513 vec_oprnds0.quick_push (vec_oprnd0);
4514 if (op_type == binary_op)
4516 if (code == WIDEN_LSHIFT_EXPR)
4517 vec_oprnd1 = op1;
4518 else
4519 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4520 vec_oprnd1);
4521 vec_oprnds1.truncate (0);
4522 vec_oprnds1.quick_push (vec_oprnd1);
4526 /* Arguments are ready. Create the new vector stmts. */
4527 for (i = multi_step_cvt; i >= 0; i--)
4529 tree this_dest = vec_dsts[i];
4530 enum tree_code c1 = code1, c2 = code2;
4531 if (i == 0 && codecvt2 != ERROR_MARK)
4533 c1 = codecvt1;
4534 c2 = codecvt2;
4536 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4537 &vec_oprnds1,
4538 stmt, this_dest, gsi,
4539 c1, c2, decl1, decl2,
4540 op_type);
4543 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4545 if (cvt_type)
4547 if (codecvt1 == CALL_EXPR)
4549 new_stmt = gimple_build_call (decl1, 1, vop0);
4550 new_temp = make_ssa_name (vec_dest, new_stmt);
4551 gimple_call_set_lhs (new_stmt, new_temp);
4553 else
4555 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4556 new_temp = make_ssa_name (vec_dest);
4557 new_stmt = gimple_build_assign (new_temp, codecvt1,
4558 vop0);
4561 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4563 else
4564 new_stmt = SSA_NAME_DEF_STMT (vop0);
4566 if (slp_node)
4567 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4568 else
4570 if (!prev_stmt_info)
4571 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4572 else
4573 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4574 prev_stmt_info = vinfo_for_stmt (new_stmt);
4579 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4580 break;
4582 case NARROW:
4583 /* In case the vectorization factor (VF) is bigger than the number
4584 of elements that we can fit in a vectype (nunits), we have to
4585 generate more than one vector stmt - i.e - we need to "unroll"
4586 the vector stmt by a factor VF/nunits. */
4587 for (j = 0; j < ncopies; j++)
4589 /* Handle uses. */
4590 if (slp_node)
4591 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4592 slp_node);
4593 else
4595 vec_oprnds0.truncate (0);
4596 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4597 vect_pow2 (multi_step_cvt) - 1);
4600 /* Arguments are ready. Create the new vector stmts. */
4601 if (cvt_type)
4602 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4604 if (codecvt1 == CALL_EXPR)
4606 new_stmt = gimple_build_call (decl1, 1, vop0);
4607 new_temp = make_ssa_name (vec_dest, new_stmt);
4608 gimple_call_set_lhs (new_stmt, new_temp);
4610 else
4612 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4613 new_temp = make_ssa_name (vec_dest);
4614 new_stmt = gimple_build_assign (new_temp, codecvt1,
4615 vop0);
4618 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4619 vec_oprnds0[i] = new_temp;
4622 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4623 stmt, vec_dsts, gsi,
4624 slp_node, code1,
4625 &prev_stmt_info);
4628 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4629 break;
4632 vec_oprnds0.release ();
4633 vec_oprnds1.release ();
4634 interm_types.release ();
4636 return true;
4640 /* Function vectorizable_assignment.
4642 Check if STMT performs an assignment (copy) that can be vectorized.
4643 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4644 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4645 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4647 static bool
4648 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4649 gimple **vec_stmt, slp_tree slp_node)
4651 tree vec_dest;
4652 tree scalar_dest;
4653 tree op;
4654 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4655 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4656 tree new_temp;
4657 gimple *def_stmt;
4658 enum vect_def_type dt[1] = {vect_unknown_def_type};
4659 int ndts = 1;
4660 int ncopies;
4661 int i, j;
4662 vec<tree> vec_oprnds = vNULL;
4663 tree vop;
4664 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4665 vec_info *vinfo = stmt_info->vinfo;
4666 gimple *new_stmt = NULL;
4667 stmt_vec_info prev_stmt_info = NULL;
4668 enum tree_code code;
4669 tree vectype_in;
4671 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4672 return false;
4674 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4675 && ! vec_stmt)
4676 return false;
4678 /* Is vectorizable assignment? */
4679 if (!is_gimple_assign (stmt))
4680 return false;
4682 scalar_dest = gimple_assign_lhs (stmt);
4683 if (TREE_CODE (scalar_dest) != SSA_NAME)
4684 return false;
4686 code = gimple_assign_rhs_code (stmt);
4687 if (gimple_assign_single_p (stmt)
4688 || code == PAREN_EXPR
4689 || CONVERT_EXPR_CODE_P (code))
4690 op = gimple_assign_rhs1 (stmt);
4691 else
4692 return false;
4694 if (code == VIEW_CONVERT_EXPR)
4695 op = TREE_OPERAND (op, 0);
4697 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4698 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4700 /* Multiple types in SLP are handled by creating the appropriate number of
4701 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4702 case of SLP. */
4703 if (slp_node)
4704 ncopies = 1;
4705 else
4706 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4708 gcc_assert (ncopies >= 1);
4710 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4712 if (dump_enabled_p ())
4713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4714 "use not simple.\n");
4715 return false;
4718 /* We can handle NOP_EXPR conversions that do not change the number
4719 of elements or the vector size. */
4720 if ((CONVERT_EXPR_CODE_P (code)
4721 || code == VIEW_CONVERT_EXPR)
4722 && (!vectype_in
4723 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4724 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4725 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4726 return false;
4728 /* We do not handle bit-precision changes. */
4729 if ((CONVERT_EXPR_CODE_P (code)
4730 || code == VIEW_CONVERT_EXPR)
4731 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4732 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4733 || !type_has_mode_precision_p (TREE_TYPE (op)))
4734 /* But a conversion that does not change the bit-pattern is ok. */
4735 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4736 > TYPE_PRECISION (TREE_TYPE (op)))
4737 && TYPE_UNSIGNED (TREE_TYPE (op)))
4738 /* Conversion between boolean types of different sizes is
4739 a simple assignment in case their vectypes are same
4740 boolean vectors. */
4741 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4742 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4744 if (dump_enabled_p ())
4745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4746 "type conversion to/from bit-precision "
4747 "unsupported.\n");
4748 return false;
4751 if (!vec_stmt) /* transformation not required. */
4753 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4754 if (dump_enabled_p ())
4755 dump_printf_loc (MSG_NOTE, vect_location,
4756 "=== vectorizable_assignment ===\n");
4757 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4758 return true;
4761 /* Transform. */
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4765 /* Handle def. */
4766 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4768 /* Handle use. */
4769 for (j = 0; j < ncopies; j++)
4771 /* Handle uses. */
4772 if (j == 0)
4773 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4774 else
4775 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4777 /* Arguments are ready. create the new vector stmt. */
4778 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4780 if (CONVERT_EXPR_CODE_P (code)
4781 || code == VIEW_CONVERT_EXPR)
4782 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4783 new_stmt = gimple_build_assign (vec_dest, vop);
4784 new_temp = make_ssa_name (vec_dest, new_stmt);
4785 gimple_assign_set_lhs (new_stmt, new_temp);
4786 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4787 if (slp_node)
4788 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4791 if (slp_node)
4792 continue;
4794 if (j == 0)
4795 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4796 else
4797 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4799 prev_stmt_info = vinfo_for_stmt (new_stmt);
4802 vec_oprnds.release ();
4803 return true;
4807 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4808 either as shift by a scalar or by a vector. */
4810 bool
4811 vect_supportable_shift (enum tree_code code, tree scalar_type)
4814 machine_mode vec_mode;
4815 optab optab;
4816 int icode;
4817 tree vectype;
4819 vectype = get_vectype_for_scalar_type (scalar_type);
4820 if (!vectype)
4821 return false;
4823 optab = optab_for_tree_code (code, vectype, optab_scalar);
4824 if (!optab
4825 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4827 optab = optab_for_tree_code (code, vectype, optab_vector);
4828 if (!optab
4829 || (optab_handler (optab, TYPE_MODE (vectype))
4830 == CODE_FOR_nothing))
4831 return false;
4834 vec_mode = TYPE_MODE (vectype);
4835 icode = (int) optab_handler (optab, vec_mode);
4836 if (icode == CODE_FOR_nothing)
4837 return false;
4839 return true;
4843 /* Function vectorizable_shift.
4845 Check if STMT performs a shift operation that can be vectorized.
4846 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4847 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4848 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4850 static bool
4851 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4852 gimple **vec_stmt, slp_tree slp_node)
4854 tree vec_dest;
4855 tree scalar_dest;
4856 tree op0, op1 = NULL;
4857 tree vec_oprnd1 = NULL_TREE;
4858 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4859 tree vectype;
4860 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4861 enum tree_code code;
4862 machine_mode vec_mode;
4863 tree new_temp;
4864 optab optab;
4865 int icode;
4866 machine_mode optab_op2_mode;
4867 gimple *def_stmt;
4868 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4869 int ndts = 2;
4870 gimple *new_stmt = NULL;
4871 stmt_vec_info prev_stmt_info;
4872 int nunits_in;
4873 int nunits_out;
4874 tree vectype_out;
4875 tree op1_vectype;
4876 int ncopies;
4877 int j, i;
4878 vec<tree> vec_oprnds0 = vNULL;
4879 vec<tree> vec_oprnds1 = vNULL;
4880 tree vop0, vop1;
4881 unsigned int k;
4882 bool scalar_shift_arg = true;
4883 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4884 vec_info *vinfo = stmt_info->vinfo;
4886 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4887 return false;
4889 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4890 && ! vec_stmt)
4891 return false;
4893 /* Is STMT a vectorizable binary/unary operation? */
4894 if (!is_gimple_assign (stmt))
4895 return false;
4897 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4898 return false;
4900 code = gimple_assign_rhs_code (stmt);
4902 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4903 || code == RROTATE_EXPR))
4904 return false;
4906 scalar_dest = gimple_assign_lhs (stmt);
4907 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4908 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
4910 if (dump_enabled_p ())
4911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4912 "bit-precision shifts not supported.\n");
4913 return false;
4916 op0 = gimple_assign_rhs1 (stmt);
4917 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4919 if (dump_enabled_p ())
4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4921 "use not simple.\n");
4922 return false;
4924 /* If op0 is an external or constant def use a vector type with
4925 the same size as the output vector type. */
4926 if (!vectype)
4927 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4928 if (vec_stmt)
4929 gcc_assert (vectype);
4930 if (!vectype)
4932 if (dump_enabled_p ())
4933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4934 "no vectype for scalar type\n");
4935 return false;
4938 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4939 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4940 if (nunits_out != nunits_in)
4941 return false;
4943 op1 = gimple_assign_rhs2 (stmt);
4944 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4946 if (dump_enabled_p ())
4947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4948 "use not simple.\n");
4949 return false;
4952 /* Multiple types in SLP are handled by creating the appropriate number of
4953 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4954 case of SLP. */
4955 if (slp_node)
4956 ncopies = 1;
4957 else
4958 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4960 gcc_assert (ncopies >= 1);
4962 /* Determine whether the shift amount is a vector, or scalar. If the
4963 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4965 if ((dt[1] == vect_internal_def
4966 || dt[1] == vect_induction_def)
4967 && !slp_node)
4968 scalar_shift_arg = false;
4969 else if (dt[1] == vect_constant_def
4970 || dt[1] == vect_external_def
4971 || dt[1] == vect_internal_def)
4973 /* In SLP, need to check whether the shift count is the same,
4974 in loops if it is a constant or invariant, it is always
4975 a scalar shift. */
4976 if (slp_node)
4978 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4979 gimple *slpstmt;
4981 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4982 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4983 scalar_shift_arg = false;
4986 /* If the shift amount is computed by a pattern stmt we cannot
4987 use the scalar amount directly thus give up and use a vector
4988 shift. */
4989 if (dt[1] == vect_internal_def)
4991 gimple *def = SSA_NAME_DEF_STMT (op1);
4992 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4993 scalar_shift_arg = false;
4996 else
4998 if (dump_enabled_p ())
4999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5000 "operand mode requires invariant argument.\n");
5001 return false;
5004 /* Vector shifted by vector. */
5005 if (!scalar_shift_arg)
5007 optab = optab_for_tree_code (code, vectype, optab_vector);
5008 if (dump_enabled_p ())
5009 dump_printf_loc (MSG_NOTE, vect_location,
5010 "vector/vector shift/rotate found.\n");
5012 if (!op1_vectype)
5013 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5014 if (op1_vectype == NULL_TREE
5015 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5017 if (dump_enabled_p ())
5018 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5019 "unusable type for last operand in"
5020 " vector/vector shift/rotate.\n");
5021 return false;
5024 /* See if the machine has a vector shifted by scalar insn and if not
5025 then see if it has a vector shifted by vector insn. */
5026 else
5028 optab = optab_for_tree_code (code, vectype, optab_scalar);
5029 if (optab
5030 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5032 if (dump_enabled_p ())
5033 dump_printf_loc (MSG_NOTE, vect_location,
5034 "vector/scalar shift/rotate found.\n");
5036 else
5038 optab = optab_for_tree_code (code, vectype, optab_vector);
5039 if (optab
5040 && (optab_handler (optab, TYPE_MODE (vectype))
5041 != CODE_FOR_nothing))
5043 scalar_shift_arg = false;
5045 if (dump_enabled_p ())
5046 dump_printf_loc (MSG_NOTE, vect_location,
5047 "vector/vector shift/rotate found.\n");
5049 /* Unlike the other binary operators, shifts/rotates have
5050 the rhs being int, instead of the same type as the lhs,
5051 so make sure the scalar is the right type if we are
5052 dealing with vectors of long long/long/short/char. */
5053 if (dt[1] == vect_constant_def)
5054 op1 = fold_convert (TREE_TYPE (vectype), op1);
5055 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5056 TREE_TYPE (op1)))
5058 if (slp_node
5059 && TYPE_MODE (TREE_TYPE (vectype))
5060 != TYPE_MODE (TREE_TYPE (op1)))
5062 if (dump_enabled_p ())
5063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5064 "unusable type for last operand in"
5065 " vector/vector shift/rotate.\n");
5066 return false;
5068 if (vec_stmt && !slp_node)
5070 op1 = fold_convert (TREE_TYPE (vectype), op1);
5071 op1 = vect_init_vector (stmt, op1,
5072 TREE_TYPE (vectype), NULL);
5079 /* Supportable by target? */
5080 if (!optab)
5082 if (dump_enabled_p ())
5083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5084 "no optab.\n");
5085 return false;
5087 vec_mode = TYPE_MODE (vectype);
5088 icode = (int) optab_handler (optab, vec_mode);
5089 if (icode == CODE_FOR_nothing)
5091 if (dump_enabled_p ())
5092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5093 "op not supported by target.\n");
5094 /* Check only during analysis. */
5095 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5096 || (!vec_stmt
5097 && !vect_worthwhile_without_simd_p (vinfo, code)))
5098 return false;
5099 if (dump_enabled_p ())
5100 dump_printf_loc (MSG_NOTE, vect_location,
5101 "proceeding using word mode.\n");
5104 /* Worthwhile without SIMD support? Check only during analysis. */
5105 if (!vec_stmt
5106 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5107 && !vect_worthwhile_without_simd_p (vinfo, code))
5109 if (dump_enabled_p ())
5110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5111 "not worthwhile without SIMD support.\n");
5112 return false;
5115 if (!vec_stmt) /* transformation not required. */
5117 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5118 if (dump_enabled_p ())
5119 dump_printf_loc (MSG_NOTE, vect_location,
5120 "=== vectorizable_shift ===\n");
5121 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5122 return true;
5125 /* Transform. */
5127 if (dump_enabled_p ())
5128 dump_printf_loc (MSG_NOTE, vect_location,
5129 "transform binary/unary operation.\n");
5131 /* Handle def. */
5132 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5134 prev_stmt_info = NULL;
5135 for (j = 0; j < ncopies; j++)
5137 /* Handle uses. */
5138 if (j == 0)
5140 if (scalar_shift_arg)
5142 /* Vector shl and shr insn patterns can be defined with scalar
5143 operand 2 (shift operand). In this case, use constant or loop
5144 invariant op1 directly, without extending it to vector mode
5145 first. */
5146 optab_op2_mode = insn_data[icode].operand[2].mode;
5147 if (!VECTOR_MODE_P (optab_op2_mode))
5149 if (dump_enabled_p ())
5150 dump_printf_loc (MSG_NOTE, vect_location,
5151 "operand 1 using scalar mode.\n");
5152 vec_oprnd1 = op1;
5153 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5154 vec_oprnds1.quick_push (vec_oprnd1);
5155 if (slp_node)
5157 /* Store vec_oprnd1 for every vector stmt to be created
5158 for SLP_NODE. We check during the analysis that all
5159 the shift arguments are the same.
5160 TODO: Allow different constants for different vector
5161 stmts generated for an SLP instance. */
5162 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5163 vec_oprnds1.quick_push (vec_oprnd1);
5168 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5169 (a special case for certain kind of vector shifts); otherwise,
5170 operand 1 should be of a vector type (the usual case). */
5171 if (vec_oprnd1)
5172 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5173 slp_node);
5174 else
5175 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5176 slp_node);
5178 else
5179 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5181 /* Arguments are ready. Create the new vector stmt. */
5182 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5184 vop1 = vec_oprnds1[i];
5185 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5186 new_temp = make_ssa_name (vec_dest, new_stmt);
5187 gimple_assign_set_lhs (new_stmt, new_temp);
5188 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5189 if (slp_node)
5190 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5193 if (slp_node)
5194 continue;
5196 if (j == 0)
5197 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5198 else
5199 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5200 prev_stmt_info = vinfo_for_stmt (new_stmt);
5203 vec_oprnds0.release ();
5204 vec_oprnds1.release ();
5206 return true;
5210 /* Function vectorizable_operation.
5212 Check if STMT performs a binary, unary or ternary operation that can
5213 be vectorized.
5214 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5215 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5216 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5218 static bool
5219 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5220 gimple **vec_stmt, slp_tree slp_node)
5222 tree vec_dest;
5223 tree scalar_dest;
5224 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5226 tree vectype;
5227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5228 enum tree_code code;
5229 machine_mode vec_mode;
5230 tree new_temp;
5231 int op_type;
5232 optab optab;
5233 bool target_support_p;
5234 gimple *def_stmt;
5235 enum vect_def_type dt[3]
5236 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5237 int ndts = 3;
5238 gimple *new_stmt = NULL;
5239 stmt_vec_info prev_stmt_info;
5240 int nunits_in;
5241 int nunits_out;
5242 tree vectype_out;
5243 int ncopies;
5244 int j, i;
5245 vec<tree> vec_oprnds0 = vNULL;
5246 vec<tree> vec_oprnds1 = vNULL;
5247 vec<tree> vec_oprnds2 = vNULL;
5248 tree vop0, vop1, vop2;
5249 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5250 vec_info *vinfo = stmt_info->vinfo;
5252 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5253 return false;
5255 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5256 && ! vec_stmt)
5257 return false;
5259 /* Is STMT a vectorizable binary/unary operation? */
5260 if (!is_gimple_assign (stmt))
5261 return false;
5263 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5264 return false;
5266 code = gimple_assign_rhs_code (stmt);
5268 /* For pointer addition and subtraction, we should use the normal
5269 plus and minus for the vector operation. */
5270 if (code == POINTER_PLUS_EXPR)
5271 code = PLUS_EXPR;
5272 if (code == POINTER_DIFF_EXPR)
5273 code = MINUS_EXPR;
5275 /* Support only unary or binary operations. */
5276 op_type = TREE_CODE_LENGTH (code);
5277 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5279 if (dump_enabled_p ())
5280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5281 "num. args = %d (not unary/binary/ternary op).\n",
5282 op_type);
5283 return false;
5286 scalar_dest = gimple_assign_lhs (stmt);
5287 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5289 /* Most operations cannot handle bit-precision types without extra
5290 truncations. */
5291 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5292 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5293 /* Exception are bitwise binary operations. */
5294 && code != BIT_IOR_EXPR
5295 && code != BIT_XOR_EXPR
5296 && code != BIT_AND_EXPR)
5298 if (dump_enabled_p ())
5299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5300 "bit-precision arithmetic not supported.\n");
5301 return false;
5304 op0 = gimple_assign_rhs1 (stmt);
5305 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5307 if (dump_enabled_p ())
5308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5309 "use not simple.\n");
5310 return false;
5312 /* If op0 is an external or constant def use a vector type with
5313 the same size as the output vector type. */
5314 if (!vectype)
5316 /* For boolean type we cannot determine vectype by
5317 invariant value (don't know whether it is a vector
5318 of booleans or vector of integers). We use output
5319 vectype because operations on boolean don't change
5320 type. */
5321 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5323 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5325 if (dump_enabled_p ())
5326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5327 "not supported operation on bool value.\n");
5328 return false;
5330 vectype = vectype_out;
5332 else
5333 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5335 if (vec_stmt)
5336 gcc_assert (vectype);
5337 if (!vectype)
5339 if (dump_enabled_p ())
5341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5342 "no vectype for scalar type ");
5343 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5344 TREE_TYPE (op0));
5345 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5348 return false;
5351 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5352 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5353 if (nunits_out != nunits_in)
5354 return false;
5356 if (op_type == binary_op || op_type == ternary_op)
5358 op1 = gimple_assign_rhs2 (stmt);
5359 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5361 if (dump_enabled_p ())
5362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5363 "use not simple.\n");
5364 return false;
5367 if (op_type == ternary_op)
5369 op2 = gimple_assign_rhs3 (stmt);
5370 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5372 if (dump_enabled_p ())
5373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5374 "use not simple.\n");
5375 return false;
5379 /* Multiple types in SLP are handled by creating the appropriate number of
5380 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5381 case of SLP. */
5382 if (slp_node)
5383 ncopies = 1;
5384 else
5385 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5387 gcc_assert (ncopies >= 1);
5389 /* Shifts are handled in vectorizable_shift (). */
5390 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5391 || code == RROTATE_EXPR)
5392 return false;
5394 /* Supportable by target? */
5396 vec_mode = TYPE_MODE (vectype);
5397 if (code == MULT_HIGHPART_EXPR)
5398 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5399 else
5401 optab = optab_for_tree_code (code, vectype, optab_default);
5402 if (!optab)
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5406 "no optab.\n");
5407 return false;
5409 target_support_p = (optab_handler (optab, vec_mode)
5410 != CODE_FOR_nothing);
5413 if (!target_support_p)
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5417 "op not supported by target.\n");
5418 /* Check only during analysis. */
5419 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5420 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5421 return false;
5422 if (dump_enabled_p ())
5423 dump_printf_loc (MSG_NOTE, vect_location,
5424 "proceeding using word mode.\n");
5427 /* Worthwhile without SIMD support? Check only during analysis. */
5428 if (!VECTOR_MODE_P (vec_mode)
5429 && !vec_stmt
5430 && !vect_worthwhile_without_simd_p (vinfo, code))
5432 if (dump_enabled_p ())
5433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5434 "not worthwhile without SIMD support.\n");
5435 return false;
5438 if (!vec_stmt) /* transformation not required. */
5440 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5441 if (dump_enabled_p ())
5442 dump_printf_loc (MSG_NOTE, vect_location,
5443 "=== vectorizable_operation ===\n");
5444 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5445 return true;
5448 /* Transform. */
5450 if (dump_enabled_p ())
5451 dump_printf_loc (MSG_NOTE, vect_location,
5452 "transform binary/unary operation.\n");
5454 /* Handle def. */
5455 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5457 /* In case the vectorization factor (VF) is bigger than the number
5458 of elements that we can fit in a vectype (nunits), we have to generate
5459 more than one vector stmt - i.e - we need to "unroll" the
5460 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5461 from one copy of the vector stmt to the next, in the field
5462 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5463 stages to find the correct vector defs to be used when vectorizing
5464 stmts that use the defs of the current stmt. The example below
5465 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5466 we need to create 4 vectorized stmts):
5468 before vectorization:
5469 RELATED_STMT VEC_STMT
5470 S1: x = memref - -
5471 S2: z = x + 1 - -
5473 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5474 there):
5475 RELATED_STMT VEC_STMT
5476 VS1_0: vx0 = memref0 VS1_1 -
5477 VS1_1: vx1 = memref1 VS1_2 -
5478 VS1_2: vx2 = memref2 VS1_3 -
5479 VS1_3: vx3 = memref3 - -
5480 S1: x = load - VS1_0
5481 S2: z = x + 1 - -
5483 step2: vectorize stmt S2 (done here):
5484 To vectorize stmt S2 we first need to find the relevant vector
5485 def for the first operand 'x'. This is, as usual, obtained from
5486 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5487 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5488 relevant vector def 'vx0'. Having found 'vx0' we can generate
5489 the vector stmt VS2_0, and as usual, record it in the
5490 STMT_VINFO_VEC_STMT of stmt S2.
5491 When creating the second copy (VS2_1), we obtain the relevant vector
5492 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5493 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5494 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5495 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5496 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5497 chain of stmts and pointers:
5498 RELATED_STMT VEC_STMT
5499 VS1_0: vx0 = memref0 VS1_1 -
5500 VS1_1: vx1 = memref1 VS1_2 -
5501 VS1_2: vx2 = memref2 VS1_3 -
5502 VS1_3: vx3 = memref3 - -
5503 S1: x = load - VS1_0
5504 VS2_0: vz0 = vx0 + v1 VS2_1 -
5505 VS2_1: vz1 = vx1 + v1 VS2_2 -
5506 VS2_2: vz2 = vx2 + v1 VS2_3 -
5507 VS2_3: vz3 = vx3 + v1 - -
5508 S2: z = x + 1 - VS2_0 */
5510 prev_stmt_info = NULL;
5511 for (j = 0; j < ncopies; j++)
5513 /* Handle uses. */
5514 if (j == 0)
5516 if (op_type == binary_op || op_type == ternary_op)
5517 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5518 slp_node);
5519 else
5520 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5521 slp_node);
5522 if (op_type == ternary_op)
5523 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5524 slp_node);
5526 else
5528 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5529 if (op_type == ternary_op)
5531 tree vec_oprnd = vec_oprnds2.pop ();
5532 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5533 vec_oprnd));
5537 /* Arguments are ready. Create the new vector stmt. */
5538 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5540 vop1 = ((op_type == binary_op || op_type == ternary_op)
5541 ? vec_oprnds1[i] : NULL_TREE);
5542 vop2 = ((op_type == ternary_op)
5543 ? vec_oprnds2[i] : NULL_TREE);
5544 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5545 new_temp = make_ssa_name (vec_dest, new_stmt);
5546 gimple_assign_set_lhs (new_stmt, new_temp);
5547 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5548 if (slp_node)
5549 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5552 if (slp_node)
5553 continue;
5555 if (j == 0)
5556 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5557 else
5558 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5559 prev_stmt_info = vinfo_for_stmt (new_stmt);
5562 vec_oprnds0.release ();
5563 vec_oprnds1.release ();
5564 vec_oprnds2.release ();
5566 return true;
5569 /* A helper function to ensure data reference DR's base alignment. */
5571 static void
5572 ensure_base_align (struct data_reference *dr)
5574 if (!dr->aux)
5575 return;
5577 if (DR_VECT_AUX (dr)->base_misaligned)
5579 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5581 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5583 if (decl_in_symtab_p (base_decl))
5584 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5585 else
5587 SET_DECL_ALIGN (base_decl, align_base_to);
5588 DECL_USER_ALIGN (base_decl) = 1;
5590 DR_VECT_AUX (dr)->base_misaligned = false;
5595 /* Function get_group_alias_ptr_type.
5597 Return the alias type for the group starting at FIRST_STMT. */
5599 static tree
5600 get_group_alias_ptr_type (gimple *first_stmt)
5602 struct data_reference *first_dr, *next_dr;
5603 gimple *next_stmt;
5605 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5606 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5607 while (next_stmt)
5609 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5610 if (get_alias_set (DR_REF (first_dr))
5611 != get_alias_set (DR_REF (next_dr)))
5613 if (dump_enabled_p ())
5614 dump_printf_loc (MSG_NOTE, vect_location,
5615 "conflicting alias set types.\n");
5616 return ptr_type_node;
5618 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5620 return reference_alias_ptr_type (DR_REF (first_dr));
5624 /* Function vectorizable_store.
5626 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5627 can be vectorized.
5628 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5629 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5630 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5632 static bool
5633 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5634 slp_tree slp_node)
5636 tree scalar_dest;
5637 tree data_ref;
5638 tree op;
5639 tree vec_oprnd = NULL_TREE;
5640 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5641 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5642 tree elem_type;
5643 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5644 struct loop *loop = NULL;
5645 machine_mode vec_mode;
5646 tree dummy;
5647 enum dr_alignment_support alignment_support_scheme;
5648 gimple *def_stmt;
5649 enum vect_def_type dt;
5650 stmt_vec_info prev_stmt_info = NULL;
5651 tree dataref_ptr = NULL_TREE;
5652 tree dataref_offset = NULL_TREE;
5653 gimple *ptr_incr = NULL;
5654 int ncopies;
5655 int j;
5656 gimple *next_stmt, *first_stmt;
5657 bool grouped_store;
5658 unsigned int group_size, i;
5659 vec<tree> oprnds = vNULL;
5660 vec<tree> result_chain = vNULL;
5661 bool inv_p;
5662 tree offset = NULL_TREE;
5663 vec<tree> vec_oprnds = vNULL;
5664 bool slp = (slp_node != NULL);
5665 unsigned int vec_num;
5666 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5667 vec_info *vinfo = stmt_info->vinfo;
5668 tree aggr_type;
5669 gather_scatter_info gs_info;
5670 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5671 gimple *new_stmt;
5672 int vf;
5673 vec_load_store_type vls_type;
5674 tree ref_type;
5676 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5677 return false;
5679 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5680 && ! vec_stmt)
5681 return false;
5683 /* Is vectorizable store? */
5685 if (!is_gimple_assign (stmt))
5686 return false;
5688 scalar_dest = gimple_assign_lhs (stmt);
5689 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5690 && is_pattern_stmt_p (stmt_info))
5691 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5692 if (TREE_CODE (scalar_dest) != ARRAY_REF
5693 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5694 && TREE_CODE (scalar_dest) != INDIRECT_REF
5695 && TREE_CODE (scalar_dest) != COMPONENT_REF
5696 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5697 && TREE_CODE (scalar_dest) != REALPART_EXPR
5698 && TREE_CODE (scalar_dest) != MEM_REF)
5699 return false;
5701 /* Cannot have hybrid store SLP -- that would mean storing to the
5702 same location twice. */
5703 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5705 gcc_assert (gimple_assign_single_p (stmt));
5707 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5708 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5710 if (loop_vinfo)
5712 loop = LOOP_VINFO_LOOP (loop_vinfo);
5713 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5715 else
5716 vf = 1;
5718 /* Multiple types in SLP are handled by creating the appropriate number of
5719 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5720 case of SLP. */
5721 if (slp)
5722 ncopies = 1;
5723 else
5724 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5726 gcc_assert (ncopies >= 1);
5728 /* FORNOW. This restriction should be relaxed. */
5729 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5731 if (dump_enabled_p ())
5732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5733 "multiple types in nested loop.\n");
5734 return false;
5737 op = gimple_assign_rhs1 (stmt);
5739 /* In the case this is a store from a constant make sure
5740 native_encode_expr can handle it. */
5741 if (CONSTANT_CLASS_P (op) && native_encode_expr (op, NULL, 64) == 0)
5742 return false;
5744 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5746 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5748 "use not simple.\n");
5749 return false;
5752 if (dt == vect_constant_def || dt == vect_external_def)
5753 vls_type = VLS_STORE_INVARIANT;
5754 else
5755 vls_type = VLS_STORE;
5757 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5758 return false;
5760 elem_type = TREE_TYPE (vectype);
5761 vec_mode = TYPE_MODE (vectype);
5763 /* FORNOW. In some cases can vectorize even if data-type not supported
5764 (e.g. - array initialization with 0). */
5765 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5766 return false;
5768 if (!STMT_VINFO_DATA_REF (stmt_info))
5769 return false;
5771 vect_memory_access_type memory_access_type;
5772 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5773 &memory_access_type, &gs_info))
5774 return false;
5776 if (!vec_stmt) /* transformation not required. */
5778 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5779 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5780 /* The SLP costs are calculated during SLP analysis. */
5781 if (!PURE_SLP_STMT (stmt_info))
5782 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5783 NULL, NULL, NULL);
5784 return true;
5786 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5788 /* Transform. */
5790 ensure_base_align (dr);
5792 if (memory_access_type == VMAT_GATHER_SCATTER)
5794 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5795 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5796 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5797 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5798 edge pe = loop_preheader_edge (loop);
5799 gimple_seq seq;
5800 basic_block new_bb;
5801 enum { NARROW, NONE, WIDEN } modifier;
5802 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5804 if (nunits == (unsigned int) scatter_off_nunits)
5805 modifier = NONE;
5806 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5808 modifier = WIDEN;
5810 auto_vec_perm_indices sel (scatter_off_nunits);
5811 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5812 sel.quick_push (i | nunits);
5814 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5815 gcc_assert (perm_mask != NULL_TREE);
5817 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5819 modifier = NARROW;
5821 auto_vec_perm_indices sel (nunits);
5822 for (i = 0; i < (unsigned int) nunits; ++i)
5823 sel.quick_push (i | scatter_off_nunits);
5825 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5826 gcc_assert (perm_mask != NULL_TREE);
5827 ncopies *= 2;
5829 else
5830 gcc_unreachable ();
5832 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5833 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5834 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5835 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5836 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5837 scaletype = TREE_VALUE (arglist);
5839 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5840 && TREE_CODE (rettype) == VOID_TYPE);
5842 ptr = fold_convert (ptrtype, gs_info.base);
5843 if (!is_gimple_min_invariant (ptr))
5845 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5846 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5847 gcc_assert (!new_bb);
5850 /* Currently we support only unconditional scatter stores,
5851 so mask should be all ones. */
5852 mask = build_int_cst (masktype, -1);
5853 mask = vect_init_vector (stmt, mask, masktype, NULL);
5855 scale = build_int_cst (scaletype, gs_info.scale);
5857 prev_stmt_info = NULL;
5858 for (j = 0; j < ncopies; ++j)
5860 if (j == 0)
5862 src = vec_oprnd1
5863 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5864 op = vec_oprnd0
5865 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5867 else if (modifier != NONE && (j & 1))
5869 if (modifier == WIDEN)
5871 src = vec_oprnd1
5872 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5873 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5874 stmt, gsi);
5876 else if (modifier == NARROW)
5878 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5879 stmt, gsi);
5880 op = vec_oprnd0
5881 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5882 vec_oprnd0);
5884 else
5885 gcc_unreachable ();
5887 else
5889 src = vec_oprnd1
5890 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5891 op = vec_oprnd0
5892 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5893 vec_oprnd0);
5896 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5898 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5899 == TYPE_VECTOR_SUBPARTS (srctype));
5900 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5901 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5902 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5903 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5904 src = var;
5907 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5909 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5910 == TYPE_VECTOR_SUBPARTS (idxtype));
5911 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5912 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5913 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5914 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5915 op = var;
5918 new_stmt
5919 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5921 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5923 if (prev_stmt_info == NULL)
5924 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5925 else
5926 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5927 prev_stmt_info = vinfo_for_stmt (new_stmt);
5929 return true;
5932 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5933 if (grouped_store)
5935 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5936 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5937 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5939 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5941 /* FORNOW */
5942 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5944 /* We vectorize all the stmts of the interleaving group when we
5945 reach the last stmt in the group. */
5946 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5947 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5948 && !slp)
5950 *vec_stmt = NULL;
5951 return true;
5954 if (slp)
5956 grouped_store = false;
5957 /* VEC_NUM is the number of vect stmts to be created for this
5958 group. */
5959 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5960 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5961 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5962 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5963 op = gimple_assign_rhs1 (first_stmt);
5965 else
5966 /* VEC_NUM is the number of vect stmts to be created for this
5967 group. */
5968 vec_num = group_size;
5970 ref_type = get_group_alias_ptr_type (first_stmt);
5972 else
5974 first_stmt = stmt;
5975 first_dr = dr;
5976 group_size = vec_num = 1;
5977 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5980 if (dump_enabled_p ())
5981 dump_printf_loc (MSG_NOTE, vect_location,
5982 "transform store. ncopies = %d\n", ncopies);
5984 if (memory_access_type == VMAT_ELEMENTWISE
5985 || memory_access_type == VMAT_STRIDED_SLP)
5987 gimple_stmt_iterator incr_gsi;
5988 bool insert_after;
5989 gimple *incr;
5990 tree offvar;
5991 tree ivstep;
5992 tree running_off;
5993 gimple_seq stmts = NULL;
5994 tree stride_base, stride_step, alias_off;
5995 tree vec_oprnd;
5996 unsigned int g;
5998 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6000 stride_base
6001 = fold_build_pointer_plus
6002 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6003 size_binop (PLUS_EXPR,
6004 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6005 convert_to_ptrofftype (DR_INIT (first_dr))));
6006 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6008 /* For a store with loop-invariant (but other than power-of-2)
6009 stride (i.e. not a grouped access) like so:
6011 for (i = 0; i < n; i += stride)
6012 array[i] = ...;
6014 we generate a new induction variable and new stores from
6015 the components of the (vectorized) rhs:
6017 for (j = 0; ; j += VF*stride)
6018 vectemp = ...;
6019 tmp1 = vectemp[0];
6020 array[j] = tmp1;
6021 tmp2 = vectemp[1];
6022 array[j + stride] = tmp2;
6026 unsigned nstores = nunits;
6027 unsigned lnel = 1;
6028 tree ltype = elem_type;
6029 tree lvectype = vectype;
6030 if (slp)
6032 if (group_size < nunits
6033 && nunits % group_size == 0)
6035 nstores = nunits / group_size;
6036 lnel = group_size;
6037 ltype = build_vector_type (elem_type, group_size);
6038 lvectype = vectype;
6040 /* First check if vec_extract optab doesn't support extraction
6041 of vector elts directly. */
6042 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6043 machine_mode vmode;
6044 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6045 || !VECTOR_MODE_P (vmode)
6046 || (convert_optab_handler (vec_extract_optab,
6047 TYPE_MODE (vectype), vmode)
6048 == CODE_FOR_nothing))
6050 /* Try to avoid emitting an extract of vector elements
6051 by performing the extracts using an integer type of the
6052 same size, extracting from a vector of those and then
6053 re-interpreting it as the original vector type if
6054 supported. */
6055 unsigned lsize
6056 = group_size * GET_MODE_BITSIZE (elmode);
6057 elmode = int_mode_for_size (lsize, 0).require ();
6058 /* If we can't construct such a vector fall back to
6059 element extracts from the original vector type and
6060 element size stores. */
6061 if (mode_for_vector (elmode,
6062 nunits / group_size).exists (&vmode)
6063 && VECTOR_MODE_P (vmode)
6064 && (convert_optab_handler (vec_extract_optab,
6065 vmode, elmode)
6066 != CODE_FOR_nothing))
6068 nstores = nunits / group_size;
6069 lnel = group_size;
6070 ltype = build_nonstandard_integer_type (lsize, 1);
6071 lvectype = build_vector_type (ltype, nstores);
6073 /* Else fall back to vector extraction anyway.
6074 Fewer stores are more important than avoiding spilling
6075 of the vector we extract from. Compared to the
6076 construction case in vectorizable_load no store-forwarding
6077 issue exists here for reasonable archs. */
6080 else if (group_size >= nunits
6081 && group_size % nunits == 0)
6083 nstores = 1;
6084 lnel = nunits;
6085 ltype = vectype;
6086 lvectype = vectype;
6088 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6089 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6092 ivstep = stride_step;
6093 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6094 build_int_cst (TREE_TYPE (ivstep), vf));
6096 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6098 create_iv (stride_base, ivstep, NULL,
6099 loop, &incr_gsi, insert_after,
6100 &offvar, NULL);
6101 incr = gsi_stmt (incr_gsi);
6102 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6104 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6105 if (stmts)
6106 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6108 prev_stmt_info = NULL;
6109 alias_off = build_int_cst (ref_type, 0);
6110 next_stmt = first_stmt;
6111 for (g = 0; g < group_size; g++)
6113 running_off = offvar;
6114 if (g)
6116 tree size = TYPE_SIZE_UNIT (ltype);
6117 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6118 size);
6119 tree newoff = copy_ssa_name (running_off, NULL);
6120 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6121 running_off, pos);
6122 vect_finish_stmt_generation (stmt, incr, gsi);
6123 running_off = newoff;
6125 unsigned int group_el = 0;
6126 unsigned HOST_WIDE_INT
6127 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6128 for (j = 0; j < ncopies; j++)
6130 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6131 and first_stmt == stmt. */
6132 if (j == 0)
6134 if (slp)
6136 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6137 slp_node);
6138 vec_oprnd = vec_oprnds[0];
6140 else
6142 gcc_assert (gimple_assign_single_p (next_stmt));
6143 op = gimple_assign_rhs1 (next_stmt);
6144 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6147 else
6149 if (slp)
6150 vec_oprnd = vec_oprnds[j];
6151 else
6153 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6154 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6157 /* Pun the vector to extract from if necessary. */
6158 if (lvectype != vectype)
6160 tree tem = make_ssa_name (lvectype);
6161 gimple *pun
6162 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6163 lvectype, vec_oprnd));
6164 vect_finish_stmt_generation (stmt, pun, gsi);
6165 vec_oprnd = tem;
6167 for (i = 0; i < nstores; i++)
6169 tree newref, newoff;
6170 gimple *incr, *assign;
6171 tree size = TYPE_SIZE (ltype);
6172 /* Extract the i'th component. */
6173 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6174 bitsize_int (i), size);
6175 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6176 size, pos);
6178 elem = force_gimple_operand_gsi (gsi, elem, true,
6179 NULL_TREE, true,
6180 GSI_SAME_STMT);
6182 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6183 group_el * elsz);
6184 newref = build2 (MEM_REF, ltype,
6185 running_off, this_off);
6187 /* And store it to *running_off. */
6188 assign = gimple_build_assign (newref, elem);
6189 vect_finish_stmt_generation (stmt, assign, gsi);
6191 group_el += lnel;
6192 if (! slp
6193 || group_el == group_size)
6195 newoff = copy_ssa_name (running_off, NULL);
6196 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6197 running_off, stride_step);
6198 vect_finish_stmt_generation (stmt, incr, gsi);
6200 running_off = newoff;
6201 group_el = 0;
6203 if (g == group_size - 1
6204 && !slp)
6206 if (j == 0 && i == 0)
6207 STMT_VINFO_VEC_STMT (stmt_info)
6208 = *vec_stmt = assign;
6209 else
6210 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6211 prev_stmt_info = vinfo_for_stmt (assign);
6215 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6216 if (slp)
6217 break;
6220 vec_oprnds.release ();
6221 return true;
6224 auto_vec<tree> dr_chain (group_size);
6225 oprnds.create (group_size);
6227 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6228 gcc_assert (alignment_support_scheme);
6229 /* Targets with store-lane instructions must not require explicit
6230 realignment. */
6231 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6232 || alignment_support_scheme == dr_aligned
6233 || alignment_support_scheme == dr_unaligned_supported);
6235 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6236 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6237 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6239 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6240 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6241 else
6242 aggr_type = vectype;
6244 /* In case the vectorization factor (VF) is bigger than the number
6245 of elements that we can fit in a vectype (nunits), we have to generate
6246 more than one vector stmt - i.e - we need to "unroll" the
6247 vector stmt by a factor VF/nunits. For more details see documentation in
6248 vect_get_vec_def_for_copy_stmt. */
6250 /* In case of interleaving (non-unit grouped access):
6252 S1: &base + 2 = x2
6253 S2: &base = x0
6254 S3: &base + 1 = x1
6255 S4: &base + 3 = x3
6257 We create vectorized stores starting from base address (the access of the
6258 first stmt in the chain (S2 in the above example), when the last store stmt
6259 of the chain (S4) is reached:
6261 VS1: &base = vx2
6262 VS2: &base + vec_size*1 = vx0
6263 VS3: &base + vec_size*2 = vx1
6264 VS4: &base + vec_size*3 = vx3
6266 Then permutation statements are generated:
6268 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6269 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6272 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6273 (the order of the data-refs in the output of vect_permute_store_chain
6274 corresponds to the order of scalar stmts in the interleaving chain - see
6275 the documentation of vect_permute_store_chain()).
6277 In case of both multiple types and interleaving, above vector stores and
6278 permutation stmts are created for every copy. The result vector stmts are
6279 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6280 STMT_VINFO_RELATED_STMT for the next copies.
6283 prev_stmt_info = NULL;
6284 for (j = 0; j < ncopies; j++)
6287 if (j == 0)
6289 if (slp)
6291 /* Get vectorized arguments for SLP_NODE. */
6292 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6293 NULL, slp_node);
6295 vec_oprnd = vec_oprnds[0];
6297 else
6299 /* For interleaved stores we collect vectorized defs for all the
6300 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6301 used as an input to vect_permute_store_chain(), and OPRNDS as
6302 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6304 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6305 OPRNDS are of size 1. */
6306 next_stmt = first_stmt;
6307 for (i = 0; i < group_size; i++)
6309 /* Since gaps are not supported for interleaved stores,
6310 GROUP_SIZE is the exact number of stmts in the chain.
6311 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6312 there is no interleaving, GROUP_SIZE is 1, and only one
6313 iteration of the loop will be executed. */
6314 gcc_assert (next_stmt
6315 && gimple_assign_single_p (next_stmt));
6316 op = gimple_assign_rhs1 (next_stmt);
6318 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6319 dr_chain.quick_push (vec_oprnd);
6320 oprnds.quick_push (vec_oprnd);
6321 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6325 /* We should have catched mismatched types earlier. */
6326 gcc_assert (useless_type_conversion_p (vectype,
6327 TREE_TYPE (vec_oprnd)));
6328 bool simd_lane_access_p
6329 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6330 if (simd_lane_access_p
6331 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6332 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6333 && integer_zerop (DR_OFFSET (first_dr))
6334 && integer_zerop (DR_INIT (first_dr))
6335 && alias_sets_conflict_p (get_alias_set (aggr_type),
6336 get_alias_set (TREE_TYPE (ref_type))))
6338 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6339 dataref_offset = build_int_cst (ref_type, 0);
6340 inv_p = false;
6342 else
6343 dataref_ptr
6344 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6345 simd_lane_access_p ? loop : NULL,
6346 offset, &dummy, gsi, &ptr_incr,
6347 simd_lane_access_p, &inv_p);
6348 gcc_assert (bb_vinfo || !inv_p);
6350 else
6352 /* For interleaved stores we created vectorized defs for all the
6353 defs stored in OPRNDS in the previous iteration (previous copy).
6354 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6355 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6356 next copy.
6357 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6358 OPRNDS are of size 1. */
6359 for (i = 0; i < group_size; i++)
6361 op = oprnds[i];
6362 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6363 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6364 dr_chain[i] = vec_oprnd;
6365 oprnds[i] = vec_oprnd;
6367 if (dataref_offset)
6368 dataref_offset
6369 = int_const_binop (PLUS_EXPR, dataref_offset,
6370 TYPE_SIZE_UNIT (aggr_type));
6371 else
6372 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6373 TYPE_SIZE_UNIT (aggr_type));
6376 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6378 tree vec_array;
6380 /* Combine all the vectors into an array. */
6381 vec_array = create_vector_array (vectype, vec_num);
6382 for (i = 0; i < vec_num; i++)
6384 vec_oprnd = dr_chain[i];
6385 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6388 /* Emit:
6389 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6390 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6391 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6392 vec_array);
6393 gimple_call_set_lhs (call, data_ref);
6394 gimple_call_set_nothrow (call, true);
6395 new_stmt = call;
6396 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6398 else
6400 new_stmt = NULL;
6401 if (grouped_store)
6403 if (j == 0)
6404 result_chain.create (group_size);
6405 /* Permute. */
6406 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6407 &result_chain);
6410 next_stmt = first_stmt;
6411 for (i = 0; i < vec_num; i++)
6413 unsigned align, misalign;
6415 if (i > 0)
6416 /* Bump the vector pointer. */
6417 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6418 stmt, NULL_TREE);
6420 if (slp)
6421 vec_oprnd = vec_oprnds[i];
6422 else if (grouped_store)
6423 /* For grouped stores vectorized defs are interleaved in
6424 vect_permute_store_chain(). */
6425 vec_oprnd = result_chain[i];
6427 data_ref = fold_build2 (MEM_REF, vectype,
6428 dataref_ptr,
6429 dataref_offset
6430 ? dataref_offset
6431 : build_int_cst (ref_type, 0));
6432 align = DR_TARGET_ALIGNMENT (first_dr);
6433 if (aligned_access_p (first_dr))
6434 misalign = 0;
6435 else if (DR_MISALIGNMENT (first_dr) == -1)
6437 align = dr_alignment (vect_dr_behavior (first_dr));
6438 misalign = 0;
6439 TREE_TYPE (data_ref)
6440 = build_aligned_type (TREE_TYPE (data_ref),
6441 align * BITS_PER_UNIT);
6443 else
6445 TREE_TYPE (data_ref)
6446 = build_aligned_type (TREE_TYPE (data_ref),
6447 TYPE_ALIGN (elem_type));
6448 misalign = DR_MISALIGNMENT (first_dr);
6450 if (dataref_offset == NULL_TREE
6451 && TREE_CODE (dataref_ptr) == SSA_NAME)
6452 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6453 misalign);
6455 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6457 tree perm_mask = perm_mask_for_reverse (vectype);
6458 tree perm_dest
6459 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6460 vectype);
6461 tree new_temp = make_ssa_name (perm_dest);
6463 /* Generate the permute statement. */
6464 gimple *perm_stmt
6465 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6466 vec_oprnd, perm_mask);
6467 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6469 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6470 vec_oprnd = new_temp;
6473 /* Arguments are ready. Create the new vector stmt. */
6474 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6475 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6477 if (slp)
6478 continue;
6480 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6481 if (!next_stmt)
6482 break;
6485 if (!slp)
6487 if (j == 0)
6488 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6489 else
6490 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6491 prev_stmt_info = vinfo_for_stmt (new_stmt);
6495 oprnds.release ();
6496 result_chain.release ();
6497 vec_oprnds.release ();
6499 return true;
6502 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6503 VECTOR_CST mask. No checks are made that the target platform supports the
6504 mask, so callers may wish to test can_vec_perm_p separately, or use
6505 vect_gen_perm_mask_checked. */
6507 tree
6508 vect_gen_perm_mask_any (tree vectype, vec_perm_indices sel)
6510 tree mask_elt_type, mask_type, mask_vec;
6512 unsigned int nunits = sel.length ();
6513 gcc_checking_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
6515 mask_elt_type = lang_hooks.types.type_for_mode
6516 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
6517 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6519 auto_vec<tree, 32> mask_elts (nunits);
6520 for (unsigned int i = 0; i < nunits; ++i)
6521 mask_elts.quick_push (build_int_cst (mask_elt_type, sel[i]));
6522 mask_vec = build_vector (mask_type, mask_elts);
6524 return mask_vec;
6527 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6528 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6530 tree
6531 vect_gen_perm_mask_checked (tree vectype, vec_perm_indices sel)
6533 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, &sel));
6534 return vect_gen_perm_mask_any (vectype, sel);
6537 /* Given a vector variable X and Y, that was generated for the scalar
6538 STMT, generate instructions to permute the vector elements of X and Y
6539 using permutation mask MASK_VEC, insert them at *GSI and return the
6540 permuted vector variable. */
6542 static tree
6543 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6544 gimple_stmt_iterator *gsi)
6546 tree vectype = TREE_TYPE (x);
6547 tree perm_dest, data_ref;
6548 gimple *perm_stmt;
6550 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6551 data_ref = make_ssa_name (perm_dest);
6553 /* Generate the permute statement. */
6554 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6555 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6557 return data_ref;
6560 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6561 inserting them on the loops preheader edge. Returns true if we
6562 were successful in doing so (and thus STMT can be moved then),
6563 otherwise returns false. */
6565 static bool
6566 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6568 ssa_op_iter i;
6569 tree op;
6570 bool any = false;
6572 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6574 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6575 if (!gimple_nop_p (def_stmt)
6576 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6578 /* Make sure we don't need to recurse. While we could do
6579 so in simple cases when there are more complex use webs
6580 we don't have an easy way to preserve stmt order to fulfil
6581 dependencies within them. */
6582 tree op2;
6583 ssa_op_iter i2;
6584 if (gimple_code (def_stmt) == GIMPLE_PHI)
6585 return false;
6586 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6588 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6589 if (!gimple_nop_p (def_stmt2)
6590 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6591 return false;
6593 any = true;
6597 if (!any)
6598 return true;
6600 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6602 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6603 if (!gimple_nop_p (def_stmt)
6604 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6606 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6607 gsi_remove (&gsi, false);
6608 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6612 return true;
6615 /* vectorizable_load.
6617 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6618 can be vectorized.
6619 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6620 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6621 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6623 static bool
6624 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6625 slp_tree slp_node, slp_instance slp_node_instance)
6627 tree scalar_dest;
6628 tree vec_dest = NULL;
6629 tree data_ref = NULL;
6630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6631 stmt_vec_info prev_stmt_info;
6632 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6633 struct loop *loop = NULL;
6634 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6635 bool nested_in_vect_loop = false;
6636 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6637 tree elem_type;
6638 tree new_temp;
6639 machine_mode mode;
6640 gimple *new_stmt = NULL;
6641 tree dummy;
6642 enum dr_alignment_support alignment_support_scheme;
6643 tree dataref_ptr = NULL_TREE;
6644 tree dataref_offset = NULL_TREE;
6645 gimple *ptr_incr = NULL;
6646 int ncopies;
6647 int i, j, group_size, group_gap_adj;
6648 tree msq = NULL_TREE, lsq;
6649 tree offset = NULL_TREE;
6650 tree byte_offset = NULL_TREE;
6651 tree realignment_token = NULL_TREE;
6652 gphi *phi = NULL;
6653 vec<tree> dr_chain = vNULL;
6654 bool grouped_load = false;
6655 gimple *first_stmt;
6656 gimple *first_stmt_for_drptr = NULL;
6657 bool inv_p;
6658 bool compute_in_loop = false;
6659 struct loop *at_loop;
6660 int vec_num;
6661 bool slp = (slp_node != NULL);
6662 bool slp_perm = false;
6663 enum tree_code code;
6664 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6665 int vf;
6666 tree aggr_type;
6667 gather_scatter_info gs_info;
6668 vec_info *vinfo = stmt_info->vinfo;
6669 tree ref_type;
6671 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6672 return false;
6674 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6675 && ! vec_stmt)
6676 return false;
6678 /* Is vectorizable load? */
6679 if (!is_gimple_assign (stmt))
6680 return false;
6682 scalar_dest = gimple_assign_lhs (stmt);
6683 if (TREE_CODE (scalar_dest) != SSA_NAME)
6684 return false;
6686 code = gimple_assign_rhs_code (stmt);
6687 if (code != ARRAY_REF
6688 && code != BIT_FIELD_REF
6689 && code != INDIRECT_REF
6690 && code != COMPONENT_REF
6691 && code != IMAGPART_EXPR
6692 && code != REALPART_EXPR
6693 && code != MEM_REF
6694 && TREE_CODE_CLASS (code) != tcc_declaration)
6695 return false;
6697 if (!STMT_VINFO_DATA_REF (stmt_info))
6698 return false;
6700 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6701 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6703 if (loop_vinfo)
6705 loop = LOOP_VINFO_LOOP (loop_vinfo);
6706 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6707 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6709 else
6710 vf = 1;
6712 /* Multiple types in SLP are handled by creating the appropriate number of
6713 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6714 case of SLP. */
6715 if (slp)
6716 ncopies = 1;
6717 else
6718 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6720 gcc_assert (ncopies >= 1);
6722 /* FORNOW. This restriction should be relaxed. */
6723 if (nested_in_vect_loop && ncopies > 1)
6725 if (dump_enabled_p ())
6726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6727 "multiple types in nested loop.\n");
6728 return false;
6731 /* Invalidate assumptions made by dependence analysis when vectorization
6732 on the unrolled body effectively re-orders stmts. */
6733 if (ncopies > 1
6734 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6735 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6736 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6738 if (dump_enabled_p ())
6739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6740 "cannot perform implicit CSE when unrolling "
6741 "with negative dependence distance\n");
6742 return false;
6745 elem_type = TREE_TYPE (vectype);
6746 mode = TYPE_MODE (vectype);
6748 /* FORNOW. In some cases can vectorize even if data-type not supported
6749 (e.g. - data copies). */
6750 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6752 if (dump_enabled_p ())
6753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6754 "Aligned load, but unsupported type.\n");
6755 return false;
6758 /* Check if the load is a part of an interleaving chain. */
6759 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6761 grouped_load = true;
6762 /* FORNOW */
6763 gcc_assert (!nested_in_vect_loop);
6764 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6766 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6767 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6769 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6770 slp_perm = true;
6772 /* Invalidate assumptions made by dependence analysis when vectorization
6773 on the unrolled body effectively re-orders stmts. */
6774 if (!PURE_SLP_STMT (stmt_info)
6775 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6776 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6777 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6779 if (dump_enabled_p ())
6780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6781 "cannot perform implicit CSE when performing "
6782 "group loads with negative dependence distance\n");
6783 return false;
6786 /* Similarly when the stmt is a load that is both part of a SLP
6787 instance and a loop vectorized stmt via the same-dr mechanism
6788 we have to give up. */
6789 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6790 && (STMT_SLP_TYPE (stmt_info)
6791 != STMT_SLP_TYPE (vinfo_for_stmt
6792 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6794 if (dump_enabled_p ())
6795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6796 "conflicting SLP types for CSEd load\n");
6797 return false;
6801 vect_memory_access_type memory_access_type;
6802 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6803 &memory_access_type, &gs_info))
6804 return false;
6806 if (!vec_stmt) /* transformation not required. */
6808 if (!slp)
6809 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6810 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6811 /* The SLP costs are calculated during SLP analysis. */
6812 if (!PURE_SLP_STMT (stmt_info))
6813 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6814 NULL, NULL, NULL);
6815 return true;
6818 if (!slp)
6819 gcc_assert (memory_access_type
6820 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6822 if (dump_enabled_p ())
6823 dump_printf_loc (MSG_NOTE, vect_location,
6824 "transform load. ncopies = %d\n", ncopies);
6826 /* Transform. */
6828 ensure_base_align (dr);
6830 if (memory_access_type == VMAT_GATHER_SCATTER)
6832 tree vec_oprnd0 = NULL_TREE, op;
6833 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6834 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6835 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6836 edge pe = loop_preheader_edge (loop);
6837 gimple_seq seq;
6838 basic_block new_bb;
6839 enum { NARROW, NONE, WIDEN } modifier;
6840 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6842 if (nunits == gather_off_nunits)
6843 modifier = NONE;
6844 else if (nunits == gather_off_nunits / 2)
6846 modifier = WIDEN;
6848 auto_vec_perm_indices sel (gather_off_nunits);
6849 for (i = 0; i < gather_off_nunits; ++i)
6850 sel.quick_push (i | nunits);
6852 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6854 else if (nunits == gather_off_nunits * 2)
6856 modifier = NARROW;
6858 auto_vec_perm_indices sel (nunits);
6859 for (i = 0; i < nunits; ++i)
6860 sel.quick_push (i < gather_off_nunits
6861 ? i : i + nunits - gather_off_nunits);
6863 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6864 ncopies *= 2;
6866 else
6867 gcc_unreachable ();
6869 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6870 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6871 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6872 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6873 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6874 scaletype = TREE_VALUE (arglist);
6875 gcc_checking_assert (types_compatible_p (srctype, rettype));
6877 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6879 ptr = fold_convert (ptrtype, gs_info.base);
6880 if (!is_gimple_min_invariant (ptr))
6882 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6883 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6884 gcc_assert (!new_bb);
6887 /* Currently we support only unconditional gather loads,
6888 so mask should be all ones. */
6889 if (TREE_CODE (masktype) == INTEGER_TYPE)
6890 mask = build_int_cst (masktype, -1);
6891 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6893 mask = build_int_cst (TREE_TYPE (masktype), -1);
6894 mask = build_vector_from_val (masktype, mask);
6895 mask = vect_init_vector (stmt, mask, masktype, NULL);
6897 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6899 REAL_VALUE_TYPE r;
6900 long tmp[6];
6901 for (j = 0; j < 6; ++j)
6902 tmp[j] = -1;
6903 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6904 mask = build_real (TREE_TYPE (masktype), r);
6905 mask = build_vector_from_val (masktype, mask);
6906 mask = vect_init_vector (stmt, mask, masktype, NULL);
6908 else
6909 gcc_unreachable ();
6911 scale = build_int_cst (scaletype, gs_info.scale);
6913 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6914 merge = build_int_cst (TREE_TYPE (rettype), 0);
6915 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6917 REAL_VALUE_TYPE r;
6918 long tmp[6];
6919 for (j = 0; j < 6; ++j)
6920 tmp[j] = 0;
6921 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6922 merge = build_real (TREE_TYPE (rettype), r);
6924 else
6925 gcc_unreachable ();
6926 merge = build_vector_from_val (rettype, merge);
6927 merge = vect_init_vector (stmt, merge, rettype, NULL);
6929 prev_stmt_info = NULL;
6930 for (j = 0; j < ncopies; ++j)
6932 if (modifier == WIDEN && (j & 1))
6933 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6934 perm_mask, stmt, gsi);
6935 else if (j == 0)
6936 op = vec_oprnd0
6937 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6938 else
6939 op = vec_oprnd0
6940 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6942 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6944 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6945 == TYPE_VECTOR_SUBPARTS (idxtype));
6946 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6947 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6948 new_stmt
6949 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6950 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6951 op = var;
6954 new_stmt
6955 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6957 if (!useless_type_conversion_p (vectype, rettype))
6959 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6960 == TYPE_VECTOR_SUBPARTS (rettype));
6961 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6962 gimple_call_set_lhs (new_stmt, op);
6963 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6964 var = make_ssa_name (vec_dest);
6965 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6966 new_stmt
6967 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6969 else
6971 var = make_ssa_name (vec_dest, new_stmt);
6972 gimple_call_set_lhs (new_stmt, var);
6975 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6977 if (modifier == NARROW)
6979 if ((j & 1) == 0)
6981 prev_res = var;
6982 continue;
6984 var = permute_vec_elements (prev_res, var,
6985 perm_mask, stmt, gsi);
6986 new_stmt = SSA_NAME_DEF_STMT (var);
6989 if (prev_stmt_info == NULL)
6990 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6991 else
6992 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6993 prev_stmt_info = vinfo_for_stmt (new_stmt);
6995 return true;
6998 if (memory_access_type == VMAT_ELEMENTWISE
6999 || memory_access_type == VMAT_STRIDED_SLP)
7001 gimple_stmt_iterator incr_gsi;
7002 bool insert_after;
7003 gimple *incr;
7004 tree offvar;
7005 tree ivstep;
7006 tree running_off;
7007 vec<constructor_elt, va_gc> *v = NULL;
7008 gimple_seq stmts = NULL;
7009 tree stride_base, stride_step, alias_off;
7011 gcc_assert (!nested_in_vect_loop);
7013 if (slp && grouped_load)
7015 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7016 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7017 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7018 ref_type = get_group_alias_ptr_type (first_stmt);
7020 else
7022 first_stmt = stmt;
7023 first_dr = dr;
7024 group_size = 1;
7025 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7028 stride_base
7029 = fold_build_pointer_plus
7030 (DR_BASE_ADDRESS (first_dr),
7031 size_binop (PLUS_EXPR,
7032 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7033 convert_to_ptrofftype (DR_INIT (first_dr))));
7034 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7036 /* For a load with loop-invariant (but other than power-of-2)
7037 stride (i.e. not a grouped access) like so:
7039 for (i = 0; i < n; i += stride)
7040 ... = array[i];
7042 we generate a new induction variable and new accesses to
7043 form a new vector (or vectors, depending on ncopies):
7045 for (j = 0; ; j += VF*stride)
7046 tmp1 = array[j];
7047 tmp2 = array[j + stride];
7049 vectemp = {tmp1, tmp2, ...}
7052 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7053 build_int_cst (TREE_TYPE (stride_step), vf));
7055 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7057 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7058 loop, &incr_gsi, insert_after,
7059 &offvar, NULL);
7060 incr = gsi_stmt (incr_gsi);
7061 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7063 stride_step = force_gimple_operand (unshare_expr (stride_step),
7064 &stmts, true, NULL_TREE);
7065 if (stmts)
7066 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7068 prev_stmt_info = NULL;
7069 running_off = offvar;
7070 alias_off = build_int_cst (ref_type, 0);
7071 int nloads = nunits;
7072 int lnel = 1;
7073 tree ltype = TREE_TYPE (vectype);
7074 tree lvectype = vectype;
7075 auto_vec<tree> dr_chain;
7076 if (memory_access_type == VMAT_STRIDED_SLP)
7078 if (group_size < nunits)
7080 /* First check if vec_init optab supports construction from
7081 vector elts directly. */
7082 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7083 machine_mode vmode;
7084 if (mode_for_vector (elmode, group_size).exists (&vmode)
7085 && VECTOR_MODE_P (vmode)
7086 && (convert_optab_handler (vec_init_optab,
7087 TYPE_MODE (vectype), vmode)
7088 != CODE_FOR_nothing))
7090 nloads = nunits / group_size;
7091 lnel = group_size;
7092 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7094 else
7096 /* Otherwise avoid emitting a constructor of vector elements
7097 by performing the loads using an integer type of the same
7098 size, constructing a vector of those and then
7099 re-interpreting it as the original vector type.
7100 This avoids a huge runtime penalty due to the general
7101 inability to perform store forwarding from smaller stores
7102 to a larger load. */
7103 unsigned lsize
7104 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7105 elmode = int_mode_for_size (lsize, 0).require ();
7106 /* If we can't construct such a vector fall back to
7107 element loads of the original vector type. */
7108 if (mode_for_vector (elmode,
7109 nunits / group_size).exists (&vmode)
7110 && VECTOR_MODE_P (vmode)
7111 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7112 != CODE_FOR_nothing))
7114 nloads = nunits / group_size;
7115 lnel = group_size;
7116 ltype = build_nonstandard_integer_type (lsize, 1);
7117 lvectype = build_vector_type (ltype, nloads);
7121 else
7123 nloads = 1;
7124 lnel = nunits;
7125 ltype = vectype;
7127 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7129 if (slp)
7131 /* For SLP permutation support we need to load the whole group,
7132 not only the number of vector stmts the permutation result
7133 fits in. */
7134 if (slp_perm)
7136 ncopies = (group_size * vf + nunits - 1) / nunits;
7137 dr_chain.create (ncopies);
7139 else
7140 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7142 int group_el = 0;
7143 unsigned HOST_WIDE_INT
7144 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7145 for (j = 0; j < ncopies; j++)
7147 if (nloads > 1)
7148 vec_alloc (v, nloads);
7149 for (i = 0; i < nloads; i++)
7151 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7152 group_el * elsz);
7153 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7154 build2 (MEM_REF, ltype,
7155 running_off, this_off));
7156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7157 if (nloads > 1)
7158 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7159 gimple_assign_lhs (new_stmt));
7161 group_el += lnel;
7162 if (! slp
7163 || group_el == group_size)
7165 tree newoff = copy_ssa_name (running_off);
7166 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7167 running_off, stride_step);
7168 vect_finish_stmt_generation (stmt, incr, gsi);
7170 running_off = newoff;
7171 group_el = 0;
7174 if (nloads > 1)
7176 tree vec_inv = build_constructor (lvectype, v);
7177 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7178 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7179 if (lvectype != vectype)
7181 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7182 VIEW_CONVERT_EXPR,
7183 build1 (VIEW_CONVERT_EXPR,
7184 vectype, new_temp));
7185 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7189 if (slp)
7191 if (slp_perm)
7192 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7193 else
7194 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7196 else
7198 if (j == 0)
7199 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7200 else
7201 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7202 prev_stmt_info = vinfo_for_stmt (new_stmt);
7205 if (slp_perm)
7207 unsigned n_perms;
7208 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7209 slp_node_instance, false, &n_perms);
7211 return true;
7214 if (grouped_load)
7216 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7217 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7218 /* For SLP vectorization we directly vectorize a subchain
7219 without permutation. */
7220 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7221 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7222 /* For BB vectorization always use the first stmt to base
7223 the data ref pointer on. */
7224 if (bb_vinfo)
7225 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7227 /* Check if the chain of loads is already vectorized. */
7228 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7229 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7230 ??? But we can only do so if there is exactly one
7231 as we have no way to get at the rest. Leave the CSE
7232 opportunity alone.
7233 ??? With the group load eventually participating
7234 in multiple different permutations (having multiple
7235 slp nodes which refer to the same group) the CSE
7236 is even wrong code. See PR56270. */
7237 && !slp)
7239 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7240 return true;
7242 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7243 group_gap_adj = 0;
7245 /* VEC_NUM is the number of vect stmts to be created for this group. */
7246 if (slp)
7248 grouped_load = false;
7249 /* For SLP permutation support we need to load the whole group,
7250 not only the number of vector stmts the permutation result
7251 fits in. */
7252 if (slp_perm)
7254 vec_num = (group_size * vf + nunits - 1) / nunits;
7255 group_gap_adj = vf * group_size - nunits * vec_num;
7257 else
7259 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7260 group_gap_adj
7261 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7264 else
7265 vec_num = group_size;
7267 ref_type = get_group_alias_ptr_type (first_stmt);
7269 else
7271 first_stmt = stmt;
7272 first_dr = dr;
7273 group_size = vec_num = 1;
7274 group_gap_adj = 0;
7275 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7278 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7279 gcc_assert (alignment_support_scheme);
7280 /* Targets with load-lane instructions must not require explicit
7281 realignment. */
7282 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7283 || alignment_support_scheme == dr_aligned
7284 || alignment_support_scheme == dr_unaligned_supported);
7286 /* In case the vectorization factor (VF) is bigger than the number
7287 of elements that we can fit in a vectype (nunits), we have to generate
7288 more than one vector stmt - i.e - we need to "unroll" the
7289 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7290 from one copy of the vector stmt to the next, in the field
7291 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7292 stages to find the correct vector defs to be used when vectorizing
7293 stmts that use the defs of the current stmt. The example below
7294 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7295 need to create 4 vectorized stmts):
7297 before vectorization:
7298 RELATED_STMT VEC_STMT
7299 S1: x = memref - -
7300 S2: z = x + 1 - -
7302 step 1: vectorize stmt S1:
7303 We first create the vector stmt VS1_0, and, as usual, record a
7304 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7305 Next, we create the vector stmt VS1_1, and record a pointer to
7306 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7307 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7308 stmts and pointers:
7309 RELATED_STMT VEC_STMT
7310 VS1_0: vx0 = memref0 VS1_1 -
7311 VS1_1: vx1 = memref1 VS1_2 -
7312 VS1_2: vx2 = memref2 VS1_3 -
7313 VS1_3: vx3 = memref3 - -
7314 S1: x = load - VS1_0
7315 S2: z = x + 1 - -
7317 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7318 information we recorded in RELATED_STMT field is used to vectorize
7319 stmt S2. */
7321 /* In case of interleaving (non-unit grouped access):
7323 S1: x2 = &base + 2
7324 S2: x0 = &base
7325 S3: x1 = &base + 1
7326 S4: x3 = &base + 3
7328 Vectorized loads are created in the order of memory accesses
7329 starting from the access of the first stmt of the chain:
7331 VS1: vx0 = &base
7332 VS2: vx1 = &base + vec_size*1
7333 VS3: vx3 = &base + vec_size*2
7334 VS4: vx4 = &base + vec_size*3
7336 Then permutation statements are generated:
7338 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7339 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7342 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7343 (the order of the data-refs in the output of vect_permute_load_chain
7344 corresponds to the order of scalar stmts in the interleaving chain - see
7345 the documentation of vect_permute_load_chain()).
7346 The generation of permutation stmts and recording them in
7347 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7349 In case of both multiple types and interleaving, the vector loads and
7350 permutation stmts above are created for every copy. The result vector
7351 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7352 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7354 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7355 on a target that supports unaligned accesses (dr_unaligned_supported)
7356 we generate the following code:
7357 p = initial_addr;
7358 indx = 0;
7359 loop {
7360 p = p + indx * vectype_size;
7361 vec_dest = *(p);
7362 indx = indx + 1;
7365 Otherwise, the data reference is potentially unaligned on a target that
7366 does not support unaligned accesses (dr_explicit_realign_optimized) -
7367 then generate the following code, in which the data in each iteration is
7368 obtained by two vector loads, one from the previous iteration, and one
7369 from the current iteration:
7370 p1 = initial_addr;
7371 msq_init = *(floor(p1))
7372 p2 = initial_addr + VS - 1;
7373 realignment_token = call target_builtin;
7374 indx = 0;
7375 loop {
7376 p2 = p2 + indx * vectype_size
7377 lsq = *(floor(p2))
7378 vec_dest = realign_load (msq, lsq, realignment_token)
7379 indx = indx + 1;
7380 msq = lsq;
7381 } */
7383 /* If the misalignment remains the same throughout the execution of the
7384 loop, we can create the init_addr and permutation mask at the loop
7385 preheader. Otherwise, it needs to be created inside the loop.
7386 This can only occur when vectorizing memory accesses in the inner-loop
7387 nested within an outer-loop that is being vectorized. */
7389 if (nested_in_vect_loop
7390 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
7392 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7393 compute_in_loop = true;
7396 if ((alignment_support_scheme == dr_explicit_realign_optimized
7397 || alignment_support_scheme == dr_explicit_realign)
7398 && !compute_in_loop)
7400 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7401 alignment_support_scheme, NULL_TREE,
7402 &at_loop);
7403 if (alignment_support_scheme == dr_explicit_realign_optimized)
7405 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7406 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7407 size_one_node);
7410 else
7411 at_loop = loop;
7413 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7414 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7416 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7417 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7418 else
7419 aggr_type = vectype;
7421 prev_stmt_info = NULL;
7422 int group_elt = 0;
7423 for (j = 0; j < ncopies; j++)
7425 /* 1. Create the vector or array pointer update chain. */
7426 if (j == 0)
7428 bool simd_lane_access_p
7429 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7430 if (simd_lane_access_p
7431 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7432 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7433 && integer_zerop (DR_OFFSET (first_dr))
7434 && integer_zerop (DR_INIT (first_dr))
7435 && alias_sets_conflict_p (get_alias_set (aggr_type),
7436 get_alias_set (TREE_TYPE (ref_type)))
7437 && (alignment_support_scheme == dr_aligned
7438 || alignment_support_scheme == dr_unaligned_supported))
7440 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7441 dataref_offset = build_int_cst (ref_type, 0);
7442 inv_p = false;
7444 else if (first_stmt_for_drptr
7445 && first_stmt != first_stmt_for_drptr)
7447 dataref_ptr
7448 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7449 at_loop, offset, &dummy, gsi,
7450 &ptr_incr, simd_lane_access_p,
7451 &inv_p, byte_offset);
7452 /* Adjust the pointer by the difference to first_stmt. */
7453 data_reference_p ptrdr
7454 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7455 tree diff = fold_convert (sizetype,
7456 size_binop (MINUS_EXPR,
7457 DR_INIT (first_dr),
7458 DR_INIT (ptrdr)));
7459 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7460 stmt, diff);
7462 else
7463 dataref_ptr
7464 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7465 offset, &dummy, gsi, &ptr_incr,
7466 simd_lane_access_p, &inv_p,
7467 byte_offset);
7469 else if (dataref_offset)
7470 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7471 TYPE_SIZE_UNIT (aggr_type));
7472 else
7473 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7474 TYPE_SIZE_UNIT (aggr_type));
7476 if (grouped_load || slp_perm)
7477 dr_chain.create (vec_num);
7479 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7481 tree vec_array;
7483 vec_array = create_vector_array (vectype, vec_num);
7485 /* Emit:
7486 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7487 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7488 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7489 data_ref);
7490 gimple_call_set_lhs (call, vec_array);
7491 gimple_call_set_nothrow (call, true);
7492 new_stmt = call;
7493 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7495 /* Extract each vector into an SSA_NAME. */
7496 for (i = 0; i < vec_num; i++)
7498 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7499 vec_array, i);
7500 dr_chain.quick_push (new_temp);
7503 /* Record the mapping between SSA_NAMEs and statements. */
7504 vect_record_grouped_load_vectors (stmt, dr_chain);
7506 else
7508 for (i = 0; i < vec_num; i++)
7510 if (i > 0)
7511 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7512 stmt, NULL_TREE);
7514 /* 2. Create the vector-load in the loop. */
7515 switch (alignment_support_scheme)
7517 case dr_aligned:
7518 case dr_unaligned_supported:
7520 unsigned int align, misalign;
7522 data_ref
7523 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7524 dataref_offset
7525 ? dataref_offset
7526 : build_int_cst (ref_type, 0));
7527 align = DR_TARGET_ALIGNMENT (dr);
7528 if (alignment_support_scheme == dr_aligned)
7530 gcc_assert (aligned_access_p (first_dr));
7531 misalign = 0;
7533 else if (DR_MISALIGNMENT (first_dr) == -1)
7535 align = dr_alignment (vect_dr_behavior (first_dr));
7536 misalign = 0;
7537 TREE_TYPE (data_ref)
7538 = build_aligned_type (TREE_TYPE (data_ref),
7539 align * BITS_PER_UNIT);
7541 else
7543 TREE_TYPE (data_ref)
7544 = build_aligned_type (TREE_TYPE (data_ref),
7545 TYPE_ALIGN (elem_type));
7546 misalign = DR_MISALIGNMENT (first_dr);
7548 if (dataref_offset == NULL_TREE
7549 && TREE_CODE (dataref_ptr) == SSA_NAME)
7550 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7551 align, misalign);
7552 break;
7554 case dr_explicit_realign:
7556 tree ptr, bump;
7558 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7560 if (compute_in_loop)
7561 msq = vect_setup_realignment (first_stmt, gsi,
7562 &realignment_token,
7563 dr_explicit_realign,
7564 dataref_ptr, NULL);
7566 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7567 ptr = copy_ssa_name (dataref_ptr);
7568 else
7569 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7570 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7571 new_stmt = gimple_build_assign
7572 (ptr, BIT_AND_EXPR, dataref_ptr,
7573 build_int_cst
7574 (TREE_TYPE (dataref_ptr),
7575 -(HOST_WIDE_INT) align));
7576 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7577 data_ref
7578 = build2 (MEM_REF, vectype, ptr,
7579 build_int_cst (ref_type, 0));
7580 vec_dest = vect_create_destination_var (scalar_dest,
7581 vectype);
7582 new_stmt = gimple_build_assign (vec_dest, data_ref);
7583 new_temp = make_ssa_name (vec_dest, new_stmt);
7584 gimple_assign_set_lhs (new_stmt, new_temp);
7585 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7586 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7587 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7588 msq = new_temp;
7590 bump = size_binop (MULT_EXPR, vs,
7591 TYPE_SIZE_UNIT (elem_type));
7592 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7593 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7594 new_stmt = gimple_build_assign
7595 (NULL_TREE, BIT_AND_EXPR, ptr,
7596 build_int_cst
7597 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7598 ptr = copy_ssa_name (ptr, new_stmt);
7599 gimple_assign_set_lhs (new_stmt, ptr);
7600 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7601 data_ref
7602 = build2 (MEM_REF, vectype, ptr,
7603 build_int_cst (ref_type, 0));
7604 break;
7606 case dr_explicit_realign_optimized:
7608 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7609 new_temp = copy_ssa_name (dataref_ptr);
7610 else
7611 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7612 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7613 new_stmt = gimple_build_assign
7614 (new_temp, BIT_AND_EXPR, dataref_ptr,
7615 build_int_cst (TREE_TYPE (dataref_ptr),
7616 -(HOST_WIDE_INT) align));
7617 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7618 data_ref
7619 = build2 (MEM_REF, vectype, new_temp,
7620 build_int_cst (ref_type, 0));
7621 break;
7623 default:
7624 gcc_unreachable ();
7626 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7627 new_stmt = gimple_build_assign (vec_dest, data_ref);
7628 new_temp = make_ssa_name (vec_dest, new_stmt);
7629 gimple_assign_set_lhs (new_stmt, new_temp);
7630 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7632 /* 3. Handle explicit realignment if necessary/supported.
7633 Create in loop:
7634 vec_dest = realign_load (msq, lsq, realignment_token) */
7635 if (alignment_support_scheme == dr_explicit_realign_optimized
7636 || alignment_support_scheme == dr_explicit_realign)
7638 lsq = gimple_assign_lhs (new_stmt);
7639 if (!realignment_token)
7640 realignment_token = dataref_ptr;
7641 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7642 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7643 msq, lsq, realignment_token);
7644 new_temp = make_ssa_name (vec_dest, new_stmt);
7645 gimple_assign_set_lhs (new_stmt, new_temp);
7646 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7648 if (alignment_support_scheme == dr_explicit_realign_optimized)
7650 gcc_assert (phi);
7651 if (i == vec_num - 1 && j == ncopies - 1)
7652 add_phi_arg (phi, lsq,
7653 loop_latch_edge (containing_loop),
7654 UNKNOWN_LOCATION);
7655 msq = lsq;
7659 /* 4. Handle invariant-load. */
7660 if (inv_p && !bb_vinfo)
7662 gcc_assert (!grouped_load);
7663 /* If we have versioned for aliasing or the loop doesn't
7664 have any data dependencies that would preclude this,
7665 then we are sure this is a loop invariant load and
7666 thus we can insert it on the preheader edge. */
7667 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7668 && !nested_in_vect_loop
7669 && hoist_defs_of_uses (stmt, loop))
7671 if (dump_enabled_p ())
7673 dump_printf_loc (MSG_NOTE, vect_location,
7674 "hoisting out of the vectorized "
7675 "loop: ");
7676 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7678 tree tem = copy_ssa_name (scalar_dest);
7679 gsi_insert_on_edge_immediate
7680 (loop_preheader_edge (loop),
7681 gimple_build_assign (tem,
7682 unshare_expr
7683 (gimple_assign_rhs1 (stmt))));
7684 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7685 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7686 set_vinfo_for_stmt (new_stmt,
7687 new_stmt_vec_info (new_stmt, vinfo));
7689 else
7691 gimple_stmt_iterator gsi2 = *gsi;
7692 gsi_next (&gsi2);
7693 new_temp = vect_init_vector (stmt, scalar_dest,
7694 vectype, &gsi2);
7695 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7699 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7701 tree perm_mask = perm_mask_for_reverse (vectype);
7702 new_temp = permute_vec_elements (new_temp, new_temp,
7703 perm_mask, stmt, gsi);
7704 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7707 /* Collect vector loads and later create their permutation in
7708 vect_transform_grouped_load (). */
7709 if (grouped_load || slp_perm)
7710 dr_chain.quick_push (new_temp);
7712 /* Store vector loads in the corresponding SLP_NODE. */
7713 if (slp && !slp_perm)
7714 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7716 /* With SLP permutation we load the gaps as well, without
7717 we need to skip the gaps after we manage to fully load
7718 all elements. group_gap_adj is GROUP_SIZE here. */
7719 group_elt += nunits;
7720 if (group_gap_adj != 0 && ! slp_perm
7721 && group_elt == group_size - group_gap_adj)
7723 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7724 * group_gap_adj);
7725 tree bump = wide_int_to_tree (sizetype, bump_val);
7726 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7727 stmt, bump);
7728 group_elt = 0;
7731 /* Bump the vector pointer to account for a gap or for excess
7732 elements loaded for a permuted SLP load. */
7733 if (group_gap_adj != 0 && slp_perm)
7735 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7736 * group_gap_adj);
7737 tree bump = wide_int_to_tree (sizetype, bump_val);
7738 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7739 stmt, bump);
7743 if (slp && !slp_perm)
7744 continue;
7746 if (slp_perm)
7748 unsigned n_perms;
7749 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7750 slp_node_instance, false,
7751 &n_perms))
7753 dr_chain.release ();
7754 return false;
7757 else
7759 if (grouped_load)
7761 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7762 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7763 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7765 else
7767 if (j == 0)
7768 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7769 else
7770 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7771 prev_stmt_info = vinfo_for_stmt (new_stmt);
7774 dr_chain.release ();
7777 return true;
7780 /* Function vect_is_simple_cond.
7782 Input:
7783 LOOP - the loop that is being vectorized.
7784 COND - Condition that is checked for simple use.
7786 Output:
7787 *COMP_VECTYPE - the vector type for the comparison.
7788 *DTS - The def types for the arguments of the comparison
7790 Returns whether a COND can be vectorized. Checks whether
7791 condition operands are supportable using vec_is_simple_use. */
7793 static bool
7794 vect_is_simple_cond (tree cond, vec_info *vinfo,
7795 tree *comp_vectype, enum vect_def_type *dts)
7797 tree lhs, rhs;
7798 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7800 /* Mask case. */
7801 if (TREE_CODE (cond) == SSA_NAME
7802 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7804 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7805 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7806 &dts[0], comp_vectype)
7807 || !*comp_vectype
7808 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7809 return false;
7810 return true;
7813 if (!COMPARISON_CLASS_P (cond))
7814 return false;
7816 lhs = TREE_OPERAND (cond, 0);
7817 rhs = TREE_OPERAND (cond, 1);
7819 if (TREE_CODE (lhs) == SSA_NAME)
7821 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7822 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7823 return false;
7825 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7826 || TREE_CODE (lhs) == FIXED_CST)
7827 dts[0] = vect_constant_def;
7828 else
7829 return false;
7831 if (TREE_CODE (rhs) == SSA_NAME)
7833 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7834 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7835 return false;
7837 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7838 || TREE_CODE (rhs) == FIXED_CST)
7839 dts[1] = vect_constant_def;
7840 else
7841 return false;
7843 if (vectype1 && vectype2
7844 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7845 return false;
7847 *comp_vectype = vectype1 ? vectype1 : vectype2;
7848 return true;
7851 /* vectorizable_condition.
7853 Check if STMT is conditional modify expression that can be vectorized.
7854 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7855 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7856 at GSI.
7858 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7859 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7860 else clause if it is 2).
7862 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7864 bool
7865 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7866 gimple **vec_stmt, tree reduc_def, int reduc_index,
7867 slp_tree slp_node)
7869 tree scalar_dest = NULL_TREE;
7870 tree vec_dest = NULL_TREE;
7871 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7872 tree then_clause, else_clause;
7873 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7874 tree comp_vectype = NULL_TREE;
7875 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7876 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7877 tree vec_compare;
7878 tree new_temp;
7879 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7880 enum vect_def_type dts[4]
7881 = {vect_unknown_def_type, vect_unknown_def_type,
7882 vect_unknown_def_type, vect_unknown_def_type};
7883 int ndts = 4;
7884 int ncopies;
7885 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7886 stmt_vec_info prev_stmt_info = NULL;
7887 int i, j;
7888 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7889 vec<tree> vec_oprnds0 = vNULL;
7890 vec<tree> vec_oprnds1 = vNULL;
7891 vec<tree> vec_oprnds2 = vNULL;
7892 vec<tree> vec_oprnds3 = vNULL;
7893 tree vec_cmp_type;
7894 bool masked = false;
7896 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7897 return false;
7899 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7901 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7902 return false;
7904 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7905 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7906 && reduc_def))
7907 return false;
7909 /* FORNOW: not yet supported. */
7910 if (STMT_VINFO_LIVE_P (stmt_info))
7912 if (dump_enabled_p ())
7913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7914 "value used after loop.\n");
7915 return false;
7919 /* Is vectorizable conditional operation? */
7920 if (!is_gimple_assign (stmt))
7921 return false;
7923 code = gimple_assign_rhs_code (stmt);
7925 if (code != COND_EXPR)
7926 return false;
7928 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7929 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7931 if (slp_node)
7932 ncopies = 1;
7933 else
7934 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7936 gcc_assert (ncopies >= 1);
7937 if (reduc_index && ncopies > 1)
7938 return false; /* FORNOW */
7940 cond_expr = gimple_assign_rhs1 (stmt);
7941 then_clause = gimple_assign_rhs2 (stmt);
7942 else_clause = gimple_assign_rhs3 (stmt);
7944 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7945 &comp_vectype, &dts[0])
7946 || !comp_vectype)
7947 return false;
7949 gimple *def_stmt;
7950 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7951 &vectype1))
7952 return false;
7953 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7954 &vectype2))
7955 return false;
7957 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7958 return false;
7960 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7961 return false;
7963 masked = !COMPARISON_CLASS_P (cond_expr);
7964 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7966 if (vec_cmp_type == NULL_TREE)
7967 return false;
7969 cond_code = TREE_CODE (cond_expr);
7970 if (!masked)
7972 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7973 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7976 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7978 /* Boolean values may have another representation in vectors
7979 and therefore we prefer bit operations over comparison for
7980 them (which also works for scalar masks). We store opcodes
7981 to use in bitop1 and bitop2. Statement is vectorized as
7982 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7983 depending on bitop1 and bitop2 arity. */
7984 switch (cond_code)
7986 case GT_EXPR:
7987 bitop1 = BIT_NOT_EXPR;
7988 bitop2 = BIT_AND_EXPR;
7989 break;
7990 case GE_EXPR:
7991 bitop1 = BIT_NOT_EXPR;
7992 bitop2 = BIT_IOR_EXPR;
7993 break;
7994 case LT_EXPR:
7995 bitop1 = BIT_NOT_EXPR;
7996 bitop2 = BIT_AND_EXPR;
7997 std::swap (cond_expr0, cond_expr1);
7998 break;
7999 case LE_EXPR:
8000 bitop1 = BIT_NOT_EXPR;
8001 bitop2 = BIT_IOR_EXPR;
8002 std::swap (cond_expr0, cond_expr1);
8003 break;
8004 case NE_EXPR:
8005 bitop1 = BIT_XOR_EXPR;
8006 break;
8007 case EQ_EXPR:
8008 bitop1 = BIT_XOR_EXPR;
8009 bitop2 = BIT_NOT_EXPR;
8010 break;
8011 default:
8012 return false;
8014 cond_code = SSA_NAME;
8017 if (!vec_stmt)
8019 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8020 if (bitop1 != NOP_EXPR)
8022 machine_mode mode = TYPE_MODE (comp_vectype);
8023 optab optab;
8025 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8026 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8027 return false;
8029 if (bitop2 != NOP_EXPR)
8031 optab = optab_for_tree_code (bitop2, comp_vectype,
8032 optab_default);
8033 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8034 return false;
8037 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8038 cond_code))
8040 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8041 return true;
8043 return false;
8046 /* Transform. */
8048 if (!slp_node)
8050 vec_oprnds0.create (1);
8051 vec_oprnds1.create (1);
8052 vec_oprnds2.create (1);
8053 vec_oprnds3.create (1);
8056 /* Handle def. */
8057 scalar_dest = gimple_assign_lhs (stmt);
8058 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8060 /* Handle cond expr. */
8061 for (j = 0; j < ncopies; j++)
8063 gassign *new_stmt = NULL;
8064 if (j == 0)
8066 if (slp_node)
8068 auto_vec<tree, 4> ops;
8069 auto_vec<vec<tree>, 4> vec_defs;
8071 if (masked)
8072 ops.safe_push (cond_expr);
8073 else
8075 ops.safe_push (cond_expr0);
8076 ops.safe_push (cond_expr1);
8078 ops.safe_push (then_clause);
8079 ops.safe_push (else_clause);
8080 vect_get_slp_defs (ops, slp_node, &vec_defs);
8081 vec_oprnds3 = vec_defs.pop ();
8082 vec_oprnds2 = vec_defs.pop ();
8083 if (!masked)
8084 vec_oprnds1 = vec_defs.pop ();
8085 vec_oprnds0 = vec_defs.pop ();
8087 else
8089 gimple *gtemp;
8090 if (masked)
8092 vec_cond_lhs
8093 = vect_get_vec_def_for_operand (cond_expr, stmt,
8094 comp_vectype);
8095 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8096 &gtemp, &dts[0]);
8098 else
8100 vec_cond_lhs
8101 = vect_get_vec_def_for_operand (cond_expr0,
8102 stmt, comp_vectype);
8103 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8105 vec_cond_rhs
8106 = vect_get_vec_def_for_operand (cond_expr1,
8107 stmt, comp_vectype);
8108 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8110 if (reduc_index == 1)
8111 vec_then_clause = reduc_def;
8112 else
8114 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8115 stmt);
8116 vect_is_simple_use (then_clause, loop_vinfo,
8117 &gtemp, &dts[2]);
8119 if (reduc_index == 2)
8120 vec_else_clause = reduc_def;
8121 else
8123 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8124 stmt);
8125 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8129 else
8131 vec_cond_lhs
8132 = vect_get_vec_def_for_stmt_copy (dts[0],
8133 vec_oprnds0.pop ());
8134 if (!masked)
8135 vec_cond_rhs
8136 = vect_get_vec_def_for_stmt_copy (dts[1],
8137 vec_oprnds1.pop ());
8139 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8140 vec_oprnds2.pop ());
8141 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8142 vec_oprnds3.pop ());
8145 if (!slp_node)
8147 vec_oprnds0.quick_push (vec_cond_lhs);
8148 if (!masked)
8149 vec_oprnds1.quick_push (vec_cond_rhs);
8150 vec_oprnds2.quick_push (vec_then_clause);
8151 vec_oprnds3.quick_push (vec_else_clause);
8154 /* Arguments are ready. Create the new vector stmt. */
8155 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8157 vec_then_clause = vec_oprnds2[i];
8158 vec_else_clause = vec_oprnds3[i];
8160 if (masked)
8161 vec_compare = vec_cond_lhs;
8162 else
8164 vec_cond_rhs = vec_oprnds1[i];
8165 if (bitop1 == NOP_EXPR)
8166 vec_compare = build2 (cond_code, vec_cmp_type,
8167 vec_cond_lhs, vec_cond_rhs);
8168 else
8170 new_temp = make_ssa_name (vec_cmp_type);
8171 if (bitop1 == BIT_NOT_EXPR)
8172 new_stmt = gimple_build_assign (new_temp, bitop1,
8173 vec_cond_rhs);
8174 else
8175 new_stmt
8176 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8177 vec_cond_rhs);
8178 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8179 if (bitop2 == NOP_EXPR)
8180 vec_compare = new_temp;
8181 else if (bitop2 == BIT_NOT_EXPR)
8183 /* Instead of doing ~x ? y : z do x ? z : y. */
8184 vec_compare = new_temp;
8185 std::swap (vec_then_clause, vec_else_clause);
8187 else
8189 vec_compare = make_ssa_name (vec_cmp_type);
8190 new_stmt
8191 = gimple_build_assign (vec_compare, bitop2,
8192 vec_cond_lhs, new_temp);
8193 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8197 new_temp = make_ssa_name (vec_dest);
8198 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8199 vec_compare, vec_then_clause,
8200 vec_else_clause);
8201 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8202 if (slp_node)
8203 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8206 if (slp_node)
8207 continue;
8209 if (j == 0)
8210 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8211 else
8212 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8214 prev_stmt_info = vinfo_for_stmt (new_stmt);
8217 vec_oprnds0.release ();
8218 vec_oprnds1.release ();
8219 vec_oprnds2.release ();
8220 vec_oprnds3.release ();
8222 return true;
8225 /* vectorizable_comparison.
8227 Check if STMT is comparison expression that can be vectorized.
8228 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8229 comparison, put it in VEC_STMT, and insert it at GSI.
8231 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8233 static bool
8234 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8235 gimple **vec_stmt, tree reduc_def,
8236 slp_tree slp_node)
8238 tree lhs, rhs1, rhs2;
8239 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8240 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8241 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8242 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8243 tree new_temp;
8244 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8245 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8246 int ndts = 2;
8247 unsigned nunits;
8248 int ncopies;
8249 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8250 stmt_vec_info prev_stmt_info = NULL;
8251 int i, j;
8252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8253 vec<tree> vec_oprnds0 = vNULL;
8254 vec<tree> vec_oprnds1 = vNULL;
8255 gimple *def_stmt;
8256 tree mask_type;
8257 tree mask;
8259 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8260 return false;
8262 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8263 return false;
8265 mask_type = vectype;
8266 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8268 if (slp_node)
8269 ncopies = 1;
8270 else
8271 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8273 gcc_assert (ncopies >= 1);
8274 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8275 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8276 && reduc_def))
8277 return false;
8279 if (STMT_VINFO_LIVE_P (stmt_info))
8281 if (dump_enabled_p ())
8282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8283 "value used after loop.\n");
8284 return false;
8287 if (!is_gimple_assign (stmt))
8288 return false;
8290 code = gimple_assign_rhs_code (stmt);
8292 if (TREE_CODE_CLASS (code) != tcc_comparison)
8293 return false;
8295 rhs1 = gimple_assign_rhs1 (stmt);
8296 rhs2 = gimple_assign_rhs2 (stmt);
8298 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8299 &dts[0], &vectype1))
8300 return false;
8302 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8303 &dts[1], &vectype2))
8304 return false;
8306 if (vectype1 && vectype2
8307 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8308 return false;
8310 vectype = vectype1 ? vectype1 : vectype2;
8312 /* Invariant comparison. */
8313 if (!vectype)
8315 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8316 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8317 return false;
8319 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8320 return false;
8322 /* Can't compare mask and non-mask types. */
8323 if (vectype1 && vectype2
8324 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8325 return false;
8327 /* Boolean values may have another representation in vectors
8328 and therefore we prefer bit operations over comparison for
8329 them (which also works for scalar masks). We store opcodes
8330 to use in bitop1 and bitop2. Statement is vectorized as
8331 BITOP2 (rhs1 BITOP1 rhs2) or
8332 rhs1 BITOP2 (BITOP1 rhs2)
8333 depending on bitop1 and bitop2 arity. */
8334 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8336 if (code == GT_EXPR)
8338 bitop1 = BIT_NOT_EXPR;
8339 bitop2 = BIT_AND_EXPR;
8341 else if (code == GE_EXPR)
8343 bitop1 = BIT_NOT_EXPR;
8344 bitop2 = BIT_IOR_EXPR;
8346 else if (code == LT_EXPR)
8348 bitop1 = BIT_NOT_EXPR;
8349 bitop2 = BIT_AND_EXPR;
8350 std::swap (rhs1, rhs2);
8351 std::swap (dts[0], dts[1]);
8353 else if (code == LE_EXPR)
8355 bitop1 = BIT_NOT_EXPR;
8356 bitop2 = BIT_IOR_EXPR;
8357 std::swap (rhs1, rhs2);
8358 std::swap (dts[0], dts[1]);
8360 else
8362 bitop1 = BIT_XOR_EXPR;
8363 if (code == EQ_EXPR)
8364 bitop2 = BIT_NOT_EXPR;
8368 if (!vec_stmt)
8370 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8371 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8372 dts, ndts, NULL, NULL);
8373 if (bitop1 == NOP_EXPR)
8374 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8375 else
8377 machine_mode mode = TYPE_MODE (vectype);
8378 optab optab;
8380 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8381 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8382 return false;
8384 if (bitop2 != NOP_EXPR)
8386 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8387 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8388 return false;
8390 return true;
8394 /* Transform. */
8395 if (!slp_node)
8397 vec_oprnds0.create (1);
8398 vec_oprnds1.create (1);
8401 /* Handle def. */
8402 lhs = gimple_assign_lhs (stmt);
8403 mask = vect_create_destination_var (lhs, mask_type);
8405 /* Handle cmp expr. */
8406 for (j = 0; j < ncopies; j++)
8408 gassign *new_stmt = NULL;
8409 if (j == 0)
8411 if (slp_node)
8413 auto_vec<tree, 2> ops;
8414 auto_vec<vec<tree>, 2> vec_defs;
8416 ops.safe_push (rhs1);
8417 ops.safe_push (rhs2);
8418 vect_get_slp_defs (ops, slp_node, &vec_defs);
8419 vec_oprnds1 = vec_defs.pop ();
8420 vec_oprnds0 = vec_defs.pop ();
8422 else
8424 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8425 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8428 else
8430 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8431 vec_oprnds0.pop ());
8432 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8433 vec_oprnds1.pop ());
8436 if (!slp_node)
8438 vec_oprnds0.quick_push (vec_rhs1);
8439 vec_oprnds1.quick_push (vec_rhs2);
8442 /* Arguments are ready. Create the new vector stmt. */
8443 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8445 vec_rhs2 = vec_oprnds1[i];
8447 new_temp = make_ssa_name (mask);
8448 if (bitop1 == NOP_EXPR)
8450 new_stmt = gimple_build_assign (new_temp, code,
8451 vec_rhs1, vec_rhs2);
8452 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8454 else
8456 if (bitop1 == BIT_NOT_EXPR)
8457 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8458 else
8459 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8460 vec_rhs2);
8461 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8462 if (bitop2 != NOP_EXPR)
8464 tree res = make_ssa_name (mask);
8465 if (bitop2 == BIT_NOT_EXPR)
8466 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8467 else
8468 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8469 new_temp);
8470 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8473 if (slp_node)
8474 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8477 if (slp_node)
8478 continue;
8480 if (j == 0)
8481 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8482 else
8483 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8485 prev_stmt_info = vinfo_for_stmt (new_stmt);
8488 vec_oprnds0.release ();
8489 vec_oprnds1.release ();
8491 return true;
8494 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8495 can handle all live statements in the node. Otherwise return true
8496 if STMT is not live or if vectorizable_live_operation can handle it.
8497 GSI and VEC_STMT are as for vectorizable_live_operation. */
8499 static bool
8500 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8501 slp_tree slp_node, gimple **vec_stmt)
8503 if (slp_node)
8505 gimple *slp_stmt;
8506 unsigned int i;
8507 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8509 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8510 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8511 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8512 vec_stmt))
8513 return false;
8516 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8517 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8518 return false;
8520 return true;
8523 /* Make sure the statement is vectorizable. */
8525 bool
8526 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8527 slp_instance node_instance)
8529 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8530 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8531 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8532 bool ok;
8533 gimple *pattern_stmt;
8534 gimple_seq pattern_def_seq;
8536 if (dump_enabled_p ())
8538 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8539 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8542 if (gimple_has_volatile_ops (stmt))
8544 if (dump_enabled_p ())
8545 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8546 "not vectorized: stmt has volatile operands\n");
8548 return false;
8551 /* Skip stmts that do not need to be vectorized. In loops this is expected
8552 to include:
8553 - the COND_EXPR which is the loop exit condition
8554 - any LABEL_EXPRs in the loop
8555 - computations that are used only for array indexing or loop control.
8556 In basic blocks we only analyze statements that are a part of some SLP
8557 instance, therefore, all the statements are relevant.
8559 Pattern statement needs to be analyzed instead of the original statement
8560 if the original statement is not relevant. Otherwise, we analyze both
8561 statements. In basic blocks we are called from some SLP instance
8562 traversal, don't analyze pattern stmts instead, the pattern stmts
8563 already will be part of SLP instance. */
8565 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8566 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8567 && !STMT_VINFO_LIVE_P (stmt_info))
8569 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8570 && pattern_stmt
8571 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8572 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8574 /* Analyze PATTERN_STMT instead of the original stmt. */
8575 stmt = pattern_stmt;
8576 stmt_info = vinfo_for_stmt (pattern_stmt);
8577 if (dump_enabled_p ())
8579 dump_printf_loc (MSG_NOTE, vect_location,
8580 "==> examining pattern statement: ");
8581 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8584 else
8586 if (dump_enabled_p ())
8587 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8589 return true;
8592 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8593 && node == NULL
8594 && pattern_stmt
8595 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8596 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8598 /* Analyze PATTERN_STMT too. */
8599 if (dump_enabled_p ())
8601 dump_printf_loc (MSG_NOTE, vect_location,
8602 "==> examining pattern statement: ");
8603 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8606 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8607 node_instance))
8608 return false;
8611 if (is_pattern_stmt_p (stmt_info)
8612 && node == NULL
8613 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8615 gimple_stmt_iterator si;
8617 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8619 gimple *pattern_def_stmt = gsi_stmt (si);
8620 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8621 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8623 /* Analyze def stmt of STMT if it's a pattern stmt. */
8624 if (dump_enabled_p ())
8626 dump_printf_loc (MSG_NOTE, vect_location,
8627 "==> examining pattern def statement: ");
8628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8631 if (!vect_analyze_stmt (pattern_def_stmt,
8632 need_to_vectorize, node, node_instance))
8633 return false;
8638 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8640 case vect_internal_def:
8641 break;
8643 case vect_reduction_def:
8644 case vect_nested_cycle:
8645 gcc_assert (!bb_vinfo
8646 && (relevance == vect_used_in_outer
8647 || relevance == vect_used_in_outer_by_reduction
8648 || relevance == vect_used_by_reduction
8649 || relevance == vect_unused_in_scope
8650 || relevance == vect_used_only_live));
8651 break;
8653 case vect_induction_def:
8654 gcc_assert (!bb_vinfo);
8655 break;
8657 case vect_constant_def:
8658 case vect_external_def:
8659 case vect_unknown_def_type:
8660 default:
8661 gcc_unreachable ();
8664 if (STMT_VINFO_RELEVANT_P (stmt_info))
8666 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8667 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8668 || (is_gimple_call (stmt)
8669 && gimple_call_lhs (stmt) == NULL_TREE));
8670 *need_to_vectorize = true;
8673 if (PURE_SLP_STMT (stmt_info) && !node)
8675 dump_printf_loc (MSG_NOTE, vect_location,
8676 "handled only by SLP analysis\n");
8677 return true;
8680 ok = true;
8681 if (!bb_vinfo
8682 && (STMT_VINFO_RELEVANT_P (stmt_info)
8683 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8684 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8685 || vectorizable_conversion (stmt, NULL, NULL, node)
8686 || vectorizable_shift (stmt, NULL, NULL, node)
8687 || vectorizable_operation (stmt, NULL, NULL, node)
8688 || vectorizable_assignment (stmt, NULL, NULL, node)
8689 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8690 || vectorizable_call (stmt, NULL, NULL, node)
8691 || vectorizable_store (stmt, NULL, NULL, node)
8692 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8693 || vectorizable_induction (stmt, NULL, NULL, node)
8694 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8695 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8696 else
8698 if (bb_vinfo)
8699 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8700 || vectorizable_conversion (stmt, NULL, NULL, node)
8701 || vectorizable_shift (stmt, NULL, NULL, node)
8702 || vectorizable_operation (stmt, NULL, NULL, node)
8703 || vectorizable_assignment (stmt, NULL, NULL, node)
8704 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8705 || vectorizable_call (stmt, NULL, NULL, node)
8706 || vectorizable_store (stmt, NULL, NULL, node)
8707 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8708 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8711 if (!ok)
8713 if (dump_enabled_p ())
8715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8716 "not vectorized: relevant stmt not ");
8717 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8718 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8721 return false;
8724 if (bb_vinfo)
8725 return true;
8727 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8728 need extra handling, except for vectorizable reductions. */
8729 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8730 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
8732 if (dump_enabled_p ())
8734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8735 "not vectorized: live stmt not supported: ");
8736 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8739 return false;
8742 return true;
8746 /* Function vect_transform_stmt.
8748 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8750 bool
8751 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8752 bool *grouped_store, slp_tree slp_node,
8753 slp_instance slp_node_instance)
8755 bool is_store = false;
8756 gimple *vec_stmt = NULL;
8757 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8758 bool done;
8760 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8761 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8763 switch (STMT_VINFO_TYPE (stmt_info))
8765 case type_demotion_vec_info_type:
8766 case type_promotion_vec_info_type:
8767 case type_conversion_vec_info_type:
8768 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8769 gcc_assert (done);
8770 break;
8772 case induc_vec_info_type:
8773 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8774 gcc_assert (done);
8775 break;
8777 case shift_vec_info_type:
8778 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8779 gcc_assert (done);
8780 break;
8782 case op_vec_info_type:
8783 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8784 gcc_assert (done);
8785 break;
8787 case assignment_vec_info_type:
8788 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8789 gcc_assert (done);
8790 break;
8792 case load_vec_info_type:
8793 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8794 slp_node_instance);
8795 gcc_assert (done);
8796 break;
8798 case store_vec_info_type:
8799 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8800 gcc_assert (done);
8801 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8803 /* In case of interleaving, the whole chain is vectorized when the
8804 last store in the chain is reached. Store stmts before the last
8805 one are skipped, and there vec_stmt_info shouldn't be freed
8806 meanwhile. */
8807 *grouped_store = true;
8808 if (STMT_VINFO_VEC_STMT (stmt_info))
8809 is_store = true;
8811 else
8812 is_store = true;
8813 break;
8815 case condition_vec_info_type:
8816 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8817 gcc_assert (done);
8818 break;
8820 case comparison_vec_info_type:
8821 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8822 gcc_assert (done);
8823 break;
8825 case call_vec_info_type:
8826 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8827 stmt = gsi_stmt (*gsi);
8828 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8829 is_store = true;
8830 break;
8832 case call_simd_clone_vec_info_type:
8833 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8834 stmt = gsi_stmt (*gsi);
8835 break;
8837 case reduc_vec_info_type:
8838 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8839 slp_node_instance);
8840 gcc_assert (done);
8841 break;
8843 default:
8844 if (!STMT_VINFO_LIVE_P (stmt_info))
8846 if (dump_enabled_p ())
8847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8848 "stmt not supported.\n");
8849 gcc_unreachable ();
8853 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8854 This would break hybrid SLP vectorization. */
8855 if (slp_node)
8856 gcc_assert (!vec_stmt
8857 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8859 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8860 is being vectorized, but outside the immediately enclosing loop. */
8861 if (vec_stmt
8862 && STMT_VINFO_LOOP_VINFO (stmt_info)
8863 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8864 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8865 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8866 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8867 || STMT_VINFO_RELEVANT (stmt_info) ==
8868 vect_used_in_outer_by_reduction))
8870 struct loop *innerloop = LOOP_VINFO_LOOP (
8871 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8872 imm_use_iterator imm_iter;
8873 use_operand_p use_p;
8874 tree scalar_dest;
8875 gimple *exit_phi;
8877 if (dump_enabled_p ())
8878 dump_printf_loc (MSG_NOTE, vect_location,
8879 "Record the vdef for outer-loop vectorization.\n");
8881 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8882 (to be used when vectorizing outer-loop stmts that use the DEF of
8883 STMT). */
8884 if (gimple_code (stmt) == GIMPLE_PHI)
8885 scalar_dest = PHI_RESULT (stmt);
8886 else
8887 scalar_dest = gimple_assign_lhs (stmt);
8889 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8891 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8893 exit_phi = USE_STMT (use_p);
8894 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8899 /* Handle stmts whose DEF is used outside the loop-nest that is
8900 being vectorized. */
8901 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8903 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
8904 gcc_assert (done);
8907 if (vec_stmt)
8908 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8910 return is_store;
8914 /* Remove a group of stores (for SLP or interleaving), free their
8915 stmt_vec_info. */
8917 void
8918 vect_remove_stores (gimple *first_stmt)
8920 gimple *next = first_stmt;
8921 gimple *tmp;
8922 gimple_stmt_iterator next_si;
8924 while (next)
8926 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8928 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8929 if (is_pattern_stmt_p (stmt_info))
8930 next = STMT_VINFO_RELATED_STMT (stmt_info);
8931 /* Free the attached stmt_vec_info and remove the stmt. */
8932 next_si = gsi_for_stmt (next);
8933 unlink_stmt_vdef (next);
8934 gsi_remove (&next_si, true);
8935 release_defs (next);
8936 free_stmt_vec_info (next);
8937 next = tmp;
8942 /* Function new_stmt_vec_info.
8944 Create and initialize a new stmt_vec_info struct for STMT. */
8946 stmt_vec_info
8947 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8949 stmt_vec_info res;
8950 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8952 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8953 STMT_VINFO_STMT (res) = stmt;
8954 res->vinfo = vinfo;
8955 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8956 STMT_VINFO_LIVE_P (res) = false;
8957 STMT_VINFO_VECTYPE (res) = NULL;
8958 STMT_VINFO_VEC_STMT (res) = NULL;
8959 STMT_VINFO_VECTORIZABLE (res) = true;
8960 STMT_VINFO_IN_PATTERN_P (res) = false;
8961 STMT_VINFO_RELATED_STMT (res) = NULL;
8962 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8963 STMT_VINFO_DATA_REF (res) = NULL;
8964 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8965 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8967 if (gimple_code (stmt) == GIMPLE_PHI
8968 && is_loop_header_bb_p (gimple_bb (stmt)))
8969 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8970 else
8971 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8973 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8974 STMT_SLP_TYPE (res) = loop_vect;
8975 STMT_VINFO_NUM_SLP_USES (res) = 0;
8977 GROUP_FIRST_ELEMENT (res) = NULL;
8978 GROUP_NEXT_ELEMENT (res) = NULL;
8979 GROUP_SIZE (res) = 0;
8980 GROUP_STORE_COUNT (res) = 0;
8981 GROUP_GAP (res) = 0;
8982 GROUP_SAME_DR_STMT (res) = NULL;
8984 return res;
8988 /* Create a hash table for stmt_vec_info. */
8990 void
8991 init_stmt_vec_info_vec (void)
8993 gcc_assert (!stmt_vec_info_vec.exists ());
8994 stmt_vec_info_vec.create (50);
8998 /* Free hash table for stmt_vec_info. */
9000 void
9001 free_stmt_vec_info_vec (void)
9003 unsigned int i;
9004 stmt_vec_info info;
9005 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9006 if (info != NULL)
9007 free_stmt_vec_info (STMT_VINFO_STMT (info));
9008 gcc_assert (stmt_vec_info_vec.exists ());
9009 stmt_vec_info_vec.release ();
9013 /* Free stmt vectorization related info. */
9015 void
9016 free_stmt_vec_info (gimple *stmt)
9018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9020 if (!stmt_info)
9021 return;
9023 /* Check if this statement has a related "pattern stmt"
9024 (introduced by the vectorizer during the pattern recognition
9025 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9026 too. */
9027 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9029 stmt_vec_info patt_info
9030 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9031 if (patt_info)
9033 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9034 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9035 gimple_set_bb (patt_stmt, NULL);
9036 tree lhs = gimple_get_lhs (patt_stmt);
9037 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9038 release_ssa_name (lhs);
9039 if (seq)
9041 gimple_stmt_iterator si;
9042 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9044 gimple *seq_stmt = gsi_stmt (si);
9045 gimple_set_bb (seq_stmt, NULL);
9046 lhs = gimple_get_lhs (seq_stmt);
9047 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9048 release_ssa_name (lhs);
9049 free_stmt_vec_info (seq_stmt);
9052 free_stmt_vec_info (patt_stmt);
9056 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9057 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9058 set_vinfo_for_stmt (stmt, NULL);
9059 free (stmt_info);
9063 /* Function get_vectype_for_scalar_type_and_size.
9065 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9066 by the target. */
9068 static tree
9069 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
9071 tree orig_scalar_type = scalar_type;
9072 scalar_mode inner_mode;
9073 machine_mode simd_mode;
9074 int nunits;
9075 tree vectype;
9077 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9078 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9079 return NULL_TREE;
9081 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9083 /* For vector types of elements whose mode precision doesn't
9084 match their types precision we use a element type of mode
9085 precision. The vectorization routines will have to make sure
9086 they support the proper result truncation/extension.
9087 We also make sure to build vector types with INTEGER_TYPE
9088 component type only. */
9089 if (INTEGRAL_TYPE_P (scalar_type)
9090 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9091 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9092 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9093 TYPE_UNSIGNED (scalar_type));
9095 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9096 When the component mode passes the above test simply use a type
9097 corresponding to that mode. The theory is that any use that
9098 would cause problems with this will disable vectorization anyway. */
9099 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9100 && !INTEGRAL_TYPE_P (scalar_type))
9101 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9103 /* We can't build a vector type of elements with alignment bigger than
9104 their size. */
9105 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9106 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9107 TYPE_UNSIGNED (scalar_type));
9109 /* If we felt back to using the mode fail if there was
9110 no scalar type for it. */
9111 if (scalar_type == NULL_TREE)
9112 return NULL_TREE;
9114 /* If no size was supplied use the mode the target prefers. Otherwise
9115 lookup a vector mode of the specified size. */
9116 if (size == 0)
9117 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9118 else if (!mode_for_vector (inner_mode, size / nbytes).exists (&simd_mode))
9119 return NULL_TREE;
9120 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9121 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9122 if (nunits < 1)
9123 return NULL_TREE;
9125 vectype = build_vector_type (scalar_type, nunits);
9127 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9128 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9129 return NULL_TREE;
9131 /* Re-attach the address-space qualifier if we canonicalized the scalar
9132 type. */
9133 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9134 return build_qualified_type
9135 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9137 return vectype;
9140 unsigned int current_vector_size;
9142 /* Function get_vectype_for_scalar_type.
9144 Returns the vector type corresponding to SCALAR_TYPE as supported
9145 by the target. */
9147 tree
9148 get_vectype_for_scalar_type (tree scalar_type)
9150 tree vectype;
9151 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9152 current_vector_size);
9153 if (vectype
9154 && current_vector_size == 0)
9155 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9156 return vectype;
9159 /* Function get_mask_type_for_scalar_type.
9161 Returns the mask type corresponding to a result of comparison
9162 of vectors of specified SCALAR_TYPE as supported by target. */
9164 tree
9165 get_mask_type_for_scalar_type (tree scalar_type)
9167 tree vectype = get_vectype_for_scalar_type (scalar_type);
9169 if (!vectype)
9170 return NULL;
9172 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9173 current_vector_size);
9176 /* Function get_same_sized_vectype
9178 Returns a vector type corresponding to SCALAR_TYPE of size
9179 VECTOR_TYPE if supported by the target. */
9181 tree
9182 get_same_sized_vectype (tree scalar_type, tree vector_type)
9184 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9185 return build_same_sized_truth_vector_type (vector_type);
9187 return get_vectype_for_scalar_type_and_size
9188 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9191 /* Function vect_is_simple_use.
9193 Input:
9194 VINFO - the vect info of the loop or basic block that is being vectorized.
9195 OPERAND - operand in the loop or bb.
9196 Output:
9197 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9198 DT - the type of definition
9200 Returns whether a stmt with OPERAND can be vectorized.
9201 For loops, supportable operands are constants, loop invariants, and operands
9202 that are defined by the current iteration of the loop. Unsupportable
9203 operands are those that are defined by a previous iteration of the loop (as
9204 is the case in reduction/induction computations).
9205 For basic blocks, supportable operands are constants and bb invariants.
9206 For now, operands defined outside the basic block are not supported. */
9208 bool
9209 vect_is_simple_use (tree operand, vec_info *vinfo,
9210 gimple **def_stmt, enum vect_def_type *dt)
9212 *def_stmt = NULL;
9213 *dt = vect_unknown_def_type;
9215 if (dump_enabled_p ())
9217 dump_printf_loc (MSG_NOTE, vect_location,
9218 "vect_is_simple_use: operand ");
9219 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9220 dump_printf (MSG_NOTE, "\n");
9223 if (CONSTANT_CLASS_P (operand))
9225 *dt = vect_constant_def;
9226 return true;
9229 if (is_gimple_min_invariant (operand))
9231 *dt = vect_external_def;
9232 return true;
9235 if (TREE_CODE (operand) != SSA_NAME)
9237 if (dump_enabled_p ())
9238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9239 "not ssa-name.\n");
9240 return false;
9243 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9245 *dt = vect_external_def;
9246 return true;
9249 *def_stmt = SSA_NAME_DEF_STMT (operand);
9250 if (dump_enabled_p ())
9252 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9253 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9256 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9257 *dt = vect_external_def;
9258 else
9260 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9261 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9264 if (dump_enabled_p ())
9266 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9267 switch (*dt)
9269 case vect_uninitialized_def:
9270 dump_printf (MSG_NOTE, "uninitialized\n");
9271 break;
9272 case vect_constant_def:
9273 dump_printf (MSG_NOTE, "constant\n");
9274 break;
9275 case vect_external_def:
9276 dump_printf (MSG_NOTE, "external\n");
9277 break;
9278 case vect_internal_def:
9279 dump_printf (MSG_NOTE, "internal\n");
9280 break;
9281 case vect_induction_def:
9282 dump_printf (MSG_NOTE, "induction\n");
9283 break;
9284 case vect_reduction_def:
9285 dump_printf (MSG_NOTE, "reduction\n");
9286 break;
9287 case vect_double_reduction_def:
9288 dump_printf (MSG_NOTE, "double reduction\n");
9289 break;
9290 case vect_nested_cycle:
9291 dump_printf (MSG_NOTE, "nested cycle\n");
9292 break;
9293 case vect_unknown_def_type:
9294 dump_printf (MSG_NOTE, "unknown\n");
9295 break;
9299 if (*dt == vect_unknown_def_type)
9301 if (dump_enabled_p ())
9302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9303 "Unsupported pattern.\n");
9304 return false;
9307 switch (gimple_code (*def_stmt))
9309 case GIMPLE_PHI:
9310 case GIMPLE_ASSIGN:
9311 case GIMPLE_CALL:
9312 break;
9313 default:
9314 if (dump_enabled_p ())
9315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9316 "unsupported defining stmt:\n");
9317 return false;
9320 return true;
9323 /* Function vect_is_simple_use.
9325 Same as vect_is_simple_use but also determines the vector operand
9326 type of OPERAND and stores it to *VECTYPE. If the definition of
9327 OPERAND is vect_uninitialized_def, vect_constant_def or
9328 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9329 is responsible to compute the best suited vector type for the
9330 scalar operand. */
9332 bool
9333 vect_is_simple_use (tree operand, vec_info *vinfo,
9334 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9336 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9337 return false;
9339 /* Now get a vector type if the def is internal, otherwise supply
9340 NULL_TREE and leave it up to the caller to figure out a proper
9341 type for the use stmt. */
9342 if (*dt == vect_internal_def
9343 || *dt == vect_induction_def
9344 || *dt == vect_reduction_def
9345 || *dt == vect_double_reduction_def
9346 || *dt == vect_nested_cycle)
9348 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9350 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9351 && !STMT_VINFO_RELEVANT (stmt_info)
9352 && !STMT_VINFO_LIVE_P (stmt_info))
9353 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9355 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9356 gcc_assert (*vectype != NULL_TREE);
9358 else if (*dt == vect_uninitialized_def
9359 || *dt == vect_constant_def
9360 || *dt == vect_external_def)
9361 *vectype = NULL_TREE;
9362 else
9363 gcc_unreachable ();
9365 return true;
9369 /* Function supportable_widening_operation
9371 Check whether an operation represented by the code CODE is a
9372 widening operation that is supported by the target platform in
9373 vector form (i.e., when operating on arguments of type VECTYPE_IN
9374 producing a result of type VECTYPE_OUT).
9376 Widening operations we currently support are NOP (CONVERT), FLOAT
9377 and WIDEN_MULT. This function checks if these operations are supported
9378 by the target platform either directly (via vector tree-codes), or via
9379 target builtins.
9381 Output:
9382 - CODE1 and CODE2 are codes of vector operations to be used when
9383 vectorizing the operation, if available.
9384 - MULTI_STEP_CVT determines the number of required intermediate steps in
9385 case of multi-step conversion (like char->short->int - in that case
9386 MULTI_STEP_CVT will be 1).
9387 - INTERM_TYPES contains the intermediate type required to perform the
9388 widening operation (short in the above example). */
9390 bool
9391 supportable_widening_operation (enum tree_code code, gimple *stmt,
9392 tree vectype_out, tree vectype_in,
9393 enum tree_code *code1, enum tree_code *code2,
9394 int *multi_step_cvt,
9395 vec<tree> *interm_types)
9397 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9398 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9399 struct loop *vect_loop = NULL;
9400 machine_mode vec_mode;
9401 enum insn_code icode1, icode2;
9402 optab optab1, optab2;
9403 tree vectype = vectype_in;
9404 tree wide_vectype = vectype_out;
9405 enum tree_code c1, c2;
9406 int i;
9407 tree prev_type, intermediate_type;
9408 machine_mode intermediate_mode, prev_mode;
9409 optab optab3, optab4;
9411 *multi_step_cvt = 0;
9412 if (loop_info)
9413 vect_loop = LOOP_VINFO_LOOP (loop_info);
9415 switch (code)
9417 case WIDEN_MULT_EXPR:
9418 /* The result of a vectorized widening operation usually requires
9419 two vectors (because the widened results do not fit into one vector).
9420 The generated vector results would normally be expected to be
9421 generated in the same order as in the original scalar computation,
9422 i.e. if 8 results are generated in each vector iteration, they are
9423 to be organized as follows:
9424 vect1: [res1,res2,res3,res4],
9425 vect2: [res5,res6,res7,res8].
9427 However, in the special case that the result of the widening
9428 operation is used in a reduction computation only, the order doesn't
9429 matter (because when vectorizing a reduction we change the order of
9430 the computation). Some targets can take advantage of this and
9431 generate more efficient code. For example, targets like Altivec,
9432 that support widen_mult using a sequence of {mult_even,mult_odd}
9433 generate the following vectors:
9434 vect1: [res1,res3,res5,res7],
9435 vect2: [res2,res4,res6,res8].
9437 When vectorizing outer-loops, we execute the inner-loop sequentially
9438 (each vectorized inner-loop iteration contributes to VF outer-loop
9439 iterations in parallel). We therefore don't allow to change the
9440 order of the computation in the inner-loop during outer-loop
9441 vectorization. */
9442 /* TODO: Another case in which order doesn't *really* matter is when we
9443 widen and then contract again, e.g. (short)((int)x * y >> 8).
9444 Normally, pack_trunc performs an even/odd permute, whereas the
9445 repack from an even/odd expansion would be an interleave, which
9446 would be significantly simpler for e.g. AVX2. */
9447 /* In any case, in order to avoid duplicating the code below, recurse
9448 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9449 are properly set up for the caller. If we fail, we'll continue with
9450 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9451 if (vect_loop
9452 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9453 && !nested_in_vect_loop_p (vect_loop, stmt)
9454 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9455 stmt, vectype_out, vectype_in,
9456 code1, code2, multi_step_cvt,
9457 interm_types))
9459 /* Elements in a vector with vect_used_by_reduction property cannot
9460 be reordered if the use chain with this property does not have the
9461 same operation. One such an example is s += a * b, where elements
9462 in a and b cannot be reordered. Here we check if the vector defined
9463 by STMT is only directly used in the reduction statement. */
9464 tree lhs = gimple_assign_lhs (stmt);
9465 use_operand_p dummy;
9466 gimple *use_stmt;
9467 stmt_vec_info use_stmt_info = NULL;
9468 if (single_imm_use (lhs, &dummy, &use_stmt)
9469 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9470 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9471 return true;
9473 c1 = VEC_WIDEN_MULT_LO_EXPR;
9474 c2 = VEC_WIDEN_MULT_HI_EXPR;
9475 break;
9477 case DOT_PROD_EXPR:
9478 c1 = DOT_PROD_EXPR;
9479 c2 = DOT_PROD_EXPR;
9480 break;
9482 case SAD_EXPR:
9483 c1 = SAD_EXPR;
9484 c2 = SAD_EXPR;
9485 break;
9487 case VEC_WIDEN_MULT_EVEN_EXPR:
9488 /* Support the recursion induced just above. */
9489 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9490 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9491 break;
9493 case WIDEN_LSHIFT_EXPR:
9494 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9495 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9496 break;
9498 CASE_CONVERT:
9499 c1 = VEC_UNPACK_LO_EXPR;
9500 c2 = VEC_UNPACK_HI_EXPR;
9501 break;
9503 case FLOAT_EXPR:
9504 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9505 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9506 break;
9508 case FIX_TRUNC_EXPR:
9509 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9510 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9511 computing the operation. */
9512 return false;
9514 default:
9515 gcc_unreachable ();
9518 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9519 std::swap (c1, c2);
9521 if (code == FIX_TRUNC_EXPR)
9523 /* The signedness is determined from output operand. */
9524 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9525 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9527 else
9529 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9530 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9533 if (!optab1 || !optab2)
9534 return false;
9536 vec_mode = TYPE_MODE (vectype);
9537 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9538 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9539 return false;
9541 *code1 = c1;
9542 *code2 = c2;
9544 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9545 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9546 /* For scalar masks we may have different boolean
9547 vector types having the same QImode. Thus we
9548 add additional check for elements number. */
9549 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9550 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9551 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9553 /* Check if it's a multi-step conversion that can be done using intermediate
9554 types. */
9556 prev_type = vectype;
9557 prev_mode = vec_mode;
9559 if (!CONVERT_EXPR_CODE_P (code))
9560 return false;
9562 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9563 intermediate steps in promotion sequence. We try
9564 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9565 not. */
9566 interm_types->create (MAX_INTERM_CVT_STEPS);
9567 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9569 intermediate_mode = insn_data[icode1].operand[0].mode;
9570 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9572 intermediate_type
9573 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9574 current_vector_size);
9575 if (intermediate_mode != TYPE_MODE (intermediate_type))
9576 return false;
9578 else
9579 intermediate_type
9580 = lang_hooks.types.type_for_mode (intermediate_mode,
9581 TYPE_UNSIGNED (prev_type));
9583 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9584 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9586 if (!optab3 || !optab4
9587 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9588 || insn_data[icode1].operand[0].mode != intermediate_mode
9589 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9590 || insn_data[icode2].operand[0].mode != intermediate_mode
9591 || ((icode1 = optab_handler (optab3, intermediate_mode))
9592 == CODE_FOR_nothing)
9593 || ((icode2 = optab_handler (optab4, intermediate_mode))
9594 == CODE_FOR_nothing))
9595 break;
9597 interm_types->quick_push (intermediate_type);
9598 (*multi_step_cvt)++;
9600 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9601 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9602 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9603 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9604 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9606 prev_type = intermediate_type;
9607 prev_mode = intermediate_mode;
9610 interm_types->release ();
9611 return false;
9615 /* Function supportable_narrowing_operation
9617 Check whether an operation represented by the code CODE is a
9618 narrowing operation that is supported by the target platform in
9619 vector form (i.e., when operating on arguments of type VECTYPE_IN
9620 and producing a result of type VECTYPE_OUT).
9622 Narrowing operations we currently support are NOP (CONVERT) and
9623 FIX_TRUNC. This function checks if these operations are supported by
9624 the target platform directly via vector tree-codes.
9626 Output:
9627 - CODE1 is the code of a vector operation to be used when
9628 vectorizing the operation, if available.
9629 - MULTI_STEP_CVT determines the number of required intermediate steps in
9630 case of multi-step conversion (like int->short->char - in that case
9631 MULTI_STEP_CVT will be 1).
9632 - INTERM_TYPES contains the intermediate type required to perform the
9633 narrowing operation (short in the above example). */
9635 bool
9636 supportable_narrowing_operation (enum tree_code code,
9637 tree vectype_out, tree vectype_in,
9638 enum tree_code *code1, int *multi_step_cvt,
9639 vec<tree> *interm_types)
9641 machine_mode vec_mode;
9642 enum insn_code icode1;
9643 optab optab1, interm_optab;
9644 tree vectype = vectype_in;
9645 tree narrow_vectype = vectype_out;
9646 enum tree_code c1;
9647 tree intermediate_type, prev_type;
9648 machine_mode intermediate_mode, prev_mode;
9649 int i;
9650 bool uns;
9652 *multi_step_cvt = 0;
9653 switch (code)
9655 CASE_CONVERT:
9656 c1 = VEC_PACK_TRUNC_EXPR;
9657 break;
9659 case FIX_TRUNC_EXPR:
9660 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9661 break;
9663 case FLOAT_EXPR:
9664 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9665 tree code and optabs used for computing the operation. */
9666 return false;
9668 default:
9669 gcc_unreachable ();
9672 if (code == FIX_TRUNC_EXPR)
9673 /* The signedness is determined from output operand. */
9674 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9675 else
9676 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9678 if (!optab1)
9679 return false;
9681 vec_mode = TYPE_MODE (vectype);
9682 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9683 return false;
9685 *code1 = c1;
9687 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9688 /* For scalar masks we may have different boolean
9689 vector types having the same QImode. Thus we
9690 add additional check for elements number. */
9691 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9692 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9693 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9695 /* Check if it's a multi-step conversion that can be done using intermediate
9696 types. */
9697 prev_mode = vec_mode;
9698 prev_type = vectype;
9699 if (code == FIX_TRUNC_EXPR)
9700 uns = TYPE_UNSIGNED (vectype_out);
9701 else
9702 uns = TYPE_UNSIGNED (vectype);
9704 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9705 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9706 costly than signed. */
9707 if (code == FIX_TRUNC_EXPR && uns)
9709 enum insn_code icode2;
9711 intermediate_type
9712 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9713 interm_optab
9714 = optab_for_tree_code (c1, intermediate_type, optab_default);
9715 if (interm_optab != unknown_optab
9716 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9717 && insn_data[icode1].operand[0].mode
9718 == insn_data[icode2].operand[0].mode)
9720 uns = false;
9721 optab1 = interm_optab;
9722 icode1 = icode2;
9726 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9727 intermediate steps in promotion sequence. We try
9728 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9729 interm_types->create (MAX_INTERM_CVT_STEPS);
9730 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9732 intermediate_mode = insn_data[icode1].operand[0].mode;
9733 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9735 intermediate_type
9736 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9737 current_vector_size);
9738 if (intermediate_mode != TYPE_MODE (intermediate_type))
9739 return false;
9741 else
9742 intermediate_type
9743 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9744 interm_optab
9745 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9746 optab_default);
9747 if (!interm_optab
9748 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9749 || insn_data[icode1].operand[0].mode != intermediate_mode
9750 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9751 == CODE_FOR_nothing))
9752 break;
9754 interm_types->quick_push (intermediate_type);
9755 (*multi_step_cvt)++;
9757 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9758 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9759 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9760 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9762 prev_mode = intermediate_mode;
9763 prev_type = intermediate_type;
9764 optab1 = interm_optab;
9767 interm_types->release ();
9768 return false;