PR rtl-optimization/82913
[official-gcc.git] / gcc / tree-vect-stmts.c
blob61a4cbd6df6f6e985a0e807539c06b782e7a812e
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
103 kind = vector_scatter_store;
104 if (body_cost_vec)
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 stmt_info_for_cost si = { count, kind,
108 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
109 misalign };
110 body_cost_vec->safe_push (si);
111 return (unsigned)
112 (builtin_vectorization_cost (kind, vectype, misalign) * count);
114 else
115 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
116 count, kind, stmt_info, misalign, where);
119 /* Return a variable of type ELEM_TYPE[NELEMS]. */
121 static tree
122 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
124 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
125 "vect_array");
128 /* ARRAY is an array of vectors created by create_vector_array.
129 Return an SSA_NAME for the vector in index N. The reference
130 is part of the vectorization of STMT and the vector is associated
131 with scalar destination SCALAR_DEST. */
133 static tree
134 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
135 tree array, unsigned HOST_WIDE_INT n)
137 tree vect_type, vect, vect_name, array_ref;
138 gimple *new_stmt;
140 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
141 vect_type = TREE_TYPE (TREE_TYPE (array));
142 vect = vect_create_destination_var (scalar_dest, vect_type);
143 array_ref = build4 (ARRAY_REF, vect_type, array,
144 build_int_cst (size_type_node, n),
145 NULL_TREE, NULL_TREE);
147 new_stmt = gimple_build_assign (vect, array_ref);
148 vect_name = make_ssa_name (vect, new_stmt);
149 gimple_assign_set_lhs (new_stmt, vect_name);
150 vect_finish_stmt_generation (stmt, new_stmt, gsi);
152 return vect_name;
155 /* ARRAY is an array of vectors created by create_vector_array.
156 Emit code to store SSA_NAME VECT in index N of the array.
157 The store is part of the vectorization of STMT. */
159 static void
160 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
161 tree array, unsigned HOST_WIDE_INT n)
163 tree array_ref;
164 gimple *new_stmt;
166 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
167 build_int_cst (size_type_node, n),
168 NULL_TREE, NULL_TREE);
170 new_stmt = gimple_build_assign (array_ref, vect);
171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
174 /* PTR is a pointer to an array of type TYPE. Return a representation
175 of *PTR. The memory reference replaces those in FIRST_DR
176 (and its group). */
178 static tree
179 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
181 tree mem_ref;
183 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
184 /* Arrays have the same alignment as their type. */
185 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
186 return mem_ref;
189 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
191 /* Function vect_mark_relevant.
193 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
195 static void
196 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
197 enum vect_relevant relevant, bool live_p)
199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
200 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
201 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
202 gimple *pattern_stmt;
204 if (dump_enabled_p ())
206 dump_printf_loc (MSG_NOTE, vect_location,
207 "mark relevant %d, live %d: ", relevant, live_p);
208 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 /* This is the last stmt in a sequence that was detected as a
218 pattern that can potentially be vectorized. Don't mark the stmt
219 as relevant/live because it's not going to be vectorized.
220 Instead mark the pattern-stmt that replaces it. */
222 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
224 if (dump_enabled_p ())
225 dump_printf_loc (MSG_NOTE, vect_location,
226 "last stmt in pattern. don't mark"
227 " relevant/live.\n");
228 stmt_info = vinfo_for_stmt (pattern_stmt);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
230 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
231 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
232 stmt = pattern_stmt;
235 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
236 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
237 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
240 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE, vect_location,
244 "already marked relevant/live.\n");
245 return;
248 worklist->safe_push (stmt);
252 /* Function is_simple_and_all_uses_invariant
254 Return true if STMT is simple and all uses of it are invariant. */
256 bool
257 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
259 tree op;
260 gimple *def_stmt;
261 ssa_op_iter iter;
263 if (!is_gimple_assign (stmt))
264 return false;
266 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
268 enum vect_def_type dt = vect_uninitialized_def;
270 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 "use not simple.\n");
275 return false;
278 if (dt != vect_external_def && dt != vect_constant_def)
279 return false;
281 return true;
284 /* Function vect_stmt_relevant_p.
286 Return true if STMT in loop that is represented by LOOP_VINFO is
287 "relevant for vectorization".
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
294 CHECKME: what other side effects would the vectorizer allow? */
296 static bool
297 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
298 enum vect_relevant *relevant, bool *live_p)
300 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301 ssa_op_iter op_iter;
302 imm_use_iterator imm_iter;
303 use_operand_p use_p;
304 def_operand_p def_p;
306 *relevant = vect_unused_in_scope;
307 *live_p = false;
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt)
311 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
312 != loop_exit_ctrl_vec_info_type)
313 *relevant = vect_used_in_scope;
315 /* changing memory. */
316 if (gimple_code (stmt) != GIMPLE_PHI)
317 if (gimple_vdef (stmt)
318 && !gimple_clobber_p (stmt))
320 if (dump_enabled_p ())
321 dump_printf_loc (MSG_NOTE, vect_location,
322 "vec_stmt_relevant_p: stmt has vdefs.\n");
323 *relevant = vect_used_in_scope;
326 /* uses outside the loop. */
327 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
329 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 basic_block bb = gimple_bb (USE_STMT (use_p));
332 if (!flow_bb_inside_loop_p (loop, bb))
334 if (dump_enabled_p ())
335 dump_printf_loc (MSG_NOTE, vect_location,
336 "vec_stmt_relevant_p: used out of loop.\n");
338 if (is_gimple_debug (USE_STMT (use_p)))
339 continue;
341 /* We expect all such uses to be in the loop exit phis
342 (because of loop closed form) */
343 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
344 gcc_assert (bb == single_exit (loop)->dest);
346 *live_p = true;
351 if (*live_p && *relevant == vect_unused_in_scope
352 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE, vect_location,
356 "vec_stmt_relevant_p: stmt live but not relevant.\n");
357 *relevant = vect_used_only_live;
360 return (*live_p || *relevant);
364 /* Function exist_non_indexing_operands_for_use_p
366 USE is one of the uses attached to STMT. Check if USE is
367 used in STMT for anything other than indexing an array. */
369 static bool
370 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
372 tree operand;
373 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 if (!gimple_assign_copy_p (stmt))
396 if (is_gimple_call (stmt)
397 && gimple_call_internal_p (stmt))
398 switch (gimple_call_internal_fn (stmt))
400 case IFN_MASK_STORE:
401 operand = gimple_call_arg (stmt, 3);
402 if (operand == use)
403 return true;
404 /* FALLTHRU */
405 case IFN_MASK_LOAD:
406 operand = gimple_call_arg (stmt, 2);
407 if (operand == use)
408 return true;
409 break;
410 default:
411 break;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (stmt);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
450 skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
452 be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static bool
457 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<gimple *> *worklist,
459 bool force)
461 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
462 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
463 stmt_vec_info dstmt_vinfo;
464 basic_block bb, def_bb;
465 gimple *def_stmt;
466 enum vect_def_type dt;
468 /* case 1: we are only interested in uses that need to be vectorized. Uses
469 that are used for address computation are not considered relevant. */
470 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
471 return true;
473 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
475 if (dump_enabled_p ())
476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
477 "not vectorized: unsupported use in stmt.\n");
478 return false;
481 if (!def_stmt || gimple_nop_p (def_stmt))
482 return true;
484 def_bb = gimple_bb (def_stmt);
485 if (!flow_bb_inside_loop_p (loop, def_bb))
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
489 return true;
492 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
493 DEF_STMT must have already been processed, because this should be the
494 only way that STMT, which is a reduction-phi, was put in the worklist,
495 as there should be no other uses for DEF_STMT in the loop. So we just
496 check that everything is as expected, and we are done. */
497 dstmt_vinfo = vinfo_for_stmt (def_stmt);
498 bb = gimple_bb (stmt);
499 if (gimple_code (stmt) == GIMPLE_PHI
500 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
501 && gimple_code (def_stmt) != GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
503 && bb->loop_father == def_bb->loop_father)
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "reduc-stmt defining reduc-phi in the same nest.\n");
508 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
509 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
510 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
511 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
512 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
513 return true;
516 /* case 3a: outer-loop stmt defining an inner-loop stmt:
517 outer-loop-header-bb:
518 d = def_stmt
519 inner-loop:
520 stmt # use (d)
521 outer-loop-tail-bb:
522 ... */
523 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
525 if (dump_enabled_p ())
526 dump_printf_loc (MSG_NOTE, vect_location,
527 "outer-loop def-stmt defining inner-loop stmt.\n");
529 switch (relevant)
531 case vect_unused_in_scope:
532 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
533 vect_used_in_scope : vect_unused_in_scope;
534 break;
536 case vect_used_in_outer_by_reduction:
537 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
538 relevant = vect_used_by_reduction;
539 break;
541 case vect_used_in_outer:
542 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
543 relevant = vect_used_in_scope;
544 break;
546 case vect_used_in_scope:
547 break;
549 default:
550 gcc_unreachable ();
554 /* case 3b: inner-loop stmt defining an outer-loop stmt:
555 outer-loop-header-bb:
557 inner-loop:
558 d = def_stmt
559 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
560 stmt # use (d) */
561 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
563 if (dump_enabled_p ())
564 dump_printf_loc (MSG_NOTE, vect_location,
565 "inner-loop def-stmt defining outer-loop stmt.\n");
567 switch (relevant)
569 case vect_unused_in_scope:
570 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
571 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
572 vect_used_in_outer_by_reduction : vect_unused_in_scope;
573 break;
575 case vect_used_by_reduction:
576 case vect_used_only_live:
577 relevant = vect_used_in_outer_by_reduction;
578 break;
580 case vect_used_in_scope:
581 relevant = vect_used_in_outer;
582 break;
584 default:
585 gcc_unreachable ();
588 /* We are also not interested in uses on loop PHI backedges that are
589 inductions. Otherwise we'll needlessly vectorize the IV increment
590 and cause hybrid SLP for SLP inductions. Unless the PHI is live
591 of course. */
592 else if (gimple_code (stmt) == GIMPLE_PHI
593 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
594 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
595 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
596 == use))
598 if (dump_enabled_p ())
599 dump_printf_loc (MSG_NOTE, vect_location,
600 "induction value on backedge.\n");
601 return true;
605 vect_mark_relevant (worklist, def_stmt, relevant, false);
606 return true;
610 /* Function vect_mark_stmts_to_be_vectorized.
612 Not all stmts in the loop need to be vectorized. For example:
614 for i...
615 for j...
616 1. T0 = i + j
617 2. T1 = a[T0]
619 3. j = j + 1
621 Stmt 1 and 3 do not need to be vectorized, because loop control and
622 addressing of vectorized data-refs are handled differently.
624 This pass detects such stmts. */
626 bool
627 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
629 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
630 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
631 unsigned int nbbs = loop->num_nodes;
632 gimple_stmt_iterator si;
633 gimple *stmt;
634 unsigned int i;
635 stmt_vec_info stmt_vinfo;
636 basic_block bb;
637 gimple *phi;
638 bool live_p;
639 enum vect_relevant relevant;
641 if (dump_enabled_p ())
642 dump_printf_loc (MSG_NOTE, vect_location,
643 "=== vect_mark_stmts_to_be_vectorized ===\n");
645 auto_vec<gimple *, 64> worklist;
647 /* 1. Init worklist. */
648 for (i = 0; i < nbbs; i++)
650 bb = bbs[i];
651 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
653 phi = gsi_stmt (si);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
660 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
661 vect_mark_relevant (&worklist, phi, relevant, live_p);
663 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
665 stmt = gsi_stmt (si);
666 if (dump_enabled_p ())
668 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
672 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
673 vect_mark_relevant (&worklist, stmt, relevant, live_p);
677 /* 2. Process_worklist */
678 while (worklist.length () > 0)
680 use_operand_p use_p;
681 ssa_op_iter iter;
683 stmt = worklist.pop ();
684 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
687 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
690 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
691 (DEF_STMT) as relevant/irrelevant according to the relevance property
692 of STMT. */
693 stmt_vinfo = vinfo_for_stmt (stmt);
694 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
696 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
697 propagated as is to the DEF_STMTs of its USEs.
699 One exception is when STMT has been identified as defining a reduction
700 variable; in this case we set the relevance to vect_used_by_reduction.
701 This is because we distinguish between two kinds of relevant stmts -
702 those that are used by a reduction computation, and those that are
703 (also) used by a regular computation. This allows us later on to
704 identify stmts that are used solely by a reduction, and therefore the
705 order of the results that they produce does not have to be kept. */
707 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
709 case vect_reduction_def:
710 gcc_assert (relevant != vect_unused_in_scope);
711 if (relevant != vect_unused_in_scope
712 && relevant != vect_used_in_scope
713 && relevant != vect_used_by_reduction
714 && relevant != vect_used_only_live)
716 if (dump_enabled_p ())
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
718 "unsupported use of reduction.\n");
719 return false;
721 break;
723 case vect_nested_cycle:
724 if (relevant != vect_unused_in_scope
725 && relevant != vect_used_in_outer_by_reduction
726 && relevant != vect_used_in_outer)
728 if (dump_enabled_p ())
729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
730 "unsupported use of nested cycle.\n");
732 return false;
734 break;
736 case vect_double_reduction_def:
737 if (relevant != vect_unused_in_scope
738 && relevant != vect_used_by_reduction
739 && relevant != vect_used_only_live)
741 if (dump_enabled_p ())
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
743 "unsupported use of double reduction.\n");
745 return false;
747 break;
749 default:
750 break;
753 if (is_pattern_stmt_p (stmt_vinfo))
755 /* Pattern statements are not inserted into the code, so
756 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
757 have to scan the RHS or function arguments instead. */
758 if (is_gimple_assign (stmt))
760 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
761 tree op = gimple_assign_rhs1 (stmt);
763 i = 1;
764 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
766 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
767 relevant, &worklist, false)
768 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
769 relevant, &worklist, false))
770 return false;
771 i = 2;
773 for (; i < gimple_num_ops (stmt); i++)
775 op = gimple_op (stmt, i);
776 if (TREE_CODE (op) == SSA_NAME
777 && !process_use (stmt, op, loop_vinfo, relevant,
778 &worklist, false))
779 return false;
782 else if (is_gimple_call (stmt))
784 for (i = 0; i < gimple_call_num_args (stmt); i++)
786 tree arg = gimple_call_arg (stmt, i);
787 if (!process_use (stmt, arg, loop_vinfo, relevant,
788 &worklist, false))
789 return false;
793 else
794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
796 tree op = USE_FROM_PTR (use_p);
797 if (!process_use (stmt, op, loop_vinfo, relevant,
798 &worklist, false))
799 return false;
802 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
804 gather_scatter_info gs_info;
805 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
806 gcc_unreachable ();
807 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
808 &worklist, true))
809 return false;
811 } /* while worklist */
813 return true;
817 /* Function vect_model_simple_cost.
819 Models cost for simple operations, i.e. those that only emit ncopies of a
820 single op. Right now, this does not account for multiple insns that could
821 be generated for the single vector op. We will handle that shortly. */
823 void
824 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
825 enum vect_def_type *dt,
826 int ndts,
827 stmt_vector_for_cost *prologue_cost_vec,
828 stmt_vector_for_cost *body_cost_vec)
830 int i;
831 int inside_cost = 0, prologue_cost = 0;
833 /* The SLP costs were already calculated during SLP tree build. */
834 if (PURE_SLP_STMT (stmt_info))
835 return;
837 /* Cost the "broadcast" of a scalar operand in to a vector operand.
838 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
839 cost model. */
840 for (i = 0; i < ndts; i++)
841 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
842 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
843 stmt_info, 0, vect_prologue);
845 /* Pass the inside-of-loop statements to the target-specific cost model. */
846 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
847 stmt_info, 0, vect_body);
849 if (dump_enabled_p ())
850 dump_printf_loc (MSG_NOTE, vect_location,
851 "vect_model_simple_cost: inside_cost = %d, "
852 "prologue_cost = %d .\n", inside_cost, prologue_cost);
856 /* Model cost for type demotion and promotion operations. PWR is normally
857 zero for single-step promotions and demotions. It will be one if
858 two-step promotion/demotion is required, and so on. Each additional
859 step doubles the number of instructions required. */
861 static void
862 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
863 enum vect_def_type *dt, int pwr)
865 int i, tmp;
866 int inside_cost = 0, prologue_cost = 0;
867 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
868 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
869 void *target_cost_data;
871 /* The SLP costs were already calculated during SLP tree build. */
872 if (PURE_SLP_STMT (stmt_info))
873 return;
875 if (loop_vinfo)
876 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
877 else
878 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
880 for (i = 0; i < pwr + 1; i++)
882 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
883 (i + 1) : i;
884 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
885 vec_promote_demote, stmt_info, 0,
886 vect_body);
889 /* FORNOW: Assuming maximum 2 args per stmts. */
890 for (i = 0; i < 2; i++)
891 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
892 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
893 stmt_info, 0, vect_prologue);
895 if (dump_enabled_p ())
896 dump_printf_loc (MSG_NOTE, vect_location,
897 "vect_model_promotion_demotion_cost: inside_cost = %d, "
898 "prologue_cost = %d .\n", inside_cost, prologue_cost);
901 /* Function vect_model_store_cost
903 Models cost for stores. In the case of grouped accesses, one access
904 has the overhead of the grouped access attributed to it. */
906 void
907 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
908 vect_memory_access_type memory_access_type,
909 enum vect_def_type dt, slp_tree slp_node,
910 stmt_vector_for_cost *prologue_cost_vec,
911 stmt_vector_for_cost *body_cost_vec)
913 unsigned int inside_cost = 0, prologue_cost = 0;
914 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
915 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
916 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
918 if (dt == vect_constant_def || dt == vect_external_def)
919 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
920 stmt_info, 0, vect_prologue);
922 /* Grouped stores update all elements in the group at once,
923 so we want the DR for the first statement. */
924 if (!slp_node && grouped_access_p)
926 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
927 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
930 /* True if we should include any once-per-group costs as well as
931 the cost of the statement itself. For SLP we only get called
932 once per group anyhow. */
933 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
935 /* We assume that the cost of a single store-lanes instruction is
936 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
937 access is instead being provided by a permute-and-store operation,
938 include the cost of the permutes. */
939 if (first_stmt_p
940 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
942 /* Uses a high and low interleave or shuffle operations for each
943 needed permute. */
944 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
945 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
946 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
947 stmt_info, 0, vect_body);
949 if (dump_enabled_p ())
950 dump_printf_loc (MSG_NOTE, vect_location,
951 "vect_model_store_cost: strided group_size = %d .\n",
952 group_size);
955 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
956 /* Costs of the stores. */
957 if (memory_access_type == VMAT_ELEMENTWISE
958 || memory_access_type == VMAT_GATHER_SCATTER)
959 /* N scalar stores plus extracting the elements. */
960 inside_cost += record_stmt_cost (body_cost_vec,
961 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
962 scalar_store, stmt_info, 0, vect_body);
963 else
964 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
966 if (memory_access_type == VMAT_ELEMENTWISE
967 || memory_access_type == VMAT_STRIDED_SLP)
968 inside_cost += record_stmt_cost (body_cost_vec,
969 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
970 vec_to_scalar, stmt_info, 0, vect_body);
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_NOTE, vect_location,
974 "vect_model_store_cost: inside_cost = %d, "
975 "prologue_cost = %d .\n", inside_cost, prologue_cost);
979 /* Calculate cost of DR's memory access. */
980 void
981 vect_get_store_cost (struct data_reference *dr, int ncopies,
982 unsigned int *inside_cost,
983 stmt_vector_for_cost *body_cost_vec)
985 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
986 gimple *stmt = DR_STMT (dr);
987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
989 switch (alignment_support_scheme)
991 case dr_aligned:
993 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994 vector_store, stmt_info, 0,
995 vect_body);
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: aligned.\n");
1000 break;
1003 case dr_unaligned_supported:
1005 /* Here, we assign an additional cost for the unaligned store. */
1006 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1007 unaligned_store, stmt_info,
1008 DR_MISALIGNMENT (dr), vect_body);
1009 if (dump_enabled_p ())
1010 dump_printf_loc (MSG_NOTE, vect_location,
1011 "vect_model_store_cost: unaligned supported by "
1012 "hardware.\n");
1013 break;
1016 case dr_unaligned_unsupported:
1018 *inside_cost = VECT_MAX_COST;
1020 if (dump_enabled_p ())
1021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1022 "vect_model_store_cost: unsupported access.\n");
1023 break;
1026 default:
1027 gcc_unreachable ();
1032 /* Function vect_model_load_cost
1034 Models cost for loads. In the case of grouped accesses, one access has
1035 the overhead of the grouped access attributed to it. Since unaligned
1036 accesses are supported for loads, we also account for the costs of the
1037 access scheme chosen. */
1039 void
1040 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1041 vect_memory_access_type memory_access_type,
1042 slp_tree slp_node,
1043 stmt_vector_for_cost *prologue_cost_vec,
1044 stmt_vector_for_cost *body_cost_vec)
1046 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1047 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1048 unsigned int inside_cost = 0, prologue_cost = 0;
1049 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1051 /* Grouped loads read all elements in the group at once,
1052 so we want the DR for the first statement. */
1053 if (!slp_node && grouped_access_p)
1055 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1056 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1059 /* True if we should include any once-per-group costs as well as
1060 the cost of the statement itself. For SLP we only get called
1061 once per group anyhow. */
1062 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1064 /* We assume that the cost of a single load-lanes instruction is
1065 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1066 access is instead being provided by a load-and-permute operation,
1067 include the cost of the permutes. */
1068 if (first_stmt_p
1069 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1071 /* Uses an even and odd extract operations or shuffle operations
1072 for each needed permute. */
1073 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1074 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1075 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 stmt_info, 0, vect_body);
1078 if (dump_enabled_p ())
1079 dump_printf_loc (MSG_NOTE, vect_location,
1080 "vect_model_load_cost: strided group_size = %d .\n",
1081 group_size);
1084 /* The loads themselves. */
1085 if (memory_access_type == VMAT_ELEMENTWISE
1086 || memory_access_type == VMAT_GATHER_SCATTER)
1088 /* N scalar loads plus gathering them into a vector. */
1089 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1090 inside_cost += record_stmt_cost (body_cost_vec,
1091 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1092 scalar_load, stmt_info, 0, vect_body);
1094 else
1095 vect_get_load_cost (dr, ncopies, first_stmt_p,
1096 &inside_cost, &prologue_cost,
1097 prologue_cost_vec, body_cost_vec, true);
1098 if (memory_access_type == VMAT_ELEMENTWISE
1099 || memory_access_type == VMAT_STRIDED_SLP)
1100 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1101 stmt_info, 0, vect_body);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: inside_cost = %d, "
1106 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1110 /* Calculate cost of DR's memory access. */
1111 void
1112 vect_get_load_cost (struct data_reference *dr, int ncopies,
1113 bool add_realign_cost, unsigned int *inside_cost,
1114 unsigned int *prologue_cost,
1115 stmt_vector_for_cost *prologue_cost_vec,
1116 stmt_vector_for_cost *body_cost_vec,
1117 bool record_prologue_costs)
1119 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1120 gimple *stmt = DR_STMT (dr);
1121 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123 switch (alignment_support_scheme)
1125 case dr_aligned:
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1128 stmt_info, 0, vect_body);
1130 if (dump_enabled_p ())
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "vect_model_load_cost: aligned.\n");
1134 break;
1136 case dr_unaligned_supported:
1138 /* Here, we assign an additional cost for the unaligned load. */
1139 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1140 unaligned_load, stmt_info,
1141 DR_MISALIGNMENT (dr), vect_body);
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "vect_model_load_cost: unaligned supported by "
1146 "hardware.\n");
1148 break;
1150 case dr_explicit_realign:
1152 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1153 vector_load, stmt_info, 0, vect_body);
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1155 vec_perm, stmt_info, 0, vect_body);
1157 /* FIXME: If the misalignment remains fixed across the iterations of
1158 the containing loop, the following cost should be added to the
1159 prologue costs. */
1160 if (targetm.vectorize.builtin_mask_for_load)
1161 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1162 stmt_info, 0, vect_body);
1164 if (dump_enabled_p ())
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "vect_model_load_cost: explicit realign\n");
1168 break;
1170 case dr_explicit_realign_optimized:
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE, vect_location,
1174 "vect_model_load_cost: unaligned software "
1175 "pipelined.\n");
1177 /* Unaligned software pipeline has a load of an address, an initial
1178 load, and possibly a mask operation to "prime" the loop. However,
1179 if this is an access in a group of loads, which provide grouped
1180 access, then the above cost should only be considered for one
1181 access in the group. Inside the loop, there is a load op
1182 and a realignment op. */
1184 if (add_realign_cost && record_prologue_costs)
1186 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1187 vector_stmt, stmt_info,
1188 0, vect_prologue);
1189 if (targetm.vectorize.builtin_mask_for_load)
1190 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1191 vector_stmt, stmt_info,
1192 0, vect_prologue);
1195 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1196 stmt_info, 0, vect_body);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1198 stmt_info, 0, vect_body);
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: explicit realign optimized"
1203 "\n");
1205 break;
1208 case dr_unaligned_unsupported:
1210 *inside_cost = VECT_MAX_COST;
1212 if (dump_enabled_p ())
1213 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1214 "vect_model_load_cost: unsupported access.\n");
1215 break;
1218 default:
1219 gcc_unreachable ();
1223 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1224 the loop preheader for the vectorized stmt STMT. */
1226 static void
1227 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1229 if (gsi)
1230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1231 else
1233 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1234 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1236 if (loop_vinfo)
1238 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1239 basic_block new_bb;
1240 edge pe;
1242 if (nested_in_vect_loop_p (loop, stmt))
1243 loop = loop->inner;
1245 pe = loop_preheader_edge (loop);
1246 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1247 gcc_assert (!new_bb);
1249 else
1251 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1252 basic_block bb;
1253 gimple_stmt_iterator gsi_bb_start;
1255 gcc_assert (bb_vinfo);
1256 bb = BB_VINFO_BB (bb_vinfo);
1257 gsi_bb_start = gsi_after_labels (bb);
1258 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1262 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE, vect_location,
1265 "created new init_stmt: ");
1266 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1270 /* Function vect_init_vector.
1272 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1273 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1274 vector type a vector with all elements equal to VAL is created first.
1275 Place the initialization at BSI if it is not NULL. Otherwise, place the
1276 initialization at the loop preheader.
1277 Return the DEF of INIT_STMT.
1278 It will be used in the vectorization of STMT. */
1280 tree
1281 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1283 gimple *init_stmt;
1284 tree new_temp;
1286 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1287 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1289 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1290 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1292 /* Scalar boolean value should be transformed into
1293 all zeros or all ones value before building a vector. */
1294 if (VECTOR_BOOLEAN_TYPE_P (type))
1296 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1297 tree false_val = build_zero_cst (TREE_TYPE (type));
1299 if (CONSTANT_CLASS_P (val))
1300 val = integer_zerop (val) ? false_val : true_val;
1301 else
1303 new_temp = make_ssa_name (TREE_TYPE (type));
1304 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1305 val, true_val, false_val);
1306 vect_init_vector_1 (stmt, init_stmt, gsi);
1307 val = new_temp;
1310 else if (CONSTANT_CLASS_P (val))
1311 val = fold_convert (TREE_TYPE (type), val);
1312 else
1314 new_temp = make_ssa_name (TREE_TYPE (type));
1315 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1316 init_stmt = gimple_build_assign (new_temp,
1317 fold_build1 (VIEW_CONVERT_EXPR,
1318 TREE_TYPE (type),
1319 val));
1320 else
1321 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1322 vect_init_vector_1 (stmt, init_stmt, gsi);
1323 val = new_temp;
1326 val = build_vector_from_val (type, val);
1329 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1330 init_stmt = gimple_build_assign (new_temp, val);
1331 vect_init_vector_1 (stmt, init_stmt, gsi);
1332 return new_temp;
1335 /* Function vect_get_vec_def_for_operand_1.
1337 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1338 DT that will be used in the vectorized stmt. */
1340 tree
1341 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1343 tree vec_oprnd;
1344 gimple *vec_stmt;
1345 stmt_vec_info def_stmt_info = NULL;
1347 switch (dt)
1349 /* operand is a constant or a loop invariant. */
1350 case vect_constant_def:
1351 case vect_external_def:
1352 /* Code should use vect_get_vec_def_for_operand. */
1353 gcc_unreachable ();
1355 /* operand is defined inside the loop. */
1356 case vect_internal_def:
1358 /* Get the def from the vectorized stmt. */
1359 def_stmt_info = vinfo_for_stmt (def_stmt);
1361 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1362 /* Get vectorized pattern statement. */
1363 if (!vec_stmt
1364 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1365 && !STMT_VINFO_RELEVANT (def_stmt_info))
1366 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1367 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1368 gcc_assert (vec_stmt);
1369 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1370 vec_oprnd = PHI_RESULT (vec_stmt);
1371 else if (is_gimple_call (vec_stmt))
1372 vec_oprnd = gimple_call_lhs (vec_stmt);
1373 else
1374 vec_oprnd = gimple_assign_lhs (vec_stmt);
1375 return vec_oprnd;
1378 /* operand is defined by a loop header phi. */
1379 case vect_reduction_def:
1380 case vect_double_reduction_def:
1381 case vect_nested_cycle:
1382 case vect_induction_def:
1384 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1386 /* Get the def from the vectorized stmt. */
1387 def_stmt_info = vinfo_for_stmt (def_stmt);
1388 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1389 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1390 vec_oprnd = PHI_RESULT (vec_stmt);
1391 else
1392 vec_oprnd = gimple_get_lhs (vec_stmt);
1393 return vec_oprnd;
1396 default:
1397 gcc_unreachable ();
1402 /* Function vect_get_vec_def_for_operand.
1404 OP is an operand in STMT. This function returns a (vector) def that will be
1405 used in the vectorized stmt for STMT.
1407 In the case that OP is an SSA_NAME which is defined in the loop, then
1408 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1410 In case OP is an invariant or constant, a new stmt that creates a vector def
1411 needs to be introduced. VECTYPE may be used to specify a required type for
1412 vector invariant. */
1414 tree
1415 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1417 gimple *def_stmt;
1418 enum vect_def_type dt;
1419 bool is_simple_use;
1420 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1421 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1423 if (dump_enabled_p ())
1425 dump_printf_loc (MSG_NOTE, vect_location,
1426 "vect_get_vec_def_for_operand: ");
1427 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1428 dump_printf (MSG_NOTE, "\n");
1431 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1432 gcc_assert (is_simple_use);
1433 if (def_stmt && dump_enabled_p ())
1435 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1436 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1439 if (dt == vect_constant_def || dt == vect_external_def)
1441 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1442 tree vector_type;
1444 if (vectype)
1445 vector_type = vectype;
1446 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1447 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1448 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1449 else
1450 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1452 gcc_assert (vector_type);
1453 return vect_init_vector (stmt, op, vector_type, NULL);
1455 else
1456 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1460 /* Function vect_get_vec_def_for_stmt_copy
1462 Return a vector-def for an operand. This function is used when the
1463 vectorized stmt to be created (by the caller to this function) is a "copy"
1464 created in case the vectorized result cannot fit in one vector, and several
1465 copies of the vector-stmt are required. In this case the vector-def is
1466 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1467 of the stmt that defines VEC_OPRND.
1468 DT is the type of the vector def VEC_OPRND.
1470 Context:
1471 In case the vectorization factor (VF) is bigger than the number
1472 of elements that can fit in a vectype (nunits), we have to generate
1473 more than one vector stmt to vectorize the scalar stmt. This situation
1474 arises when there are multiple data-types operated upon in the loop; the
1475 smallest data-type determines the VF, and as a result, when vectorizing
1476 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1477 vector stmt (each computing a vector of 'nunits' results, and together
1478 computing 'VF' results in each iteration). This function is called when
1479 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1480 which VF=16 and nunits=4, so the number of copies required is 4):
1482 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1484 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1485 VS1.1: vx.1 = memref1 VS1.2
1486 VS1.2: vx.2 = memref2 VS1.3
1487 VS1.3: vx.3 = memref3
1489 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1490 VSnew.1: vz1 = vx.1 + ... VSnew.2
1491 VSnew.2: vz2 = vx.2 + ... VSnew.3
1492 VSnew.3: vz3 = vx.3 + ...
1494 The vectorization of S1 is explained in vectorizable_load.
1495 The vectorization of S2:
1496 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1497 the function 'vect_get_vec_def_for_operand' is called to
1498 get the relevant vector-def for each operand of S2. For operand x it
1499 returns the vector-def 'vx.0'.
1501 To create the remaining copies of the vector-stmt (VSnew.j), this
1502 function is called to get the relevant vector-def for each operand. It is
1503 obtained from the respective VS1.j stmt, which is recorded in the
1504 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1506 For example, to obtain the vector-def 'vx.1' in order to create the
1507 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1508 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1509 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1510 and return its def ('vx.1').
1511 Overall, to create the above sequence this function will be called 3 times:
1512 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1513 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1514 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1516 tree
1517 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1519 gimple *vec_stmt_for_operand;
1520 stmt_vec_info def_stmt_info;
1522 /* Do nothing; can reuse same def. */
1523 if (dt == vect_external_def || dt == vect_constant_def )
1524 return vec_oprnd;
1526 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1527 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1528 gcc_assert (def_stmt_info);
1529 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1530 gcc_assert (vec_stmt_for_operand);
1531 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1532 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1533 else
1534 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1535 return vec_oprnd;
1539 /* Get vectorized definitions for the operands to create a copy of an original
1540 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1542 void
1543 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1544 vec<tree> *vec_oprnds0,
1545 vec<tree> *vec_oprnds1)
1547 tree vec_oprnd = vec_oprnds0->pop ();
1549 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1550 vec_oprnds0->quick_push (vec_oprnd);
1552 if (vec_oprnds1 && vec_oprnds1->length ())
1554 vec_oprnd = vec_oprnds1->pop ();
1555 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1556 vec_oprnds1->quick_push (vec_oprnd);
1561 /* Get vectorized definitions for OP0 and OP1. */
1563 void
1564 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1565 vec<tree> *vec_oprnds0,
1566 vec<tree> *vec_oprnds1,
1567 slp_tree slp_node)
1569 if (slp_node)
1571 int nops = (op1 == NULL_TREE) ? 1 : 2;
1572 auto_vec<tree> ops (nops);
1573 auto_vec<vec<tree> > vec_defs (nops);
1575 ops.quick_push (op0);
1576 if (op1)
1577 ops.quick_push (op1);
1579 vect_get_slp_defs (ops, slp_node, &vec_defs);
1581 *vec_oprnds0 = vec_defs[0];
1582 if (op1)
1583 *vec_oprnds1 = vec_defs[1];
1585 else
1587 tree vec_oprnd;
1589 vec_oprnds0->create (1);
1590 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1591 vec_oprnds0->quick_push (vec_oprnd);
1593 if (op1)
1595 vec_oprnds1->create (1);
1596 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1597 vec_oprnds1->quick_push (vec_oprnd);
1603 /* Function vect_finish_stmt_generation.
1605 Insert a new stmt. */
1607 void
1608 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1609 gimple_stmt_iterator *gsi)
1611 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1612 vec_info *vinfo = stmt_info->vinfo;
1614 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1616 if (!gsi_end_p (*gsi)
1617 && gimple_has_mem_ops (vec_stmt))
1619 gimple *at_stmt = gsi_stmt (*gsi);
1620 tree vuse = gimple_vuse (at_stmt);
1621 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1623 tree vdef = gimple_vdef (at_stmt);
1624 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1625 /* If we have an SSA vuse and insert a store, update virtual
1626 SSA form to avoid triggering the renamer. Do so only
1627 if we can easily see all uses - which is what almost always
1628 happens with the way vectorized stmts are inserted. */
1629 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1630 && ((is_gimple_assign (vec_stmt)
1631 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1632 || (is_gimple_call (vec_stmt)
1633 && !(gimple_call_flags (vec_stmt)
1634 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1636 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1637 gimple_set_vdef (vec_stmt, new_vdef);
1638 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1642 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1644 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1646 if (dump_enabled_p ())
1648 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1649 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1652 gimple_set_location (vec_stmt, gimple_location (stmt));
1654 /* While EH edges will generally prevent vectorization, stmt might
1655 e.g. be in a must-not-throw region. Ensure newly created stmts
1656 that could throw are part of the same region. */
1657 int lp_nr = lookup_stmt_eh_lp (stmt);
1658 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1659 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1662 /* We want to vectorize a call to combined function CFN with function
1663 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1664 as the types of all inputs. Check whether this is possible using
1665 an internal function, returning its code if so or IFN_LAST if not. */
1667 static internal_fn
1668 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1669 tree vectype_out, tree vectype_in)
1671 internal_fn ifn;
1672 if (internal_fn_p (cfn))
1673 ifn = as_internal_fn (cfn);
1674 else
1675 ifn = associated_internal_fn (fndecl);
1676 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1678 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1679 if (info.vectorizable)
1681 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1682 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1683 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1684 OPTIMIZE_FOR_SPEED))
1685 return ifn;
1688 return IFN_LAST;
1692 static tree permute_vec_elements (tree, tree, tree, gimple *,
1693 gimple_stmt_iterator *);
1695 /* STMT is a non-strided load or store, meaning that it accesses
1696 elements with a known constant step. Return -1 if that step
1697 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1699 static int
1700 compare_step_with_zero (gimple *stmt)
1702 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1703 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1704 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1705 size_zero_node);
1708 /* If the target supports a permute mask that reverses the elements in
1709 a vector of type VECTYPE, return that mask, otherwise return null. */
1711 static tree
1712 perm_mask_for_reverse (tree vectype)
1714 int i, nunits;
1716 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1718 auto_vec_perm_indices sel (nunits);
1719 for (i = 0; i < nunits; ++i)
1720 sel.quick_push (nunits - 1 - i);
1722 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
1723 return NULL_TREE;
1724 return vect_gen_perm_mask_checked (vectype, sel);
1727 /* A subroutine of get_load_store_type, with a subset of the same
1728 arguments. Handle the case where STMT is part of a grouped load
1729 or store.
1731 For stores, the statements in the group are all consecutive
1732 and there is no gap at the end. For loads, the statements in the
1733 group might not be consecutive; there can be gaps between statements
1734 as well as at the end. */
1736 static bool
1737 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1738 vec_load_store_type vls_type,
1739 vect_memory_access_type *memory_access_type)
1741 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1742 vec_info *vinfo = stmt_info->vinfo;
1743 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1744 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1745 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1746 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1747 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1748 bool single_element_p = (stmt == first_stmt
1749 && !GROUP_NEXT_ELEMENT (stmt_info));
1750 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1751 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1753 /* True if the vectorized statements would access beyond the last
1754 statement in the group. */
1755 bool overrun_p = false;
1757 /* True if we can cope with such overrun by peeling for gaps, so that
1758 there is at least one final scalar iteration after the vector loop. */
1759 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1761 /* There can only be a gap at the end of the group if the stride is
1762 known at compile time. */
1763 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1765 /* Stores can't yet have gaps. */
1766 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1768 if (slp)
1770 if (STMT_VINFO_STRIDED_P (stmt_info))
1772 /* Try to use consecutive accesses of GROUP_SIZE elements,
1773 separated by the stride, until we have a complete vector.
1774 Fall back to scalar accesses if that isn't possible. */
1775 if (nunits % group_size == 0)
1776 *memory_access_type = VMAT_STRIDED_SLP;
1777 else
1778 *memory_access_type = VMAT_ELEMENTWISE;
1780 else
1782 overrun_p = loop_vinfo && gap != 0;
1783 if (overrun_p && vls_type != VLS_LOAD)
1785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1786 "Grouped store with gaps requires"
1787 " non-consecutive accesses\n");
1788 return false;
1790 /* An overrun is fine if the trailing elements are smaller
1791 than the alignment boundary B. Every vector access will
1792 be a multiple of B and so we are guaranteed to access a
1793 non-gap element in the same B-sized block. */
1794 if (overrun_p
1795 && gap < (vect_known_alignment_in_bytes (first_dr)
1796 / vect_get_scalar_dr_size (first_dr)))
1797 overrun_p = false;
1798 if (overrun_p && !can_overrun_p)
1800 if (dump_enabled_p ())
1801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1802 "Peeling for outer loop is not supported\n");
1803 return false;
1805 *memory_access_type = VMAT_CONTIGUOUS;
1808 else
1810 /* We can always handle this case using elementwise accesses,
1811 but see if something more efficient is available. */
1812 *memory_access_type = VMAT_ELEMENTWISE;
1814 /* If there is a gap at the end of the group then these optimizations
1815 would access excess elements in the last iteration. */
1816 bool would_overrun_p = (gap != 0);
1817 /* An overrun is fine if the trailing elements are smaller than the
1818 alignment boundary B. Every vector access will be a multiple of B
1819 and so we are guaranteed to access a non-gap element in the
1820 same B-sized block. */
1821 if (would_overrun_p
1822 && gap < (vect_known_alignment_in_bytes (first_dr)
1823 / vect_get_scalar_dr_size (first_dr)))
1824 would_overrun_p = false;
1826 if (!STMT_VINFO_STRIDED_P (stmt_info)
1827 && (can_overrun_p || !would_overrun_p)
1828 && compare_step_with_zero (stmt) > 0)
1830 /* First try using LOAD/STORE_LANES. */
1831 if (vls_type == VLS_LOAD
1832 ? vect_load_lanes_supported (vectype, group_size)
1833 : vect_store_lanes_supported (vectype, group_size))
1835 *memory_access_type = VMAT_LOAD_STORE_LANES;
1836 overrun_p = would_overrun_p;
1839 /* If that fails, try using permuting loads. */
1840 if (*memory_access_type == VMAT_ELEMENTWISE
1841 && (vls_type == VLS_LOAD
1842 ? vect_grouped_load_supported (vectype, single_element_p,
1843 group_size)
1844 : vect_grouped_store_supported (vectype, group_size)))
1846 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1847 overrun_p = would_overrun_p;
1852 if (vls_type != VLS_LOAD && first_stmt == stmt)
1854 /* STMT is the leader of the group. Check the operands of all the
1855 stmts of the group. */
1856 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1857 while (next_stmt)
1859 gcc_assert (gimple_assign_single_p (next_stmt));
1860 tree op = gimple_assign_rhs1 (next_stmt);
1861 gimple *def_stmt;
1862 enum vect_def_type dt;
1863 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1865 if (dump_enabled_p ())
1866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1867 "use not simple.\n");
1868 return false;
1870 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1874 if (overrun_p)
1876 gcc_assert (can_overrun_p);
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1879 "Data access with gaps requires scalar "
1880 "epilogue loop\n");
1881 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1884 return true;
1887 /* A subroutine of get_load_store_type, with a subset of the same
1888 arguments. Handle the case where STMT is a load or store that
1889 accesses consecutive elements with a negative step. */
1891 static vect_memory_access_type
1892 get_negative_load_store_type (gimple *stmt, tree vectype,
1893 vec_load_store_type vls_type,
1894 unsigned int ncopies)
1896 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1897 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1898 dr_alignment_support alignment_support_scheme;
1900 if (ncopies > 1)
1902 if (dump_enabled_p ())
1903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1904 "multiple types with negative step.\n");
1905 return VMAT_ELEMENTWISE;
1908 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1909 if (alignment_support_scheme != dr_aligned
1910 && alignment_support_scheme != dr_unaligned_supported)
1912 if (dump_enabled_p ())
1913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1914 "negative step but alignment required.\n");
1915 return VMAT_ELEMENTWISE;
1918 if (vls_type == VLS_STORE_INVARIANT)
1920 if (dump_enabled_p ())
1921 dump_printf_loc (MSG_NOTE, vect_location,
1922 "negative step with invariant source;"
1923 " no permute needed.\n");
1924 return VMAT_CONTIGUOUS_DOWN;
1927 if (!perm_mask_for_reverse (vectype))
1929 if (dump_enabled_p ())
1930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1931 "negative step and reversing not supported.\n");
1932 return VMAT_ELEMENTWISE;
1935 return VMAT_CONTIGUOUS_REVERSE;
1938 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1939 if there is a memory access type that the vectorized form can use,
1940 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1941 or scatters, fill in GS_INFO accordingly.
1943 SLP says whether we're performing SLP rather than loop vectorization.
1944 VECTYPE is the vector type that the vectorized statements will use.
1945 NCOPIES is the number of vector statements that will be needed. */
1947 static bool
1948 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1949 vec_load_store_type vls_type, unsigned int ncopies,
1950 vect_memory_access_type *memory_access_type,
1951 gather_scatter_info *gs_info)
1953 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1954 vec_info *vinfo = stmt_info->vinfo;
1955 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1956 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1958 *memory_access_type = VMAT_GATHER_SCATTER;
1959 gimple *def_stmt;
1960 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1961 gcc_unreachable ();
1962 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1963 &gs_info->offset_dt,
1964 &gs_info->offset_vectype))
1966 if (dump_enabled_p ())
1967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1968 "%s index use not simple.\n",
1969 vls_type == VLS_LOAD ? "gather" : "scatter");
1970 return false;
1973 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1975 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1976 memory_access_type))
1977 return false;
1979 else if (STMT_VINFO_STRIDED_P (stmt_info))
1981 gcc_assert (!slp);
1982 *memory_access_type = VMAT_ELEMENTWISE;
1984 else
1986 int cmp = compare_step_with_zero (stmt);
1987 if (cmp < 0)
1988 *memory_access_type = get_negative_load_store_type
1989 (stmt, vectype, vls_type, ncopies);
1990 else if (cmp == 0)
1992 gcc_assert (vls_type == VLS_LOAD);
1993 *memory_access_type = VMAT_INVARIANT;
1995 else
1996 *memory_access_type = VMAT_CONTIGUOUS;
1999 /* FIXME: At the moment the cost model seems to underestimate the
2000 cost of using elementwise accesses. This check preserves the
2001 traditional behavior until that can be fixed. */
2002 if (*memory_access_type == VMAT_ELEMENTWISE
2003 && !STMT_VINFO_STRIDED_P (stmt_info))
2005 if (dump_enabled_p ())
2006 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2007 "not falling back to elementwise accesses\n");
2008 return false;
2010 return true;
2013 /* Function vectorizable_mask_load_store.
2015 Check if STMT performs a conditional load or store that can be vectorized.
2016 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2017 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2018 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2020 static bool
2021 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2022 gimple **vec_stmt, slp_tree slp_node)
2024 tree vec_dest = NULL;
2025 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2026 stmt_vec_info prev_stmt_info;
2027 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2028 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2029 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2030 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2031 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2032 tree rhs_vectype = NULL_TREE;
2033 tree mask_vectype;
2034 tree elem_type;
2035 gimple *new_stmt;
2036 tree dummy;
2037 tree dataref_ptr = NULL_TREE;
2038 gimple *ptr_incr;
2039 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2040 int ncopies;
2041 int i, j;
2042 bool inv_p;
2043 gather_scatter_info gs_info;
2044 vec_load_store_type vls_type;
2045 tree mask;
2046 gimple *def_stmt;
2047 enum vect_def_type dt;
2049 if (slp_node != NULL)
2050 return false;
2052 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2053 gcc_assert (ncopies >= 1);
2055 mask = gimple_call_arg (stmt, 2);
2057 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2058 return false;
2060 /* FORNOW. This restriction should be relaxed. */
2061 if (nested_in_vect_loop && ncopies > 1)
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2065 "multiple types in nested loop.");
2066 return false;
2069 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2070 return false;
2072 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2073 && ! vec_stmt)
2074 return false;
2076 if (!STMT_VINFO_DATA_REF (stmt_info))
2077 return false;
2079 elem_type = TREE_TYPE (vectype);
2081 if (TREE_CODE (mask) != SSA_NAME)
2082 return false;
2084 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2085 return false;
2087 if (!mask_vectype)
2088 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2090 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2091 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2092 return false;
2094 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2096 tree rhs = gimple_call_arg (stmt, 3);
2097 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2098 return false;
2099 if (dt == vect_constant_def || dt == vect_external_def)
2100 vls_type = VLS_STORE_INVARIANT;
2101 else
2102 vls_type = VLS_STORE;
2104 else
2105 vls_type = VLS_LOAD;
2107 vect_memory_access_type memory_access_type;
2108 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2109 &memory_access_type, &gs_info))
2110 return false;
2112 if (memory_access_type == VMAT_GATHER_SCATTER)
2114 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2115 tree masktype
2116 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2117 if (TREE_CODE (masktype) == INTEGER_TYPE)
2119 if (dump_enabled_p ())
2120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2121 "masked gather with integer mask not supported.");
2122 return false;
2125 else if (memory_access_type != VMAT_CONTIGUOUS)
2127 if (dump_enabled_p ())
2128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2129 "unsupported access type for masked %s.\n",
2130 vls_type == VLS_LOAD ? "load" : "store");
2131 return false;
2133 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2134 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2135 TYPE_MODE (mask_vectype),
2136 vls_type == VLS_LOAD)
2137 || (rhs_vectype
2138 && !useless_type_conversion_p (vectype, rhs_vectype)))
2139 return false;
2141 if (!vec_stmt) /* transformation not required. */
2143 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2144 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2145 if (vls_type == VLS_LOAD)
2146 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2147 NULL, NULL, NULL);
2148 else
2149 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2150 dt, NULL, NULL, NULL);
2151 return true;
2153 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2155 /* Transform. */
2157 if (memory_access_type == VMAT_GATHER_SCATTER)
2159 tree vec_oprnd0 = NULL_TREE, op;
2160 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2161 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2162 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2163 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2164 tree mask_perm_mask = NULL_TREE;
2165 edge pe = loop_preheader_edge (loop);
2166 gimple_seq seq;
2167 basic_block new_bb;
2168 enum { NARROW, NONE, WIDEN } modifier;
2169 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2171 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2172 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2173 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2174 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2175 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2176 scaletype = TREE_VALUE (arglist);
2177 gcc_checking_assert (types_compatible_p (srctype, rettype)
2178 && types_compatible_p (srctype, masktype));
2180 if (nunits == gather_off_nunits)
2181 modifier = NONE;
2182 else if (nunits == gather_off_nunits / 2)
2184 modifier = WIDEN;
2186 auto_vec_perm_indices sel (gather_off_nunits);
2187 for (i = 0; i < gather_off_nunits; ++i)
2188 sel.quick_push (i | nunits);
2190 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2192 else if (nunits == gather_off_nunits * 2)
2194 modifier = NARROW;
2196 auto_vec_perm_indices sel (nunits);
2197 sel.quick_grow (nunits);
2198 for (i = 0; i < nunits; ++i)
2199 sel[i] = i < gather_off_nunits
2200 ? i : i + nunits - gather_off_nunits;
2202 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2203 ncopies *= 2;
2204 for (i = 0; i < nunits; ++i)
2205 sel[i] = i | gather_off_nunits;
2206 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2208 else
2209 gcc_unreachable ();
2211 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2213 ptr = fold_convert (ptrtype, gs_info.base);
2214 if (!is_gimple_min_invariant (ptr))
2216 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2217 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2218 gcc_assert (!new_bb);
2221 scale = build_int_cst (scaletype, gs_info.scale);
2223 prev_stmt_info = NULL;
2224 for (j = 0; j < ncopies; ++j)
2226 if (modifier == WIDEN && (j & 1))
2227 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2228 perm_mask, stmt, gsi);
2229 else if (j == 0)
2230 op = vec_oprnd0
2231 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2232 else
2233 op = vec_oprnd0
2234 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2236 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2238 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2239 == TYPE_VECTOR_SUBPARTS (idxtype));
2240 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2241 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2242 new_stmt
2243 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2244 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2245 op = var;
2248 if (mask_perm_mask && (j & 1))
2249 mask_op = permute_vec_elements (mask_op, mask_op,
2250 mask_perm_mask, stmt, gsi);
2251 else
2253 if (j == 0)
2254 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2255 else
2257 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2258 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2261 mask_op = vec_mask;
2262 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2264 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2265 == TYPE_VECTOR_SUBPARTS (masktype));
2266 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2267 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2268 new_stmt
2269 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2270 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2271 mask_op = var;
2275 new_stmt
2276 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2277 scale);
2279 if (!useless_type_conversion_p (vectype, rettype))
2281 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2282 == TYPE_VECTOR_SUBPARTS (rettype));
2283 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2284 gimple_call_set_lhs (new_stmt, op);
2285 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2286 var = make_ssa_name (vec_dest);
2287 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2288 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2290 else
2292 var = make_ssa_name (vec_dest, new_stmt);
2293 gimple_call_set_lhs (new_stmt, var);
2296 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2298 if (modifier == NARROW)
2300 if ((j & 1) == 0)
2302 prev_res = var;
2303 continue;
2305 var = permute_vec_elements (prev_res, var,
2306 perm_mask, stmt, gsi);
2307 new_stmt = SSA_NAME_DEF_STMT (var);
2310 if (prev_stmt_info == NULL)
2311 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2312 else
2313 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2314 prev_stmt_info = vinfo_for_stmt (new_stmt);
2317 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2318 from the IL. */
2319 if (STMT_VINFO_RELATED_STMT (stmt_info))
2321 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2322 stmt_info = vinfo_for_stmt (stmt);
2324 tree lhs = gimple_call_lhs (stmt);
2325 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2326 set_vinfo_for_stmt (new_stmt, stmt_info);
2327 set_vinfo_for_stmt (stmt, NULL);
2328 STMT_VINFO_STMT (stmt_info) = new_stmt;
2329 gsi_replace (gsi, new_stmt, true);
2330 return true;
2332 else if (vls_type != VLS_LOAD)
2334 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2335 prev_stmt_info = NULL;
2336 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2337 for (i = 0; i < ncopies; i++)
2339 unsigned align, misalign;
2341 if (i == 0)
2343 tree rhs = gimple_call_arg (stmt, 3);
2344 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2345 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2346 mask_vectype);
2347 /* We should have catched mismatched types earlier. */
2348 gcc_assert (useless_type_conversion_p (vectype,
2349 TREE_TYPE (vec_rhs)));
2350 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2351 NULL_TREE, &dummy, gsi,
2352 &ptr_incr, false, &inv_p);
2353 gcc_assert (!inv_p);
2355 else
2357 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2358 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2359 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2360 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2361 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2362 TYPE_SIZE_UNIT (vectype));
2365 align = DR_TARGET_ALIGNMENT (dr);
2366 if (aligned_access_p (dr))
2367 misalign = 0;
2368 else if (DR_MISALIGNMENT (dr) == -1)
2370 align = TYPE_ALIGN_UNIT (elem_type);
2371 misalign = 0;
2373 else
2374 misalign = DR_MISALIGNMENT (dr);
2375 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2376 misalign);
2377 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2378 misalign ? least_bit_hwi (misalign) : align);
2379 gcall *call
2380 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2381 ptr, vec_mask, vec_rhs);
2382 gimple_call_set_nothrow (call, true);
2383 new_stmt = call;
2384 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2385 if (i == 0)
2386 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2387 else
2388 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2389 prev_stmt_info = vinfo_for_stmt (new_stmt);
2392 else
2394 tree vec_mask = NULL_TREE;
2395 prev_stmt_info = NULL;
2396 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2397 for (i = 0; i < ncopies; i++)
2399 unsigned align, misalign;
2401 if (i == 0)
2403 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2404 mask_vectype);
2405 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2406 NULL_TREE, &dummy, gsi,
2407 &ptr_incr, false, &inv_p);
2408 gcc_assert (!inv_p);
2410 else
2412 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2413 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2414 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2415 TYPE_SIZE_UNIT (vectype));
2418 align = DR_TARGET_ALIGNMENT (dr);
2419 if (aligned_access_p (dr))
2420 misalign = 0;
2421 else if (DR_MISALIGNMENT (dr) == -1)
2423 align = TYPE_ALIGN_UNIT (elem_type);
2424 misalign = 0;
2426 else
2427 misalign = DR_MISALIGNMENT (dr);
2428 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2429 misalign);
2430 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2431 misalign ? least_bit_hwi (misalign) : align);
2432 gcall *call
2433 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2434 ptr, vec_mask);
2435 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2436 gimple_call_set_nothrow (call, true);
2437 vect_finish_stmt_generation (stmt, call, gsi);
2438 if (i == 0)
2439 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
2440 else
2441 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2442 prev_stmt_info = vinfo_for_stmt (call);
2446 if (vls_type == VLS_LOAD)
2448 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2449 from the IL. */
2450 if (STMT_VINFO_RELATED_STMT (stmt_info))
2452 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2453 stmt_info = vinfo_for_stmt (stmt);
2455 tree lhs = gimple_call_lhs (stmt);
2456 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2457 set_vinfo_for_stmt (new_stmt, stmt_info);
2458 set_vinfo_for_stmt (stmt, NULL);
2459 STMT_VINFO_STMT (stmt_info) = new_stmt;
2460 gsi_replace (gsi, new_stmt, true);
2463 return true;
2466 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2468 static bool
2469 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2470 gimple **vec_stmt, slp_tree slp_node,
2471 tree vectype_in, enum vect_def_type *dt)
2473 tree op, vectype;
2474 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2475 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2476 unsigned ncopies, nunits;
2478 op = gimple_call_arg (stmt, 0);
2479 vectype = STMT_VINFO_VECTYPE (stmt_info);
2480 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2482 /* Multiple types in SLP are handled by creating the appropriate number of
2483 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2484 case of SLP. */
2485 if (slp_node)
2486 ncopies = 1;
2487 else
2488 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2490 gcc_assert (ncopies >= 1);
2492 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2493 if (! char_vectype)
2494 return false;
2496 unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2497 unsigned word_bytes = num_bytes / nunits;
2499 auto_vec_perm_indices elts (num_bytes);
2500 for (unsigned i = 0; i < nunits; ++i)
2501 for (unsigned j = 0; j < word_bytes; ++j)
2502 elts.quick_push ((i + 1) * word_bytes - j - 1);
2504 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts))
2505 return false;
2507 if (! vec_stmt)
2509 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2512 "\n");
2513 if (! PURE_SLP_STMT (stmt_info))
2515 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2516 1, vector_stmt, stmt_info, 0, vect_prologue);
2517 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2518 ncopies, vec_perm, stmt_info, 0, vect_body);
2520 return true;
2523 auto_vec<tree, 32> telts (num_bytes);
2524 for (unsigned i = 0; i < num_bytes; ++i)
2525 telts.quick_push (build_int_cst (char_type_node, elts[i]));
2526 tree bswap_vconst = build_vector (char_vectype, telts);
2528 /* Transform. */
2529 vec<tree> vec_oprnds = vNULL;
2530 gimple *new_stmt = NULL;
2531 stmt_vec_info prev_stmt_info = NULL;
2532 for (unsigned j = 0; j < ncopies; j++)
2534 /* Handle uses. */
2535 if (j == 0)
2536 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2537 else
2538 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2540 /* Arguments are ready. create the new vector stmt. */
2541 unsigned i;
2542 tree vop;
2543 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2545 tree tem = make_ssa_name (char_vectype);
2546 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2547 char_vectype, vop));
2548 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2549 tree tem2 = make_ssa_name (char_vectype);
2550 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2551 tem, tem, bswap_vconst);
2552 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2553 tem = make_ssa_name (vectype);
2554 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2555 vectype, tem2));
2556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2557 if (slp_node)
2558 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2561 if (slp_node)
2562 continue;
2564 if (j == 0)
2565 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2566 else
2567 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2569 prev_stmt_info = vinfo_for_stmt (new_stmt);
2572 vec_oprnds.release ();
2573 return true;
2576 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2577 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2578 in a single step. On success, store the binary pack code in
2579 *CONVERT_CODE. */
2581 static bool
2582 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2583 tree_code *convert_code)
2585 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2586 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2587 return false;
2589 tree_code code;
2590 int multi_step_cvt = 0;
2591 auto_vec <tree, 8> interm_types;
2592 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2593 &code, &multi_step_cvt,
2594 &interm_types)
2595 || multi_step_cvt)
2596 return false;
2598 *convert_code = code;
2599 return true;
2602 /* Function vectorizable_call.
2604 Check if GS performs a function call that can be vectorized.
2605 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2606 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2607 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2609 static bool
2610 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2611 slp_tree slp_node)
2613 gcall *stmt;
2614 tree vec_dest;
2615 tree scalar_dest;
2616 tree op, type;
2617 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2618 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2619 tree vectype_out, vectype_in;
2620 int nunits_in;
2621 int nunits_out;
2622 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2623 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2624 vec_info *vinfo = stmt_info->vinfo;
2625 tree fndecl, new_temp, rhs_type;
2626 gimple *def_stmt;
2627 enum vect_def_type dt[3]
2628 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2629 int ndts = 3;
2630 gimple *new_stmt = NULL;
2631 int ncopies, j;
2632 vec<tree> vargs = vNULL;
2633 enum { NARROW, NONE, WIDEN } modifier;
2634 size_t i, nargs;
2635 tree lhs;
2637 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2638 return false;
2640 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2641 && ! vec_stmt)
2642 return false;
2644 /* Is GS a vectorizable call? */
2645 stmt = dyn_cast <gcall *> (gs);
2646 if (!stmt)
2647 return false;
2649 if (gimple_call_internal_p (stmt)
2650 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2651 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2652 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2653 slp_node);
2655 if (gimple_call_lhs (stmt) == NULL_TREE
2656 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2657 return false;
2659 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2661 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2663 /* Process function arguments. */
2664 rhs_type = NULL_TREE;
2665 vectype_in = NULL_TREE;
2666 nargs = gimple_call_num_args (stmt);
2668 /* Bail out if the function has more than three arguments, we do not have
2669 interesting builtin functions to vectorize with more than two arguments
2670 except for fma. No arguments is also not good. */
2671 if (nargs == 0 || nargs > 3)
2672 return false;
2674 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2675 if (gimple_call_internal_p (stmt)
2676 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2678 nargs = 0;
2679 rhs_type = unsigned_type_node;
2682 for (i = 0; i < nargs; i++)
2684 tree opvectype;
2686 op = gimple_call_arg (stmt, i);
2688 /* We can only handle calls with arguments of the same type. */
2689 if (rhs_type
2690 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2692 if (dump_enabled_p ())
2693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2694 "argument types differ.\n");
2695 return false;
2697 if (!rhs_type)
2698 rhs_type = TREE_TYPE (op);
2700 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2702 if (dump_enabled_p ())
2703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2704 "use not simple.\n");
2705 return false;
2708 if (!vectype_in)
2709 vectype_in = opvectype;
2710 else if (opvectype
2711 && opvectype != vectype_in)
2713 if (dump_enabled_p ())
2714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2715 "argument vector types differ.\n");
2716 return false;
2719 /* If all arguments are external or constant defs use a vector type with
2720 the same size as the output vector type. */
2721 if (!vectype_in)
2722 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2723 if (vec_stmt)
2724 gcc_assert (vectype_in);
2725 if (!vectype_in)
2727 if (dump_enabled_p ())
2729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2730 "no vectype for scalar type ");
2731 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2732 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2735 return false;
2738 /* FORNOW */
2739 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2740 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2741 if (nunits_in == nunits_out / 2)
2742 modifier = NARROW;
2743 else if (nunits_out == nunits_in)
2744 modifier = NONE;
2745 else if (nunits_out == nunits_in / 2)
2746 modifier = WIDEN;
2747 else
2748 return false;
2750 /* We only handle functions that do not read or clobber memory. */
2751 if (gimple_vuse (stmt))
2753 if (dump_enabled_p ())
2754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2755 "function reads from or writes to memory.\n");
2756 return false;
2759 /* For now, we only vectorize functions if a target specific builtin
2760 is available. TODO -- in some cases, it might be profitable to
2761 insert the calls for pieces of the vector, in order to be able
2762 to vectorize other operations in the loop. */
2763 fndecl = NULL_TREE;
2764 internal_fn ifn = IFN_LAST;
2765 combined_fn cfn = gimple_call_combined_fn (stmt);
2766 tree callee = gimple_call_fndecl (stmt);
2768 /* First try using an internal function. */
2769 tree_code convert_code = ERROR_MARK;
2770 if (cfn != CFN_LAST
2771 && (modifier == NONE
2772 || (modifier == NARROW
2773 && simple_integer_narrowing (vectype_out, vectype_in,
2774 &convert_code))))
2775 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2776 vectype_in);
2778 /* If that fails, try asking for a target-specific built-in function. */
2779 if (ifn == IFN_LAST)
2781 if (cfn != CFN_LAST)
2782 fndecl = targetm.vectorize.builtin_vectorized_function
2783 (cfn, vectype_out, vectype_in);
2784 else
2785 fndecl = targetm.vectorize.builtin_md_vectorized_function
2786 (callee, vectype_out, vectype_in);
2789 if (ifn == IFN_LAST && !fndecl)
2791 if (cfn == CFN_GOMP_SIMD_LANE
2792 && !slp_node
2793 && loop_vinfo
2794 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2795 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2796 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2797 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2799 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2800 { 0, 1, 2, ... vf - 1 } vector. */
2801 gcc_assert (nargs == 0);
2803 else if (modifier == NONE
2804 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2805 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2806 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2807 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2808 vectype_in, dt);
2809 else
2811 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2813 "function is not vectorizable.\n");
2814 return false;
2818 if (slp_node)
2819 ncopies = 1;
2820 else if (modifier == NARROW && ifn == IFN_LAST)
2821 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2822 else
2823 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2825 /* Sanity check: make sure that at least one copy of the vectorized stmt
2826 needs to be generated. */
2827 gcc_assert (ncopies >= 1);
2829 if (!vec_stmt) /* transformation not required. */
2831 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2832 if (dump_enabled_p ())
2833 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2834 "\n");
2835 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2836 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2837 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2838 vec_promote_demote, stmt_info, 0, vect_body);
2840 return true;
2843 /* Transform. */
2845 if (dump_enabled_p ())
2846 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2848 /* Handle def. */
2849 scalar_dest = gimple_call_lhs (stmt);
2850 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2852 prev_stmt_info = NULL;
2853 if (modifier == NONE || ifn != IFN_LAST)
2855 tree prev_res = NULL_TREE;
2856 for (j = 0; j < ncopies; ++j)
2858 /* Build argument list for the vectorized call. */
2859 if (j == 0)
2860 vargs.create (nargs);
2861 else
2862 vargs.truncate (0);
2864 if (slp_node)
2866 auto_vec<vec<tree> > vec_defs (nargs);
2867 vec<tree> vec_oprnds0;
2869 for (i = 0; i < nargs; i++)
2870 vargs.quick_push (gimple_call_arg (stmt, i));
2871 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2872 vec_oprnds0 = vec_defs[0];
2874 /* Arguments are ready. Create the new vector stmt. */
2875 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2877 size_t k;
2878 for (k = 0; k < nargs; k++)
2880 vec<tree> vec_oprndsk = vec_defs[k];
2881 vargs[k] = vec_oprndsk[i];
2883 if (modifier == NARROW)
2885 tree half_res = make_ssa_name (vectype_in);
2886 gcall *call
2887 = gimple_build_call_internal_vec (ifn, vargs);
2888 gimple_call_set_lhs (call, half_res);
2889 gimple_call_set_nothrow (call, true);
2890 new_stmt = call;
2891 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2892 if ((i & 1) == 0)
2894 prev_res = half_res;
2895 continue;
2897 new_temp = make_ssa_name (vec_dest);
2898 new_stmt = gimple_build_assign (new_temp, convert_code,
2899 prev_res, half_res);
2901 else
2903 gcall *call;
2904 if (ifn != IFN_LAST)
2905 call = gimple_build_call_internal_vec (ifn, vargs);
2906 else
2907 call = gimple_build_call_vec (fndecl, vargs);
2908 new_temp = make_ssa_name (vec_dest, call);
2909 gimple_call_set_lhs (call, new_temp);
2910 gimple_call_set_nothrow (call, true);
2911 new_stmt = call;
2913 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2914 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2917 for (i = 0; i < nargs; i++)
2919 vec<tree> vec_oprndsi = vec_defs[i];
2920 vec_oprndsi.release ();
2922 continue;
2925 for (i = 0; i < nargs; i++)
2927 op = gimple_call_arg (stmt, i);
2928 if (j == 0)
2929 vec_oprnd0
2930 = vect_get_vec_def_for_operand (op, stmt);
2931 else
2933 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2934 vec_oprnd0
2935 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2938 vargs.quick_push (vec_oprnd0);
2941 if (gimple_call_internal_p (stmt)
2942 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2944 auto_vec<tree, 32> v (nunits_out);
2945 for (int k = 0; k < nunits_out; ++k)
2946 v.quick_push (build_int_cst (unsigned_type_node,
2947 j * nunits_out + k));
2948 tree cst = build_vector (vectype_out, v);
2949 tree new_var
2950 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2951 gimple *init_stmt = gimple_build_assign (new_var, cst);
2952 vect_init_vector_1 (stmt, init_stmt, NULL);
2953 new_temp = make_ssa_name (vec_dest);
2954 new_stmt = gimple_build_assign (new_temp, new_var);
2956 else if (modifier == NARROW)
2958 tree half_res = make_ssa_name (vectype_in);
2959 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2960 gimple_call_set_lhs (call, half_res);
2961 gimple_call_set_nothrow (call, true);
2962 new_stmt = call;
2963 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2964 if ((j & 1) == 0)
2966 prev_res = half_res;
2967 continue;
2969 new_temp = make_ssa_name (vec_dest);
2970 new_stmt = gimple_build_assign (new_temp, convert_code,
2971 prev_res, half_res);
2973 else
2975 gcall *call;
2976 if (ifn != IFN_LAST)
2977 call = gimple_build_call_internal_vec (ifn, vargs);
2978 else
2979 call = gimple_build_call_vec (fndecl, vargs);
2980 new_temp = make_ssa_name (vec_dest, new_stmt);
2981 gimple_call_set_lhs (call, new_temp);
2982 gimple_call_set_nothrow (call, true);
2983 new_stmt = call;
2985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2987 if (j == (modifier == NARROW ? 1 : 0))
2988 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2989 else
2990 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2992 prev_stmt_info = vinfo_for_stmt (new_stmt);
2995 else if (modifier == NARROW)
2997 for (j = 0; j < ncopies; ++j)
2999 /* Build argument list for the vectorized call. */
3000 if (j == 0)
3001 vargs.create (nargs * 2);
3002 else
3003 vargs.truncate (0);
3005 if (slp_node)
3007 auto_vec<vec<tree> > vec_defs (nargs);
3008 vec<tree> vec_oprnds0;
3010 for (i = 0; i < nargs; i++)
3011 vargs.quick_push (gimple_call_arg (stmt, i));
3012 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3013 vec_oprnds0 = vec_defs[0];
3015 /* Arguments are ready. Create the new vector stmt. */
3016 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3018 size_t k;
3019 vargs.truncate (0);
3020 for (k = 0; k < nargs; k++)
3022 vec<tree> vec_oprndsk = vec_defs[k];
3023 vargs.quick_push (vec_oprndsk[i]);
3024 vargs.quick_push (vec_oprndsk[i + 1]);
3026 gcall *call;
3027 if (ifn != IFN_LAST)
3028 call = gimple_build_call_internal_vec (ifn, vargs);
3029 else
3030 call = gimple_build_call_vec (fndecl, vargs);
3031 new_temp = make_ssa_name (vec_dest, call);
3032 gimple_call_set_lhs (call, new_temp);
3033 gimple_call_set_nothrow (call, true);
3034 new_stmt = call;
3035 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3036 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3039 for (i = 0; i < nargs; i++)
3041 vec<tree> vec_oprndsi = vec_defs[i];
3042 vec_oprndsi.release ();
3044 continue;
3047 for (i = 0; i < nargs; i++)
3049 op = gimple_call_arg (stmt, i);
3050 if (j == 0)
3052 vec_oprnd0
3053 = vect_get_vec_def_for_operand (op, stmt);
3054 vec_oprnd1
3055 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3057 else
3059 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3060 vec_oprnd0
3061 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3062 vec_oprnd1
3063 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3066 vargs.quick_push (vec_oprnd0);
3067 vargs.quick_push (vec_oprnd1);
3070 new_stmt = gimple_build_call_vec (fndecl, vargs);
3071 new_temp = make_ssa_name (vec_dest, new_stmt);
3072 gimple_call_set_lhs (new_stmt, new_temp);
3073 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3075 if (j == 0)
3076 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3077 else
3078 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3080 prev_stmt_info = vinfo_for_stmt (new_stmt);
3083 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3085 else
3086 /* No current target implements this case. */
3087 return false;
3089 vargs.release ();
3091 /* The call in STMT might prevent it from being removed in dce.
3092 We however cannot remove it here, due to the way the ssa name
3093 it defines is mapped to the new definition. So just replace
3094 rhs of the statement with something harmless. */
3096 if (slp_node)
3097 return true;
3099 type = TREE_TYPE (scalar_dest);
3100 if (is_pattern_stmt_p (stmt_info))
3101 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3102 else
3103 lhs = gimple_call_lhs (stmt);
3105 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3106 set_vinfo_for_stmt (new_stmt, stmt_info);
3107 set_vinfo_for_stmt (stmt, NULL);
3108 STMT_VINFO_STMT (stmt_info) = new_stmt;
3109 gsi_replace (gsi, new_stmt, false);
3111 return true;
3115 struct simd_call_arg_info
3117 tree vectype;
3118 tree op;
3119 HOST_WIDE_INT linear_step;
3120 enum vect_def_type dt;
3121 unsigned int align;
3122 bool simd_lane_linear;
3125 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3126 is linear within simd lane (but not within whole loop), note it in
3127 *ARGINFO. */
3129 static void
3130 vect_simd_lane_linear (tree op, struct loop *loop,
3131 struct simd_call_arg_info *arginfo)
3133 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3135 if (!is_gimple_assign (def_stmt)
3136 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3137 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3138 return;
3140 tree base = gimple_assign_rhs1 (def_stmt);
3141 HOST_WIDE_INT linear_step = 0;
3142 tree v = gimple_assign_rhs2 (def_stmt);
3143 while (TREE_CODE (v) == SSA_NAME)
3145 tree t;
3146 def_stmt = SSA_NAME_DEF_STMT (v);
3147 if (is_gimple_assign (def_stmt))
3148 switch (gimple_assign_rhs_code (def_stmt))
3150 case PLUS_EXPR:
3151 t = gimple_assign_rhs2 (def_stmt);
3152 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3153 return;
3154 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3155 v = gimple_assign_rhs1 (def_stmt);
3156 continue;
3157 case MULT_EXPR:
3158 t = gimple_assign_rhs2 (def_stmt);
3159 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3160 return;
3161 linear_step = tree_to_shwi (t);
3162 v = gimple_assign_rhs1 (def_stmt);
3163 continue;
3164 CASE_CONVERT:
3165 t = gimple_assign_rhs1 (def_stmt);
3166 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3167 || (TYPE_PRECISION (TREE_TYPE (v))
3168 < TYPE_PRECISION (TREE_TYPE (t))))
3169 return;
3170 if (!linear_step)
3171 linear_step = 1;
3172 v = t;
3173 continue;
3174 default:
3175 return;
3177 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3178 && loop->simduid
3179 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3180 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3181 == loop->simduid))
3183 if (!linear_step)
3184 linear_step = 1;
3185 arginfo->linear_step = linear_step;
3186 arginfo->op = base;
3187 arginfo->simd_lane_linear = true;
3188 return;
3193 /* Function vectorizable_simd_clone_call.
3195 Check if STMT performs a function call that can be vectorized
3196 by calling a simd clone of the function.
3197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3198 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3201 static bool
3202 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3203 gimple **vec_stmt, slp_tree slp_node)
3205 tree vec_dest;
3206 tree scalar_dest;
3207 tree op, type;
3208 tree vec_oprnd0 = NULL_TREE;
3209 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3210 tree vectype;
3211 unsigned int nunits;
3212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3213 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3214 vec_info *vinfo = stmt_info->vinfo;
3215 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3216 tree fndecl, new_temp;
3217 gimple *def_stmt;
3218 gimple *new_stmt = NULL;
3219 int ncopies, j;
3220 auto_vec<simd_call_arg_info> arginfo;
3221 vec<tree> vargs = vNULL;
3222 size_t i, nargs;
3223 tree lhs, rtype, ratype;
3224 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3226 /* Is STMT a vectorizable call? */
3227 if (!is_gimple_call (stmt))
3228 return false;
3230 fndecl = gimple_call_fndecl (stmt);
3231 if (fndecl == NULL_TREE)
3232 return false;
3234 struct cgraph_node *node = cgraph_node::get (fndecl);
3235 if (node == NULL || node->simd_clones == NULL)
3236 return false;
3238 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3239 return false;
3241 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3242 && ! vec_stmt)
3243 return false;
3245 if (gimple_call_lhs (stmt)
3246 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3247 return false;
3249 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3251 vectype = STMT_VINFO_VECTYPE (stmt_info);
3253 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3254 return false;
3256 /* FORNOW */
3257 if (slp_node)
3258 return false;
3260 /* Process function arguments. */
3261 nargs = gimple_call_num_args (stmt);
3263 /* Bail out if the function has zero arguments. */
3264 if (nargs == 0)
3265 return false;
3267 arginfo.reserve (nargs, true);
3269 for (i = 0; i < nargs; i++)
3271 simd_call_arg_info thisarginfo;
3272 affine_iv iv;
3274 thisarginfo.linear_step = 0;
3275 thisarginfo.align = 0;
3276 thisarginfo.op = NULL_TREE;
3277 thisarginfo.simd_lane_linear = false;
3279 op = gimple_call_arg (stmt, i);
3280 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3281 &thisarginfo.vectype)
3282 || thisarginfo.dt == vect_uninitialized_def)
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3286 "use not simple.\n");
3287 return false;
3290 if (thisarginfo.dt == vect_constant_def
3291 || thisarginfo.dt == vect_external_def)
3292 gcc_assert (thisarginfo.vectype == NULL_TREE);
3293 else
3294 gcc_assert (thisarginfo.vectype != NULL_TREE);
3296 /* For linear arguments, the analyze phase should have saved
3297 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3298 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3299 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3301 gcc_assert (vec_stmt);
3302 thisarginfo.linear_step
3303 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3304 thisarginfo.op
3305 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3306 thisarginfo.simd_lane_linear
3307 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3308 == boolean_true_node);
3309 /* If loop has been peeled for alignment, we need to adjust it. */
3310 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3311 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3312 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3314 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3315 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3316 tree opt = TREE_TYPE (thisarginfo.op);
3317 bias = fold_convert (TREE_TYPE (step), bias);
3318 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3319 thisarginfo.op
3320 = fold_build2 (POINTER_TYPE_P (opt)
3321 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3322 thisarginfo.op, bias);
3325 else if (!vec_stmt
3326 && thisarginfo.dt != vect_constant_def
3327 && thisarginfo.dt != vect_external_def
3328 && loop_vinfo
3329 && TREE_CODE (op) == SSA_NAME
3330 && simple_iv (loop, loop_containing_stmt (stmt), op,
3331 &iv, false)
3332 && tree_fits_shwi_p (iv.step))
3334 thisarginfo.linear_step = tree_to_shwi (iv.step);
3335 thisarginfo.op = iv.base;
3337 else if ((thisarginfo.dt == vect_constant_def
3338 || thisarginfo.dt == vect_external_def)
3339 && POINTER_TYPE_P (TREE_TYPE (op)))
3340 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3341 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3342 linear too. */
3343 if (POINTER_TYPE_P (TREE_TYPE (op))
3344 && !thisarginfo.linear_step
3345 && !vec_stmt
3346 && thisarginfo.dt != vect_constant_def
3347 && thisarginfo.dt != vect_external_def
3348 && loop_vinfo
3349 && !slp_node
3350 && TREE_CODE (op) == SSA_NAME)
3351 vect_simd_lane_linear (op, loop, &thisarginfo);
3353 arginfo.quick_push (thisarginfo);
3356 unsigned int badness = 0;
3357 struct cgraph_node *bestn = NULL;
3358 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3359 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3360 else
3361 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3362 n = n->simdclone->next_clone)
3364 unsigned int this_badness = 0;
3365 if (n->simdclone->simdlen
3366 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3367 || n->simdclone->nargs != nargs)
3368 continue;
3369 if (n->simdclone->simdlen
3370 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3371 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3372 - exact_log2 (n->simdclone->simdlen)) * 1024;
3373 if (n->simdclone->inbranch)
3374 this_badness += 2048;
3375 int target_badness = targetm.simd_clone.usable (n);
3376 if (target_badness < 0)
3377 continue;
3378 this_badness += target_badness * 512;
3379 /* FORNOW: Have to add code to add the mask argument. */
3380 if (n->simdclone->inbranch)
3381 continue;
3382 for (i = 0; i < nargs; i++)
3384 switch (n->simdclone->args[i].arg_type)
3386 case SIMD_CLONE_ARG_TYPE_VECTOR:
3387 if (!useless_type_conversion_p
3388 (n->simdclone->args[i].orig_type,
3389 TREE_TYPE (gimple_call_arg (stmt, i))))
3390 i = -1;
3391 else if (arginfo[i].dt == vect_constant_def
3392 || arginfo[i].dt == vect_external_def
3393 || arginfo[i].linear_step)
3394 this_badness += 64;
3395 break;
3396 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3397 if (arginfo[i].dt != vect_constant_def
3398 && arginfo[i].dt != vect_external_def)
3399 i = -1;
3400 break;
3401 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3402 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3403 if (arginfo[i].dt == vect_constant_def
3404 || arginfo[i].dt == vect_external_def
3405 || (arginfo[i].linear_step
3406 != n->simdclone->args[i].linear_step))
3407 i = -1;
3408 break;
3409 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3411 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3412 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3413 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3414 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3415 /* FORNOW */
3416 i = -1;
3417 break;
3418 case SIMD_CLONE_ARG_TYPE_MASK:
3419 gcc_unreachable ();
3421 if (i == (size_t) -1)
3422 break;
3423 if (n->simdclone->args[i].alignment > arginfo[i].align)
3425 i = -1;
3426 break;
3428 if (arginfo[i].align)
3429 this_badness += (exact_log2 (arginfo[i].align)
3430 - exact_log2 (n->simdclone->args[i].alignment));
3432 if (i == (size_t) -1)
3433 continue;
3434 if (bestn == NULL || this_badness < badness)
3436 bestn = n;
3437 badness = this_badness;
3441 if (bestn == NULL)
3442 return false;
3444 for (i = 0; i < nargs; i++)
3445 if ((arginfo[i].dt == vect_constant_def
3446 || arginfo[i].dt == vect_external_def)
3447 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3449 arginfo[i].vectype
3450 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3451 i)));
3452 if (arginfo[i].vectype == NULL
3453 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3454 > bestn->simdclone->simdlen))
3455 return false;
3458 fndecl = bestn->decl;
3459 nunits = bestn->simdclone->simdlen;
3460 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3462 /* If the function isn't const, only allow it in simd loops where user
3463 has asserted that at least nunits consecutive iterations can be
3464 performed using SIMD instructions. */
3465 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3466 && gimple_vuse (stmt))
3467 return false;
3469 /* Sanity check: make sure that at least one copy of the vectorized stmt
3470 needs to be generated. */
3471 gcc_assert (ncopies >= 1);
3473 if (!vec_stmt) /* transformation not required. */
3475 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3476 for (i = 0; i < nargs; i++)
3477 if ((bestn->simdclone->args[i].arg_type
3478 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3479 || (bestn->simdclone->args[i].arg_type
3480 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3482 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3483 + 1);
3484 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3485 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3486 ? size_type_node : TREE_TYPE (arginfo[i].op);
3487 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3488 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3489 tree sll = arginfo[i].simd_lane_linear
3490 ? boolean_true_node : boolean_false_node;
3491 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3493 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3494 if (dump_enabled_p ())
3495 dump_printf_loc (MSG_NOTE, vect_location,
3496 "=== vectorizable_simd_clone_call ===\n");
3497 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3498 return true;
3501 /* Transform. */
3503 if (dump_enabled_p ())
3504 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3506 /* Handle def. */
3507 scalar_dest = gimple_call_lhs (stmt);
3508 vec_dest = NULL_TREE;
3509 rtype = NULL_TREE;
3510 ratype = NULL_TREE;
3511 if (scalar_dest)
3513 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3514 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3515 if (TREE_CODE (rtype) == ARRAY_TYPE)
3517 ratype = rtype;
3518 rtype = TREE_TYPE (ratype);
3522 prev_stmt_info = NULL;
3523 for (j = 0; j < ncopies; ++j)
3525 /* Build argument list for the vectorized call. */
3526 if (j == 0)
3527 vargs.create (nargs);
3528 else
3529 vargs.truncate (0);
3531 for (i = 0; i < nargs; i++)
3533 unsigned int k, l, m, o;
3534 tree atype;
3535 op = gimple_call_arg (stmt, i);
3536 switch (bestn->simdclone->args[i].arg_type)
3538 case SIMD_CLONE_ARG_TYPE_VECTOR:
3539 atype = bestn->simdclone->args[i].vector_type;
3540 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3541 for (m = j * o; m < (j + 1) * o; m++)
3543 if (TYPE_VECTOR_SUBPARTS (atype)
3544 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3546 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3547 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3548 / TYPE_VECTOR_SUBPARTS (atype));
3549 gcc_assert ((k & (k - 1)) == 0);
3550 if (m == 0)
3551 vec_oprnd0
3552 = vect_get_vec_def_for_operand (op, stmt);
3553 else
3555 vec_oprnd0 = arginfo[i].op;
3556 if ((m & (k - 1)) == 0)
3557 vec_oprnd0
3558 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3559 vec_oprnd0);
3561 arginfo[i].op = vec_oprnd0;
3562 vec_oprnd0
3563 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3564 bitsize_int (prec),
3565 bitsize_int ((m & (k - 1)) * prec));
3566 new_stmt
3567 = gimple_build_assign (make_ssa_name (atype),
3568 vec_oprnd0);
3569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3570 vargs.safe_push (gimple_assign_lhs (new_stmt));
3572 else
3574 k = (TYPE_VECTOR_SUBPARTS (atype)
3575 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3576 gcc_assert ((k & (k - 1)) == 0);
3577 vec<constructor_elt, va_gc> *ctor_elts;
3578 if (k != 1)
3579 vec_alloc (ctor_elts, k);
3580 else
3581 ctor_elts = NULL;
3582 for (l = 0; l < k; l++)
3584 if (m == 0 && l == 0)
3585 vec_oprnd0
3586 = vect_get_vec_def_for_operand (op, stmt);
3587 else
3588 vec_oprnd0
3589 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3590 arginfo[i].op);
3591 arginfo[i].op = vec_oprnd0;
3592 if (k == 1)
3593 break;
3594 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3595 vec_oprnd0);
3597 if (k == 1)
3598 vargs.safe_push (vec_oprnd0);
3599 else
3601 vec_oprnd0 = build_constructor (atype, ctor_elts);
3602 new_stmt
3603 = gimple_build_assign (make_ssa_name (atype),
3604 vec_oprnd0);
3605 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3606 vargs.safe_push (gimple_assign_lhs (new_stmt));
3610 break;
3611 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3612 vargs.safe_push (op);
3613 break;
3614 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3615 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3616 if (j == 0)
3618 gimple_seq stmts;
3619 arginfo[i].op
3620 = force_gimple_operand (arginfo[i].op, &stmts, true,
3621 NULL_TREE);
3622 if (stmts != NULL)
3624 basic_block new_bb;
3625 edge pe = loop_preheader_edge (loop);
3626 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3627 gcc_assert (!new_bb);
3629 if (arginfo[i].simd_lane_linear)
3631 vargs.safe_push (arginfo[i].op);
3632 break;
3634 tree phi_res = copy_ssa_name (op);
3635 gphi *new_phi = create_phi_node (phi_res, loop->header);
3636 set_vinfo_for_stmt (new_phi,
3637 new_stmt_vec_info (new_phi, loop_vinfo));
3638 add_phi_arg (new_phi, arginfo[i].op,
3639 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3640 enum tree_code code
3641 = POINTER_TYPE_P (TREE_TYPE (op))
3642 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3643 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3644 ? sizetype : TREE_TYPE (op);
3645 widest_int cst
3646 = wi::mul (bestn->simdclone->args[i].linear_step,
3647 ncopies * nunits);
3648 tree tcst = wide_int_to_tree (type, cst);
3649 tree phi_arg = copy_ssa_name (op);
3650 new_stmt
3651 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3652 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3653 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3654 set_vinfo_for_stmt (new_stmt,
3655 new_stmt_vec_info (new_stmt, loop_vinfo));
3656 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3657 UNKNOWN_LOCATION);
3658 arginfo[i].op = phi_res;
3659 vargs.safe_push (phi_res);
3661 else
3663 enum tree_code code
3664 = POINTER_TYPE_P (TREE_TYPE (op))
3665 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3666 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3667 ? sizetype : TREE_TYPE (op);
3668 widest_int cst
3669 = wi::mul (bestn->simdclone->args[i].linear_step,
3670 j * nunits);
3671 tree tcst = wide_int_to_tree (type, cst);
3672 new_temp = make_ssa_name (TREE_TYPE (op));
3673 new_stmt = gimple_build_assign (new_temp, code,
3674 arginfo[i].op, tcst);
3675 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3676 vargs.safe_push (new_temp);
3678 break;
3679 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3680 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3681 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3682 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3683 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3684 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3685 default:
3686 gcc_unreachable ();
3690 new_stmt = gimple_build_call_vec (fndecl, vargs);
3691 if (vec_dest)
3693 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3694 if (ratype)
3695 new_temp = create_tmp_var (ratype);
3696 else if (TYPE_VECTOR_SUBPARTS (vectype)
3697 == TYPE_VECTOR_SUBPARTS (rtype))
3698 new_temp = make_ssa_name (vec_dest, new_stmt);
3699 else
3700 new_temp = make_ssa_name (rtype, new_stmt);
3701 gimple_call_set_lhs (new_stmt, new_temp);
3703 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3705 if (vec_dest)
3707 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3709 unsigned int k, l;
3710 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3711 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3712 gcc_assert ((k & (k - 1)) == 0);
3713 for (l = 0; l < k; l++)
3715 tree t;
3716 if (ratype)
3718 t = build_fold_addr_expr (new_temp);
3719 t = build2 (MEM_REF, vectype, t,
3720 build_int_cst (TREE_TYPE (t),
3721 l * prec / BITS_PER_UNIT));
3723 else
3724 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3725 bitsize_int (prec), bitsize_int (l * prec));
3726 new_stmt
3727 = gimple_build_assign (make_ssa_name (vectype), t);
3728 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3729 if (j == 0 && l == 0)
3730 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3731 else
3732 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3734 prev_stmt_info = vinfo_for_stmt (new_stmt);
3737 if (ratype)
3739 tree clobber = build_constructor (ratype, NULL);
3740 TREE_THIS_VOLATILE (clobber) = 1;
3741 new_stmt = gimple_build_assign (new_temp, clobber);
3742 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3744 continue;
3746 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3748 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3749 / TYPE_VECTOR_SUBPARTS (rtype));
3750 gcc_assert ((k & (k - 1)) == 0);
3751 if ((j & (k - 1)) == 0)
3752 vec_alloc (ret_ctor_elts, k);
3753 if (ratype)
3755 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3756 for (m = 0; m < o; m++)
3758 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3759 size_int (m), NULL_TREE, NULL_TREE);
3760 new_stmt
3761 = gimple_build_assign (make_ssa_name (rtype), tem);
3762 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3763 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3764 gimple_assign_lhs (new_stmt));
3766 tree clobber = build_constructor (ratype, NULL);
3767 TREE_THIS_VOLATILE (clobber) = 1;
3768 new_stmt = gimple_build_assign (new_temp, clobber);
3769 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3771 else
3772 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3773 if ((j & (k - 1)) != k - 1)
3774 continue;
3775 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3776 new_stmt
3777 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3778 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3780 if ((unsigned) j == k - 1)
3781 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3782 else
3783 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3785 prev_stmt_info = vinfo_for_stmt (new_stmt);
3786 continue;
3788 else if (ratype)
3790 tree t = build_fold_addr_expr (new_temp);
3791 t = build2 (MEM_REF, vectype, t,
3792 build_int_cst (TREE_TYPE (t), 0));
3793 new_stmt
3794 = gimple_build_assign (make_ssa_name (vec_dest), t);
3795 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3796 tree clobber = build_constructor (ratype, NULL);
3797 TREE_THIS_VOLATILE (clobber) = 1;
3798 vect_finish_stmt_generation (stmt,
3799 gimple_build_assign (new_temp,
3800 clobber), gsi);
3804 if (j == 0)
3805 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3806 else
3807 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3809 prev_stmt_info = vinfo_for_stmt (new_stmt);
3812 vargs.release ();
3814 /* The call in STMT might prevent it from being removed in dce.
3815 We however cannot remove it here, due to the way the ssa name
3816 it defines is mapped to the new definition. So just replace
3817 rhs of the statement with something harmless. */
3819 if (slp_node)
3820 return true;
3822 if (scalar_dest)
3824 type = TREE_TYPE (scalar_dest);
3825 if (is_pattern_stmt_p (stmt_info))
3826 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3827 else
3828 lhs = gimple_call_lhs (stmt);
3829 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3831 else
3832 new_stmt = gimple_build_nop ();
3833 set_vinfo_for_stmt (new_stmt, stmt_info);
3834 set_vinfo_for_stmt (stmt, NULL);
3835 STMT_VINFO_STMT (stmt_info) = new_stmt;
3836 gsi_replace (gsi, new_stmt, true);
3837 unlink_stmt_vdef (stmt);
3839 return true;
3843 /* Function vect_gen_widened_results_half
3845 Create a vector stmt whose code, type, number of arguments, and result
3846 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3847 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3848 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3849 needs to be created (DECL is a function-decl of a target-builtin).
3850 STMT is the original scalar stmt that we are vectorizing. */
3852 static gimple *
3853 vect_gen_widened_results_half (enum tree_code code,
3854 tree decl,
3855 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3856 tree vec_dest, gimple_stmt_iterator *gsi,
3857 gimple *stmt)
3859 gimple *new_stmt;
3860 tree new_temp;
3862 /* Generate half of the widened result: */
3863 if (code == CALL_EXPR)
3865 /* Target specific support */
3866 if (op_type == binary_op)
3867 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3868 else
3869 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3870 new_temp = make_ssa_name (vec_dest, new_stmt);
3871 gimple_call_set_lhs (new_stmt, new_temp);
3873 else
3875 /* Generic support */
3876 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3877 if (op_type != binary_op)
3878 vec_oprnd1 = NULL;
3879 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3880 new_temp = make_ssa_name (vec_dest, new_stmt);
3881 gimple_assign_set_lhs (new_stmt, new_temp);
3883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3885 return new_stmt;
3889 /* Get vectorized definitions for loop-based vectorization. For the first
3890 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3891 scalar operand), and for the rest we get a copy with
3892 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3893 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3894 The vectors are collected into VEC_OPRNDS. */
3896 static void
3897 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3898 vec<tree> *vec_oprnds, int multi_step_cvt)
3900 tree vec_oprnd;
3902 /* Get first vector operand. */
3903 /* All the vector operands except the very first one (that is scalar oprnd)
3904 are stmt copies. */
3905 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3906 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3907 else
3908 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3910 vec_oprnds->quick_push (vec_oprnd);
3912 /* Get second vector operand. */
3913 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3914 vec_oprnds->quick_push (vec_oprnd);
3916 *oprnd = vec_oprnd;
3918 /* For conversion in multiple steps, continue to get operands
3919 recursively. */
3920 if (multi_step_cvt)
3921 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3925 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3926 For multi-step conversions store the resulting vectors and call the function
3927 recursively. */
3929 static void
3930 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3931 int multi_step_cvt, gimple *stmt,
3932 vec<tree> vec_dsts,
3933 gimple_stmt_iterator *gsi,
3934 slp_tree slp_node, enum tree_code code,
3935 stmt_vec_info *prev_stmt_info)
3937 unsigned int i;
3938 tree vop0, vop1, new_tmp, vec_dest;
3939 gimple *new_stmt;
3940 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3942 vec_dest = vec_dsts.pop ();
3944 for (i = 0; i < vec_oprnds->length (); i += 2)
3946 /* Create demotion operation. */
3947 vop0 = (*vec_oprnds)[i];
3948 vop1 = (*vec_oprnds)[i + 1];
3949 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3950 new_tmp = make_ssa_name (vec_dest, new_stmt);
3951 gimple_assign_set_lhs (new_stmt, new_tmp);
3952 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3954 if (multi_step_cvt)
3955 /* Store the resulting vector for next recursive call. */
3956 (*vec_oprnds)[i/2] = new_tmp;
3957 else
3959 /* This is the last step of the conversion sequence. Store the
3960 vectors in SLP_NODE or in vector info of the scalar statement
3961 (or in STMT_VINFO_RELATED_STMT chain). */
3962 if (slp_node)
3963 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3964 else
3966 if (!*prev_stmt_info)
3967 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3968 else
3969 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3971 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3976 /* For multi-step demotion operations we first generate demotion operations
3977 from the source type to the intermediate types, and then combine the
3978 results (stored in VEC_OPRNDS) in demotion operation to the destination
3979 type. */
3980 if (multi_step_cvt)
3982 /* At each level of recursion we have half of the operands we had at the
3983 previous level. */
3984 vec_oprnds->truncate ((i+1)/2);
3985 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3986 stmt, vec_dsts, gsi, slp_node,
3987 VEC_PACK_TRUNC_EXPR,
3988 prev_stmt_info);
3991 vec_dsts.quick_push (vec_dest);
3995 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3996 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3997 the resulting vectors and call the function recursively. */
3999 static void
4000 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4001 vec<tree> *vec_oprnds1,
4002 gimple *stmt, tree vec_dest,
4003 gimple_stmt_iterator *gsi,
4004 enum tree_code code1,
4005 enum tree_code code2, tree decl1,
4006 tree decl2, int op_type)
4008 int i;
4009 tree vop0, vop1, new_tmp1, new_tmp2;
4010 gimple *new_stmt1, *new_stmt2;
4011 vec<tree> vec_tmp = vNULL;
4013 vec_tmp.create (vec_oprnds0->length () * 2);
4014 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4016 if (op_type == binary_op)
4017 vop1 = (*vec_oprnds1)[i];
4018 else
4019 vop1 = NULL_TREE;
4021 /* Generate the two halves of promotion operation. */
4022 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4023 op_type, vec_dest, gsi, stmt);
4024 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4025 op_type, vec_dest, gsi, stmt);
4026 if (is_gimple_call (new_stmt1))
4028 new_tmp1 = gimple_call_lhs (new_stmt1);
4029 new_tmp2 = gimple_call_lhs (new_stmt2);
4031 else
4033 new_tmp1 = gimple_assign_lhs (new_stmt1);
4034 new_tmp2 = gimple_assign_lhs (new_stmt2);
4037 /* Store the results for the next step. */
4038 vec_tmp.quick_push (new_tmp1);
4039 vec_tmp.quick_push (new_tmp2);
4042 vec_oprnds0->release ();
4043 *vec_oprnds0 = vec_tmp;
4047 /* Check if STMT performs a conversion operation, that can be vectorized.
4048 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4049 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4050 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4052 static bool
4053 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4054 gimple **vec_stmt, slp_tree slp_node)
4056 tree vec_dest;
4057 tree scalar_dest;
4058 tree op0, op1 = NULL_TREE;
4059 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4060 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4061 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4062 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4063 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4064 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4065 tree new_temp;
4066 gimple *def_stmt;
4067 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4068 int ndts = 2;
4069 gimple *new_stmt = NULL;
4070 stmt_vec_info prev_stmt_info;
4071 int nunits_in;
4072 int nunits_out;
4073 tree vectype_out, vectype_in;
4074 int ncopies, i, j;
4075 tree lhs_type, rhs_type;
4076 enum { NARROW, NONE, WIDEN } modifier;
4077 vec<tree> vec_oprnds0 = vNULL;
4078 vec<tree> vec_oprnds1 = vNULL;
4079 tree vop0;
4080 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4081 vec_info *vinfo = stmt_info->vinfo;
4082 int multi_step_cvt = 0;
4083 vec<tree> interm_types = vNULL;
4084 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4085 int op_type;
4086 unsigned short fltsz;
4088 /* Is STMT a vectorizable conversion? */
4090 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4091 return false;
4093 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4094 && ! vec_stmt)
4095 return false;
4097 if (!is_gimple_assign (stmt))
4098 return false;
4100 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4101 return false;
4103 code = gimple_assign_rhs_code (stmt);
4104 if (!CONVERT_EXPR_CODE_P (code)
4105 && code != FIX_TRUNC_EXPR
4106 && code != FLOAT_EXPR
4107 && code != WIDEN_MULT_EXPR
4108 && code != WIDEN_LSHIFT_EXPR)
4109 return false;
4111 op_type = TREE_CODE_LENGTH (code);
4113 /* Check types of lhs and rhs. */
4114 scalar_dest = gimple_assign_lhs (stmt);
4115 lhs_type = TREE_TYPE (scalar_dest);
4116 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4118 op0 = gimple_assign_rhs1 (stmt);
4119 rhs_type = TREE_TYPE (op0);
4121 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4122 && !((INTEGRAL_TYPE_P (lhs_type)
4123 && INTEGRAL_TYPE_P (rhs_type))
4124 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4125 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4126 return false;
4128 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4129 && ((INTEGRAL_TYPE_P (lhs_type)
4130 && !type_has_mode_precision_p (lhs_type))
4131 || (INTEGRAL_TYPE_P (rhs_type)
4132 && !type_has_mode_precision_p (rhs_type))))
4134 if (dump_enabled_p ())
4135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4136 "type conversion to/from bit-precision unsupported."
4137 "\n");
4138 return false;
4141 /* Check the operands of the operation. */
4142 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4144 if (dump_enabled_p ())
4145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4146 "use not simple.\n");
4147 return false;
4149 if (op_type == binary_op)
4151 bool ok;
4153 op1 = gimple_assign_rhs2 (stmt);
4154 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4155 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4156 OP1. */
4157 if (CONSTANT_CLASS_P (op0))
4158 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4159 else
4160 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4162 if (!ok)
4164 if (dump_enabled_p ())
4165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4166 "use not simple.\n");
4167 return false;
4171 /* If op0 is an external or constant defs use a vector type of
4172 the same size as the output vector type. */
4173 if (!vectype_in)
4174 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4175 if (vec_stmt)
4176 gcc_assert (vectype_in);
4177 if (!vectype_in)
4179 if (dump_enabled_p ())
4181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4182 "no vectype for scalar type ");
4183 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4184 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4187 return false;
4190 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4191 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4193 if (dump_enabled_p ())
4195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4196 "can't convert between boolean and non "
4197 "boolean vectors");
4198 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4199 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4202 return false;
4205 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4206 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4207 if (nunits_in < nunits_out)
4208 modifier = NARROW;
4209 else if (nunits_out == nunits_in)
4210 modifier = NONE;
4211 else
4212 modifier = WIDEN;
4214 /* Multiple types in SLP are handled by creating the appropriate number of
4215 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4216 case of SLP. */
4217 if (slp_node)
4218 ncopies = 1;
4219 else if (modifier == NARROW)
4220 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4221 else
4222 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4224 /* Sanity check: make sure that at least one copy of the vectorized stmt
4225 needs to be generated. */
4226 gcc_assert (ncopies >= 1);
4228 bool found_mode = false;
4229 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4230 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4231 opt_scalar_mode rhs_mode_iter;
4233 /* Supportable by target? */
4234 switch (modifier)
4236 case NONE:
4237 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4238 return false;
4239 if (supportable_convert_operation (code, vectype_out, vectype_in,
4240 &decl1, &code1))
4241 break;
4242 /* FALLTHRU */
4243 unsupported:
4244 if (dump_enabled_p ())
4245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4246 "conversion not supported by target.\n");
4247 return false;
4249 case WIDEN:
4250 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4251 &code1, &code2, &multi_step_cvt,
4252 &interm_types))
4254 /* Binary widening operation can only be supported directly by the
4255 architecture. */
4256 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4257 break;
4260 if (code != FLOAT_EXPR
4261 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4262 goto unsupported;
4264 fltsz = GET_MODE_SIZE (lhs_mode);
4265 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4267 rhs_mode = rhs_mode_iter.require ();
4268 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4269 break;
4271 cvt_type
4272 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4273 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4274 if (cvt_type == NULL_TREE)
4275 goto unsupported;
4277 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4279 if (!supportable_convert_operation (code, vectype_out,
4280 cvt_type, &decl1, &codecvt1))
4281 goto unsupported;
4283 else if (!supportable_widening_operation (code, stmt, vectype_out,
4284 cvt_type, &codecvt1,
4285 &codecvt2, &multi_step_cvt,
4286 &interm_types))
4287 continue;
4288 else
4289 gcc_assert (multi_step_cvt == 0);
4291 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4292 vectype_in, &code1, &code2,
4293 &multi_step_cvt, &interm_types))
4295 found_mode = true;
4296 break;
4300 if (!found_mode)
4301 goto unsupported;
4303 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4304 codecvt2 = ERROR_MARK;
4305 else
4307 multi_step_cvt++;
4308 interm_types.safe_push (cvt_type);
4309 cvt_type = NULL_TREE;
4311 break;
4313 case NARROW:
4314 gcc_assert (op_type == unary_op);
4315 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4316 &code1, &multi_step_cvt,
4317 &interm_types))
4318 break;
4320 if (code != FIX_TRUNC_EXPR
4321 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4322 goto unsupported;
4324 cvt_type
4325 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4326 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4327 if (cvt_type == NULL_TREE)
4328 goto unsupported;
4329 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4330 &decl1, &codecvt1))
4331 goto unsupported;
4332 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4333 &code1, &multi_step_cvt,
4334 &interm_types))
4335 break;
4336 goto unsupported;
4338 default:
4339 gcc_unreachable ();
4342 if (!vec_stmt) /* transformation not required. */
4344 if (dump_enabled_p ())
4345 dump_printf_loc (MSG_NOTE, vect_location,
4346 "=== vectorizable_conversion ===\n");
4347 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4349 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4350 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4352 else if (modifier == NARROW)
4354 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4355 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4357 else
4359 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4360 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4362 interm_types.release ();
4363 return true;
4366 /* Transform. */
4367 if (dump_enabled_p ())
4368 dump_printf_loc (MSG_NOTE, vect_location,
4369 "transform conversion. ncopies = %d.\n", ncopies);
4371 if (op_type == binary_op)
4373 if (CONSTANT_CLASS_P (op0))
4374 op0 = fold_convert (TREE_TYPE (op1), op0);
4375 else if (CONSTANT_CLASS_P (op1))
4376 op1 = fold_convert (TREE_TYPE (op0), op1);
4379 /* In case of multi-step conversion, we first generate conversion operations
4380 to the intermediate types, and then from that types to the final one.
4381 We create vector destinations for the intermediate type (TYPES) received
4382 from supportable_*_operation, and store them in the correct order
4383 for future use in vect_create_vectorized_*_stmts (). */
4384 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4385 vec_dest = vect_create_destination_var (scalar_dest,
4386 (cvt_type && modifier == WIDEN)
4387 ? cvt_type : vectype_out);
4388 vec_dsts.quick_push (vec_dest);
4390 if (multi_step_cvt)
4392 for (i = interm_types.length () - 1;
4393 interm_types.iterate (i, &intermediate_type); i--)
4395 vec_dest = vect_create_destination_var (scalar_dest,
4396 intermediate_type);
4397 vec_dsts.quick_push (vec_dest);
4401 if (cvt_type)
4402 vec_dest = vect_create_destination_var (scalar_dest,
4403 modifier == WIDEN
4404 ? vectype_out : cvt_type);
4406 if (!slp_node)
4408 if (modifier == WIDEN)
4410 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4411 if (op_type == binary_op)
4412 vec_oprnds1.create (1);
4414 else if (modifier == NARROW)
4415 vec_oprnds0.create (
4416 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4418 else if (code == WIDEN_LSHIFT_EXPR)
4419 vec_oprnds1.create (slp_node->vec_stmts_size);
4421 last_oprnd = op0;
4422 prev_stmt_info = NULL;
4423 switch (modifier)
4425 case NONE:
4426 for (j = 0; j < ncopies; j++)
4428 if (j == 0)
4429 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4430 else
4431 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4433 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4435 /* Arguments are ready, create the new vector stmt. */
4436 if (code1 == CALL_EXPR)
4438 new_stmt = gimple_build_call (decl1, 1, vop0);
4439 new_temp = make_ssa_name (vec_dest, new_stmt);
4440 gimple_call_set_lhs (new_stmt, new_temp);
4442 else
4444 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4445 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4446 new_temp = make_ssa_name (vec_dest, new_stmt);
4447 gimple_assign_set_lhs (new_stmt, new_temp);
4450 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4451 if (slp_node)
4452 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4453 else
4455 if (!prev_stmt_info)
4456 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4457 else
4458 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4459 prev_stmt_info = vinfo_for_stmt (new_stmt);
4463 break;
4465 case WIDEN:
4466 /* In case the vectorization factor (VF) is bigger than the number
4467 of elements that we can fit in a vectype (nunits), we have to
4468 generate more than one vector stmt - i.e - we need to "unroll"
4469 the vector stmt by a factor VF/nunits. */
4470 for (j = 0; j < ncopies; j++)
4472 /* Handle uses. */
4473 if (j == 0)
4475 if (slp_node)
4477 if (code == WIDEN_LSHIFT_EXPR)
4479 unsigned int k;
4481 vec_oprnd1 = op1;
4482 /* Store vec_oprnd1 for every vector stmt to be created
4483 for SLP_NODE. We check during the analysis that all
4484 the shift arguments are the same. */
4485 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4486 vec_oprnds1.quick_push (vec_oprnd1);
4488 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4489 slp_node);
4491 else
4492 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4493 &vec_oprnds1, slp_node);
4495 else
4497 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4498 vec_oprnds0.quick_push (vec_oprnd0);
4499 if (op_type == binary_op)
4501 if (code == WIDEN_LSHIFT_EXPR)
4502 vec_oprnd1 = op1;
4503 else
4504 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4505 vec_oprnds1.quick_push (vec_oprnd1);
4509 else
4511 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4512 vec_oprnds0.truncate (0);
4513 vec_oprnds0.quick_push (vec_oprnd0);
4514 if (op_type == binary_op)
4516 if (code == WIDEN_LSHIFT_EXPR)
4517 vec_oprnd1 = op1;
4518 else
4519 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4520 vec_oprnd1);
4521 vec_oprnds1.truncate (0);
4522 vec_oprnds1.quick_push (vec_oprnd1);
4526 /* Arguments are ready. Create the new vector stmts. */
4527 for (i = multi_step_cvt; i >= 0; i--)
4529 tree this_dest = vec_dsts[i];
4530 enum tree_code c1 = code1, c2 = code2;
4531 if (i == 0 && codecvt2 != ERROR_MARK)
4533 c1 = codecvt1;
4534 c2 = codecvt2;
4536 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4537 &vec_oprnds1,
4538 stmt, this_dest, gsi,
4539 c1, c2, decl1, decl2,
4540 op_type);
4543 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4545 if (cvt_type)
4547 if (codecvt1 == CALL_EXPR)
4549 new_stmt = gimple_build_call (decl1, 1, vop0);
4550 new_temp = make_ssa_name (vec_dest, new_stmt);
4551 gimple_call_set_lhs (new_stmt, new_temp);
4553 else
4555 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4556 new_temp = make_ssa_name (vec_dest);
4557 new_stmt = gimple_build_assign (new_temp, codecvt1,
4558 vop0);
4561 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4563 else
4564 new_stmt = SSA_NAME_DEF_STMT (vop0);
4566 if (slp_node)
4567 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4568 else
4570 if (!prev_stmt_info)
4571 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4572 else
4573 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4574 prev_stmt_info = vinfo_for_stmt (new_stmt);
4579 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4580 break;
4582 case NARROW:
4583 /* In case the vectorization factor (VF) is bigger than the number
4584 of elements that we can fit in a vectype (nunits), we have to
4585 generate more than one vector stmt - i.e - we need to "unroll"
4586 the vector stmt by a factor VF/nunits. */
4587 for (j = 0; j < ncopies; j++)
4589 /* Handle uses. */
4590 if (slp_node)
4591 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4592 slp_node);
4593 else
4595 vec_oprnds0.truncate (0);
4596 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4597 vect_pow2 (multi_step_cvt) - 1);
4600 /* Arguments are ready. Create the new vector stmts. */
4601 if (cvt_type)
4602 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4604 if (codecvt1 == CALL_EXPR)
4606 new_stmt = gimple_build_call (decl1, 1, vop0);
4607 new_temp = make_ssa_name (vec_dest, new_stmt);
4608 gimple_call_set_lhs (new_stmt, new_temp);
4610 else
4612 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4613 new_temp = make_ssa_name (vec_dest);
4614 new_stmt = gimple_build_assign (new_temp, codecvt1,
4615 vop0);
4618 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4619 vec_oprnds0[i] = new_temp;
4622 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4623 stmt, vec_dsts, gsi,
4624 slp_node, code1,
4625 &prev_stmt_info);
4628 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4629 break;
4632 vec_oprnds0.release ();
4633 vec_oprnds1.release ();
4634 interm_types.release ();
4636 return true;
4640 /* Function vectorizable_assignment.
4642 Check if STMT performs an assignment (copy) that can be vectorized.
4643 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4644 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4645 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4647 static bool
4648 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4649 gimple **vec_stmt, slp_tree slp_node)
4651 tree vec_dest;
4652 tree scalar_dest;
4653 tree op;
4654 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4655 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4656 tree new_temp;
4657 gimple *def_stmt;
4658 enum vect_def_type dt[1] = {vect_unknown_def_type};
4659 int ndts = 1;
4660 int ncopies;
4661 int i, j;
4662 vec<tree> vec_oprnds = vNULL;
4663 tree vop;
4664 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4665 vec_info *vinfo = stmt_info->vinfo;
4666 gimple *new_stmt = NULL;
4667 stmt_vec_info prev_stmt_info = NULL;
4668 enum tree_code code;
4669 tree vectype_in;
4671 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4672 return false;
4674 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4675 && ! vec_stmt)
4676 return false;
4678 /* Is vectorizable assignment? */
4679 if (!is_gimple_assign (stmt))
4680 return false;
4682 scalar_dest = gimple_assign_lhs (stmt);
4683 if (TREE_CODE (scalar_dest) != SSA_NAME)
4684 return false;
4686 code = gimple_assign_rhs_code (stmt);
4687 if (gimple_assign_single_p (stmt)
4688 || code == PAREN_EXPR
4689 || CONVERT_EXPR_CODE_P (code))
4690 op = gimple_assign_rhs1 (stmt);
4691 else
4692 return false;
4694 if (code == VIEW_CONVERT_EXPR)
4695 op = TREE_OPERAND (op, 0);
4697 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4698 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4700 /* Multiple types in SLP are handled by creating the appropriate number of
4701 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4702 case of SLP. */
4703 if (slp_node)
4704 ncopies = 1;
4705 else
4706 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4708 gcc_assert (ncopies >= 1);
4710 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4712 if (dump_enabled_p ())
4713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4714 "use not simple.\n");
4715 return false;
4718 /* We can handle NOP_EXPR conversions that do not change the number
4719 of elements or the vector size. */
4720 if ((CONVERT_EXPR_CODE_P (code)
4721 || code == VIEW_CONVERT_EXPR)
4722 && (!vectype_in
4723 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4724 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4725 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4726 return false;
4728 /* We do not handle bit-precision changes. */
4729 if ((CONVERT_EXPR_CODE_P (code)
4730 || code == VIEW_CONVERT_EXPR)
4731 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4732 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4733 || !type_has_mode_precision_p (TREE_TYPE (op)))
4734 /* But a conversion that does not change the bit-pattern is ok. */
4735 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4736 > TYPE_PRECISION (TREE_TYPE (op)))
4737 && TYPE_UNSIGNED (TREE_TYPE (op)))
4738 /* Conversion between boolean types of different sizes is
4739 a simple assignment in case their vectypes are same
4740 boolean vectors. */
4741 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4742 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4744 if (dump_enabled_p ())
4745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4746 "type conversion to/from bit-precision "
4747 "unsupported.\n");
4748 return false;
4751 if (!vec_stmt) /* transformation not required. */
4753 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4754 if (dump_enabled_p ())
4755 dump_printf_loc (MSG_NOTE, vect_location,
4756 "=== vectorizable_assignment ===\n");
4757 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4758 return true;
4761 /* Transform. */
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4765 /* Handle def. */
4766 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4768 /* Handle use. */
4769 for (j = 0; j < ncopies; j++)
4771 /* Handle uses. */
4772 if (j == 0)
4773 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4774 else
4775 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4777 /* Arguments are ready. create the new vector stmt. */
4778 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4780 if (CONVERT_EXPR_CODE_P (code)
4781 || code == VIEW_CONVERT_EXPR)
4782 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4783 new_stmt = gimple_build_assign (vec_dest, vop);
4784 new_temp = make_ssa_name (vec_dest, new_stmt);
4785 gimple_assign_set_lhs (new_stmt, new_temp);
4786 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4787 if (slp_node)
4788 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4791 if (slp_node)
4792 continue;
4794 if (j == 0)
4795 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4796 else
4797 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4799 prev_stmt_info = vinfo_for_stmt (new_stmt);
4802 vec_oprnds.release ();
4803 return true;
4807 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4808 either as shift by a scalar or by a vector. */
4810 bool
4811 vect_supportable_shift (enum tree_code code, tree scalar_type)
4814 machine_mode vec_mode;
4815 optab optab;
4816 int icode;
4817 tree vectype;
4819 vectype = get_vectype_for_scalar_type (scalar_type);
4820 if (!vectype)
4821 return false;
4823 optab = optab_for_tree_code (code, vectype, optab_scalar);
4824 if (!optab
4825 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4827 optab = optab_for_tree_code (code, vectype, optab_vector);
4828 if (!optab
4829 || (optab_handler (optab, TYPE_MODE (vectype))
4830 == CODE_FOR_nothing))
4831 return false;
4834 vec_mode = TYPE_MODE (vectype);
4835 icode = (int) optab_handler (optab, vec_mode);
4836 if (icode == CODE_FOR_nothing)
4837 return false;
4839 return true;
4843 /* Function vectorizable_shift.
4845 Check if STMT performs a shift operation that can be vectorized.
4846 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4847 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4848 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4850 static bool
4851 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4852 gimple **vec_stmt, slp_tree slp_node)
4854 tree vec_dest;
4855 tree scalar_dest;
4856 tree op0, op1 = NULL;
4857 tree vec_oprnd1 = NULL_TREE;
4858 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4859 tree vectype;
4860 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4861 enum tree_code code;
4862 machine_mode vec_mode;
4863 tree new_temp;
4864 optab optab;
4865 int icode;
4866 machine_mode optab_op2_mode;
4867 gimple *def_stmt;
4868 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4869 int ndts = 2;
4870 gimple *new_stmt = NULL;
4871 stmt_vec_info prev_stmt_info;
4872 int nunits_in;
4873 int nunits_out;
4874 tree vectype_out;
4875 tree op1_vectype;
4876 int ncopies;
4877 int j, i;
4878 vec<tree> vec_oprnds0 = vNULL;
4879 vec<tree> vec_oprnds1 = vNULL;
4880 tree vop0, vop1;
4881 unsigned int k;
4882 bool scalar_shift_arg = true;
4883 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4884 vec_info *vinfo = stmt_info->vinfo;
4886 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4887 return false;
4889 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4890 && ! vec_stmt)
4891 return false;
4893 /* Is STMT a vectorizable binary/unary operation? */
4894 if (!is_gimple_assign (stmt))
4895 return false;
4897 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4898 return false;
4900 code = gimple_assign_rhs_code (stmt);
4902 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4903 || code == RROTATE_EXPR))
4904 return false;
4906 scalar_dest = gimple_assign_lhs (stmt);
4907 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4908 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
4910 if (dump_enabled_p ())
4911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4912 "bit-precision shifts not supported.\n");
4913 return false;
4916 op0 = gimple_assign_rhs1 (stmt);
4917 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4919 if (dump_enabled_p ())
4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4921 "use not simple.\n");
4922 return false;
4924 /* If op0 is an external or constant def use a vector type with
4925 the same size as the output vector type. */
4926 if (!vectype)
4927 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4928 if (vec_stmt)
4929 gcc_assert (vectype);
4930 if (!vectype)
4932 if (dump_enabled_p ())
4933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4934 "no vectype for scalar type\n");
4935 return false;
4938 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4939 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4940 if (nunits_out != nunits_in)
4941 return false;
4943 op1 = gimple_assign_rhs2 (stmt);
4944 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4946 if (dump_enabled_p ())
4947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4948 "use not simple.\n");
4949 return false;
4952 /* Multiple types in SLP are handled by creating the appropriate number of
4953 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4954 case of SLP. */
4955 if (slp_node)
4956 ncopies = 1;
4957 else
4958 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4960 gcc_assert (ncopies >= 1);
4962 /* Determine whether the shift amount is a vector, or scalar. If the
4963 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4965 if ((dt[1] == vect_internal_def
4966 || dt[1] == vect_induction_def)
4967 && !slp_node)
4968 scalar_shift_arg = false;
4969 else if (dt[1] == vect_constant_def
4970 || dt[1] == vect_external_def
4971 || dt[1] == vect_internal_def)
4973 /* In SLP, need to check whether the shift count is the same,
4974 in loops if it is a constant or invariant, it is always
4975 a scalar shift. */
4976 if (slp_node)
4978 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4979 gimple *slpstmt;
4981 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4982 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4983 scalar_shift_arg = false;
4986 /* If the shift amount is computed by a pattern stmt we cannot
4987 use the scalar amount directly thus give up and use a vector
4988 shift. */
4989 if (dt[1] == vect_internal_def)
4991 gimple *def = SSA_NAME_DEF_STMT (op1);
4992 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4993 scalar_shift_arg = false;
4996 else
4998 if (dump_enabled_p ())
4999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5000 "operand mode requires invariant argument.\n");
5001 return false;
5004 /* Vector shifted by vector. */
5005 if (!scalar_shift_arg)
5007 optab = optab_for_tree_code (code, vectype, optab_vector);
5008 if (dump_enabled_p ())
5009 dump_printf_loc (MSG_NOTE, vect_location,
5010 "vector/vector shift/rotate found.\n");
5012 if (!op1_vectype)
5013 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5014 if (op1_vectype == NULL_TREE
5015 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5017 if (dump_enabled_p ())
5018 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5019 "unusable type for last operand in"
5020 " vector/vector shift/rotate.\n");
5021 return false;
5024 /* See if the machine has a vector shifted by scalar insn and if not
5025 then see if it has a vector shifted by vector insn. */
5026 else
5028 optab = optab_for_tree_code (code, vectype, optab_scalar);
5029 if (optab
5030 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5032 if (dump_enabled_p ())
5033 dump_printf_loc (MSG_NOTE, vect_location,
5034 "vector/scalar shift/rotate found.\n");
5036 else
5038 optab = optab_for_tree_code (code, vectype, optab_vector);
5039 if (optab
5040 && (optab_handler (optab, TYPE_MODE (vectype))
5041 != CODE_FOR_nothing))
5043 scalar_shift_arg = false;
5045 if (dump_enabled_p ())
5046 dump_printf_loc (MSG_NOTE, vect_location,
5047 "vector/vector shift/rotate found.\n");
5049 /* Unlike the other binary operators, shifts/rotates have
5050 the rhs being int, instead of the same type as the lhs,
5051 so make sure the scalar is the right type if we are
5052 dealing with vectors of long long/long/short/char. */
5053 if (dt[1] == vect_constant_def)
5054 op1 = fold_convert (TREE_TYPE (vectype), op1);
5055 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5056 TREE_TYPE (op1)))
5058 if (slp_node
5059 && TYPE_MODE (TREE_TYPE (vectype))
5060 != TYPE_MODE (TREE_TYPE (op1)))
5062 if (dump_enabled_p ())
5063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5064 "unusable type for last operand in"
5065 " vector/vector shift/rotate.\n");
5066 return false;
5068 if (vec_stmt && !slp_node)
5070 op1 = fold_convert (TREE_TYPE (vectype), op1);
5071 op1 = vect_init_vector (stmt, op1,
5072 TREE_TYPE (vectype), NULL);
5079 /* Supportable by target? */
5080 if (!optab)
5082 if (dump_enabled_p ())
5083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5084 "no optab.\n");
5085 return false;
5087 vec_mode = TYPE_MODE (vectype);
5088 icode = (int) optab_handler (optab, vec_mode);
5089 if (icode == CODE_FOR_nothing)
5091 if (dump_enabled_p ())
5092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5093 "op not supported by target.\n");
5094 /* Check only during analysis. */
5095 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5096 || (!vec_stmt
5097 && !vect_worthwhile_without_simd_p (vinfo, code)))
5098 return false;
5099 if (dump_enabled_p ())
5100 dump_printf_loc (MSG_NOTE, vect_location,
5101 "proceeding using word mode.\n");
5104 /* Worthwhile without SIMD support? Check only during analysis. */
5105 if (!vec_stmt
5106 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5107 && !vect_worthwhile_without_simd_p (vinfo, code))
5109 if (dump_enabled_p ())
5110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5111 "not worthwhile without SIMD support.\n");
5112 return false;
5115 if (!vec_stmt) /* transformation not required. */
5117 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5118 if (dump_enabled_p ())
5119 dump_printf_loc (MSG_NOTE, vect_location,
5120 "=== vectorizable_shift ===\n");
5121 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5122 return true;
5125 /* Transform. */
5127 if (dump_enabled_p ())
5128 dump_printf_loc (MSG_NOTE, vect_location,
5129 "transform binary/unary operation.\n");
5131 /* Handle def. */
5132 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5134 prev_stmt_info = NULL;
5135 for (j = 0; j < ncopies; j++)
5137 /* Handle uses. */
5138 if (j == 0)
5140 if (scalar_shift_arg)
5142 /* Vector shl and shr insn patterns can be defined with scalar
5143 operand 2 (shift operand). In this case, use constant or loop
5144 invariant op1 directly, without extending it to vector mode
5145 first. */
5146 optab_op2_mode = insn_data[icode].operand[2].mode;
5147 if (!VECTOR_MODE_P (optab_op2_mode))
5149 if (dump_enabled_p ())
5150 dump_printf_loc (MSG_NOTE, vect_location,
5151 "operand 1 using scalar mode.\n");
5152 vec_oprnd1 = op1;
5153 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5154 vec_oprnds1.quick_push (vec_oprnd1);
5155 if (slp_node)
5157 /* Store vec_oprnd1 for every vector stmt to be created
5158 for SLP_NODE. We check during the analysis that all
5159 the shift arguments are the same.
5160 TODO: Allow different constants for different vector
5161 stmts generated for an SLP instance. */
5162 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5163 vec_oprnds1.quick_push (vec_oprnd1);
5168 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5169 (a special case for certain kind of vector shifts); otherwise,
5170 operand 1 should be of a vector type (the usual case). */
5171 if (vec_oprnd1)
5172 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5173 slp_node);
5174 else
5175 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5176 slp_node);
5178 else
5179 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5181 /* Arguments are ready. Create the new vector stmt. */
5182 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5184 vop1 = vec_oprnds1[i];
5185 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5186 new_temp = make_ssa_name (vec_dest, new_stmt);
5187 gimple_assign_set_lhs (new_stmt, new_temp);
5188 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5189 if (slp_node)
5190 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5193 if (slp_node)
5194 continue;
5196 if (j == 0)
5197 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5198 else
5199 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5200 prev_stmt_info = vinfo_for_stmt (new_stmt);
5203 vec_oprnds0.release ();
5204 vec_oprnds1.release ();
5206 return true;
5210 /* Function vectorizable_operation.
5212 Check if STMT performs a binary, unary or ternary operation that can
5213 be vectorized.
5214 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5215 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5216 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5218 static bool
5219 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5220 gimple **vec_stmt, slp_tree slp_node)
5222 tree vec_dest;
5223 tree scalar_dest;
5224 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5226 tree vectype;
5227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5228 enum tree_code code;
5229 machine_mode vec_mode;
5230 tree new_temp;
5231 int op_type;
5232 optab optab;
5233 bool target_support_p;
5234 gimple *def_stmt;
5235 enum vect_def_type dt[3]
5236 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5237 int ndts = 3;
5238 gimple *new_stmt = NULL;
5239 stmt_vec_info prev_stmt_info;
5240 int nunits_in;
5241 int nunits_out;
5242 tree vectype_out;
5243 int ncopies;
5244 int j, i;
5245 vec<tree> vec_oprnds0 = vNULL;
5246 vec<tree> vec_oprnds1 = vNULL;
5247 vec<tree> vec_oprnds2 = vNULL;
5248 tree vop0, vop1, vop2;
5249 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5250 vec_info *vinfo = stmt_info->vinfo;
5252 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5253 return false;
5255 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5256 && ! vec_stmt)
5257 return false;
5259 /* Is STMT a vectorizable binary/unary operation? */
5260 if (!is_gimple_assign (stmt))
5261 return false;
5263 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5264 return false;
5266 code = gimple_assign_rhs_code (stmt);
5268 /* For pointer addition, we should use the normal plus for
5269 the vector addition. */
5270 if (code == POINTER_PLUS_EXPR)
5271 code = PLUS_EXPR;
5273 /* Support only unary or binary operations. */
5274 op_type = TREE_CODE_LENGTH (code);
5275 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5277 if (dump_enabled_p ())
5278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5279 "num. args = %d (not unary/binary/ternary op).\n",
5280 op_type);
5281 return false;
5284 scalar_dest = gimple_assign_lhs (stmt);
5285 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5287 /* Most operations cannot handle bit-precision types without extra
5288 truncations. */
5289 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5290 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5291 /* Exception are bitwise binary operations. */
5292 && code != BIT_IOR_EXPR
5293 && code != BIT_XOR_EXPR
5294 && code != BIT_AND_EXPR)
5296 if (dump_enabled_p ())
5297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5298 "bit-precision arithmetic not supported.\n");
5299 return false;
5302 op0 = gimple_assign_rhs1 (stmt);
5303 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5305 if (dump_enabled_p ())
5306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5307 "use not simple.\n");
5308 return false;
5310 /* If op0 is an external or constant def use a vector type with
5311 the same size as the output vector type. */
5312 if (!vectype)
5314 /* For boolean type we cannot determine vectype by
5315 invariant value (don't know whether it is a vector
5316 of booleans or vector of integers). We use output
5317 vectype because operations on boolean don't change
5318 type. */
5319 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5321 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5323 if (dump_enabled_p ())
5324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5325 "not supported operation on bool value.\n");
5326 return false;
5328 vectype = vectype_out;
5330 else
5331 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5333 if (vec_stmt)
5334 gcc_assert (vectype);
5335 if (!vectype)
5337 if (dump_enabled_p ())
5339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5340 "no vectype for scalar type ");
5341 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5342 TREE_TYPE (op0));
5343 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5346 return false;
5349 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5350 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5351 if (nunits_out != nunits_in)
5352 return false;
5354 if (op_type == binary_op || op_type == ternary_op)
5356 op1 = gimple_assign_rhs2 (stmt);
5357 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5359 if (dump_enabled_p ())
5360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5361 "use not simple.\n");
5362 return false;
5365 if (op_type == ternary_op)
5367 op2 = gimple_assign_rhs3 (stmt);
5368 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5370 if (dump_enabled_p ())
5371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5372 "use not simple.\n");
5373 return false;
5377 /* Multiple types in SLP are handled by creating the appropriate number of
5378 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5379 case of SLP. */
5380 if (slp_node)
5381 ncopies = 1;
5382 else
5383 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5385 gcc_assert (ncopies >= 1);
5387 /* Shifts are handled in vectorizable_shift (). */
5388 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5389 || code == RROTATE_EXPR)
5390 return false;
5392 /* Supportable by target? */
5394 vec_mode = TYPE_MODE (vectype);
5395 if (code == MULT_HIGHPART_EXPR)
5396 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5397 else
5399 optab = optab_for_tree_code (code, vectype, optab_default);
5400 if (!optab)
5402 if (dump_enabled_p ())
5403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5404 "no optab.\n");
5405 return false;
5407 target_support_p = (optab_handler (optab, vec_mode)
5408 != CODE_FOR_nothing);
5411 if (!target_support_p)
5413 if (dump_enabled_p ())
5414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5415 "op not supported by target.\n");
5416 /* Check only during analysis. */
5417 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5418 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5419 return false;
5420 if (dump_enabled_p ())
5421 dump_printf_loc (MSG_NOTE, vect_location,
5422 "proceeding using word mode.\n");
5425 /* Worthwhile without SIMD support? Check only during analysis. */
5426 if (!VECTOR_MODE_P (vec_mode)
5427 && !vec_stmt
5428 && !vect_worthwhile_without_simd_p (vinfo, code))
5430 if (dump_enabled_p ())
5431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5432 "not worthwhile without SIMD support.\n");
5433 return false;
5436 if (!vec_stmt) /* transformation not required. */
5438 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5439 if (dump_enabled_p ())
5440 dump_printf_loc (MSG_NOTE, vect_location,
5441 "=== vectorizable_operation ===\n");
5442 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5443 return true;
5446 /* Transform. */
5448 if (dump_enabled_p ())
5449 dump_printf_loc (MSG_NOTE, vect_location,
5450 "transform binary/unary operation.\n");
5452 /* Handle def. */
5453 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5455 /* In case the vectorization factor (VF) is bigger than the number
5456 of elements that we can fit in a vectype (nunits), we have to generate
5457 more than one vector stmt - i.e - we need to "unroll" the
5458 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5459 from one copy of the vector stmt to the next, in the field
5460 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5461 stages to find the correct vector defs to be used when vectorizing
5462 stmts that use the defs of the current stmt. The example below
5463 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5464 we need to create 4 vectorized stmts):
5466 before vectorization:
5467 RELATED_STMT VEC_STMT
5468 S1: x = memref - -
5469 S2: z = x + 1 - -
5471 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5472 there):
5473 RELATED_STMT VEC_STMT
5474 VS1_0: vx0 = memref0 VS1_1 -
5475 VS1_1: vx1 = memref1 VS1_2 -
5476 VS1_2: vx2 = memref2 VS1_3 -
5477 VS1_3: vx3 = memref3 - -
5478 S1: x = load - VS1_0
5479 S2: z = x + 1 - -
5481 step2: vectorize stmt S2 (done here):
5482 To vectorize stmt S2 we first need to find the relevant vector
5483 def for the first operand 'x'. This is, as usual, obtained from
5484 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5485 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5486 relevant vector def 'vx0'. Having found 'vx0' we can generate
5487 the vector stmt VS2_0, and as usual, record it in the
5488 STMT_VINFO_VEC_STMT of stmt S2.
5489 When creating the second copy (VS2_1), we obtain the relevant vector
5490 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5491 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5492 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5493 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5494 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5495 chain of stmts and pointers:
5496 RELATED_STMT VEC_STMT
5497 VS1_0: vx0 = memref0 VS1_1 -
5498 VS1_1: vx1 = memref1 VS1_2 -
5499 VS1_2: vx2 = memref2 VS1_3 -
5500 VS1_3: vx3 = memref3 - -
5501 S1: x = load - VS1_0
5502 VS2_0: vz0 = vx0 + v1 VS2_1 -
5503 VS2_1: vz1 = vx1 + v1 VS2_2 -
5504 VS2_2: vz2 = vx2 + v1 VS2_3 -
5505 VS2_3: vz3 = vx3 + v1 - -
5506 S2: z = x + 1 - VS2_0 */
5508 prev_stmt_info = NULL;
5509 for (j = 0; j < ncopies; j++)
5511 /* Handle uses. */
5512 if (j == 0)
5514 if (op_type == binary_op || op_type == ternary_op)
5515 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5516 slp_node);
5517 else
5518 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5519 slp_node);
5520 if (op_type == ternary_op)
5521 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5522 slp_node);
5524 else
5526 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5527 if (op_type == ternary_op)
5529 tree vec_oprnd = vec_oprnds2.pop ();
5530 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5531 vec_oprnd));
5535 /* Arguments are ready. Create the new vector stmt. */
5536 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5538 vop1 = ((op_type == binary_op || op_type == ternary_op)
5539 ? vec_oprnds1[i] : NULL_TREE);
5540 vop2 = ((op_type == ternary_op)
5541 ? vec_oprnds2[i] : NULL_TREE);
5542 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5543 new_temp = make_ssa_name (vec_dest, new_stmt);
5544 gimple_assign_set_lhs (new_stmt, new_temp);
5545 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5546 if (slp_node)
5547 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5550 if (slp_node)
5551 continue;
5553 if (j == 0)
5554 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5555 else
5556 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5557 prev_stmt_info = vinfo_for_stmt (new_stmt);
5560 vec_oprnds0.release ();
5561 vec_oprnds1.release ();
5562 vec_oprnds2.release ();
5564 return true;
5567 /* A helper function to ensure data reference DR's base alignment. */
5569 static void
5570 ensure_base_align (struct data_reference *dr)
5572 if (!dr->aux)
5573 return;
5575 if (DR_VECT_AUX (dr)->base_misaligned)
5577 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5579 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5581 if (decl_in_symtab_p (base_decl))
5582 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5583 else
5585 SET_DECL_ALIGN (base_decl, align_base_to);
5586 DECL_USER_ALIGN (base_decl) = 1;
5588 DR_VECT_AUX (dr)->base_misaligned = false;
5593 /* Function get_group_alias_ptr_type.
5595 Return the alias type for the group starting at FIRST_STMT. */
5597 static tree
5598 get_group_alias_ptr_type (gimple *first_stmt)
5600 struct data_reference *first_dr, *next_dr;
5601 gimple *next_stmt;
5603 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5604 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5605 while (next_stmt)
5607 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5608 if (get_alias_set (DR_REF (first_dr))
5609 != get_alias_set (DR_REF (next_dr)))
5611 if (dump_enabled_p ())
5612 dump_printf_loc (MSG_NOTE, vect_location,
5613 "conflicting alias set types.\n");
5614 return ptr_type_node;
5616 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5618 return reference_alias_ptr_type (DR_REF (first_dr));
5622 /* Function vectorizable_store.
5624 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5625 can be vectorized.
5626 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5627 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5628 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5630 static bool
5631 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5632 slp_tree slp_node)
5634 tree scalar_dest;
5635 tree data_ref;
5636 tree op;
5637 tree vec_oprnd = NULL_TREE;
5638 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5639 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5640 tree elem_type;
5641 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5642 struct loop *loop = NULL;
5643 machine_mode vec_mode;
5644 tree dummy;
5645 enum dr_alignment_support alignment_support_scheme;
5646 gimple *def_stmt;
5647 enum vect_def_type dt;
5648 stmt_vec_info prev_stmt_info = NULL;
5649 tree dataref_ptr = NULL_TREE;
5650 tree dataref_offset = NULL_TREE;
5651 gimple *ptr_incr = NULL;
5652 int ncopies;
5653 int j;
5654 gimple *next_stmt, *first_stmt;
5655 bool grouped_store;
5656 unsigned int group_size, i;
5657 vec<tree> oprnds = vNULL;
5658 vec<tree> result_chain = vNULL;
5659 bool inv_p;
5660 tree offset = NULL_TREE;
5661 vec<tree> vec_oprnds = vNULL;
5662 bool slp = (slp_node != NULL);
5663 unsigned int vec_num;
5664 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5665 vec_info *vinfo = stmt_info->vinfo;
5666 tree aggr_type;
5667 gather_scatter_info gs_info;
5668 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5669 gimple *new_stmt;
5670 int vf;
5671 vec_load_store_type vls_type;
5672 tree ref_type;
5674 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5675 return false;
5677 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5678 && ! vec_stmt)
5679 return false;
5681 /* Is vectorizable store? */
5683 if (!is_gimple_assign (stmt))
5684 return false;
5686 scalar_dest = gimple_assign_lhs (stmt);
5687 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5688 && is_pattern_stmt_p (stmt_info))
5689 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5690 if (TREE_CODE (scalar_dest) != ARRAY_REF
5691 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5692 && TREE_CODE (scalar_dest) != INDIRECT_REF
5693 && TREE_CODE (scalar_dest) != COMPONENT_REF
5694 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5695 && TREE_CODE (scalar_dest) != REALPART_EXPR
5696 && TREE_CODE (scalar_dest) != MEM_REF)
5697 return false;
5699 /* Cannot have hybrid store SLP -- that would mean storing to the
5700 same location twice. */
5701 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5703 gcc_assert (gimple_assign_single_p (stmt));
5705 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5706 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5708 if (loop_vinfo)
5710 loop = LOOP_VINFO_LOOP (loop_vinfo);
5711 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5713 else
5714 vf = 1;
5716 /* Multiple types in SLP are handled by creating the appropriate number of
5717 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5718 case of SLP. */
5719 if (slp)
5720 ncopies = 1;
5721 else
5722 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5724 gcc_assert (ncopies >= 1);
5726 /* FORNOW. This restriction should be relaxed. */
5727 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5729 if (dump_enabled_p ())
5730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5731 "multiple types in nested loop.\n");
5732 return false;
5735 op = gimple_assign_rhs1 (stmt);
5737 /* In the case this is a store from a constant make sure
5738 native_encode_expr can handle it. */
5739 if (CONSTANT_CLASS_P (op) && native_encode_expr (op, NULL, 64) == 0)
5740 return false;
5742 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5744 if (dump_enabled_p ())
5745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5746 "use not simple.\n");
5747 return false;
5750 if (dt == vect_constant_def || dt == vect_external_def)
5751 vls_type = VLS_STORE_INVARIANT;
5752 else
5753 vls_type = VLS_STORE;
5755 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5756 return false;
5758 elem_type = TREE_TYPE (vectype);
5759 vec_mode = TYPE_MODE (vectype);
5761 /* FORNOW. In some cases can vectorize even if data-type not supported
5762 (e.g. - array initialization with 0). */
5763 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5764 return false;
5766 if (!STMT_VINFO_DATA_REF (stmt_info))
5767 return false;
5769 vect_memory_access_type memory_access_type;
5770 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5771 &memory_access_type, &gs_info))
5772 return false;
5774 if (!vec_stmt) /* transformation not required. */
5776 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5777 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5778 /* The SLP costs are calculated during SLP analysis. */
5779 if (!PURE_SLP_STMT (stmt_info))
5780 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5781 NULL, NULL, NULL);
5782 return true;
5784 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5786 /* Transform. */
5788 ensure_base_align (dr);
5790 if (memory_access_type == VMAT_GATHER_SCATTER)
5792 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5793 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5794 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5795 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5796 edge pe = loop_preheader_edge (loop);
5797 gimple_seq seq;
5798 basic_block new_bb;
5799 enum { NARROW, NONE, WIDEN } modifier;
5800 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5802 if (nunits == (unsigned int) scatter_off_nunits)
5803 modifier = NONE;
5804 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5806 modifier = WIDEN;
5808 auto_vec_perm_indices sel (scatter_off_nunits);
5809 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5810 sel.quick_push (i | nunits);
5812 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5813 gcc_assert (perm_mask != NULL_TREE);
5815 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5817 modifier = NARROW;
5819 auto_vec_perm_indices sel (nunits);
5820 for (i = 0; i < (unsigned int) nunits; ++i)
5821 sel.quick_push (i | scatter_off_nunits);
5823 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5824 gcc_assert (perm_mask != NULL_TREE);
5825 ncopies *= 2;
5827 else
5828 gcc_unreachable ();
5830 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5831 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5832 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5833 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5834 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5835 scaletype = TREE_VALUE (arglist);
5837 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5838 && TREE_CODE (rettype) == VOID_TYPE);
5840 ptr = fold_convert (ptrtype, gs_info.base);
5841 if (!is_gimple_min_invariant (ptr))
5843 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5844 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5845 gcc_assert (!new_bb);
5848 /* Currently we support only unconditional scatter stores,
5849 so mask should be all ones. */
5850 mask = build_int_cst (masktype, -1);
5851 mask = vect_init_vector (stmt, mask, masktype, NULL);
5853 scale = build_int_cst (scaletype, gs_info.scale);
5855 prev_stmt_info = NULL;
5856 for (j = 0; j < ncopies; ++j)
5858 if (j == 0)
5860 src = vec_oprnd1
5861 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5862 op = vec_oprnd0
5863 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5865 else if (modifier != NONE && (j & 1))
5867 if (modifier == WIDEN)
5869 src = vec_oprnd1
5870 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5871 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5872 stmt, gsi);
5874 else if (modifier == NARROW)
5876 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5877 stmt, gsi);
5878 op = vec_oprnd0
5879 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5880 vec_oprnd0);
5882 else
5883 gcc_unreachable ();
5885 else
5887 src = vec_oprnd1
5888 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5889 op = vec_oprnd0
5890 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5891 vec_oprnd0);
5894 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5896 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5897 == TYPE_VECTOR_SUBPARTS (srctype));
5898 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5899 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5900 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5901 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5902 src = var;
5905 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5907 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5908 == TYPE_VECTOR_SUBPARTS (idxtype));
5909 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5910 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5911 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5912 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5913 op = var;
5916 new_stmt
5917 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5919 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5921 if (prev_stmt_info == NULL)
5922 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5923 else
5924 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5925 prev_stmt_info = vinfo_for_stmt (new_stmt);
5927 return true;
5930 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5931 if (grouped_store)
5933 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5934 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5935 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5937 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5939 /* FORNOW */
5940 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5942 /* We vectorize all the stmts of the interleaving group when we
5943 reach the last stmt in the group. */
5944 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5945 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5946 && !slp)
5948 *vec_stmt = NULL;
5949 return true;
5952 if (slp)
5954 grouped_store = false;
5955 /* VEC_NUM is the number of vect stmts to be created for this
5956 group. */
5957 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5958 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5959 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5960 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5961 op = gimple_assign_rhs1 (first_stmt);
5963 else
5964 /* VEC_NUM is the number of vect stmts to be created for this
5965 group. */
5966 vec_num = group_size;
5968 ref_type = get_group_alias_ptr_type (first_stmt);
5970 else
5972 first_stmt = stmt;
5973 first_dr = dr;
5974 group_size = vec_num = 1;
5975 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5978 if (dump_enabled_p ())
5979 dump_printf_loc (MSG_NOTE, vect_location,
5980 "transform store. ncopies = %d\n", ncopies);
5982 if (memory_access_type == VMAT_ELEMENTWISE
5983 || memory_access_type == VMAT_STRIDED_SLP)
5985 gimple_stmt_iterator incr_gsi;
5986 bool insert_after;
5987 gimple *incr;
5988 tree offvar;
5989 tree ivstep;
5990 tree running_off;
5991 gimple_seq stmts = NULL;
5992 tree stride_base, stride_step, alias_off;
5993 tree vec_oprnd;
5994 unsigned int g;
5996 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5998 stride_base
5999 = fold_build_pointer_plus
6000 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6001 size_binop (PLUS_EXPR,
6002 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6003 convert_to_ptrofftype (DR_INIT (first_dr))));
6004 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6006 /* For a store with loop-invariant (but other than power-of-2)
6007 stride (i.e. not a grouped access) like so:
6009 for (i = 0; i < n; i += stride)
6010 array[i] = ...;
6012 we generate a new induction variable and new stores from
6013 the components of the (vectorized) rhs:
6015 for (j = 0; ; j += VF*stride)
6016 vectemp = ...;
6017 tmp1 = vectemp[0];
6018 array[j] = tmp1;
6019 tmp2 = vectemp[1];
6020 array[j + stride] = tmp2;
6024 unsigned nstores = nunits;
6025 unsigned lnel = 1;
6026 tree ltype = elem_type;
6027 tree lvectype = vectype;
6028 if (slp)
6030 if (group_size < nunits
6031 && nunits % group_size == 0)
6033 nstores = nunits / group_size;
6034 lnel = group_size;
6035 ltype = build_vector_type (elem_type, group_size);
6036 lvectype = vectype;
6038 /* First check if vec_extract optab doesn't support extraction
6039 of vector elts directly. */
6040 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6041 machine_mode vmode;
6042 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6043 || !VECTOR_MODE_P (vmode)
6044 || (convert_optab_handler (vec_extract_optab,
6045 TYPE_MODE (vectype), vmode)
6046 == CODE_FOR_nothing))
6048 /* Try to avoid emitting an extract of vector elements
6049 by performing the extracts using an integer type of the
6050 same size, extracting from a vector of those and then
6051 re-interpreting it as the original vector type if
6052 supported. */
6053 unsigned lsize
6054 = group_size * GET_MODE_BITSIZE (elmode);
6055 elmode = int_mode_for_size (lsize, 0).require ();
6056 /* If we can't construct such a vector fall back to
6057 element extracts from the original vector type and
6058 element size stores. */
6059 if (mode_for_vector (elmode,
6060 nunits / group_size).exists (&vmode)
6061 && VECTOR_MODE_P (vmode)
6062 && (convert_optab_handler (vec_extract_optab,
6063 vmode, elmode)
6064 != CODE_FOR_nothing))
6066 nstores = nunits / group_size;
6067 lnel = group_size;
6068 ltype = build_nonstandard_integer_type (lsize, 1);
6069 lvectype = build_vector_type (ltype, nstores);
6071 /* Else fall back to vector extraction anyway.
6072 Fewer stores are more important than avoiding spilling
6073 of the vector we extract from. Compared to the
6074 construction case in vectorizable_load no store-forwarding
6075 issue exists here for reasonable archs. */
6078 else if (group_size >= nunits
6079 && group_size % nunits == 0)
6081 nstores = 1;
6082 lnel = nunits;
6083 ltype = vectype;
6084 lvectype = vectype;
6086 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6087 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6090 ivstep = stride_step;
6091 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6092 build_int_cst (TREE_TYPE (ivstep), vf));
6094 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6096 create_iv (stride_base, ivstep, NULL,
6097 loop, &incr_gsi, insert_after,
6098 &offvar, NULL);
6099 incr = gsi_stmt (incr_gsi);
6100 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6102 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6103 if (stmts)
6104 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6106 prev_stmt_info = NULL;
6107 alias_off = build_int_cst (ref_type, 0);
6108 next_stmt = first_stmt;
6109 for (g = 0; g < group_size; g++)
6111 running_off = offvar;
6112 if (g)
6114 tree size = TYPE_SIZE_UNIT (ltype);
6115 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6116 size);
6117 tree newoff = copy_ssa_name (running_off, NULL);
6118 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6119 running_off, pos);
6120 vect_finish_stmt_generation (stmt, incr, gsi);
6121 running_off = newoff;
6123 unsigned int group_el = 0;
6124 unsigned HOST_WIDE_INT
6125 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6126 for (j = 0; j < ncopies; j++)
6128 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6129 and first_stmt == stmt. */
6130 if (j == 0)
6132 if (slp)
6134 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6135 slp_node);
6136 vec_oprnd = vec_oprnds[0];
6138 else
6140 gcc_assert (gimple_assign_single_p (next_stmt));
6141 op = gimple_assign_rhs1 (next_stmt);
6142 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6145 else
6147 if (slp)
6148 vec_oprnd = vec_oprnds[j];
6149 else
6151 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6152 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6155 /* Pun the vector to extract from if necessary. */
6156 if (lvectype != vectype)
6158 tree tem = make_ssa_name (lvectype);
6159 gimple *pun
6160 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6161 lvectype, vec_oprnd));
6162 vect_finish_stmt_generation (stmt, pun, gsi);
6163 vec_oprnd = tem;
6165 for (i = 0; i < nstores; i++)
6167 tree newref, newoff;
6168 gimple *incr, *assign;
6169 tree size = TYPE_SIZE (ltype);
6170 /* Extract the i'th component. */
6171 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6172 bitsize_int (i), size);
6173 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6174 size, pos);
6176 elem = force_gimple_operand_gsi (gsi, elem, true,
6177 NULL_TREE, true,
6178 GSI_SAME_STMT);
6180 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6181 group_el * elsz);
6182 newref = build2 (MEM_REF, ltype,
6183 running_off, this_off);
6185 /* And store it to *running_off. */
6186 assign = gimple_build_assign (newref, elem);
6187 vect_finish_stmt_generation (stmt, assign, gsi);
6189 group_el += lnel;
6190 if (! slp
6191 || group_el == group_size)
6193 newoff = copy_ssa_name (running_off, NULL);
6194 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6195 running_off, stride_step);
6196 vect_finish_stmt_generation (stmt, incr, gsi);
6198 running_off = newoff;
6199 group_el = 0;
6201 if (g == group_size - 1
6202 && !slp)
6204 if (j == 0 && i == 0)
6205 STMT_VINFO_VEC_STMT (stmt_info)
6206 = *vec_stmt = assign;
6207 else
6208 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6209 prev_stmt_info = vinfo_for_stmt (assign);
6213 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6214 if (slp)
6215 break;
6218 vec_oprnds.release ();
6219 return true;
6222 auto_vec<tree> dr_chain (group_size);
6223 oprnds.create (group_size);
6225 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6226 gcc_assert (alignment_support_scheme);
6227 /* Targets with store-lane instructions must not require explicit
6228 realignment. */
6229 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6230 || alignment_support_scheme == dr_aligned
6231 || alignment_support_scheme == dr_unaligned_supported);
6233 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6234 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6235 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6237 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6238 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6239 else
6240 aggr_type = vectype;
6242 /* In case the vectorization factor (VF) is bigger than the number
6243 of elements that we can fit in a vectype (nunits), we have to generate
6244 more than one vector stmt - i.e - we need to "unroll" the
6245 vector stmt by a factor VF/nunits. For more details see documentation in
6246 vect_get_vec_def_for_copy_stmt. */
6248 /* In case of interleaving (non-unit grouped access):
6250 S1: &base + 2 = x2
6251 S2: &base = x0
6252 S3: &base + 1 = x1
6253 S4: &base + 3 = x3
6255 We create vectorized stores starting from base address (the access of the
6256 first stmt in the chain (S2 in the above example), when the last store stmt
6257 of the chain (S4) is reached:
6259 VS1: &base = vx2
6260 VS2: &base + vec_size*1 = vx0
6261 VS3: &base + vec_size*2 = vx1
6262 VS4: &base + vec_size*3 = vx3
6264 Then permutation statements are generated:
6266 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6267 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6270 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6271 (the order of the data-refs in the output of vect_permute_store_chain
6272 corresponds to the order of scalar stmts in the interleaving chain - see
6273 the documentation of vect_permute_store_chain()).
6275 In case of both multiple types and interleaving, above vector stores and
6276 permutation stmts are created for every copy. The result vector stmts are
6277 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6278 STMT_VINFO_RELATED_STMT for the next copies.
6281 prev_stmt_info = NULL;
6282 for (j = 0; j < ncopies; j++)
6285 if (j == 0)
6287 if (slp)
6289 /* Get vectorized arguments for SLP_NODE. */
6290 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6291 NULL, slp_node);
6293 vec_oprnd = vec_oprnds[0];
6295 else
6297 /* For interleaved stores we collect vectorized defs for all the
6298 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6299 used as an input to vect_permute_store_chain(), and OPRNDS as
6300 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6302 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6303 OPRNDS are of size 1. */
6304 next_stmt = first_stmt;
6305 for (i = 0; i < group_size; i++)
6307 /* Since gaps are not supported for interleaved stores,
6308 GROUP_SIZE is the exact number of stmts in the chain.
6309 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6310 there is no interleaving, GROUP_SIZE is 1, and only one
6311 iteration of the loop will be executed. */
6312 gcc_assert (next_stmt
6313 && gimple_assign_single_p (next_stmt));
6314 op = gimple_assign_rhs1 (next_stmt);
6316 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6317 dr_chain.quick_push (vec_oprnd);
6318 oprnds.quick_push (vec_oprnd);
6319 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6323 /* We should have catched mismatched types earlier. */
6324 gcc_assert (useless_type_conversion_p (vectype,
6325 TREE_TYPE (vec_oprnd)));
6326 bool simd_lane_access_p
6327 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6328 if (simd_lane_access_p
6329 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6330 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6331 && integer_zerop (DR_OFFSET (first_dr))
6332 && integer_zerop (DR_INIT (first_dr))
6333 && alias_sets_conflict_p (get_alias_set (aggr_type),
6334 get_alias_set (TREE_TYPE (ref_type))))
6336 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6337 dataref_offset = build_int_cst (ref_type, 0);
6338 inv_p = false;
6340 else
6341 dataref_ptr
6342 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6343 simd_lane_access_p ? loop : NULL,
6344 offset, &dummy, gsi, &ptr_incr,
6345 simd_lane_access_p, &inv_p);
6346 gcc_assert (bb_vinfo || !inv_p);
6348 else
6350 /* For interleaved stores we created vectorized defs for all the
6351 defs stored in OPRNDS in the previous iteration (previous copy).
6352 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6353 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6354 next copy.
6355 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6356 OPRNDS are of size 1. */
6357 for (i = 0; i < group_size; i++)
6359 op = oprnds[i];
6360 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6361 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6362 dr_chain[i] = vec_oprnd;
6363 oprnds[i] = vec_oprnd;
6365 if (dataref_offset)
6366 dataref_offset
6367 = int_const_binop (PLUS_EXPR, dataref_offset,
6368 TYPE_SIZE_UNIT (aggr_type));
6369 else
6370 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6371 TYPE_SIZE_UNIT (aggr_type));
6374 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6376 tree vec_array;
6378 /* Combine all the vectors into an array. */
6379 vec_array = create_vector_array (vectype, vec_num);
6380 for (i = 0; i < vec_num; i++)
6382 vec_oprnd = dr_chain[i];
6383 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6386 /* Emit:
6387 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6388 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6389 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6390 vec_array);
6391 gimple_call_set_lhs (call, data_ref);
6392 gimple_call_set_nothrow (call, true);
6393 new_stmt = call;
6394 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6396 else
6398 new_stmt = NULL;
6399 if (grouped_store)
6401 if (j == 0)
6402 result_chain.create (group_size);
6403 /* Permute. */
6404 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6405 &result_chain);
6408 next_stmt = first_stmt;
6409 for (i = 0; i < vec_num; i++)
6411 unsigned align, misalign;
6413 if (i > 0)
6414 /* Bump the vector pointer. */
6415 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6416 stmt, NULL_TREE);
6418 if (slp)
6419 vec_oprnd = vec_oprnds[i];
6420 else if (grouped_store)
6421 /* For grouped stores vectorized defs are interleaved in
6422 vect_permute_store_chain(). */
6423 vec_oprnd = result_chain[i];
6425 data_ref = fold_build2 (MEM_REF, vectype,
6426 dataref_ptr,
6427 dataref_offset
6428 ? dataref_offset
6429 : build_int_cst (ref_type, 0));
6430 align = DR_TARGET_ALIGNMENT (first_dr);
6431 if (aligned_access_p (first_dr))
6432 misalign = 0;
6433 else if (DR_MISALIGNMENT (first_dr) == -1)
6435 align = dr_alignment (vect_dr_behavior (first_dr));
6436 misalign = 0;
6437 TREE_TYPE (data_ref)
6438 = build_aligned_type (TREE_TYPE (data_ref),
6439 align * BITS_PER_UNIT);
6441 else
6443 TREE_TYPE (data_ref)
6444 = build_aligned_type (TREE_TYPE (data_ref),
6445 TYPE_ALIGN (elem_type));
6446 misalign = DR_MISALIGNMENT (first_dr);
6448 if (dataref_offset == NULL_TREE
6449 && TREE_CODE (dataref_ptr) == SSA_NAME)
6450 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6451 misalign);
6453 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6455 tree perm_mask = perm_mask_for_reverse (vectype);
6456 tree perm_dest
6457 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6458 vectype);
6459 tree new_temp = make_ssa_name (perm_dest);
6461 /* Generate the permute statement. */
6462 gimple *perm_stmt
6463 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6464 vec_oprnd, perm_mask);
6465 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6467 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6468 vec_oprnd = new_temp;
6471 /* Arguments are ready. Create the new vector stmt. */
6472 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6473 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6475 if (slp)
6476 continue;
6478 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6479 if (!next_stmt)
6480 break;
6483 if (!slp)
6485 if (j == 0)
6486 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6487 else
6488 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6489 prev_stmt_info = vinfo_for_stmt (new_stmt);
6493 oprnds.release ();
6494 result_chain.release ();
6495 vec_oprnds.release ();
6497 return true;
6500 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6501 VECTOR_CST mask. No checks are made that the target platform supports the
6502 mask, so callers may wish to test can_vec_perm_p separately, or use
6503 vect_gen_perm_mask_checked. */
6505 tree
6506 vect_gen_perm_mask_any (tree vectype, vec_perm_indices sel)
6508 tree mask_elt_type, mask_type, mask_vec;
6510 unsigned int nunits = sel.length ();
6511 gcc_checking_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
6513 mask_elt_type = lang_hooks.types.type_for_mode
6514 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
6515 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6517 auto_vec<tree, 32> mask_elts (nunits);
6518 for (unsigned int i = 0; i < nunits; ++i)
6519 mask_elts.quick_push (build_int_cst (mask_elt_type, sel[i]));
6520 mask_vec = build_vector (mask_type, mask_elts);
6522 return mask_vec;
6525 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6526 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6528 tree
6529 vect_gen_perm_mask_checked (tree vectype, vec_perm_indices sel)
6531 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, &sel));
6532 return vect_gen_perm_mask_any (vectype, sel);
6535 /* Given a vector variable X and Y, that was generated for the scalar
6536 STMT, generate instructions to permute the vector elements of X and Y
6537 using permutation mask MASK_VEC, insert them at *GSI and return the
6538 permuted vector variable. */
6540 static tree
6541 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6542 gimple_stmt_iterator *gsi)
6544 tree vectype = TREE_TYPE (x);
6545 tree perm_dest, data_ref;
6546 gimple *perm_stmt;
6548 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6549 data_ref = make_ssa_name (perm_dest);
6551 /* Generate the permute statement. */
6552 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6553 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6555 return data_ref;
6558 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6559 inserting them on the loops preheader edge. Returns true if we
6560 were successful in doing so (and thus STMT can be moved then),
6561 otherwise returns false. */
6563 static bool
6564 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6566 ssa_op_iter i;
6567 tree op;
6568 bool any = false;
6570 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6572 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6573 if (!gimple_nop_p (def_stmt)
6574 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6576 /* Make sure we don't need to recurse. While we could do
6577 so in simple cases when there are more complex use webs
6578 we don't have an easy way to preserve stmt order to fulfil
6579 dependencies within them. */
6580 tree op2;
6581 ssa_op_iter i2;
6582 if (gimple_code (def_stmt) == GIMPLE_PHI)
6583 return false;
6584 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6586 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6587 if (!gimple_nop_p (def_stmt2)
6588 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6589 return false;
6591 any = true;
6595 if (!any)
6596 return true;
6598 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6600 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6601 if (!gimple_nop_p (def_stmt)
6602 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6604 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6605 gsi_remove (&gsi, false);
6606 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6610 return true;
6613 /* vectorizable_load.
6615 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6616 can be vectorized.
6617 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6618 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6619 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6621 static bool
6622 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6623 slp_tree slp_node, slp_instance slp_node_instance)
6625 tree scalar_dest;
6626 tree vec_dest = NULL;
6627 tree data_ref = NULL;
6628 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6629 stmt_vec_info prev_stmt_info;
6630 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6631 struct loop *loop = NULL;
6632 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6633 bool nested_in_vect_loop = false;
6634 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6635 tree elem_type;
6636 tree new_temp;
6637 machine_mode mode;
6638 gimple *new_stmt = NULL;
6639 tree dummy;
6640 enum dr_alignment_support alignment_support_scheme;
6641 tree dataref_ptr = NULL_TREE;
6642 tree dataref_offset = NULL_TREE;
6643 gimple *ptr_incr = NULL;
6644 int ncopies;
6645 int i, j, group_size, group_gap_adj;
6646 tree msq = NULL_TREE, lsq;
6647 tree offset = NULL_TREE;
6648 tree byte_offset = NULL_TREE;
6649 tree realignment_token = NULL_TREE;
6650 gphi *phi = NULL;
6651 vec<tree> dr_chain = vNULL;
6652 bool grouped_load = false;
6653 gimple *first_stmt;
6654 gimple *first_stmt_for_drptr = NULL;
6655 bool inv_p;
6656 bool compute_in_loop = false;
6657 struct loop *at_loop;
6658 int vec_num;
6659 bool slp = (slp_node != NULL);
6660 bool slp_perm = false;
6661 enum tree_code code;
6662 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6663 int vf;
6664 tree aggr_type;
6665 gather_scatter_info gs_info;
6666 vec_info *vinfo = stmt_info->vinfo;
6667 tree ref_type;
6669 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6670 return false;
6672 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6673 && ! vec_stmt)
6674 return false;
6676 /* Is vectorizable load? */
6677 if (!is_gimple_assign (stmt))
6678 return false;
6680 scalar_dest = gimple_assign_lhs (stmt);
6681 if (TREE_CODE (scalar_dest) != SSA_NAME)
6682 return false;
6684 code = gimple_assign_rhs_code (stmt);
6685 if (code != ARRAY_REF
6686 && code != BIT_FIELD_REF
6687 && code != INDIRECT_REF
6688 && code != COMPONENT_REF
6689 && code != IMAGPART_EXPR
6690 && code != REALPART_EXPR
6691 && code != MEM_REF
6692 && TREE_CODE_CLASS (code) != tcc_declaration)
6693 return false;
6695 if (!STMT_VINFO_DATA_REF (stmt_info))
6696 return false;
6698 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6699 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6701 if (loop_vinfo)
6703 loop = LOOP_VINFO_LOOP (loop_vinfo);
6704 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6705 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6707 else
6708 vf = 1;
6710 /* Multiple types in SLP are handled by creating the appropriate number of
6711 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6712 case of SLP. */
6713 if (slp)
6714 ncopies = 1;
6715 else
6716 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6718 gcc_assert (ncopies >= 1);
6720 /* FORNOW. This restriction should be relaxed. */
6721 if (nested_in_vect_loop && ncopies > 1)
6723 if (dump_enabled_p ())
6724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6725 "multiple types in nested loop.\n");
6726 return false;
6729 /* Invalidate assumptions made by dependence analysis when vectorization
6730 on the unrolled body effectively re-orders stmts. */
6731 if (ncopies > 1
6732 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6733 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6734 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6736 if (dump_enabled_p ())
6737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6738 "cannot perform implicit CSE when unrolling "
6739 "with negative dependence distance\n");
6740 return false;
6743 elem_type = TREE_TYPE (vectype);
6744 mode = TYPE_MODE (vectype);
6746 /* FORNOW. In some cases can vectorize even if data-type not supported
6747 (e.g. - data copies). */
6748 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6750 if (dump_enabled_p ())
6751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6752 "Aligned load, but unsupported type.\n");
6753 return false;
6756 /* Check if the load is a part of an interleaving chain. */
6757 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6759 grouped_load = true;
6760 /* FORNOW */
6761 gcc_assert (!nested_in_vect_loop);
6762 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6764 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6765 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6767 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6768 slp_perm = true;
6770 /* Invalidate assumptions made by dependence analysis when vectorization
6771 on the unrolled body effectively re-orders stmts. */
6772 if (!PURE_SLP_STMT (stmt_info)
6773 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6774 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6775 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6777 if (dump_enabled_p ())
6778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6779 "cannot perform implicit CSE when performing "
6780 "group loads with negative dependence distance\n");
6781 return false;
6784 /* Similarly when the stmt is a load that is both part of a SLP
6785 instance and a loop vectorized stmt via the same-dr mechanism
6786 we have to give up. */
6787 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6788 && (STMT_SLP_TYPE (stmt_info)
6789 != STMT_SLP_TYPE (vinfo_for_stmt
6790 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6792 if (dump_enabled_p ())
6793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6794 "conflicting SLP types for CSEd load\n");
6795 return false;
6799 vect_memory_access_type memory_access_type;
6800 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6801 &memory_access_type, &gs_info))
6802 return false;
6804 if (!vec_stmt) /* transformation not required. */
6806 if (!slp)
6807 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6808 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6809 /* The SLP costs are calculated during SLP analysis. */
6810 if (!PURE_SLP_STMT (stmt_info))
6811 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6812 NULL, NULL, NULL);
6813 return true;
6816 if (!slp)
6817 gcc_assert (memory_access_type
6818 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6820 if (dump_enabled_p ())
6821 dump_printf_loc (MSG_NOTE, vect_location,
6822 "transform load. ncopies = %d\n", ncopies);
6824 /* Transform. */
6826 ensure_base_align (dr);
6828 if (memory_access_type == VMAT_GATHER_SCATTER)
6830 tree vec_oprnd0 = NULL_TREE, op;
6831 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6832 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6833 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6834 edge pe = loop_preheader_edge (loop);
6835 gimple_seq seq;
6836 basic_block new_bb;
6837 enum { NARROW, NONE, WIDEN } modifier;
6838 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6840 if (nunits == gather_off_nunits)
6841 modifier = NONE;
6842 else if (nunits == gather_off_nunits / 2)
6844 modifier = WIDEN;
6846 auto_vec_perm_indices sel (gather_off_nunits);
6847 for (i = 0; i < gather_off_nunits; ++i)
6848 sel.quick_push (i | nunits);
6850 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6852 else if (nunits == gather_off_nunits * 2)
6854 modifier = NARROW;
6856 auto_vec_perm_indices sel (nunits);
6857 for (i = 0; i < nunits; ++i)
6858 sel.quick_push (i < gather_off_nunits
6859 ? i : i + nunits - gather_off_nunits);
6861 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6862 ncopies *= 2;
6864 else
6865 gcc_unreachable ();
6867 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6868 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6869 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6870 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6871 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6872 scaletype = TREE_VALUE (arglist);
6873 gcc_checking_assert (types_compatible_p (srctype, rettype));
6875 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6877 ptr = fold_convert (ptrtype, gs_info.base);
6878 if (!is_gimple_min_invariant (ptr))
6880 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6881 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6882 gcc_assert (!new_bb);
6885 /* Currently we support only unconditional gather loads,
6886 so mask should be all ones. */
6887 if (TREE_CODE (masktype) == INTEGER_TYPE)
6888 mask = build_int_cst (masktype, -1);
6889 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6891 mask = build_int_cst (TREE_TYPE (masktype), -1);
6892 mask = build_vector_from_val (masktype, mask);
6893 mask = vect_init_vector (stmt, mask, masktype, NULL);
6895 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6897 REAL_VALUE_TYPE r;
6898 long tmp[6];
6899 for (j = 0; j < 6; ++j)
6900 tmp[j] = -1;
6901 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6902 mask = build_real (TREE_TYPE (masktype), r);
6903 mask = build_vector_from_val (masktype, mask);
6904 mask = vect_init_vector (stmt, mask, masktype, NULL);
6906 else
6907 gcc_unreachable ();
6909 scale = build_int_cst (scaletype, gs_info.scale);
6911 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6912 merge = build_int_cst (TREE_TYPE (rettype), 0);
6913 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6915 REAL_VALUE_TYPE r;
6916 long tmp[6];
6917 for (j = 0; j < 6; ++j)
6918 tmp[j] = 0;
6919 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6920 merge = build_real (TREE_TYPE (rettype), r);
6922 else
6923 gcc_unreachable ();
6924 merge = build_vector_from_val (rettype, merge);
6925 merge = vect_init_vector (stmt, merge, rettype, NULL);
6927 prev_stmt_info = NULL;
6928 for (j = 0; j < ncopies; ++j)
6930 if (modifier == WIDEN && (j & 1))
6931 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6932 perm_mask, stmt, gsi);
6933 else if (j == 0)
6934 op = vec_oprnd0
6935 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6936 else
6937 op = vec_oprnd0
6938 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6940 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6942 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6943 == TYPE_VECTOR_SUBPARTS (idxtype));
6944 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6945 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6946 new_stmt
6947 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6948 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6949 op = var;
6952 new_stmt
6953 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6955 if (!useless_type_conversion_p (vectype, rettype))
6957 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6958 == TYPE_VECTOR_SUBPARTS (rettype));
6959 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6960 gimple_call_set_lhs (new_stmt, op);
6961 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6962 var = make_ssa_name (vec_dest);
6963 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6964 new_stmt
6965 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6967 else
6969 var = make_ssa_name (vec_dest, new_stmt);
6970 gimple_call_set_lhs (new_stmt, var);
6973 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6975 if (modifier == NARROW)
6977 if ((j & 1) == 0)
6979 prev_res = var;
6980 continue;
6982 var = permute_vec_elements (prev_res, var,
6983 perm_mask, stmt, gsi);
6984 new_stmt = SSA_NAME_DEF_STMT (var);
6987 if (prev_stmt_info == NULL)
6988 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6989 else
6990 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6991 prev_stmt_info = vinfo_for_stmt (new_stmt);
6993 return true;
6996 if (memory_access_type == VMAT_ELEMENTWISE
6997 || memory_access_type == VMAT_STRIDED_SLP)
6999 gimple_stmt_iterator incr_gsi;
7000 bool insert_after;
7001 gimple *incr;
7002 tree offvar;
7003 tree ivstep;
7004 tree running_off;
7005 vec<constructor_elt, va_gc> *v = NULL;
7006 gimple_seq stmts = NULL;
7007 tree stride_base, stride_step, alias_off;
7009 gcc_assert (!nested_in_vect_loop);
7011 if (slp && grouped_load)
7013 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7014 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7015 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7016 ref_type = get_group_alias_ptr_type (first_stmt);
7018 else
7020 first_stmt = stmt;
7021 first_dr = dr;
7022 group_size = 1;
7023 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7026 stride_base
7027 = fold_build_pointer_plus
7028 (DR_BASE_ADDRESS (first_dr),
7029 size_binop (PLUS_EXPR,
7030 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7031 convert_to_ptrofftype (DR_INIT (first_dr))));
7032 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7034 /* For a load with loop-invariant (but other than power-of-2)
7035 stride (i.e. not a grouped access) like so:
7037 for (i = 0; i < n; i += stride)
7038 ... = array[i];
7040 we generate a new induction variable and new accesses to
7041 form a new vector (or vectors, depending on ncopies):
7043 for (j = 0; ; j += VF*stride)
7044 tmp1 = array[j];
7045 tmp2 = array[j + stride];
7047 vectemp = {tmp1, tmp2, ...}
7050 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7051 build_int_cst (TREE_TYPE (stride_step), vf));
7053 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7055 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7056 loop, &incr_gsi, insert_after,
7057 &offvar, NULL);
7058 incr = gsi_stmt (incr_gsi);
7059 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7061 stride_step = force_gimple_operand (unshare_expr (stride_step),
7062 &stmts, true, NULL_TREE);
7063 if (stmts)
7064 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7066 prev_stmt_info = NULL;
7067 running_off = offvar;
7068 alias_off = build_int_cst (ref_type, 0);
7069 int nloads = nunits;
7070 int lnel = 1;
7071 tree ltype = TREE_TYPE (vectype);
7072 tree lvectype = vectype;
7073 auto_vec<tree> dr_chain;
7074 if (memory_access_type == VMAT_STRIDED_SLP)
7076 if (group_size < nunits)
7078 /* First check if vec_init optab supports construction from
7079 vector elts directly. */
7080 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7081 machine_mode vmode;
7082 if (mode_for_vector (elmode, group_size).exists (&vmode)
7083 && VECTOR_MODE_P (vmode)
7084 && (convert_optab_handler (vec_init_optab,
7085 TYPE_MODE (vectype), vmode)
7086 != CODE_FOR_nothing))
7088 nloads = nunits / group_size;
7089 lnel = group_size;
7090 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7092 else
7094 /* Otherwise avoid emitting a constructor of vector elements
7095 by performing the loads using an integer type of the same
7096 size, constructing a vector of those and then
7097 re-interpreting it as the original vector type.
7098 This avoids a huge runtime penalty due to the general
7099 inability to perform store forwarding from smaller stores
7100 to a larger load. */
7101 unsigned lsize
7102 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7103 elmode = int_mode_for_size (lsize, 0).require ();
7104 /* If we can't construct such a vector fall back to
7105 element loads of the original vector type. */
7106 if (mode_for_vector (elmode,
7107 nunits / group_size).exists (&vmode)
7108 && VECTOR_MODE_P (vmode)
7109 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7110 != CODE_FOR_nothing))
7112 nloads = nunits / group_size;
7113 lnel = group_size;
7114 ltype = build_nonstandard_integer_type (lsize, 1);
7115 lvectype = build_vector_type (ltype, nloads);
7119 else
7121 nloads = 1;
7122 lnel = nunits;
7123 ltype = vectype;
7125 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7127 if (slp)
7129 /* For SLP permutation support we need to load the whole group,
7130 not only the number of vector stmts the permutation result
7131 fits in. */
7132 if (slp_perm)
7134 ncopies = (group_size * vf + nunits - 1) / nunits;
7135 dr_chain.create (ncopies);
7137 else
7138 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7140 int group_el = 0;
7141 unsigned HOST_WIDE_INT
7142 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7143 for (j = 0; j < ncopies; j++)
7145 if (nloads > 1)
7146 vec_alloc (v, nloads);
7147 for (i = 0; i < nloads; i++)
7149 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7150 group_el * elsz);
7151 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7152 build2 (MEM_REF, ltype,
7153 running_off, this_off));
7154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7155 if (nloads > 1)
7156 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7157 gimple_assign_lhs (new_stmt));
7159 group_el += lnel;
7160 if (! slp
7161 || group_el == group_size)
7163 tree newoff = copy_ssa_name (running_off);
7164 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7165 running_off, stride_step);
7166 vect_finish_stmt_generation (stmt, incr, gsi);
7168 running_off = newoff;
7169 group_el = 0;
7172 if (nloads > 1)
7174 tree vec_inv = build_constructor (lvectype, v);
7175 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7176 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7177 if (lvectype != vectype)
7179 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7180 VIEW_CONVERT_EXPR,
7181 build1 (VIEW_CONVERT_EXPR,
7182 vectype, new_temp));
7183 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7187 if (slp)
7189 if (slp_perm)
7190 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7191 else
7192 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7194 else
7196 if (j == 0)
7197 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7198 else
7199 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7200 prev_stmt_info = vinfo_for_stmt (new_stmt);
7203 if (slp_perm)
7205 unsigned n_perms;
7206 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7207 slp_node_instance, false, &n_perms);
7209 return true;
7212 if (grouped_load)
7214 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7215 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7216 /* For SLP vectorization we directly vectorize a subchain
7217 without permutation. */
7218 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7219 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7220 /* For BB vectorization always use the first stmt to base
7221 the data ref pointer on. */
7222 if (bb_vinfo)
7223 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7225 /* Check if the chain of loads is already vectorized. */
7226 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7227 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7228 ??? But we can only do so if there is exactly one
7229 as we have no way to get at the rest. Leave the CSE
7230 opportunity alone.
7231 ??? With the group load eventually participating
7232 in multiple different permutations (having multiple
7233 slp nodes which refer to the same group) the CSE
7234 is even wrong code. See PR56270. */
7235 && !slp)
7237 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7238 return true;
7240 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7241 group_gap_adj = 0;
7243 /* VEC_NUM is the number of vect stmts to be created for this group. */
7244 if (slp)
7246 grouped_load = false;
7247 /* For SLP permutation support we need to load the whole group,
7248 not only the number of vector stmts the permutation result
7249 fits in. */
7250 if (slp_perm)
7252 vec_num = (group_size * vf + nunits - 1) / nunits;
7253 group_gap_adj = vf * group_size - nunits * vec_num;
7255 else
7257 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7258 group_gap_adj
7259 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7262 else
7263 vec_num = group_size;
7265 ref_type = get_group_alias_ptr_type (first_stmt);
7267 else
7269 first_stmt = stmt;
7270 first_dr = dr;
7271 group_size = vec_num = 1;
7272 group_gap_adj = 0;
7273 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7276 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7277 gcc_assert (alignment_support_scheme);
7278 /* Targets with load-lane instructions must not require explicit
7279 realignment. */
7280 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7281 || alignment_support_scheme == dr_aligned
7282 || alignment_support_scheme == dr_unaligned_supported);
7284 /* In case the vectorization factor (VF) is bigger than the number
7285 of elements that we can fit in a vectype (nunits), we have to generate
7286 more than one vector stmt - i.e - we need to "unroll" the
7287 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7288 from one copy of the vector stmt to the next, in the field
7289 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7290 stages to find the correct vector defs to be used when vectorizing
7291 stmts that use the defs of the current stmt. The example below
7292 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7293 need to create 4 vectorized stmts):
7295 before vectorization:
7296 RELATED_STMT VEC_STMT
7297 S1: x = memref - -
7298 S2: z = x + 1 - -
7300 step 1: vectorize stmt S1:
7301 We first create the vector stmt VS1_0, and, as usual, record a
7302 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7303 Next, we create the vector stmt VS1_1, and record a pointer to
7304 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7305 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7306 stmts and pointers:
7307 RELATED_STMT VEC_STMT
7308 VS1_0: vx0 = memref0 VS1_1 -
7309 VS1_1: vx1 = memref1 VS1_2 -
7310 VS1_2: vx2 = memref2 VS1_3 -
7311 VS1_3: vx3 = memref3 - -
7312 S1: x = load - VS1_0
7313 S2: z = x + 1 - -
7315 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7316 information we recorded in RELATED_STMT field is used to vectorize
7317 stmt S2. */
7319 /* In case of interleaving (non-unit grouped access):
7321 S1: x2 = &base + 2
7322 S2: x0 = &base
7323 S3: x1 = &base + 1
7324 S4: x3 = &base + 3
7326 Vectorized loads are created in the order of memory accesses
7327 starting from the access of the first stmt of the chain:
7329 VS1: vx0 = &base
7330 VS2: vx1 = &base + vec_size*1
7331 VS3: vx3 = &base + vec_size*2
7332 VS4: vx4 = &base + vec_size*3
7334 Then permutation statements are generated:
7336 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7337 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7340 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7341 (the order of the data-refs in the output of vect_permute_load_chain
7342 corresponds to the order of scalar stmts in the interleaving chain - see
7343 the documentation of vect_permute_load_chain()).
7344 The generation of permutation stmts and recording them in
7345 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7347 In case of both multiple types and interleaving, the vector loads and
7348 permutation stmts above are created for every copy. The result vector
7349 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7350 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7352 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7353 on a target that supports unaligned accesses (dr_unaligned_supported)
7354 we generate the following code:
7355 p = initial_addr;
7356 indx = 0;
7357 loop {
7358 p = p + indx * vectype_size;
7359 vec_dest = *(p);
7360 indx = indx + 1;
7363 Otherwise, the data reference is potentially unaligned on a target that
7364 does not support unaligned accesses (dr_explicit_realign_optimized) -
7365 then generate the following code, in which the data in each iteration is
7366 obtained by two vector loads, one from the previous iteration, and one
7367 from the current iteration:
7368 p1 = initial_addr;
7369 msq_init = *(floor(p1))
7370 p2 = initial_addr + VS - 1;
7371 realignment_token = call target_builtin;
7372 indx = 0;
7373 loop {
7374 p2 = p2 + indx * vectype_size
7375 lsq = *(floor(p2))
7376 vec_dest = realign_load (msq, lsq, realignment_token)
7377 indx = indx + 1;
7378 msq = lsq;
7379 } */
7381 /* If the misalignment remains the same throughout the execution of the
7382 loop, we can create the init_addr and permutation mask at the loop
7383 preheader. Otherwise, it needs to be created inside the loop.
7384 This can only occur when vectorizing memory accesses in the inner-loop
7385 nested within an outer-loop that is being vectorized. */
7387 if (nested_in_vect_loop
7388 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
7390 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7391 compute_in_loop = true;
7394 if ((alignment_support_scheme == dr_explicit_realign_optimized
7395 || alignment_support_scheme == dr_explicit_realign)
7396 && !compute_in_loop)
7398 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7399 alignment_support_scheme, NULL_TREE,
7400 &at_loop);
7401 if (alignment_support_scheme == dr_explicit_realign_optimized)
7403 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7404 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7405 size_one_node);
7408 else
7409 at_loop = loop;
7411 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7412 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7414 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7415 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7416 else
7417 aggr_type = vectype;
7419 prev_stmt_info = NULL;
7420 int group_elt = 0;
7421 for (j = 0; j < ncopies; j++)
7423 /* 1. Create the vector or array pointer update chain. */
7424 if (j == 0)
7426 bool simd_lane_access_p
7427 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7428 if (simd_lane_access_p
7429 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7430 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7431 && integer_zerop (DR_OFFSET (first_dr))
7432 && integer_zerop (DR_INIT (first_dr))
7433 && alias_sets_conflict_p (get_alias_set (aggr_type),
7434 get_alias_set (TREE_TYPE (ref_type)))
7435 && (alignment_support_scheme == dr_aligned
7436 || alignment_support_scheme == dr_unaligned_supported))
7438 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7439 dataref_offset = build_int_cst (ref_type, 0);
7440 inv_p = false;
7442 else if (first_stmt_for_drptr
7443 && first_stmt != first_stmt_for_drptr)
7445 dataref_ptr
7446 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7447 at_loop, offset, &dummy, gsi,
7448 &ptr_incr, simd_lane_access_p,
7449 &inv_p, byte_offset);
7450 /* Adjust the pointer by the difference to first_stmt. */
7451 data_reference_p ptrdr
7452 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7453 tree diff = fold_convert (sizetype,
7454 size_binop (MINUS_EXPR,
7455 DR_INIT (first_dr),
7456 DR_INIT (ptrdr)));
7457 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7458 stmt, diff);
7460 else
7461 dataref_ptr
7462 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7463 offset, &dummy, gsi, &ptr_incr,
7464 simd_lane_access_p, &inv_p,
7465 byte_offset);
7467 else if (dataref_offset)
7468 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7469 TYPE_SIZE_UNIT (aggr_type));
7470 else
7471 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7472 TYPE_SIZE_UNIT (aggr_type));
7474 if (grouped_load || slp_perm)
7475 dr_chain.create (vec_num);
7477 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7479 tree vec_array;
7481 vec_array = create_vector_array (vectype, vec_num);
7483 /* Emit:
7484 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7485 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7486 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7487 data_ref);
7488 gimple_call_set_lhs (call, vec_array);
7489 gimple_call_set_nothrow (call, true);
7490 new_stmt = call;
7491 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7493 /* Extract each vector into an SSA_NAME. */
7494 for (i = 0; i < vec_num; i++)
7496 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7497 vec_array, i);
7498 dr_chain.quick_push (new_temp);
7501 /* Record the mapping between SSA_NAMEs and statements. */
7502 vect_record_grouped_load_vectors (stmt, dr_chain);
7504 else
7506 for (i = 0; i < vec_num; i++)
7508 if (i > 0)
7509 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7510 stmt, NULL_TREE);
7512 /* 2. Create the vector-load in the loop. */
7513 switch (alignment_support_scheme)
7515 case dr_aligned:
7516 case dr_unaligned_supported:
7518 unsigned int align, misalign;
7520 data_ref
7521 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7522 dataref_offset
7523 ? dataref_offset
7524 : build_int_cst (ref_type, 0));
7525 align = DR_TARGET_ALIGNMENT (dr);
7526 if (alignment_support_scheme == dr_aligned)
7528 gcc_assert (aligned_access_p (first_dr));
7529 misalign = 0;
7531 else if (DR_MISALIGNMENT (first_dr) == -1)
7533 align = dr_alignment (vect_dr_behavior (first_dr));
7534 misalign = 0;
7535 TREE_TYPE (data_ref)
7536 = build_aligned_type (TREE_TYPE (data_ref),
7537 align * BITS_PER_UNIT);
7539 else
7541 TREE_TYPE (data_ref)
7542 = build_aligned_type (TREE_TYPE (data_ref),
7543 TYPE_ALIGN (elem_type));
7544 misalign = DR_MISALIGNMENT (first_dr);
7546 if (dataref_offset == NULL_TREE
7547 && TREE_CODE (dataref_ptr) == SSA_NAME)
7548 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7549 align, misalign);
7550 break;
7552 case dr_explicit_realign:
7554 tree ptr, bump;
7556 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7558 if (compute_in_loop)
7559 msq = vect_setup_realignment (first_stmt, gsi,
7560 &realignment_token,
7561 dr_explicit_realign,
7562 dataref_ptr, NULL);
7564 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7565 ptr = copy_ssa_name (dataref_ptr);
7566 else
7567 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7568 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7569 new_stmt = gimple_build_assign
7570 (ptr, BIT_AND_EXPR, dataref_ptr,
7571 build_int_cst
7572 (TREE_TYPE (dataref_ptr),
7573 -(HOST_WIDE_INT) align));
7574 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7575 data_ref
7576 = build2 (MEM_REF, vectype, ptr,
7577 build_int_cst (ref_type, 0));
7578 vec_dest = vect_create_destination_var (scalar_dest,
7579 vectype);
7580 new_stmt = gimple_build_assign (vec_dest, data_ref);
7581 new_temp = make_ssa_name (vec_dest, new_stmt);
7582 gimple_assign_set_lhs (new_stmt, new_temp);
7583 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7584 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7585 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7586 msq = new_temp;
7588 bump = size_binop (MULT_EXPR, vs,
7589 TYPE_SIZE_UNIT (elem_type));
7590 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7591 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7592 new_stmt = gimple_build_assign
7593 (NULL_TREE, BIT_AND_EXPR, ptr,
7594 build_int_cst
7595 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7596 ptr = copy_ssa_name (ptr, new_stmt);
7597 gimple_assign_set_lhs (new_stmt, ptr);
7598 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7599 data_ref
7600 = build2 (MEM_REF, vectype, ptr,
7601 build_int_cst (ref_type, 0));
7602 break;
7604 case dr_explicit_realign_optimized:
7606 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7607 new_temp = copy_ssa_name (dataref_ptr);
7608 else
7609 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7610 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7611 new_stmt = gimple_build_assign
7612 (new_temp, BIT_AND_EXPR, dataref_ptr,
7613 build_int_cst (TREE_TYPE (dataref_ptr),
7614 -(HOST_WIDE_INT) align));
7615 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7616 data_ref
7617 = build2 (MEM_REF, vectype, new_temp,
7618 build_int_cst (ref_type, 0));
7619 break;
7621 default:
7622 gcc_unreachable ();
7624 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7625 new_stmt = gimple_build_assign (vec_dest, data_ref);
7626 new_temp = make_ssa_name (vec_dest, new_stmt);
7627 gimple_assign_set_lhs (new_stmt, new_temp);
7628 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7630 /* 3. Handle explicit realignment if necessary/supported.
7631 Create in loop:
7632 vec_dest = realign_load (msq, lsq, realignment_token) */
7633 if (alignment_support_scheme == dr_explicit_realign_optimized
7634 || alignment_support_scheme == dr_explicit_realign)
7636 lsq = gimple_assign_lhs (new_stmt);
7637 if (!realignment_token)
7638 realignment_token = dataref_ptr;
7639 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7640 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7641 msq, lsq, realignment_token);
7642 new_temp = make_ssa_name (vec_dest, new_stmt);
7643 gimple_assign_set_lhs (new_stmt, new_temp);
7644 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7646 if (alignment_support_scheme == dr_explicit_realign_optimized)
7648 gcc_assert (phi);
7649 if (i == vec_num - 1 && j == ncopies - 1)
7650 add_phi_arg (phi, lsq,
7651 loop_latch_edge (containing_loop),
7652 UNKNOWN_LOCATION);
7653 msq = lsq;
7657 /* 4. Handle invariant-load. */
7658 if (inv_p && !bb_vinfo)
7660 gcc_assert (!grouped_load);
7661 /* If we have versioned for aliasing or the loop doesn't
7662 have any data dependencies that would preclude this,
7663 then we are sure this is a loop invariant load and
7664 thus we can insert it on the preheader edge. */
7665 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7666 && !nested_in_vect_loop
7667 && hoist_defs_of_uses (stmt, loop))
7669 if (dump_enabled_p ())
7671 dump_printf_loc (MSG_NOTE, vect_location,
7672 "hoisting out of the vectorized "
7673 "loop: ");
7674 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7676 tree tem = copy_ssa_name (scalar_dest);
7677 gsi_insert_on_edge_immediate
7678 (loop_preheader_edge (loop),
7679 gimple_build_assign (tem,
7680 unshare_expr
7681 (gimple_assign_rhs1 (stmt))));
7682 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7683 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7684 set_vinfo_for_stmt (new_stmt,
7685 new_stmt_vec_info (new_stmt, vinfo));
7687 else
7689 gimple_stmt_iterator gsi2 = *gsi;
7690 gsi_next (&gsi2);
7691 new_temp = vect_init_vector (stmt, scalar_dest,
7692 vectype, &gsi2);
7693 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7697 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7699 tree perm_mask = perm_mask_for_reverse (vectype);
7700 new_temp = permute_vec_elements (new_temp, new_temp,
7701 perm_mask, stmt, gsi);
7702 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7705 /* Collect vector loads and later create their permutation in
7706 vect_transform_grouped_load (). */
7707 if (grouped_load || slp_perm)
7708 dr_chain.quick_push (new_temp);
7710 /* Store vector loads in the corresponding SLP_NODE. */
7711 if (slp && !slp_perm)
7712 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7714 /* With SLP permutation we load the gaps as well, without
7715 we need to skip the gaps after we manage to fully load
7716 all elements. group_gap_adj is GROUP_SIZE here. */
7717 group_elt += nunits;
7718 if (group_gap_adj != 0 && ! slp_perm
7719 && group_elt == group_size - group_gap_adj)
7721 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7722 * group_gap_adj);
7723 tree bump = wide_int_to_tree (sizetype, bump_val);
7724 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7725 stmt, bump);
7726 group_elt = 0;
7729 /* Bump the vector pointer to account for a gap or for excess
7730 elements loaded for a permuted SLP load. */
7731 if (group_gap_adj != 0 && slp_perm)
7733 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7734 * group_gap_adj);
7735 tree bump = wide_int_to_tree (sizetype, bump_val);
7736 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7737 stmt, bump);
7741 if (slp && !slp_perm)
7742 continue;
7744 if (slp_perm)
7746 unsigned n_perms;
7747 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7748 slp_node_instance, false,
7749 &n_perms))
7751 dr_chain.release ();
7752 return false;
7755 else
7757 if (grouped_load)
7759 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7760 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7761 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7763 else
7765 if (j == 0)
7766 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7767 else
7768 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7769 prev_stmt_info = vinfo_for_stmt (new_stmt);
7772 dr_chain.release ();
7775 return true;
7778 /* Function vect_is_simple_cond.
7780 Input:
7781 LOOP - the loop that is being vectorized.
7782 COND - Condition that is checked for simple use.
7784 Output:
7785 *COMP_VECTYPE - the vector type for the comparison.
7786 *DTS - The def types for the arguments of the comparison
7788 Returns whether a COND can be vectorized. Checks whether
7789 condition operands are supportable using vec_is_simple_use. */
7791 static bool
7792 vect_is_simple_cond (tree cond, vec_info *vinfo,
7793 tree *comp_vectype, enum vect_def_type *dts)
7795 tree lhs, rhs;
7796 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7798 /* Mask case. */
7799 if (TREE_CODE (cond) == SSA_NAME
7800 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7802 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7803 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7804 &dts[0], comp_vectype)
7805 || !*comp_vectype
7806 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7807 return false;
7808 return true;
7811 if (!COMPARISON_CLASS_P (cond))
7812 return false;
7814 lhs = TREE_OPERAND (cond, 0);
7815 rhs = TREE_OPERAND (cond, 1);
7817 if (TREE_CODE (lhs) == SSA_NAME)
7819 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7820 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7821 return false;
7823 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7824 || TREE_CODE (lhs) == FIXED_CST)
7825 dts[0] = vect_constant_def;
7826 else
7827 return false;
7829 if (TREE_CODE (rhs) == SSA_NAME)
7831 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7832 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7833 return false;
7835 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7836 || TREE_CODE (rhs) == FIXED_CST)
7837 dts[1] = vect_constant_def;
7838 else
7839 return false;
7841 if (vectype1 && vectype2
7842 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7843 return false;
7845 *comp_vectype = vectype1 ? vectype1 : vectype2;
7846 return true;
7849 /* vectorizable_condition.
7851 Check if STMT is conditional modify expression that can be vectorized.
7852 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7853 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7854 at GSI.
7856 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7857 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7858 else clause if it is 2).
7860 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7862 bool
7863 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7864 gimple **vec_stmt, tree reduc_def, int reduc_index,
7865 slp_tree slp_node)
7867 tree scalar_dest = NULL_TREE;
7868 tree vec_dest = NULL_TREE;
7869 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7870 tree then_clause, else_clause;
7871 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7872 tree comp_vectype = NULL_TREE;
7873 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7874 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7875 tree vec_compare;
7876 tree new_temp;
7877 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7878 enum vect_def_type dts[4]
7879 = {vect_unknown_def_type, vect_unknown_def_type,
7880 vect_unknown_def_type, vect_unknown_def_type};
7881 int ndts = 4;
7882 int ncopies;
7883 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7884 stmt_vec_info prev_stmt_info = NULL;
7885 int i, j;
7886 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7887 vec<tree> vec_oprnds0 = vNULL;
7888 vec<tree> vec_oprnds1 = vNULL;
7889 vec<tree> vec_oprnds2 = vNULL;
7890 vec<tree> vec_oprnds3 = vNULL;
7891 tree vec_cmp_type;
7892 bool masked = false;
7894 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7895 return false;
7897 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7899 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7900 return false;
7902 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7903 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7904 && reduc_def))
7905 return false;
7907 /* FORNOW: not yet supported. */
7908 if (STMT_VINFO_LIVE_P (stmt_info))
7910 if (dump_enabled_p ())
7911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7912 "value used after loop.\n");
7913 return false;
7917 /* Is vectorizable conditional operation? */
7918 if (!is_gimple_assign (stmt))
7919 return false;
7921 code = gimple_assign_rhs_code (stmt);
7923 if (code != COND_EXPR)
7924 return false;
7926 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7927 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7929 if (slp_node)
7930 ncopies = 1;
7931 else
7932 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7934 gcc_assert (ncopies >= 1);
7935 if (reduc_index && ncopies > 1)
7936 return false; /* FORNOW */
7938 cond_expr = gimple_assign_rhs1 (stmt);
7939 then_clause = gimple_assign_rhs2 (stmt);
7940 else_clause = gimple_assign_rhs3 (stmt);
7942 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7943 &comp_vectype, &dts[0])
7944 || !comp_vectype)
7945 return false;
7947 gimple *def_stmt;
7948 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7949 &vectype1))
7950 return false;
7951 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7952 &vectype2))
7953 return false;
7955 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7956 return false;
7958 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7959 return false;
7961 masked = !COMPARISON_CLASS_P (cond_expr);
7962 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7964 if (vec_cmp_type == NULL_TREE)
7965 return false;
7967 cond_code = TREE_CODE (cond_expr);
7968 if (!masked)
7970 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7971 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7974 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7976 /* Boolean values may have another representation in vectors
7977 and therefore we prefer bit operations over comparison for
7978 them (which also works for scalar masks). We store opcodes
7979 to use in bitop1 and bitop2. Statement is vectorized as
7980 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7981 depending on bitop1 and bitop2 arity. */
7982 switch (cond_code)
7984 case GT_EXPR:
7985 bitop1 = BIT_NOT_EXPR;
7986 bitop2 = BIT_AND_EXPR;
7987 break;
7988 case GE_EXPR:
7989 bitop1 = BIT_NOT_EXPR;
7990 bitop2 = BIT_IOR_EXPR;
7991 break;
7992 case LT_EXPR:
7993 bitop1 = BIT_NOT_EXPR;
7994 bitop2 = BIT_AND_EXPR;
7995 std::swap (cond_expr0, cond_expr1);
7996 break;
7997 case LE_EXPR:
7998 bitop1 = BIT_NOT_EXPR;
7999 bitop2 = BIT_IOR_EXPR;
8000 std::swap (cond_expr0, cond_expr1);
8001 break;
8002 case NE_EXPR:
8003 bitop1 = BIT_XOR_EXPR;
8004 break;
8005 case EQ_EXPR:
8006 bitop1 = BIT_XOR_EXPR;
8007 bitop2 = BIT_NOT_EXPR;
8008 break;
8009 default:
8010 return false;
8012 cond_code = SSA_NAME;
8015 if (!vec_stmt)
8017 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8018 if (bitop1 != NOP_EXPR)
8020 machine_mode mode = TYPE_MODE (comp_vectype);
8021 optab optab;
8023 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8024 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8025 return false;
8027 if (bitop2 != NOP_EXPR)
8029 optab = optab_for_tree_code (bitop2, comp_vectype,
8030 optab_default);
8031 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8032 return false;
8035 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8036 cond_code))
8038 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8039 return true;
8041 return false;
8044 /* Transform. */
8046 if (!slp_node)
8048 vec_oprnds0.create (1);
8049 vec_oprnds1.create (1);
8050 vec_oprnds2.create (1);
8051 vec_oprnds3.create (1);
8054 /* Handle def. */
8055 scalar_dest = gimple_assign_lhs (stmt);
8056 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8058 /* Handle cond expr. */
8059 for (j = 0; j < ncopies; j++)
8061 gassign *new_stmt = NULL;
8062 if (j == 0)
8064 if (slp_node)
8066 auto_vec<tree, 4> ops;
8067 auto_vec<vec<tree>, 4> vec_defs;
8069 if (masked)
8070 ops.safe_push (cond_expr);
8071 else
8073 ops.safe_push (cond_expr0);
8074 ops.safe_push (cond_expr1);
8076 ops.safe_push (then_clause);
8077 ops.safe_push (else_clause);
8078 vect_get_slp_defs (ops, slp_node, &vec_defs);
8079 vec_oprnds3 = vec_defs.pop ();
8080 vec_oprnds2 = vec_defs.pop ();
8081 if (!masked)
8082 vec_oprnds1 = vec_defs.pop ();
8083 vec_oprnds0 = vec_defs.pop ();
8085 else
8087 gimple *gtemp;
8088 if (masked)
8090 vec_cond_lhs
8091 = vect_get_vec_def_for_operand (cond_expr, stmt,
8092 comp_vectype);
8093 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8094 &gtemp, &dts[0]);
8096 else
8098 vec_cond_lhs
8099 = vect_get_vec_def_for_operand (cond_expr0,
8100 stmt, comp_vectype);
8101 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8103 vec_cond_rhs
8104 = vect_get_vec_def_for_operand (cond_expr1,
8105 stmt, comp_vectype);
8106 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8108 if (reduc_index == 1)
8109 vec_then_clause = reduc_def;
8110 else
8112 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8113 stmt);
8114 vect_is_simple_use (then_clause, loop_vinfo,
8115 &gtemp, &dts[2]);
8117 if (reduc_index == 2)
8118 vec_else_clause = reduc_def;
8119 else
8121 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8122 stmt);
8123 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8127 else
8129 vec_cond_lhs
8130 = vect_get_vec_def_for_stmt_copy (dts[0],
8131 vec_oprnds0.pop ());
8132 if (!masked)
8133 vec_cond_rhs
8134 = vect_get_vec_def_for_stmt_copy (dts[1],
8135 vec_oprnds1.pop ());
8137 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8138 vec_oprnds2.pop ());
8139 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8140 vec_oprnds3.pop ());
8143 if (!slp_node)
8145 vec_oprnds0.quick_push (vec_cond_lhs);
8146 if (!masked)
8147 vec_oprnds1.quick_push (vec_cond_rhs);
8148 vec_oprnds2.quick_push (vec_then_clause);
8149 vec_oprnds3.quick_push (vec_else_clause);
8152 /* Arguments are ready. Create the new vector stmt. */
8153 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8155 vec_then_clause = vec_oprnds2[i];
8156 vec_else_clause = vec_oprnds3[i];
8158 if (masked)
8159 vec_compare = vec_cond_lhs;
8160 else
8162 vec_cond_rhs = vec_oprnds1[i];
8163 if (bitop1 == NOP_EXPR)
8164 vec_compare = build2 (cond_code, vec_cmp_type,
8165 vec_cond_lhs, vec_cond_rhs);
8166 else
8168 new_temp = make_ssa_name (vec_cmp_type);
8169 if (bitop1 == BIT_NOT_EXPR)
8170 new_stmt = gimple_build_assign (new_temp, bitop1,
8171 vec_cond_rhs);
8172 else
8173 new_stmt
8174 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8175 vec_cond_rhs);
8176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8177 if (bitop2 == NOP_EXPR)
8178 vec_compare = new_temp;
8179 else if (bitop2 == BIT_NOT_EXPR)
8181 /* Instead of doing ~x ? y : z do x ? z : y. */
8182 vec_compare = new_temp;
8183 std::swap (vec_then_clause, vec_else_clause);
8185 else
8187 vec_compare = make_ssa_name (vec_cmp_type);
8188 new_stmt
8189 = gimple_build_assign (vec_compare, bitop2,
8190 vec_cond_lhs, new_temp);
8191 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8195 new_temp = make_ssa_name (vec_dest);
8196 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8197 vec_compare, vec_then_clause,
8198 vec_else_clause);
8199 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8200 if (slp_node)
8201 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8204 if (slp_node)
8205 continue;
8207 if (j == 0)
8208 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8209 else
8210 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8212 prev_stmt_info = vinfo_for_stmt (new_stmt);
8215 vec_oprnds0.release ();
8216 vec_oprnds1.release ();
8217 vec_oprnds2.release ();
8218 vec_oprnds3.release ();
8220 return true;
8223 /* vectorizable_comparison.
8225 Check if STMT is comparison expression that can be vectorized.
8226 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8227 comparison, put it in VEC_STMT, and insert it at GSI.
8229 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8231 static bool
8232 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8233 gimple **vec_stmt, tree reduc_def,
8234 slp_tree slp_node)
8236 tree lhs, rhs1, rhs2;
8237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8238 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8239 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8240 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8241 tree new_temp;
8242 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8243 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8244 int ndts = 2;
8245 unsigned nunits;
8246 int ncopies;
8247 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8248 stmt_vec_info prev_stmt_info = NULL;
8249 int i, j;
8250 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8251 vec<tree> vec_oprnds0 = vNULL;
8252 vec<tree> vec_oprnds1 = vNULL;
8253 gimple *def_stmt;
8254 tree mask_type;
8255 tree mask;
8257 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8258 return false;
8260 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8261 return false;
8263 mask_type = vectype;
8264 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8266 if (slp_node)
8267 ncopies = 1;
8268 else
8269 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8271 gcc_assert (ncopies >= 1);
8272 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8273 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8274 && reduc_def))
8275 return false;
8277 if (STMT_VINFO_LIVE_P (stmt_info))
8279 if (dump_enabled_p ())
8280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8281 "value used after loop.\n");
8282 return false;
8285 if (!is_gimple_assign (stmt))
8286 return false;
8288 code = gimple_assign_rhs_code (stmt);
8290 if (TREE_CODE_CLASS (code) != tcc_comparison)
8291 return false;
8293 rhs1 = gimple_assign_rhs1 (stmt);
8294 rhs2 = gimple_assign_rhs2 (stmt);
8296 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8297 &dts[0], &vectype1))
8298 return false;
8300 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8301 &dts[1], &vectype2))
8302 return false;
8304 if (vectype1 && vectype2
8305 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8306 return false;
8308 vectype = vectype1 ? vectype1 : vectype2;
8310 /* Invariant comparison. */
8311 if (!vectype)
8313 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8314 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8315 return false;
8317 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8318 return false;
8320 /* Can't compare mask and non-mask types. */
8321 if (vectype1 && vectype2
8322 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8323 return false;
8325 /* Boolean values may have another representation in vectors
8326 and therefore we prefer bit operations over comparison for
8327 them (which also works for scalar masks). We store opcodes
8328 to use in bitop1 and bitop2. Statement is vectorized as
8329 BITOP2 (rhs1 BITOP1 rhs2) or
8330 rhs1 BITOP2 (BITOP1 rhs2)
8331 depending on bitop1 and bitop2 arity. */
8332 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8334 if (code == GT_EXPR)
8336 bitop1 = BIT_NOT_EXPR;
8337 bitop2 = BIT_AND_EXPR;
8339 else if (code == GE_EXPR)
8341 bitop1 = BIT_NOT_EXPR;
8342 bitop2 = BIT_IOR_EXPR;
8344 else if (code == LT_EXPR)
8346 bitop1 = BIT_NOT_EXPR;
8347 bitop2 = BIT_AND_EXPR;
8348 std::swap (rhs1, rhs2);
8349 std::swap (dts[0], dts[1]);
8351 else if (code == LE_EXPR)
8353 bitop1 = BIT_NOT_EXPR;
8354 bitop2 = BIT_IOR_EXPR;
8355 std::swap (rhs1, rhs2);
8356 std::swap (dts[0], dts[1]);
8358 else
8360 bitop1 = BIT_XOR_EXPR;
8361 if (code == EQ_EXPR)
8362 bitop2 = BIT_NOT_EXPR;
8366 if (!vec_stmt)
8368 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8369 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8370 dts, ndts, NULL, NULL);
8371 if (bitop1 == NOP_EXPR)
8372 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8373 else
8375 machine_mode mode = TYPE_MODE (vectype);
8376 optab optab;
8378 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8379 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8380 return false;
8382 if (bitop2 != NOP_EXPR)
8384 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8385 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8386 return false;
8388 return true;
8392 /* Transform. */
8393 if (!slp_node)
8395 vec_oprnds0.create (1);
8396 vec_oprnds1.create (1);
8399 /* Handle def. */
8400 lhs = gimple_assign_lhs (stmt);
8401 mask = vect_create_destination_var (lhs, mask_type);
8403 /* Handle cmp expr. */
8404 for (j = 0; j < ncopies; j++)
8406 gassign *new_stmt = NULL;
8407 if (j == 0)
8409 if (slp_node)
8411 auto_vec<tree, 2> ops;
8412 auto_vec<vec<tree>, 2> vec_defs;
8414 ops.safe_push (rhs1);
8415 ops.safe_push (rhs2);
8416 vect_get_slp_defs (ops, slp_node, &vec_defs);
8417 vec_oprnds1 = vec_defs.pop ();
8418 vec_oprnds0 = vec_defs.pop ();
8420 else
8422 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8423 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8426 else
8428 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8429 vec_oprnds0.pop ());
8430 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8431 vec_oprnds1.pop ());
8434 if (!slp_node)
8436 vec_oprnds0.quick_push (vec_rhs1);
8437 vec_oprnds1.quick_push (vec_rhs2);
8440 /* Arguments are ready. Create the new vector stmt. */
8441 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8443 vec_rhs2 = vec_oprnds1[i];
8445 new_temp = make_ssa_name (mask);
8446 if (bitop1 == NOP_EXPR)
8448 new_stmt = gimple_build_assign (new_temp, code,
8449 vec_rhs1, vec_rhs2);
8450 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8452 else
8454 if (bitop1 == BIT_NOT_EXPR)
8455 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8456 else
8457 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8458 vec_rhs2);
8459 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8460 if (bitop2 != NOP_EXPR)
8462 tree res = make_ssa_name (mask);
8463 if (bitop2 == BIT_NOT_EXPR)
8464 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8465 else
8466 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8467 new_temp);
8468 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8471 if (slp_node)
8472 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8475 if (slp_node)
8476 continue;
8478 if (j == 0)
8479 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8480 else
8481 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8483 prev_stmt_info = vinfo_for_stmt (new_stmt);
8486 vec_oprnds0.release ();
8487 vec_oprnds1.release ();
8489 return true;
8492 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8493 can handle all live statements in the node. Otherwise return true
8494 if STMT is not live or if vectorizable_live_operation can handle it.
8495 GSI and VEC_STMT are as for vectorizable_live_operation. */
8497 static bool
8498 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8499 slp_tree slp_node, gimple **vec_stmt)
8501 if (slp_node)
8503 gimple *slp_stmt;
8504 unsigned int i;
8505 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8507 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8508 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8509 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8510 vec_stmt))
8511 return false;
8514 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8515 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8516 return false;
8518 return true;
8521 /* Make sure the statement is vectorizable. */
8523 bool
8524 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8525 slp_instance node_instance)
8527 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8528 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8529 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8530 bool ok;
8531 gimple *pattern_stmt;
8532 gimple_seq pattern_def_seq;
8534 if (dump_enabled_p ())
8536 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8537 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8540 if (gimple_has_volatile_ops (stmt))
8542 if (dump_enabled_p ())
8543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8544 "not vectorized: stmt has volatile operands\n");
8546 return false;
8549 /* Skip stmts that do not need to be vectorized. In loops this is expected
8550 to include:
8551 - the COND_EXPR which is the loop exit condition
8552 - any LABEL_EXPRs in the loop
8553 - computations that are used only for array indexing or loop control.
8554 In basic blocks we only analyze statements that are a part of some SLP
8555 instance, therefore, all the statements are relevant.
8557 Pattern statement needs to be analyzed instead of the original statement
8558 if the original statement is not relevant. Otherwise, we analyze both
8559 statements. In basic blocks we are called from some SLP instance
8560 traversal, don't analyze pattern stmts instead, the pattern stmts
8561 already will be part of SLP instance. */
8563 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8564 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8565 && !STMT_VINFO_LIVE_P (stmt_info))
8567 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8568 && pattern_stmt
8569 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8570 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8572 /* Analyze PATTERN_STMT instead of the original stmt. */
8573 stmt = pattern_stmt;
8574 stmt_info = vinfo_for_stmt (pattern_stmt);
8575 if (dump_enabled_p ())
8577 dump_printf_loc (MSG_NOTE, vect_location,
8578 "==> examining pattern statement: ");
8579 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8582 else
8584 if (dump_enabled_p ())
8585 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8587 return true;
8590 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8591 && node == NULL
8592 && pattern_stmt
8593 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8594 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8596 /* Analyze PATTERN_STMT too. */
8597 if (dump_enabled_p ())
8599 dump_printf_loc (MSG_NOTE, vect_location,
8600 "==> examining pattern statement: ");
8601 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8604 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8605 node_instance))
8606 return false;
8609 if (is_pattern_stmt_p (stmt_info)
8610 && node == NULL
8611 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8613 gimple_stmt_iterator si;
8615 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8617 gimple *pattern_def_stmt = gsi_stmt (si);
8618 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8619 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8621 /* Analyze def stmt of STMT if it's a pattern stmt. */
8622 if (dump_enabled_p ())
8624 dump_printf_loc (MSG_NOTE, vect_location,
8625 "==> examining pattern def statement: ");
8626 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8629 if (!vect_analyze_stmt (pattern_def_stmt,
8630 need_to_vectorize, node, node_instance))
8631 return false;
8636 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8638 case vect_internal_def:
8639 break;
8641 case vect_reduction_def:
8642 case vect_nested_cycle:
8643 gcc_assert (!bb_vinfo
8644 && (relevance == vect_used_in_outer
8645 || relevance == vect_used_in_outer_by_reduction
8646 || relevance == vect_used_by_reduction
8647 || relevance == vect_unused_in_scope
8648 || relevance == vect_used_only_live));
8649 break;
8651 case vect_induction_def:
8652 gcc_assert (!bb_vinfo);
8653 break;
8655 case vect_constant_def:
8656 case vect_external_def:
8657 case vect_unknown_def_type:
8658 default:
8659 gcc_unreachable ();
8662 if (STMT_VINFO_RELEVANT_P (stmt_info))
8664 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8665 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8666 || (is_gimple_call (stmt)
8667 && gimple_call_lhs (stmt) == NULL_TREE));
8668 *need_to_vectorize = true;
8671 if (PURE_SLP_STMT (stmt_info) && !node)
8673 dump_printf_loc (MSG_NOTE, vect_location,
8674 "handled only by SLP analysis\n");
8675 return true;
8678 ok = true;
8679 if (!bb_vinfo
8680 && (STMT_VINFO_RELEVANT_P (stmt_info)
8681 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8682 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8683 || vectorizable_conversion (stmt, NULL, NULL, node)
8684 || vectorizable_shift (stmt, NULL, NULL, node)
8685 || vectorizable_operation (stmt, NULL, NULL, node)
8686 || vectorizable_assignment (stmt, NULL, NULL, node)
8687 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8688 || vectorizable_call (stmt, NULL, NULL, node)
8689 || vectorizable_store (stmt, NULL, NULL, node)
8690 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8691 || vectorizable_induction (stmt, NULL, NULL, node)
8692 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8693 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8694 else
8696 if (bb_vinfo)
8697 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8698 || vectorizable_conversion (stmt, NULL, NULL, node)
8699 || vectorizable_shift (stmt, NULL, NULL, node)
8700 || vectorizable_operation (stmt, NULL, NULL, node)
8701 || vectorizable_assignment (stmt, NULL, NULL, node)
8702 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8703 || vectorizable_call (stmt, NULL, NULL, node)
8704 || vectorizable_store (stmt, NULL, NULL, node)
8705 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8706 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8709 if (!ok)
8711 if (dump_enabled_p ())
8713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8714 "not vectorized: relevant stmt not ");
8715 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8716 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8719 return false;
8722 if (bb_vinfo)
8723 return true;
8725 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8726 need extra handling, except for vectorizable reductions. */
8727 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8728 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
8730 if (dump_enabled_p ())
8732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8733 "not vectorized: live stmt not supported: ");
8734 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8737 return false;
8740 return true;
8744 /* Function vect_transform_stmt.
8746 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8748 bool
8749 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8750 bool *grouped_store, slp_tree slp_node,
8751 slp_instance slp_node_instance)
8753 bool is_store = false;
8754 gimple *vec_stmt = NULL;
8755 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8756 bool done;
8758 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8759 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8761 switch (STMT_VINFO_TYPE (stmt_info))
8763 case type_demotion_vec_info_type:
8764 case type_promotion_vec_info_type:
8765 case type_conversion_vec_info_type:
8766 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8767 gcc_assert (done);
8768 break;
8770 case induc_vec_info_type:
8771 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8772 gcc_assert (done);
8773 break;
8775 case shift_vec_info_type:
8776 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8777 gcc_assert (done);
8778 break;
8780 case op_vec_info_type:
8781 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8782 gcc_assert (done);
8783 break;
8785 case assignment_vec_info_type:
8786 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8787 gcc_assert (done);
8788 break;
8790 case load_vec_info_type:
8791 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8792 slp_node_instance);
8793 gcc_assert (done);
8794 break;
8796 case store_vec_info_type:
8797 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8798 gcc_assert (done);
8799 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8801 /* In case of interleaving, the whole chain is vectorized when the
8802 last store in the chain is reached. Store stmts before the last
8803 one are skipped, and there vec_stmt_info shouldn't be freed
8804 meanwhile. */
8805 *grouped_store = true;
8806 if (STMT_VINFO_VEC_STMT (stmt_info))
8807 is_store = true;
8809 else
8810 is_store = true;
8811 break;
8813 case condition_vec_info_type:
8814 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8815 gcc_assert (done);
8816 break;
8818 case comparison_vec_info_type:
8819 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8820 gcc_assert (done);
8821 break;
8823 case call_vec_info_type:
8824 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8825 stmt = gsi_stmt (*gsi);
8826 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8827 is_store = true;
8828 break;
8830 case call_simd_clone_vec_info_type:
8831 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8832 stmt = gsi_stmt (*gsi);
8833 break;
8835 case reduc_vec_info_type:
8836 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8837 slp_node_instance);
8838 gcc_assert (done);
8839 break;
8841 default:
8842 if (!STMT_VINFO_LIVE_P (stmt_info))
8844 if (dump_enabled_p ())
8845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8846 "stmt not supported.\n");
8847 gcc_unreachable ();
8851 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8852 This would break hybrid SLP vectorization. */
8853 if (slp_node)
8854 gcc_assert (!vec_stmt
8855 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8857 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8858 is being vectorized, but outside the immediately enclosing loop. */
8859 if (vec_stmt
8860 && STMT_VINFO_LOOP_VINFO (stmt_info)
8861 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8862 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8863 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8864 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8865 || STMT_VINFO_RELEVANT (stmt_info) ==
8866 vect_used_in_outer_by_reduction))
8868 struct loop *innerloop = LOOP_VINFO_LOOP (
8869 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8870 imm_use_iterator imm_iter;
8871 use_operand_p use_p;
8872 tree scalar_dest;
8873 gimple *exit_phi;
8875 if (dump_enabled_p ())
8876 dump_printf_loc (MSG_NOTE, vect_location,
8877 "Record the vdef for outer-loop vectorization.\n");
8879 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8880 (to be used when vectorizing outer-loop stmts that use the DEF of
8881 STMT). */
8882 if (gimple_code (stmt) == GIMPLE_PHI)
8883 scalar_dest = PHI_RESULT (stmt);
8884 else
8885 scalar_dest = gimple_assign_lhs (stmt);
8887 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8889 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8891 exit_phi = USE_STMT (use_p);
8892 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8897 /* Handle stmts whose DEF is used outside the loop-nest that is
8898 being vectorized. */
8899 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8901 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
8902 gcc_assert (done);
8905 if (vec_stmt)
8906 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8908 return is_store;
8912 /* Remove a group of stores (for SLP or interleaving), free their
8913 stmt_vec_info. */
8915 void
8916 vect_remove_stores (gimple *first_stmt)
8918 gimple *next = first_stmt;
8919 gimple *tmp;
8920 gimple_stmt_iterator next_si;
8922 while (next)
8924 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8926 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8927 if (is_pattern_stmt_p (stmt_info))
8928 next = STMT_VINFO_RELATED_STMT (stmt_info);
8929 /* Free the attached stmt_vec_info and remove the stmt. */
8930 next_si = gsi_for_stmt (next);
8931 unlink_stmt_vdef (next);
8932 gsi_remove (&next_si, true);
8933 release_defs (next);
8934 free_stmt_vec_info (next);
8935 next = tmp;
8940 /* Function new_stmt_vec_info.
8942 Create and initialize a new stmt_vec_info struct for STMT. */
8944 stmt_vec_info
8945 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8947 stmt_vec_info res;
8948 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8950 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8951 STMT_VINFO_STMT (res) = stmt;
8952 res->vinfo = vinfo;
8953 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8954 STMT_VINFO_LIVE_P (res) = false;
8955 STMT_VINFO_VECTYPE (res) = NULL;
8956 STMT_VINFO_VEC_STMT (res) = NULL;
8957 STMT_VINFO_VECTORIZABLE (res) = true;
8958 STMT_VINFO_IN_PATTERN_P (res) = false;
8959 STMT_VINFO_RELATED_STMT (res) = NULL;
8960 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8961 STMT_VINFO_DATA_REF (res) = NULL;
8962 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8963 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8965 if (gimple_code (stmt) == GIMPLE_PHI
8966 && is_loop_header_bb_p (gimple_bb (stmt)))
8967 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8968 else
8969 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8971 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8972 STMT_SLP_TYPE (res) = loop_vect;
8973 STMT_VINFO_NUM_SLP_USES (res) = 0;
8975 GROUP_FIRST_ELEMENT (res) = NULL;
8976 GROUP_NEXT_ELEMENT (res) = NULL;
8977 GROUP_SIZE (res) = 0;
8978 GROUP_STORE_COUNT (res) = 0;
8979 GROUP_GAP (res) = 0;
8980 GROUP_SAME_DR_STMT (res) = NULL;
8982 return res;
8986 /* Create a hash table for stmt_vec_info. */
8988 void
8989 init_stmt_vec_info_vec (void)
8991 gcc_assert (!stmt_vec_info_vec.exists ());
8992 stmt_vec_info_vec.create (50);
8996 /* Free hash table for stmt_vec_info. */
8998 void
8999 free_stmt_vec_info_vec (void)
9001 unsigned int i;
9002 stmt_vec_info info;
9003 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9004 if (info != NULL)
9005 free_stmt_vec_info (STMT_VINFO_STMT (info));
9006 gcc_assert (stmt_vec_info_vec.exists ());
9007 stmt_vec_info_vec.release ();
9011 /* Free stmt vectorization related info. */
9013 void
9014 free_stmt_vec_info (gimple *stmt)
9016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9018 if (!stmt_info)
9019 return;
9021 /* Check if this statement has a related "pattern stmt"
9022 (introduced by the vectorizer during the pattern recognition
9023 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9024 too. */
9025 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9027 stmt_vec_info patt_info
9028 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9029 if (patt_info)
9031 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9032 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9033 gimple_set_bb (patt_stmt, NULL);
9034 tree lhs = gimple_get_lhs (patt_stmt);
9035 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9036 release_ssa_name (lhs);
9037 if (seq)
9039 gimple_stmt_iterator si;
9040 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9042 gimple *seq_stmt = gsi_stmt (si);
9043 gimple_set_bb (seq_stmt, NULL);
9044 lhs = gimple_get_lhs (seq_stmt);
9045 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9046 release_ssa_name (lhs);
9047 free_stmt_vec_info (seq_stmt);
9050 free_stmt_vec_info (patt_stmt);
9054 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9055 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9056 set_vinfo_for_stmt (stmt, NULL);
9057 free (stmt_info);
9061 /* Function get_vectype_for_scalar_type_and_size.
9063 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9064 by the target. */
9066 static tree
9067 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
9069 tree orig_scalar_type = scalar_type;
9070 scalar_mode inner_mode;
9071 machine_mode simd_mode;
9072 int nunits;
9073 tree vectype;
9075 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9076 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9077 return NULL_TREE;
9079 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9081 /* For vector types of elements whose mode precision doesn't
9082 match their types precision we use a element type of mode
9083 precision. The vectorization routines will have to make sure
9084 they support the proper result truncation/extension.
9085 We also make sure to build vector types with INTEGER_TYPE
9086 component type only. */
9087 if (INTEGRAL_TYPE_P (scalar_type)
9088 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9089 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9090 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9091 TYPE_UNSIGNED (scalar_type));
9093 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9094 When the component mode passes the above test simply use a type
9095 corresponding to that mode. The theory is that any use that
9096 would cause problems with this will disable vectorization anyway. */
9097 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9098 && !INTEGRAL_TYPE_P (scalar_type))
9099 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9101 /* We can't build a vector type of elements with alignment bigger than
9102 their size. */
9103 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9104 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9105 TYPE_UNSIGNED (scalar_type));
9107 /* If we felt back to using the mode fail if there was
9108 no scalar type for it. */
9109 if (scalar_type == NULL_TREE)
9110 return NULL_TREE;
9112 /* If no size was supplied use the mode the target prefers. Otherwise
9113 lookup a vector mode of the specified size. */
9114 if (size == 0)
9115 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9116 else if (!mode_for_vector (inner_mode, size / nbytes).exists (&simd_mode))
9117 return NULL_TREE;
9118 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9119 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9120 if (nunits < 1)
9121 return NULL_TREE;
9123 vectype = build_vector_type (scalar_type, nunits);
9125 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9126 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9127 return NULL_TREE;
9129 /* Re-attach the address-space qualifier if we canonicalized the scalar
9130 type. */
9131 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9132 return build_qualified_type
9133 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9135 return vectype;
9138 unsigned int current_vector_size;
9140 /* Function get_vectype_for_scalar_type.
9142 Returns the vector type corresponding to SCALAR_TYPE as supported
9143 by the target. */
9145 tree
9146 get_vectype_for_scalar_type (tree scalar_type)
9148 tree vectype;
9149 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9150 current_vector_size);
9151 if (vectype
9152 && current_vector_size == 0)
9153 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9154 return vectype;
9157 /* Function get_mask_type_for_scalar_type.
9159 Returns the mask type corresponding to a result of comparison
9160 of vectors of specified SCALAR_TYPE as supported by target. */
9162 tree
9163 get_mask_type_for_scalar_type (tree scalar_type)
9165 tree vectype = get_vectype_for_scalar_type (scalar_type);
9167 if (!vectype)
9168 return NULL;
9170 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9171 current_vector_size);
9174 /* Function get_same_sized_vectype
9176 Returns a vector type corresponding to SCALAR_TYPE of size
9177 VECTOR_TYPE if supported by the target. */
9179 tree
9180 get_same_sized_vectype (tree scalar_type, tree vector_type)
9182 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9183 return build_same_sized_truth_vector_type (vector_type);
9185 return get_vectype_for_scalar_type_and_size
9186 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9189 /* Function vect_is_simple_use.
9191 Input:
9192 VINFO - the vect info of the loop or basic block that is being vectorized.
9193 OPERAND - operand in the loop or bb.
9194 Output:
9195 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9196 DT - the type of definition
9198 Returns whether a stmt with OPERAND can be vectorized.
9199 For loops, supportable operands are constants, loop invariants, and operands
9200 that are defined by the current iteration of the loop. Unsupportable
9201 operands are those that are defined by a previous iteration of the loop (as
9202 is the case in reduction/induction computations).
9203 For basic blocks, supportable operands are constants and bb invariants.
9204 For now, operands defined outside the basic block are not supported. */
9206 bool
9207 vect_is_simple_use (tree operand, vec_info *vinfo,
9208 gimple **def_stmt, enum vect_def_type *dt)
9210 *def_stmt = NULL;
9211 *dt = vect_unknown_def_type;
9213 if (dump_enabled_p ())
9215 dump_printf_loc (MSG_NOTE, vect_location,
9216 "vect_is_simple_use: operand ");
9217 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9218 dump_printf (MSG_NOTE, "\n");
9221 if (CONSTANT_CLASS_P (operand))
9223 *dt = vect_constant_def;
9224 return true;
9227 if (is_gimple_min_invariant (operand))
9229 *dt = vect_external_def;
9230 return true;
9233 if (TREE_CODE (operand) != SSA_NAME)
9235 if (dump_enabled_p ())
9236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9237 "not ssa-name.\n");
9238 return false;
9241 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9243 *dt = vect_external_def;
9244 return true;
9247 *def_stmt = SSA_NAME_DEF_STMT (operand);
9248 if (dump_enabled_p ())
9250 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9251 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9254 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9255 *dt = vect_external_def;
9256 else
9258 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9259 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9262 if (dump_enabled_p ())
9264 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9265 switch (*dt)
9267 case vect_uninitialized_def:
9268 dump_printf (MSG_NOTE, "uninitialized\n");
9269 break;
9270 case vect_constant_def:
9271 dump_printf (MSG_NOTE, "constant\n");
9272 break;
9273 case vect_external_def:
9274 dump_printf (MSG_NOTE, "external\n");
9275 break;
9276 case vect_internal_def:
9277 dump_printf (MSG_NOTE, "internal\n");
9278 break;
9279 case vect_induction_def:
9280 dump_printf (MSG_NOTE, "induction\n");
9281 break;
9282 case vect_reduction_def:
9283 dump_printf (MSG_NOTE, "reduction\n");
9284 break;
9285 case vect_double_reduction_def:
9286 dump_printf (MSG_NOTE, "double reduction\n");
9287 break;
9288 case vect_nested_cycle:
9289 dump_printf (MSG_NOTE, "nested cycle\n");
9290 break;
9291 case vect_unknown_def_type:
9292 dump_printf (MSG_NOTE, "unknown\n");
9293 break;
9297 if (*dt == vect_unknown_def_type)
9299 if (dump_enabled_p ())
9300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9301 "Unsupported pattern.\n");
9302 return false;
9305 switch (gimple_code (*def_stmt))
9307 case GIMPLE_PHI:
9308 case GIMPLE_ASSIGN:
9309 case GIMPLE_CALL:
9310 break;
9311 default:
9312 if (dump_enabled_p ())
9313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9314 "unsupported defining stmt:\n");
9315 return false;
9318 return true;
9321 /* Function vect_is_simple_use.
9323 Same as vect_is_simple_use but also determines the vector operand
9324 type of OPERAND and stores it to *VECTYPE. If the definition of
9325 OPERAND is vect_uninitialized_def, vect_constant_def or
9326 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9327 is responsible to compute the best suited vector type for the
9328 scalar operand. */
9330 bool
9331 vect_is_simple_use (tree operand, vec_info *vinfo,
9332 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9334 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9335 return false;
9337 /* Now get a vector type if the def is internal, otherwise supply
9338 NULL_TREE and leave it up to the caller to figure out a proper
9339 type for the use stmt. */
9340 if (*dt == vect_internal_def
9341 || *dt == vect_induction_def
9342 || *dt == vect_reduction_def
9343 || *dt == vect_double_reduction_def
9344 || *dt == vect_nested_cycle)
9346 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9348 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9349 && !STMT_VINFO_RELEVANT (stmt_info)
9350 && !STMT_VINFO_LIVE_P (stmt_info))
9351 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9353 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9354 gcc_assert (*vectype != NULL_TREE);
9356 else if (*dt == vect_uninitialized_def
9357 || *dt == vect_constant_def
9358 || *dt == vect_external_def)
9359 *vectype = NULL_TREE;
9360 else
9361 gcc_unreachable ();
9363 return true;
9367 /* Function supportable_widening_operation
9369 Check whether an operation represented by the code CODE is a
9370 widening operation that is supported by the target platform in
9371 vector form (i.e., when operating on arguments of type VECTYPE_IN
9372 producing a result of type VECTYPE_OUT).
9374 Widening operations we currently support are NOP (CONVERT), FLOAT
9375 and WIDEN_MULT. This function checks if these operations are supported
9376 by the target platform either directly (via vector tree-codes), or via
9377 target builtins.
9379 Output:
9380 - CODE1 and CODE2 are codes of vector operations to be used when
9381 vectorizing the operation, if available.
9382 - MULTI_STEP_CVT determines the number of required intermediate steps in
9383 case of multi-step conversion (like char->short->int - in that case
9384 MULTI_STEP_CVT will be 1).
9385 - INTERM_TYPES contains the intermediate type required to perform the
9386 widening operation (short in the above example). */
9388 bool
9389 supportable_widening_operation (enum tree_code code, gimple *stmt,
9390 tree vectype_out, tree vectype_in,
9391 enum tree_code *code1, enum tree_code *code2,
9392 int *multi_step_cvt,
9393 vec<tree> *interm_types)
9395 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9396 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9397 struct loop *vect_loop = NULL;
9398 machine_mode vec_mode;
9399 enum insn_code icode1, icode2;
9400 optab optab1, optab2;
9401 tree vectype = vectype_in;
9402 tree wide_vectype = vectype_out;
9403 enum tree_code c1, c2;
9404 int i;
9405 tree prev_type, intermediate_type;
9406 machine_mode intermediate_mode, prev_mode;
9407 optab optab3, optab4;
9409 *multi_step_cvt = 0;
9410 if (loop_info)
9411 vect_loop = LOOP_VINFO_LOOP (loop_info);
9413 switch (code)
9415 case WIDEN_MULT_EXPR:
9416 /* The result of a vectorized widening operation usually requires
9417 two vectors (because the widened results do not fit into one vector).
9418 The generated vector results would normally be expected to be
9419 generated in the same order as in the original scalar computation,
9420 i.e. if 8 results are generated in each vector iteration, they are
9421 to be organized as follows:
9422 vect1: [res1,res2,res3,res4],
9423 vect2: [res5,res6,res7,res8].
9425 However, in the special case that the result of the widening
9426 operation is used in a reduction computation only, the order doesn't
9427 matter (because when vectorizing a reduction we change the order of
9428 the computation). Some targets can take advantage of this and
9429 generate more efficient code. For example, targets like Altivec,
9430 that support widen_mult using a sequence of {mult_even,mult_odd}
9431 generate the following vectors:
9432 vect1: [res1,res3,res5,res7],
9433 vect2: [res2,res4,res6,res8].
9435 When vectorizing outer-loops, we execute the inner-loop sequentially
9436 (each vectorized inner-loop iteration contributes to VF outer-loop
9437 iterations in parallel). We therefore don't allow to change the
9438 order of the computation in the inner-loop during outer-loop
9439 vectorization. */
9440 /* TODO: Another case in which order doesn't *really* matter is when we
9441 widen and then contract again, e.g. (short)((int)x * y >> 8).
9442 Normally, pack_trunc performs an even/odd permute, whereas the
9443 repack from an even/odd expansion would be an interleave, which
9444 would be significantly simpler for e.g. AVX2. */
9445 /* In any case, in order to avoid duplicating the code below, recurse
9446 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9447 are properly set up for the caller. If we fail, we'll continue with
9448 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9449 if (vect_loop
9450 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9451 && !nested_in_vect_loop_p (vect_loop, stmt)
9452 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9453 stmt, vectype_out, vectype_in,
9454 code1, code2, multi_step_cvt,
9455 interm_types))
9457 /* Elements in a vector with vect_used_by_reduction property cannot
9458 be reordered if the use chain with this property does not have the
9459 same operation. One such an example is s += a * b, where elements
9460 in a and b cannot be reordered. Here we check if the vector defined
9461 by STMT is only directly used in the reduction statement. */
9462 tree lhs = gimple_assign_lhs (stmt);
9463 use_operand_p dummy;
9464 gimple *use_stmt;
9465 stmt_vec_info use_stmt_info = NULL;
9466 if (single_imm_use (lhs, &dummy, &use_stmt)
9467 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9468 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9469 return true;
9471 c1 = VEC_WIDEN_MULT_LO_EXPR;
9472 c2 = VEC_WIDEN_MULT_HI_EXPR;
9473 break;
9475 case DOT_PROD_EXPR:
9476 c1 = DOT_PROD_EXPR;
9477 c2 = DOT_PROD_EXPR;
9478 break;
9480 case SAD_EXPR:
9481 c1 = SAD_EXPR;
9482 c2 = SAD_EXPR;
9483 break;
9485 case VEC_WIDEN_MULT_EVEN_EXPR:
9486 /* Support the recursion induced just above. */
9487 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9488 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9489 break;
9491 case WIDEN_LSHIFT_EXPR:
9492 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9493 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9494 break;
9496 CASE_CONVERT:
9497 c1 = VEC_UNPACK_LO_EXPR;
9498 c2 = VEC_UNPACK_HI_EXPR;
9499 break;
9501 case FLOAT_EXPR:
9502 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9503 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9504 break;
9506 case FIX_TRUNC_EXPR:
9507 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9508 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9509 computing the operation. */
9510 return false;
9512 default:
9513 gcc_unreachable ();
9516 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9517 std::swap (c1, c2);
9519 if (code == FIX_TRUNC_EXPR)
9521 /* The signedness is determined from output operand. */
9522 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9523 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9525 else
9527 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9528 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9531 if (!optab1 || !optab2)
9532 return false;
9534 vec_mode = TYPE_MODE (vectype);
9535 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9536 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9537 return false;
9539 *code1 = c1;
9540 *code2 = c2;
9542 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9543 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9544 /* For scalar masks we may have different boolean
9545 vector types having the same QImode. Thus we
9546 add additional check for elements number. */
9547 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9548 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9549 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9551 /* Check if it's a multi-step conversion that can be done using intermediate
9552 types. */
9554 prev_type = vectype;
9555 prev_mode = vec_mode;
9557 if (!CONVERT_EXPR_CODE_P (code))
9558 return false;
9560 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9561 intermediate steps in promotion sequence. We try
9562 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9563 not. */
9564 interm_types->create (MAX_INTERM_CVT_STEPS);
9565 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9567 intermediate_mode = insn_data[icode1].operand[0].mode;
9568 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9570 intermediate_type
9571 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9572 current_vector_size);
9573 if (intermediate_mode != TYPE_MODE (intermediate_type))
9574 return false;
9576 else
9577 intermediate_type
9578 = lang_hooks.types.type_for_mode (intermediate_mode,
9579 TYPE_UNSIGNED (prev_type));
9581 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9582 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9584 if (!optab3 || !optab4
9585 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9586 || insn_data[icode1].operand[0].mode != intermediate_mode
9587 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9588 || insn_data[icode2].operand[0].mode != intermediate_mode
9589 || ((icode1 = optab_handler (optab3, intermediate_mode))
9590 == CODE_FOR_nothing)
9591 || ((icode2 = optab_handler (optab4, intermediate_mode))
9592 == CODE_FOR_nothing))
9593 break;
9595 interm_types->quick_push (intermediate_type);
9596 (*multi_step_cvt)++;
9598 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9599 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9600 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9601 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9602 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9604 prev_type = intermediate_type;
9605 prev_mode = intermediate_mode;
9608 interm_types->release ();
9609 return false;
9613 /* Function supportable_narrowing_operation
9615 Check whether an operation represented by the code CODE is a
9616 narrowing operation that is supported by the target platform in
9617 vector form (i.e., when operating on arguments of type VECTYPE_IN
9618 and producing a result of type VECTYPE_OUT).
9620 Narrowing operations we currently support are NOP (CONVERT) and
9621 FIX_TRUNC. This function checks if these operations are supported by
9622 the target platform directly via vector tree-codes.
9624 Output:
9625 - CODE1 is the code of a vector operation to be used when
9626 vectorizing the operation, if available.
9627 - MULTI_STEP_CVT determines the number of required intermediate steps in
9628 case of multi-step conversion (like int->short->char - in that case
9629 MULTI_STEP_CVT will be 1).
9630 - INTERM_TYPES contains the intermediate type required to perform the
9631 narrowing operation (short in the above example). */
9633 bool
9634 supportable_narrowing_operation (enum tree_code code,
9635 tree vectype_out, tree vectype_in,
9636 enum tree_code *code1, int *multi_step_cvt,
9637 vec<tree> *interm_types)
9639 machine_mode vec_mode;
9640 enum insn_code icode1;
9641 optab optab1, interm_optab;
9642 tree vectype = vectype_in;
9643 tree narrow_vectype = vectype_out;
9644 enum tree_code c1;
9645 tree intermediate_type, prev_type;
9646 machine_mode intermediate_mode, prev_mode;
9647 int i;
9648 bool uns;
9650 *multi_step_cvt = 0;
9651 switch (code)
9653 CASE_CONVERT:
9654 c1 = VEC_PACK_TRUNC_EXPR;
9655 break;
9657 case FIX_TRUNC_EXPR:
9658 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9659 break;
9661 case FLOAT_EXPR:
9662 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9663 tree code and optabs used for computing the operation. */
9664 return false;
9666 default:
9667 gcc_unreachable ();
9670 if (code == FIX_TRUNC_EXPR)
9671 /* The signedness is determined from output operand. */
9672 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9673 else
9674 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9676 if (!optab1)
9677 return false;
9679 vec_mode = TYPE_MODE (vectype);
9680 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9681 return false;
9683 *code1 = c1;
9685 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9686 /* For scalar masks we may have different boolean
9687 vector types having the same QImode. Thus we
9688 add additional check for elements number. */
9689 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9690 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9691 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9693 /* Check if it's a multi-step conversion that can be done using intermediate
9694 types. */
9695 prev_mode = vec_mode;
9696 prev_type = vectype;
9697 if (code == FIX_TRUNC_EXPR)
9698 uns = TYPE_UNSIGNED (vectype_out);
9699 else
9700 uns = TYPE_UNSIGNED (vectype);
9702 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9703 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9704 costly than signed. */
9705 if (code == FIX_TRUNC_EXPR && uns)
9707 enum insn_code icode2;
9709 intermediate_type
9710 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9711 interm_optab
9712 = optab_for_tree_code (c1, intermediate_type, optab_default);
9713 if (interm_optab != unknown_optab
9714 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9715 && insn_data[icode1].operand[0].mode
9716 == insn_data[icode2].operand[0].mode)
9718 uns = false;
9719 optab1 = interm_optab;
9720 icode1 = icode2;
9724 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9725 intermediate steps in promotion sequence. We try
9726 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9727 interm_types->create (MAX_INTERM_CVT_STEPS);
9728 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9730 intermediate_mode = insn_data[icode1].operand[0].mode;
9731 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9733 intermediate_type
9734 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9735 current_vector_size);
9736 if (intermediate_mode != TYPE_MODE (intermediate_type))
9737 return false;
9739 else
9740 intermediate_type
9741 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9742 interm_optab
9743 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9744 optab_default);
9745 if (!interm_optab
9746 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9747 || insn_data[icode1].operand[0].mode != intermediate_mode
9748 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9749 == CODE_FOR_nothing))
9750 break;
9752 interm_types->quick_push (intermediate_type);
9753 (*multi_step_cvt)++;
9755 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9756 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9757 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9758 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9760 prev_mode = intermediate_mode;
9761 prev_type = intermediate_type;
9762 optab1 = interm_optab;
9765 interm_types->release ();
9766 return false;