[AArch64] PR target/79913: VEC_SELECT bugs in aarch64 patterns
[official-gcc.git] / gcc / tree-vect-stmts.c
blob9f28321280af53eb899b8381e48db566159f6fc8
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
583 vect_mark_relevant (worklist, def_stmt, relevant, false);
584 return true;
588 /* Function vect_mark_stmts_to_be_vectorized.
590 Not all stmts in the loop need to be vectorized. For example:
592 for i...
593 for j...
594 1. T0 = i + j
595 2. T1 = a[T0]
597 3. j = j + 1
599 Stmt 1 and 3 do not need to be vectorized, because loop control and
600 addressing of vectorized data-refs are handled differently.
602 This pass detects such stmts. */
604 bool
605 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
607 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
608 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
609 unsigned int nbbs = loop->num_nodes;
610 gimple_stmt_iterator si;
611 gimple *stmt;
612 unsigned int i;
613 stmt_vec_info stmt_vinfo;
614 basic_block bb;
615 gimple *phi;
616 bool live_p;
617 enum vect_relevant relevant;
619 if (dump_enabled_p ())
620 dump_printf_loc (MSG_NOTE, vect_location,
621 "=== vect_mark_stmts_to_be_vectorized ===\n");
623 auto_vec<gimple *, 64> worklist;
625 /* 1. Init worklist. */
626 for (i = 0; i < nbbs; i++)
628 bb = bbs[i];
629 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
631 phi = gsi_stmt (si);
632 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
635 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
638 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
639 vect_mark_relevant (&worklist, phi, relevant, live_p);
641 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
643 stmt = gsi_stmt (si);
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
647 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
650 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
651 vect_mark_relevant (&worklist, stmt, relevant, live_p);
655 /* 2. Process_worklist */
656 while (worklist.length () > 0)
658 use_operand_p use_p;
659 ssa_op_iter iter;
661 stmt = worklist.pop ();
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
668 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
669 (DEF_STMT) as relevant/irrelevant according to the relevance property
670 of STMT. */
671 stmt_vinfo = vinfo_for_stmt (stmt);
672 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
674 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
675 propagated as is to the DEF_STMTs of its USEs.
677 One exception is when STMT has been identified as defining a reduction
678 variable; in this case we set the relevance to vect_used_by_reduction.
679 This is because we distinguish between two kinds of relevant stmts -
680 those that are used by a reduction computation, and those that are
681 (also) used by a regular computation. This allows us later on to
682 identify stmts that are used solely by a reduction, and therefore the
683 order of the results that they produce does not have to be kept. */
685 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
687 case vect_reduction_def:
688 gcc_assert (relevant != vect_unused_in_scope);
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_scope
691 && relevant != vect_used_by_reduction
692 && relevant != vect_used_only_live)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of reduction.\n");
697 return false;
699 break;
701 case vect_nested_cycle:
702 if (relevant != vect_unused_in_scope
703 && relevant != vect_used_in_outer_by_reduction
704 && relevant != vect_used_in_outer)
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "unsupported use of nested cycle.\n");
710 return false;
712 break;
714 case vect_double_reduction_def:
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_by_reduction
717 && relevant != vect_used_only_live)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721 "unsupported use of double reduction.\n");
723 return false;
725 break;
727 default:
728 break;
731 if (is_pattern_stmt_p (stmt_vinfo))
733 /* Pattern statements are not inserted into the code, so
734 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
735 have to scan the RHS or function arguments instead. */
736 if (is_gimple_assign (stmt))
738 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
739 tree op = gimple_assign_rhs1 (stmt);
741 i = 1;
742 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
744 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
745 relevant, &worklist, false)
746 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
747 relevant, &worklist, false))
748 return false;
749 i = 2;
751 for (; i < gimple_num_ops (stmt); i++)
753 op = gimple_op (stmt, i);
754 if (TREE_CODE (op) == SSA_NAME
755 && !process_use (stmt, op, loop_vinfo, relevant,
756 &worklist, false))
757 return false;
760 else if (is_gimple_call (stmt))
762 for (i = 0; i < gimple_call_num_args (stmt); i++)
764 tree arg = gimple_call_arg (stmt, i);
765 if (!process_use (stmt, arg, loop_vinfo, relevant,
766 &worklist, false))
767 return false;
771 else
772 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 tree op = USE_FROM_PTR (use_p);
775 if (!process_use (stmt, op, loop_vinfo, relevant,
776 &worklist, false))
777 return false;
780 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
782 gather_scatter_info gs_info;
783 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
784 gcc_unreachable ();
785 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
786 &worklist, true))
787 return false;
789 } /* while worklist */
791 return true;
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
801 void
802 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
803 enum vect_def_type *dt,
804 stmt_vector_for_cost *prologue_cost_vec,
805 stmt_vector_for_cost *body_cost_vec)
807 int i;
808 int inside_cost = 0, prologue_cost = 0;
810 /* The SLP costs were already calculated during SLP tree build. */
811 if (PURE_SLP_STMT (stmt_info))
812 return;
814 /* FORNOW: Assuming maximum 2 args per stmts. */
815 for (i = 0; i < 2; i++)
816 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
817 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
818 stmt_info, 0, vect_prologue);
820 /* Pass the inside-of-loop statements to the target-specific cost model. */
821 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
822 stmt_info, 0, vect_body);
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_NOTE, vect_location,
826 "vect_model_simple_cost: inside_cost = %d, "
827 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 /* Model cost for type demotion and promotion operations. PWR is normally
832 zero for single-step promotions and demotions. It will be one if
833 two-step promotion/demotion is required, and so on. Each additional
834 step doubles the number of instructions required. */
836 static void
837 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
838 enum vect_def_type *dt, int pwr)
840 int i, tmp;
841 int inside_cost = 0, prologue_cost = 0;
842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
843 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
844 void *target_cost_data;
846 /* The SLP costs were already calculated during SLP tree build. */
847 if (PURE_SLP_STMT (stmt_info))
848 return;
850 if (loop_vinfo)
851 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
852 else
853 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
855 for (i = 0; i < pwr + 1; i++)
857 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
858 (i + 1) : i;
859 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
860 vec_promote_demote, stmt_info, 0,
861 vect_body);
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i = 0; i < 2; i++)
866 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
867 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
868 stmt_info, 0, vect_prologue);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE, vect_location,
872 "vect_model_promotion_demotion_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost, prologue_cost);
876 /* Function vect_model_store_cost
878 Models cost for stores. In the case of grouped accesses, one access
879 has the overhead of the grouped access attributed to it. */
881 void
882 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
883 vect_memory_access_type memory_access_type,
884 enum vect_def_type dt, slp_tree slp_node,
885 stmt_vector_for_cost *prologue_cost_vec,
886 stmt_vector_for_cost *body_cost_vec)
888 unsigned int inside_cost = 0, prologue_cost = 0;
889 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
890 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
891 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
893 if (dt == vect_constant_def || dt == vect_external_def)
894 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
895 stmt_info, 0, vect_prologue);
897 /* Grouped stores update all elements in the group at once,
898 so we want the DR for the first statement. */
899 if (!slp_node && grouped_access_p)
901 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
902 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
905 /* True if we should include any once-per-group costs as well as
906 the cost of the statement itself. For SLP we only get called
907 once per group anyhow. */
908 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
910 /* We assume that the cost of a single store-lanes instruction is
911 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
912 access is instead being provided by a permute-and-store operation,
913 include the cost of the permutes. */
914 if (first_stmt_p
915 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
917 /* Uses a high and low interleave or shuffle operations for each
918 needed permute. */
919 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
920 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
921 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
922 stmt_info, 0, vect_body);
924 if (dump_enabled_p ())
925 dump_printf_loc (MSG_NOTE, vect_location,
926 "vect_model_store_cost: strided group_size = %d .\n",
927 group_size);
930 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
931 /* Costs of the stores. */
932 if (memory_access_type == VMAT_ELEMENTWISE)
933 /* N scalar stores plus extracting the elements. */
934 inside_cost += record_stmt_cost (body_cost_vec,
935 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
936 scalar_store, stmt_info, 0, vect_body);
937 else
938 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
940 if (memory_access_type == VMAT_ELEMENTWISE
941 || memory_access_type == VMAT_STRIDED_SLP)
942 inside_cost += record_stmt_cost (body_cost_vec,
943 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
944 vec_to_scalar, stmt_info, 0, vect_body);
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE, vect_location,
948 "vect_model_store_cost: inside_cost = %d, "
949 "prologue_cost = %d .\n", inside_cost, prologue_cost);
953 /* Calculate cost of DR's memory access. */
954 void
955 vect_get_store_cost (struct data_reference *dr, int ncopies,
956 unsigned int *inside_cost,
957 stmt_vector_for_cost *body_cost_vec)
959 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
960 gimple *stmt = DR_STMT (dr);
961 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
963 switch (alignment_support_scheme)
965 case dr_aligned:
967 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
968 vector_store, stmt_info, 0,
969 vect_body);
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE, vect_location,
973 "vect_model_store_cost: aligned.\n");
974 break;
977 case dr_unaligned_supported:
979 /* Here, we assign an additional cost for the unaligned store. */
980 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
981 unaligned_store, stmt_info,
982 DR_MISALIGNMENT (dr), vect_body);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_model_store_cost: unaligned supported by "
986 "hardware.\n");
987 break;
990 case dr_unaligned_unsupported:
992 *inside_cost = VECT_MAX_COST;
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
996 "vect_model_store_cost: unsupported access.\n");
997 break;
1000 default:
1001 gcc_unreachable ();
1006 /* Function vect_model_load_cost
1008 Models cost for loads. In the case of grouped accesses, one access has
1009 the overhead of the grouped access attributed to it. Since unaligned
1010 accesses are supported for loads, we also account for the costs of the
1011 access scheme chosen. */
1013 void
1014 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1015 vect_memory_access_type memory_access_type,
1016 slp_tree slp_node,
1017 stmt_vector_for_cost *prologue_cost_vec,
1018 stmt_vector_for_cost *body_cost_vec)
1020 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1021 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1022 unsigned int inside_cost = 0, prologue_cost = 0;
1023 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1025 /* Grouped loads read all elements in the group at once,
1026 so we want the DR for the first statement. */
1027 if (!slp_node && grouped_access_p)
1029 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1030 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1033 /* True if we should include any once-per-group costs as well as
1034 the cost of the statement itself. For SLP we only get called
1035 once per group anyhow. */
1036 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1038 /* We assume that the cost of a single load-lanes instruction is
1039 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1040 access is instead being provided by a load-and-permute operation,
1041 include the cost of the permutes. */
1042 if (first_stmt_p
1043 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1045 /* Uses an even and odd extract operations or shuffle operations
1046 for each needed permute. */
1047 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1048 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1049 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1050 stmt_info, 0, vect_body);
1052 if (dump_enabled_p ())
1053 dump_printf_loc (MSG_NOTE, vect_location,
1054 "vect_model_load_cost: strided group_size = %d .\n",
1055 group_size);
1058 /* The loads themselves. */
1059 if (memory_access_type == VMAT_ELEMENTWISE)
1061 /* N scalar loads plus gathering them into a vector. */
1062 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1063 inside_cost += record_stmt_cost (body_cost_vec,
1064 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1065 scalar_load, stmt_info, 0, vect_body);
1067 else
1068 vect_get_load_cost (dr, ncopies, first_stmt_p,
1069 &inside_cost, &prologue_cost,
1070 prologue_cost_vec, body_cost_vec, true);
1071 if (memory_access_type == VMAT_ELEMENTWISE
1072 || memory_access_type == VMAT_STRIDED_SLP)
1073 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1074 stmt_info, 0, vect_body);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE, vect_location,
1078 "vect_model_load_cost: inside_cost = %d, "
1079 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1083 /* Calculate cost of DR's memory access. */
1084 void
1085 vect_get_load_cost (struct data_reference *dr, int ncopies,
1086 bool add_realign_cost, unsigned int *inside_cost,
1087 unsigned int *prologue_cost,
1088 stmt_vector_for_cost *prologue_cost_vec,
1089 stmt_vector_for_cost *body_cost_vec,
1090 bool record_prologue_costs)
1092 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1093 gimple *stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096 switch (alignment_support_scheme)
1098 case dr_aligned:
1100 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1101 stmt_info, 0, vect_body);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: aligned.\n");
1107 break;
1109 case dr_unaligned_supported:
1111 /* Here, we assign an additional cost for the unaligned load. */
1112 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1113 unaligned_load, stmt_info,
1114 DR_MISALIGNMENT (dr), vect_body);
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE, vect_location,
1118 "vect_model_load_cost: unaligned supported by "
1119 "hardware.\n");
1121 break;
1123 case dr_explicit_realign:
1125 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1126 vector_load, stmt_info, 0, vect_body);
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1128 vec_perm, stmt_info, 0, vect_body);
1130 /* FIXME: If the misalignment remains fixed across the iterations of
1131 the containing loop, the following cost should be added to the
1132 prologue costs. */
1133 if (targetm.vectorize.builtin_mask_for_load)
1134 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1135 stmt_info, 0, vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: explicit realign\n");
1141 break;
1143 case dr_explicit_realign_optimized:
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned software "
1148 "pipelined.\n");
1150 /* Unaligned software pipeline has a load of an address, an initial
1151 load, and possibly a mask operation to "prime" the loop. However,
1152 if this is an access in a group of loads, which provide grouped
1153 access, then the above cost should only be considered for one
1154 access in the group. Inside the loop, there is a load op
1155 and a realignment op. */
1157 if (add_realign_cost && record_prologue_costs)
1159 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1160 vector_stmt, stmt_info,
1161 0, vect_prologue);
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1164 vector_stmt, stmt_info,
1165 0, vect_prologue);
1168 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1169 stmt_info, 0, vect_body);
1170 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1171 stmt_info, 0, vect_body);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: explicit realign optimized"
1176 "\n");
1178 break;
1181 case dr_unaligned_unsupported:
1183 *inside_cost = VECT_MAX_COST;
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1187 "vect_model_load_cost: unsupported access.\n");
1188 break;
1191 default:
1192 gcc_unreachable ();
1196 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1197 the loop preheader for the vectorized stmt STMT. */
1199 static void
1200 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1202 if (gsi)
1203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1204 else
1206 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1209 if (loop_vinfo)
1211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1212 basic_block new_bb;
1213 edge pe;
1215 if (nested_in_vect_loop_p (loop, stmt))
1216 loop = loop->inner;
1218 pe = loop_preheader_edge (loop);
1219 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1220 gcc_assert (!new_bb);
1222 else
1224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1225 basic_block bb;
1226 gimple_stmt_iterator gsi_bb_start;
1228 gcc_assert (bb_vinfo);
1229 bb = BB_VINFO_BB (bb_vinfo);
1230 gsi_bb_start = gsi_after_labels (bb);
1231 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE, vect_location,
1238 "created new init_stmt: ");
1239 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1243 /* Function vect_init_vector.
1245 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1246 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1247 vector type a vector with all elements equal to VAL is created first.
1248 Place the initialization at BSI if it is not NULL. Otherwise, place the
1249 initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1253 tree
1254 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1256 gimple *init_stmt;
1257 tree new_temp;
1259 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1260 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1262 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1263 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1265 /* Scalar boolean value should be transformed into
1266 all zeros or all ones value before building a vector. */
1267 if (VECTOR_BOOLEAN_TYPE_P (type))
1269 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1270 tree false_val = build_zero_cst (TREE_TYPE (type));
1272 if (CONSTANT_CLASS_P (val))
1273 val = integer_zerop (val) ? false_val : true_val;
1274 else
1276 new_temp = make_ssa_name (TREE_TYPE (type));
1277 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1278 val, true_val, false_val);
1279 vect_init_vector_1 (stmt, init_stmt, gsi);
1280 val = new_temp;
1283 else if (CONSTANT_CLASS_P (val))
1284 val = fold_convert (TREE_TYPE (type), val);
1285 else
1287 new_temp = make_ssa_name (TREE_TYPE (type));
1288 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1289 init_stmt = gimple_build_assign (new_temp,
1290 fold_build1 (VIEW_CONVERT_EXPR,
1291 TREE_TYPE (type),
1292 val));
1293 else
1294 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1295 vect_init_vector_1 (stmt, init_stmt, gsi);
1296 val = new_temp;
1299 val = build_vector_from_val (type, val);
1302 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1303 init_stmt = gimple_build_assign (new_temp, val);
1304 vect_init_vector_1 (stmt, init_stmt, gsi);
1305 return new_temp;
1308 /* Function vect_get_vec_def_for_operand_1.
1310 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1311 DT that will be used in the vectorized stmt. */
1313 tree
1314 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1316 tree vec_oprnd;
1317 gimple *vec_stmt;
1318 stmt_vec_info def_stmt_info = NULL;
1320 switch (dt)
1322 /* operand is a constant or a loop invariant. */
1323 case vect_constant_def:
1324 case vect_external_def:
1325 /* Code should use vect_get_vec_def_for_operand. */
1326 gcc_unreachable ();
1328 /* operand is defined inside the loop. */
1329 case vect_internal_def:
1331 /* Get the def from the vectorized stmt. */
1332 def_stmt_info = vinfo_for_stmt (def_stmt);
1334 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1335 /* Get vectorized pattern statement. */
1336 if (!vec_stmt
1337 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1338 && !STMT_VINFO_RELEVANT (def_stmt_info))
1339 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1340 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1341 gcc_assert (vec_stmt);
1342 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1343 vec_oprnd = PHI_RESULT (vec_stmt);
1344 else if (is_gimple_call (vec_stmt))
1345 vec_oprnd = gimple_call_lhs (vec_stmt);
1346 else
1347 vec_oprnd = gimple_assign_lhs (vec_stmt);
1348 return vec_oprnd;
1351 /* operand is defined by a loop header phi - reduction */
1352 case vect_reduction_def:
1353 case vect_double_reduction_def:
1354 case vect_nested_cycle:
1355 /* Code should use get_initial_def_for_reduction. */
1356 gcc_unreachable ();
1358 /* operand is defined by loop-header phi - induction. */
1359 case vect_induction_def:
1361 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1363 /* Get the def from the vectorized stmt. */
1364 def_stmt_info = vinfo_for_stmt (def_stmt);
1365 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1366 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1367 vec_oprnd = PHI_RESULT (vec_stmt);
1368 else
1369 vec_oprnd = gimple_get_lhs (vec_stmt);
1370 return vec_oprnd;
1373 default:
1374 gcc_unreachable ();
1379 /* Function vect_get_vec_def_for_operand.
1381 OP is an operand in STMT. This function returns a (vector) def that will be
1382 used in the vectorized stmt for STMT.
1384 In the case that OP is an SSA_NAME which is defined in the loop, then
1385 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1387 In case OP is an invariant or constant, a new stmt that creates a vector def
1388 needs to be introduced. VECTYPE may be used to specify a required type for
1389 vector invariant. */
1391 tree
1392 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1394 gimple *def_stmt;
1395 enum vect_def_type dt;
1396 bool is_simple_use;
1397 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1398 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1400 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE, vect_location,
1403 "vect_get_vec_def_for_operand: ");
1404 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1405 dump_printf (MSG_NOTE, "\n");
1408 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1409 gcc_assert (is_simple_use);
1410 if (def_stmt && dump_enabled_p ())
1412 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1416 if (dt == vect_constant_def || dt == vect_external_def)
1418 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1419 tree vector_type;
1421 if (vectype)
1422 vector_type = vectype;
1423 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1424 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1425 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1426 else
1427 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1429 gcc_assert (vector_type);
1430 return vect_init_vector (stmt, op, vector_type, NULL);
1432 else
1433 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1437 /* Function vect_get_vec_def_for_stmt_copy
1439 Return a vector-def for an operand. This function is used when the
1440 vectorized stmt to be created (by the caller to this function) is a "copy"
1441 created in case the vectorized result cannot fit in one vector, and several
1442 copies of the vector-stmt are required. In this case the vector-def is
1443 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1444 of the stmt that defines VEC_OPRND.
1445 DT is the type of the vector def VEC_OPRND.
1447 Context:
1448 In case the vectorization factor (VF) is bigger than the number
1449 of elements that can fit in a vectype (nunits), we have to generate
1450 more than one vector stmt to vectorize the scalar stmt. This situation
1451 arises when there are multiple data-types operated upon in the loop; the
1452 smallest data-type determines the VF, and as a result, when vectorizing
1453 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1454 vector stmt (each computing a vector of 'nunits' results, and together
1455 computing 'VF' results in each iteration). This function is called when
1456 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1457 which VF=16 and nunits=4, so the number of copies required is 4):
1459 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1461 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1462 VS1.1: vx.1 = memref1 VS1.2
1463 VS1.2: vx.2 = memref2 VS1.3
1464 VS1.3: vx.3 = memref3
1466 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1467 VSnew.1: vz1 = vx.1 + ... VSnew.2
1468 VSnew.2: vz2 = vx.2 + ... VSnew.3
1469 VSnew.3: vz3 = vx.3 + ...
1471 The vectorization of S1 is explained in vectorizable_load.
1472 The vectorization of S2:
1473 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1474 the function 'vect_get_vec_def_for_operand' is called to
1475 get the relevant vector-def for each operand of S2. For operand x it
1476 returns the vector-def 'vx.0'.
1478 To create the remaining copies of the vector-stmt (VSnew.j), this
1479 function is called to get the relevant vector-def for each operand. It is
1480 obtained from the respective VS1.j stmt, which is recorded in the
1481 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1483 For example, to obtain the vector-def 'vx.1' in order to create the
1484 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1485 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1486 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1487 and return its def ('vx.1').
1488 Overall, to create the above sequence this function will be called 3 times:
1489 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1490 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1491 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1493 tree
1494 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1496 gimple *vec_stmt_for_operand;
1497 stmt_vec_info def_stmt_info;
1499 /* Do nothing; can reuse same def. */
1500 if (dt == vect_external_def || dt == vect_constant_def )
1501 return vec_oprnd;
1503 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1504 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1505 gcc_assert (def_stmt_info);
1506 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1507 gcc_assert (vec_stmt_for_operand);
1508 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1509 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1510 else
1511 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1512 return vec_oprnd;
1516 /* Get vectorized definitions for the operands to create a copy of an original
1517 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1519 static void
1520 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1521 vec<tree> *vec_oprnds0,
1522 vec<tree> *vec_oprnds1)
1524 tree vec_oprnd = vec_oprnds0->pop ();
1526 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1527 vec_oprnds0->quick_push (vec_oprnd);
1529 if (vec_oprnds1 && vec_oprnds1->length ())
1531 vec_oprnd = vec_oprnds1->pop ();
1532 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1533 vec_oprnds1->quick_push (vec_oprnd);
1538 /* Get vectorized definitions for OP0 and OP1.
1539 REDUC_INDEX is the index of reduction operand in case of reduction,
1540 and -1 otherwise. */
1542 void
1543 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1544 vec<tree> *vec_oprnds0,
1545 vec<tree> *vec_oprnds1,
1546 slp_tree slp_node, int reduc_index)
1548 if (slp_node)
1550 int nops = (op1 == NULL_TREE) ? 1 : 2;
1551 auto_vec<tree> ops (nops);
1552 auto_vec<vec<tree> > vec_defs (nops);
1554 ops.quick_push (op0);
1555 if (op1)
1556 ops.quick_push (op1);
1558 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1560 *vec_oprnds0 = vec_defs[0];
1561 if (op1)
1562 *vec_oprnds1 = vec_defs[1];
1564 else
1566 tree vec_oprnd;
1568 vec_oprnds0->create (1);
1569 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1570 vec_oprnds0->quick_push (vec_oprnd);
1572 if (op1)
1574 vec_oprnds1->create (1);
1575 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1576 vec_oprnds1->quick_push (vec_oprnd);
1582 /* Function vect_finish_stmt_generation.
1584 Insert a new stmt. */
1586 void
1587 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1588 gimple_stmt_iterator *gsi)
1590 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1591 vec_info *vinfo = stmt_info->vinfo;
1593 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1595 if (!gsi_end_p (*gsi)
1596 && gimple_has_mem_ops (vec_stmt))
1598 gimple *at_stmt = gsi_stmt (*gsi);
1599 tree vuse = gimple_vuse (at_stmt);
1600 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1602 tree vdef = gimple_vdef (at_stmt);
1603 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1604 /* If we have an SSA vuse and insert a store, update virtual
1605 SSA form to avoid triggering the renamer. Do so only
1606 if we can easily see all uses - which is what almost always
1607 happens with the way vectorized stmts are inserted. */
1608 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1609 && ((is_gimple_assign (vec_stmt)
1610 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1611 || (is_gimple_call (vec_stmt)
1612 && !(gimple_call_flags (vec_stmt)
1613 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1615 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1616 gimple_set_vdef (vec_stmt, new_vdef);
1617 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1621 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1623 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1625 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1631 gimple_set_location (vec_stmt, gimple_location (stmt));
1633 /* While EH edges will generally prevent vectorization, stmt might
1634 e.g. be in a must-not-throw region. Ensure newly created stmts
1635 that could throw are part of the same region. */
1636 int lp_nr = lookup_stmt_eh_lp (stmt);
1637 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1638 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1641 /* We want to vectorize a call to combined function CFN with function
1642 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1643 as the types of all inputs. Check whether this is possible using
1644 an internal function, returning its code if so or IFN_LAST if not. */
1646 static internal_fn
1647 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1648 tree vectype_out, tree vectype_in)
1650 internal_fn ifn;
1651 if (internal_fn_p (cfn))
1652 ifn = as_internal_fn (cfn);
1653 else
1654 ifn = associated_internal_fn (fndecl);
1655 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1657 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1658 if (info.vectorizable)
1660 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1661 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1662 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1663 OPTIMIZE_FOR_SPEED))
1664 return ifn;
1667 return IFN_LAST;
1671 static tree permute_vec_elements (tree, tree, tree, gimple *,
1672 gimple_stmt_iterator *);
1674 /* STMT is a non-strided load or store, meaning that it accesses
1675 elements with a known constant step. Return -1 if that step
1676 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1678 static int
1679 compare_step_with_zero (gimple *stmt)
1681 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1682 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1683 tree step;
1684 if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
1685 step = STMT_VINFO_DR_STEP (stmt_info);
1686 else
1687 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1688 return tree_int_cst_compare (step, size_zero_node);
1691 /* If the target supports a permute mask that reverses the elements in
1692 a vector of type VECTYPE, return that mask, otherwise return null. */
1694 static tree
1695 perm_mask_for_reverse (tree vectype)
1697 int i, nunits;
1698 unsigned char *sel;
1700 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1701 sel = XALLOCAVEC (unsigned char, nunits);
1703 for (i = 0; i < nunits; ++i)
1704 sel[i] = nunits - 1 - i;
1706 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1707 return NULL_TREE;
1708 return vect_gen_perm_mask_checked (vectype, sel);
1711 /* A subroutine of get_load_store_type, with a subset of the same
1712 arguments. Handle the case where STMT is part of a grouped load
1713 or store.
1715 For stores, the statements in the group are all consecutive
1716 and there is no gap at the end. For loads, the statements in the
1717 group might not be consecutive; there can be gaps between statements
1718 as well as at the end. */
1720 static bool
1721 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1722 vec_load_store_type vls_type,
1723 vect_memory_access_type *memory_access_type)
1725 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1726 vec_info *vinfo = stmt_info->vinfo;
1727 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1728 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1729 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1730 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1731 bool single_element_p = (stmt == first_stmt
1732 && !GROUP_NEXT_ELEMENT (stmt_info));
1733 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1734 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1736 /* True if the vectorized statements would access beyond the last
1737 statement in the group. */
1738 bool overrun_p = false;
1740 /* True if we can cope with such overrun by peeling for gaps, so that
1741 there is at least one final scalar iteration after the vector loop. */
1742 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1744 /* There can only be a gap at the end of the group if the stride is
1745 known at compile time. */
1746 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1748 /* Stores can't yet have gaps. */
1749 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1751 if (slp)
1753 if (STMT_VINFO_STRIDED_P (stmt_info))
1755 /* Try to use consecutive accesses of GROUP_SIZE elements,
1756 separated by the stride, until we have a complete vector.
1757 Fall back to scalar accesses if that isn't possible. */
1758 if (nunits % group_size == 0)
1759 *memory_access_type = VMAT_STRIDED_SLP;
1760 else
1761 *memory_access_type = VMAT_ELEMENTWISE;
1763 else
1765 overrun_p = loop_vinfo && gap != 0;
1766 if (overrun_p && vls_type != VLS_LOAD)
1768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1769 "Grouped store with gaps requires"
1770 " non-consecutive accesses\n");
1771 return false;
1773 /* If the access is aligned an overrun is fine. */
1774 if (overrun_p
1775 && aligned_access_p
1776 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1777 overrun_p = false;
1778 if (overrun_p && !can_overrun_p)
1780 if (dump_enabled_p ())
1781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1782 "Peeling for outer loop is not supported\n");
1783 return false;
1785 *memory_access_type = VMAT_CONTIGUOUS;
1788 else
1790 /* We can always handle this case using elementwise accesses,
1791 but see if something more efficient is available. */
1792 *memory_access_type = VMAT_ELEMENTWISE;
1794 /* If there is a gap at the end of the group then these optimizations
1795 would access excess elements in the last iteration. */
1796 bool would_overrun_p = (gap != 0);
1797 /* If the access is aligned an overrun is fine, but only if the
1798 overrun is not inside an unused vector (if the gap is as large
1799 or larger than a vector). */
1800 if (would_overrun_p
1801 && gap < nunits
1802 && aligned_access_p
1803 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1804 would_overrun_p = false;
1805 if (!STMT_VINFO_STRIDED_P (stmt_info)
1806 && (can_overrun_p || !would_overrun_p)
1807 && compare_step_with_zero (stmt) > 0)
1809 /* First try using LOAD/STORE_LANES. */
1810 if (vls_type == VLS_LOAD
1811 ? vect_load_lanes_supported (vectype, group_size)
1812 : vect_store_lanes_supported (vectype, group_size))
1814 *memory_access_type = VMAT_LOAD_STORE_LANES;
1815 overrun_p = would_overrun_p;
1818 /* If that fails, try using permuting loads. */
1819 if (*memory_access_type == VMAT_ELEMENTWISE
1820 && (vls_type == VLS_LOAD
1821 ? vect_grouped_load_supported (vectype, single_element_p,
1822 group_size)
1823 : vect_grouped_store_supported (vectype, group_size)))
1825 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1826 overrun_p = would_overrun_p;
1831 if (vls_type != VLS_LOAD && first_stmt == stmt)
1833 /* STMT is the leader of the group. Check the operands of all the
1834 stmts of the group. */
1835 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1836 while (next_stmt)
1838 gcc_assert (gimple_assign_single_p (next_stmt));
1839 tree op = gimple_assign_rhs1 (next_stmt);
1840 gimple *def_stmt;
1841 enum vect_def_type dt;
1842 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846 "use not simple.\n");
1847 return false;
1849 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1853 if (overrun_p)
1855 gcc_assert (can_overrun_p);
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1858 "Data access with gaps requires scalar "
1859 "epilogue loop\n");
1860 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1863 return true;
1866 /* A subroutine of get_load_store_type, with a subset of the same
1867 arguments. Handle the case where STMT is a load or store that
1868 accesses consecutive elements with a negative step. */
1870 static vect_memory_access_type
1871 get_negative_load_store_type (gimple *stmt, tree vectype,
1872 vec_load_store_type vls_type,
1873 unsigned int ncopies)
1875 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1876 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1877 dr_alignment_support alignment_support_scheme;
1879 if (ncopies > 1)
1881 if (dump_enabled_p ())
1882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1883 "multiple types with negative step.\n");
1884 return VMAT_ELEMENTWISE;
1887 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1888 if (alignment_support_scheme != dr_aligned
1889 && alignment_support_scheme != dr_unaligned_supported)
1891 if (dump_enabled_p ())
1892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1893 "negative step but alignment required.\n");
1894 return VMAT_ELEMENTWISE;
1897 if (vls_type == VLS_STORE_INVARIANT)
1899 if (dump_enabled_p ())
1900 dump_printf_loc (MSG_NOTE, vect_location,
1901 "negative step with invariant source;"
1902 " no permute needed.\n");
1903 return VMAT_CONTIGUOUS_DOWN;
1906 if (!perm_mask_for_reverse (vectype))
1908 if (dump_enabled_p ())
1909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1910 "negative step and reversing not supported.\n");
1911 return VMAT_ELEMENTWISE;
1914 return VMAT_CONTIGUOUS_REVERSE;
1917 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1918 if there is a memory access type that the vectorized form can use,
1919 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1920 or scatters, fill in GS_INFO accordingly.
1922 SLP says whether we're performing SLP rather than loop vectorization.
1923 VECTYPE is the vector type that the vectorized statements will use.
1924 NCOPIES is the number of vector statements that will be needed. */
1926 static bool
1927 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1928 vec_load_store_type vls_type, unsigned int ncopies,
1929 vect_memory_access_type *memory_access_type,
1930 gather_scatter_info *gs_info)
1932 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1933 vec_info *vinfo = stmt_info->vinfo;
1934 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1935 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1937 *memory_access_type = VMAT_GATHER_SCATTER;
1938 gimple *def_stmt;
1939 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1940 gcc_unreachable ();
1941 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1942 &gs_info->offset_dt,
1943 &gs_info->offset_vectype))
1945 if (dump_enabled_p ())
1946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1947 "%s index use not simple.\n",
1948 vls_type == VLS_LOAD ? "gather" : "scatter");
1949 return false;
1952 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1954 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1955 memory_access_type))
1956 return false;
1958 else if (STMT_VINFO_STRIDED_P (stmt_info))
1960 gcc_assert (!slp);
1961 *memory_access_type = VMAT_ELEMENTWISE;
1963 else
1965 int cmp = compare_step_with_zero (stmt);
1966 if (cmp < 0)
1967 *memory_access_type = get_negative_load_store_type
1968 (stmt, vectype, vls_type, ncopies);
1969 else if (cmp == 0)
1971 gcc_assert (vls_type == VLS_LOAD);
1972 *memory_access_type = VMAT_INVARIANT;
1974 else
1975 *memory_access_type = VMAT_CONTIGUOUS;
1978 /* FIXME: At the moment the cost model seems to underestimate the
1979 cost of using elementwise accesses. This check preserves the
1980 traditional behavior until that can be fixed. */
1981 if (*memory_access_type == VMAT_ELEMENTWISE
1982 && !STMT_VINFO_STRIDED_P (stmt_info))
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1986 "not falling back to elementwise accesses\n");
1987 return false;
1989 return true;
1992 /* Function vectorizable_mask_load_store.
1994 Check if STMT performs a conditional load or store that can be vectorized.
1995 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1996 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1997 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1999 static bool
2000 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2001 gimple **vec_stmt, slp_tree slp_node)
2003 tree vec_dest = NULL;
2004 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2005 stmt_vec_info prev_stmt_info;
2006 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2007 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2008 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2009 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2010 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2011 tree rhs_vectype = NULL_TREE;
2012 tree mask_vectype;
2013 tree elem_type;
2014 gimple *new_stmt;
2015 tree dummy;
2016 tree dataref_ptr = NULL_TREE;
2017 gimple *ptr_incr;
2018 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2019 int ncopies;
2020 int i, j;
2021 bool inv_p;
2022 gather_scatter_info gs_info;
2023 vec_load_store_type vls_type;
2024 tree mask;
2025 gimple *def_stmt;
2026 enum vect_def_type dt;
2028 if (slp_node != NULL)
2029 return false;
2031 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2032 gcc_assert (ncopies >= 1);
2034 mask = gimple_call_arg (stmt, 2);
2036 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2037 return false;
2039 /* FORNOW. This restriction should be relaxed. */
2040 if (nested_in_vect_loop && ncopies > 1)
2042 if (dump_enabled_p ())
2043 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2044 "multiple types in nested loop.");
2045 return false;
2048 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2049 return false;
2051 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2052 && ! vec_stmt)
2053 return false;
2055 if (!STMT_VINFO_DATA_REF (stmt_info))
2056 return false;
2058 elem_type = TREE_TYPE (vectype);
2060 if (TREE_CODE (mask) != SSA_NAME)
2061 return false;
2063 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2064 return false;
2066 if (!mask_vectype)
2067 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2069 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2070 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2071 return false;
2073 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2075 tree rhs = gimple_call_arg (stmt, 3);
2076 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2077 return false;
2078 if (dt == vect_constant_def || dt == vect_external_def)
2079 vls_type = VLS_STORE_INVARIANT;
2080 else
2081 vls_type = VLS_STORE;
2083 else
2084 vls_type = VLS_LOAD;
2086 vect_memory_access_type memory_access_type;
2087 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2088 &memory_access_type, &gs_info))
2089 return false;
2091 if (memory_access_type == VMAT_GATHER_SCATTER)
2093 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2094 tree masktype
2095 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2096 if (TREE_CODE (masktype) == INTEGER_TYPE)
2098 if (dump_enabled_p ())
2099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2100 "masked gather with integer mask not supported.");
2101 return false;
2104 else if (memory_access_type != VMAT_CONTIGUOUS)
2106 if (dump_enabled_p ())
2107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2108 "unsupported access type for masked %s.\n",
2109 vls_type == VLS_LOAD ? "load" : "store");
2110 return false;
2112 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2113 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2114 TYPE_MODE (mask_vectype),
2115 vls_type == VLS_LOAD)
2116 || (rhs_vectype
2117 && !useless_type_conversion_p (vectype, rhs_vectype)))
2118 return false;
2120 if (!vec_stmt) /* transformation not required. */
2122 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2123 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2124 if (vls_type == VLS_LOAD)
2125 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2126 NULL, NULL, NULL);
2127 else
2128 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2129 dt, NULL, NULL, NULL);
2130 return true;
2132 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2134 /** Transform. **/
2136 if (memory_access_type == VMAT_GATHER_SCATTER)
2138 tree vec_oprnd0 = NULL_TREE, op;
2139 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2140 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2141 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2142 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2143 tree mask_perm_mask = NULL_TREE;
2144 edge pe = loop_preheader_edge (loop);
2145 gimple_seq seq;
2146 basic_block new_bb;
2147 enum { NARROW, NONE, WIDEN } modifier;
2148 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2150 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2151 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2152 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2153 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2154 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2155 scaletype = TREE_VALUE (arglist);
2156 gcc_checking_assert (types_compatible_p (srctype, rettype)
2157 && types_compatible_p (srctype, masktype));
2159 if (nunits == gather_off_nunits)
2160 modifier = NONE;
2161 else if (nunits == gather_off_nunits / 2)
2163 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2164 modifier = WIDEN;
2166 for (i = 0; i < gather_off_nunits; ++i)
2167 sel[i] = i | nunits;
2169 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2171 else if (nunits == gather_off_nunits * 2)
2173 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2174 modifier = NARROW;
2176 for (i = 0; i < nunits; ++i)
2177 sel[i] = i < gather_off_nunits
2178 ? i : i + nunits - gather_off_nunits;
2180 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2181 ncopies *= 2;
2182 for (i = 0; i < nunits; ++i)
2183 sel[i] = i | gather_off_nunits;
2184 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2186 else
2187 gcc_unreachable ();
2189 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2191 ptr = fold_convert (ptrtype, gs_info.base);
2192 if (!is_gimple_min_invariant (ptr))
2194 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2195 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2196 gcc_assert (!new_bb);
2199 scale = build_int_cst (scaletype, gs_info.scale);
2201 prev_stmt_info = NULL;
2202 for (j = 0; j < ncopies; ++j)
2204 if (modifier == WIDEN && (j & 1))
2205 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2206 perm_mask, stmt, gsi);
2207 else if (j == 0)
2208 op = vec_oprnd0
2209 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2210 else
2211 op = vec_oprnd0
2212 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2214 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2216 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2217 == TYPE_VECTOR_SUBPARTS (idxtype));
2218 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2219 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2220 new_stmt
2221 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2223 op = var;
2226 if (mask_perm_mask && (j & 1))
2227 mask_op = permute_vec_elements (mask_op, mask_op,
2228 mask_perm_mask, stmt, gsi);
2229 else
2231 if (j == 0)
2232 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2233 else
2235 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2236 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2239 mask_op = vec_mask;
2240 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2242 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2243 == TYPE_VECTOR_SUBPARTS (masktype));
2244 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2245 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2246 new_stmt
2247 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2248 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2249 mask_op = var;
2253 new_stmt
2254 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2255 scale);
2257 if (!useless_type_conversion_p (vectype, rettype))
2259 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2260 == TYPE_VECTOR_SUBPARTS (rettype));
2261 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2262 gimple_call_set_lhs (new_stmt, op);
2263 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2264 var = make_ssa_name (vec_dest);
2265 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2266 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2268 else
2270 var = make_ssa_name (vec_dest, new_stmt);
2271 gimple_call_set_lhs (new_stmt, var);
2274 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2276 if (modifier == NARROW)
2278 if ((j & 1) == 0)
2280 prev_res = var;
2281 continue;
2283 var = permute_vec_elements (prev_res, var,
2284 perm_mask, stmt, gsi);
2285 new_stmt = SSA_NAME_DEF_STMT (var);
2288 if (prev_stmt_info == NULL)
2289 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2290 else
2291 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2292 prev_stmt_info = vinfo_for_stmt (new_stmt);
2295 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2296 from the IL. */
2297 if (STMT_VINFO_RELATED_STMT (stmt_info))
2299 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2300 stmt_info = vinfo_for_stmt (stmt);
2302 tree lhs = gimple_call_lhs (stmt);
2303 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2304 set_vinfo_for_stmt (new_stmt, stmt_info);
2305 set_vinfo_for_stmt (stmt, NULL);
2306 STMT_VINFO_STMT (stmt_info) = new_stmt;
2307 gsi_replace (gsi, new_stmt, true);
2308 return true;
2310 else if (vls_type != VLS_LOAD)
2312 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2313 prev_stmt_info = NULL;
2314 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2315 for (i = 0; i < ncopies; i++)
2317 unsigned align, misalign;
2319 if (i == 0)
2321 tree rhs = gimple_call_arg (stmt, 3);
2322 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2323 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2324 /* We should have catched mismatched types earlier. */
2325 gcc_assert (useless_type_conversion_p (vectype,
2326 TREE_TYPE (vec_rhs)));
2327 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2328 NULL_TREE, &dummy, gsi,
2329 &ptr_incr, false, &inv_p);
2330 gcc_assert (!inv_p);
2332 else
2334 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2335 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2336 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2337 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2338 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2339 TYPE_SIZE_UNIT (vectype));
2342 align = TYPE_ALIGN_UNIT (vectype);
2343 if (aligned_access_p (dr))
2344 misalign = 0;
2345 else if (DR_MISALIGNMENT (dr) == -1)
2347 align = TYPE_ALIGN_UNIT (elem_type);
2348 misalign = 0;
2350 else
2351 misalign = DR_MISALIGNMENT (dr);
2352 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2353 misalign);
2354 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2355 misalign ? least_bit_hwi (misalign) : align);
2356 new_stmt
2357 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2358 ptr, vec_mask, vec_rhs);
2359 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2360 if (i == 0)
2361 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2362 else
2363 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2364 prev_stmt_info = vinfo_for_stmt (new_stmt);
2367 else
2369 tree vec_mask = NULL_TREE;
2370 prev_stmt_info = NULL;
2371 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2372 for (i = 0; i < ncopies; i++)
2374 unsigned align, misalign;
2376 if (i == 0)
2378 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2379 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2380 NULL_TREE, &dummy, gsi,
2381 &ptr_incr, false, &inv_p);
2382 gcc_assert (!inv_p);
2384 else
2386 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2387 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2388 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2389 TYPE_SIZE_UNIT (vectype));
2392 align = TYPE_ALIGN_UNIT (vectype);
2393 if (aligned_access_p (dr))
2394 misalign = 0;
2395 else if (DR_MISALIGNMENT (dr) == -1)
2397 align = TYPE_ALIGN_UNIT (elem_type);
2398 misalign = 0;
2400 else
2401 misalign = DR_MISALIGNMENT (dr);
2402 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2403 misalign);
2404 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2405 misalign ? least_bit_hwi (misalign) : align);
2406 new_stmt
2407 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2408 ptr, vec_mask);
2409 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2410 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2411 if (i == 0)
2412 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2413 else
2414 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2415 prev_stmt_info = vinfo_for_stmt (new_stmt);
2419 if (vls_type == VLS_LOAD)
2421 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2422 from the IL. */
2423 if (STMT_VINFO_RELATED_STMT (stmt_info))
2425 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2426 stmt_info = vinfo_for_stmt (stmt);
2428 tree lhs = gimple_call_lhs (stmt);
2429 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2430 set_vinfo_for_stmt (new_stmt, stmt_info);
2431 set_vinfo_for_stmt (stmt, NULL);
2432 STMT_VINFO_STMT (stmt_info) = new_stmt;
2433 gsi_replace (gsi, new_stmt, true);
2436 return true;
2439 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2441 static bool
2442 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2443 gimple **vec_stmt, slp_tree slp_node,
2444 tree vectype_in, enum vect_def_type *dt)
2446 tree op, vectype;
2447 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2448 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2449 unsigned ncopies, nunits;
2451 op = gimple_call_arg (stmt, 0);
2452 vectype = STMT_VINFO_VECTYPE (stmt_info);
2453 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2455 /* Multiple types in SLP are handled by creating the appropriate number of
2456 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2457 case of SLP. */
2458 if (slp_node)
2459 ncopies = 1;
2460 else
2461 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2463 gcc_assert (ncopies >= 1);
2465 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2466 if (! char_vectype)
2467 return false;
2469 unsigned char *elts
2470 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype));
2471 unsigned char *elt = elts;
2472 unsigned word_bytes = TYPE_VECTOR_SUBPARTS (char_vectype) / nunits;
2473 for (unsigned i = 0; i < nunits; ++i)
2474 for (unsigned j = 0; j < word_bytes; ++j)
2475 *elt++ = (i + 1) * word_bytes - j - 1;
2477 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
2478 return false;
2480 if (! vec_stmt)
2482 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2483 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2485 "\n");
2486 if (! PURE_SLP_STMT (stmt_info))
2488 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2489 1, vector_stmt, stmt_info, 0, vect_prologue);
2490 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2491 ncopies, vec_perm, stmt_info, 0, vect_body);
2493 return true;
2496 tree *telts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (char_vectype));
2497 for (unsigned i = 0; i < TYPE_VECTOR_SUBPARTS (char_vectype); ++i)
2498 telts[i] = build_int_cst (char_type_node, elts[i]);
2499 tree bswap_vconst = build_vector (char_vectype, telts);
2501 /* Transform. */
2502 vec<tree> vec_oprnds = vNULL;
2503 gimple *new_stmt = NULL;
2504 stmt_vec_info prev_stmt_info = NULL;
2505 for (unsigned j = 0; j < ncopies; j++)
2507 /* Handle uses. */
2508 if (j == 0)
2509 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2510 else
2511 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2513 /* Arguments are ready. create the new vector stmt. */
2514 unsigned i;
2515 tree vop;
2516 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2518 tree tem = make_ssa_name (char_vectype);
2519 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2520 char_vectype, vop));
2521 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2522 tree tem2 = make_ssa_name (char_vectype);
2523 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2524 tem, tem, bswap_vconst);
2525 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2526 tem = make_ssa_name (vectype);
2527 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2528 vectype, tem2));
2529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2530 if (slp_node)
2531 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2534 if (slp_node)
2535 continue;
2537 if (j == 0)
2538 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2539 else
2540 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2542 prev_stmt_info = vinfo_for_stmt (new_stmt);
2545 vec_oprnds.release ();
2546 return true;
2549 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2550 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2551 in a single step. On success, store the binary pack code in
2552 *CONVERT_CODE. */
2554 static bool
2555 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2556 tree_code *convert_code)
2558 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2559 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2560 return false;
2562 tree_code code;
2563 int multi_step_cvt = 0;
2564 auto_vec <tree, 8> interm_types;
2565 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2566 &code, &multi_step_cvt,
2567 &interm_types)
2568 || multi_step_cvt)
2569 return false;
2571 *convert_code = code;
2572 return true;
2575 /* Function vectorizable_call.
2577 Check if GS performs a function call that can be vectorized.
2578 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2579 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2580 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2582 static bool
2583 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2584 slp_tree slp_node)
2586 gcall *stmt;
2587 tree vec_dest;
2588 tree scalar_dest;
2589 tree op, type;
2590 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2591 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2592 tree vectype_out, vectype_in;
2593 int nunits_in;
2594 int nunits_out;
2595 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2596 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2597 vec_info *vinfo = stmt_info->vinfo;
2598 tree fndecl, new_temp, rhs_type;
2599 gimple *def_stmt;
2600 enum vect_def_type dt[3]
2601 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2602 gimple *new_stmt = NULL;
2603 int ncopies, j;
2604 vec<tree> vargs = vNULL;
2605 enum { NARROW, NONE, WIDEN } modifier;
2606 size_t i, nargs;
2607 tree lhs;
2609 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2610 return false;
2612 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2613 && ! vec_stmt)
2614 return false;
2616 /* Is GS a vectorizable call? */
2617 stmt = dyn_cast <gcall *> (gs);
2618 if (!stmt)
2619 return false;
2621 if (gimple_call_internal_p (stmt)
2622 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2623 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2624 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2625 slp_node);
2627 if (gimple_call_lhs (stmt) == NULL_TREE
2628 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2629 return false;
2631 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2633 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2635 /* Process function arguments. */
2636 rhs_type = NULL_TREE;
2637 vectype_in = NULL_TREE;
2638 nargs = gimple_call_num_args (stmt);
2640 /* Bail out if the function has more than three arguments, we do not have
2641 interesting builtin functions to vectorize with more than two arguments
2642 except for fma. No arguments is also not good. */
2643 if (nargs == 0 || nargs > 3)
2644 return false;
2646 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2647 if (gimple_call_internal_p (stmt)
2648 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2650 nargs = 0;
2651 rhs_type = unsigned_type_node;
2654 for (i = 0; i < nargs; i++)
2656 tree opvectype;
2658 op = gimple_call_arg (stmt, i);
2660 /* We can only handle calls with arguments of the same type. */
2661 if (rhs_type
2662 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2664 if (dump_enabled_p ())
2665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2666 "argument types differ.\n");
2667 return false;
2669 if (!rhs_type)
2670 rhs_type = TREE_TYPE (op);
2672 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2674 if (dump_enabled_p ())
2675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2676 "use not simple.\n");
2677 return false;
2680 if (!vectype_in)
2681 vectype_in = opvectype;
2682 else if (opvectype
2683 && opvectype != vectype_in)
2685 if (dump_enabled_p ())
2686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2687 "argument vector types differ.\n");
2688 return false;
2691 /* If all arguments are external or constant defs use a vector type with
2692 the same size as the output vector type. */
2693 if (!vectype_in)
2694 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2695 if (vec_stmt)
2696 gcc_assert (vectype_in);
2697 if (!vectype_in)
2699 if (dump_enabled_p ())
2701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2702 "no vectype for scalar type ");
2703 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2704 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2707 return false;
2710 /* FORNOW */
2711 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2712 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2713 if (nunits_in == nunits_out / 2)
2714 modifier = NARROW;
2715 else if (nunits_out == nunits_in)
2716 modifier = NONE;
2717 else if (nunits_out == nunits_in / 2)
2718 modifier = WIDEN;
2719 else
2720 return false;
2722 /* We only handle functions that do not read or clobber memory. */
2723 if (gimple_vuse (stmt))
2725 if (dump_enabled_p ())
2726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2727 "function reads from or writes to memory.\n");
2728 return false;
2731 /* For now, we only vectorize functions if a target specific builtin
2732 is available. TODO -- in some cases, it might be profitable to
2733 insert the calls for pieces of the vector, in order to be able
2734 to vectorize other operations in the loop. */
2735 fndecl = NULL_TREE;
2736 internal_fn ifn = IFN_LAST;
2737 combined_fn cfn = gimple_call_combined_fn (stmt);
2738 tree callee = gimple_call_fndecl (stmt);
2740 /* First try using an internal function. */
2741 tree_code convert_code = ERROR_MARK;
2742 if (cfn != CFN_LAST
2743 && (modifier == NONE
2744 || (modifier == NARROW
2745 && simple_integer_narrowing (vectype_out, vectype_in,
2746 &convert_code))))
2747 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2748 vectype_in);
2750 /* If that fails, try asking for a target-specific built-in function. */
2751 if (ifn == IFN_LAST)
2753 if (cfn != CFN_LAST)
2754 fndecl = targetm.vectorize.builtin_vectorized_function
2755 (cfn, vectype_out, vectype_in);
2756 else
2757 fndecl = targetm.vectorize.builtin_md_vectorized_function
2758 (callee, vectype_out, vectype_in);
2761 if (ifn == IFN_LAST && !fndecl)
2763 if (cfn == CFN_GOMP_SIMD_LANE
2764 && !slp_node
2765 && loop_vinfo
2766 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2767 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2768 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2769 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2771 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2772 { 0, 1, 2, ... vf - 1 } vector. */
2773 gcc_assert (nargs == 0);
2775 else if (modifier == NONE
2776 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2777 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2778 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2779 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2780 vectype_in, dt);
2781 else
2783 if (dump_enabled_p ())
2784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2785 "function is not vectorizable.\n");
2786 return false;
2790 if (slp_node)
2791 ncopies = 1;
2792 else if (modifier == NARROW && ifn == IFN_LAST)
2793 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2794 else
2795 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2797 /* Sanity check: make sure that at least one copy of the vectorized stmt
2798 needs to be generated. */
2799 gcc_assert (ncopies >= 1);
2801 if (!vec_stmt) /* transformation not required. */
2803 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2804 if (dump_enabled_p ())
2805 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2806 "\n");
2807 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2808 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2809 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2810 vec_promote_demote, stmt_info, 0, vect_body);
2812 return true;
2815 /** Transform. **/
2817 if (dump_enabled_p ())
2818 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2820 /* Handle def. */
2821 scalar_dest = gimple_call_lhs (stmt);
2822 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2824 prev_stmt_info = NULL;
2825 if (modifier == NONE || ifn != IFN_LAST)
2827 tree prev_res = NULL_TREE;
2828 for (j = 0; j < ncopies; ++j)
2830 /* Build argument list for the vectorized call. */
2831 if (j == 0)
2832 vargs.create (nargs);
2833 else
2834 vargs.truncate (0);
2836 if (slp_node)
2838 auto_vec<vec<tree> > vec_defs (nargs);
2839 vec<tree> vec_oprnds0;
2841 for (i = 0; i < nargs; i++)
2842 vargs.quick_push (gimple_call_arg (stmt, i));
2843 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2844 vec_oprnds0 = vec_defs[0];
2846 /* Arguments are ready. Create the new vector stmt. */
2847 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2849 size_t k;
2850 for (k = 0; k < nargs; k++)
2852 vec<tree> vec_oprndsk = vec_defs[k];
2853 vargs[k] = vec_oprndsk[i];
2855 if (modifier == NARROW)
2857 tree half_res = make_ssa_name (vectype_in);
2858 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2859 gimple_call_set_lhs (new_stmt, half_res);
2860 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2861 if ((i & 1) == 0)
2863 prev_res = half_res;
2864 continue;
2866 new_temp = make_ssa_name (vec_dest);
2867 new_stmt = gimple_build_assign (new_temp, convert_code,
2868 prev_res, half_res);
2870 else
2872 if (ifn != IFN_LAST)
2873 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2874 else
2875 new_stmt = gimple_build_call_vec (fndecl, vargs);
2876 new_temp = make_ssa_name (vec_dest, new_stmt);
2877 gimple_call_set_lhs (new_stmt, new_temp);
2879 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2880 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2883 for (i = 0; i < nargs; i++)
2885 vec<tree> vec_oprndsi = vec_defs[i];
2886 vec_oprndsi.release ();
2888 continue;
2891 for (i = 0; i < nargs; i++)
2893 op = gimple_call_arg (stmt, i);
2894 if (j == 0)
2895 vec_oprnd0
2896 = vect_get_vec_def_for_operand (op, stmt);
2897 else
2899 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2900 vec_oprnd0
2901 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2904 vargs.quick_push (vec_oprnd0);
2907 if (gimple_call_internal_p (stmt)
2908 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2910 tree *v = XALLOCAVEC (tree, nunits_out);
2911 int k;
2912 for (k = 0; k < nunits_out; ++k)
2913 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2914 tree cst = build_vector (vectype_out, v);
2915 tree new_var
2916 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2917 gimple *init_stmt = gimple_build_assign (new_var, cst);
2918 vect_init_vector_1 (stmt, init_stmt, NULL);
2919 new_temp = make_ssa_name (vec_dest);
2920 new_stmt = gimple_build_assign (new_temp, new_var);
2922 else if (modifier == NARROW)
2924 tree half_res = make_ssa_name (vectype_in);
2925 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2926 gimple_call_set_lhs (new_stmt, half_res);
2927 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2928 if ((j & 1) == 0)
2930 prev_res = half_res;
2931 continue;
2933 new_temp = make_ssa_name (vec_dest);
2934 new_stmt = gimple_build_assign (new_temp, convert_code,
2935 prev_res, half_res);
2937 else
2939 if (ifn != IFN_LAST)
2940 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2941 else
2942 new_stmt = gimple_build_call_vec (fndecl, vargs);
2943 new_temp = make_ssa_name (vec_dest, new_stmt);
2944 gimple_call_set_lhs (new_stmt, new_temp);
2946 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2948 if (j == (modifier == NARROW ? 1 : 0))
2949 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2950 else
2951 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2953 prev_stmt_info = vinfo_for_stmt (new_stmt);
2956 else if (modifier == NARROW)
2958 for (j = 0; j < ncopies; ++j)
2960 /* Build argument list for the vectorized call. */
2961 if (j == 0)
2962 vargs.create (nargs * 2);
2963 else
2964 vargs.truncate (0);
2966 if (slp_node)
2968 auto_vec<vec<tree> > vec_defs (nargs);
2969 vec<tree> vec_oprnds0;
2971 for (i = 0; i < nargs; i++)
2972 vargs.quick_push (gimple_call_arg (stmt, i));
2973 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2974 vec_oprnds0 = vec_defs[0];
2976 /* Arguments are ready. Create the new vector stmt. */
2977 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2979 size_t k;
2980 vargs.truncate (0);
2981 for (k = 0; k < nargs; k++)
2983 vec<tree> vec_oprndsk = vec_defs[k];
2984 vargs.quick_push (vec_oprndsk[i]);
2985 vargs.quick_push (vec_oprndsk[i + 1]);
2987 if (ifn != IFN_LAST)
2988 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2989 else
2990 new_stmt = gimple_build_call_vec (fndecl, vargs);
2991 new_temp = make_ssa_name (vec_dest, new_stmt);
2992 gimple_call_set_lhs (new_stmt, new_temp);
2993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2994 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2997 for (i = 0; i < nargs; i++)
2999 vec<tree> vec_oprndsi = vec_defs[i];
3000 vec_oprndsi.release ();
3002 continue;
3005 for (i = 0; i < nargs; i++)
3007 op = gimple_call_arg (stmt, i);
3008 if (j == 0)
3010 vec_oprnd0
3011 = vect_get_vec_def_for_operand (op, stmt);
3012 vec_oprnd1
3013 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3015 else
3017 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3018 vec_oprnd0
3019 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3020 vec_oprnd1
3021 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3024 vargs.quick_push (vec_oprnd0);
3025 vargs.quick_push (vec_oprnd1);
3028 new_stmt = gimple_build_call_vec (fndecl, vargs);
3029 new_temp = make_ssa_name (vec_dest, new_stmt);
3030 gimple_call_set_lhs (new_stmt, new_temp);
3031 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3033 if (j == 0)
3034 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3035 else
3036 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3038 prev_stmt_info = vinfo_for_stmt (new_stmt);
3041 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3043 else
3044 /* No current target implements this case. */
3045 return false;
3047 vargs.release ();
3049 /* The call in STMT might prevent it from being removed in dce.
3050 We however cannot remove it here, due to the way the ssa name
3051 it defines is mapped to the new definition. So just replace
3052 rhs of the statement with something harmless. */
3054 if (slp_node)
3055 return true;
3057 type = TREE_TYPE (scalar_dest);
3058 if (is_pattern_stmt_p (stmt_info))
3059 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3060 else
3061 lhs = gimple_call_lhs (stmt);
3063 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3064 set_vinfo_for_stmt (new_stmt, stmt_info);
3065 set_vinfo_for_stmt (stmt, NULL);
3066 STMT_VINFO_STMT (stmt_info) = new_stmt;
3067 gsi_replace (gsi, new_stmt, false);
3069 return true;
3073 struct simd_call_arg_info
3075 tree vectype;
3076 tree op;
3077 enum vect_def_type dt;
3078 HOST_WIDE_INT linear_step;
3079 unsigned int align;
3080 bool simd_lane_linear;
3083 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3084 is linear within simd lane (but not within whole loop), note it in
3085 *ARGINFO. */
3087 static void
3088 vect_simd_lane_linear (tree op, struct loop *loop,
3089 struct simd_call_arg_info *arginfo)
3091 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3093 if (!is_gimple_assign (def_stmt)
3094 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3095 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3096 return;
3098 tree base = gimple_assign_rhs1 (def_stmt);
3099 HOST_WIDE_INT linear_step = 0;
3100 tree v = gimple_assign_rhs2 (def_stmt);
3101 while (TREE_CODE (v) == SSA_NAME)
3103 tree t;
3104 def_stmt = SSA_NAME_DEF_STMT (v);
3105 if (is_gimple_assign (def_stmt))
3106 switch (gimple_assign_rhs_code (def_stmt))
3108 case PLUS_EXPR:
3109 t = gimple_assign_rhs2 (def_stmt);
3110 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3111 return;
3112 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3113 v = gimple_assign_rhs1 (def_stmt);
3114 continue;
3115 case MULT_EXPR:
3116 t = gimple_assign_rhs2 (def_stmt);
3117 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3118 return;
3119 linear_step = tree_to_shwi (t);
3120 v = gimple_assign_rhs1 (def_stmt);
3121 continue;
3122 CASE_CONVERT:
3123 t = gimple_assign_rhs1 (def_stmt);
3124 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3125 || (TYPE_PRECISION (TREE_TYPE (v))
3126 < TYPE_PRECISION (TREE_TYPE (t))))
3127 return;
3128 if (!linear_step)
3129 linear_step = 1;
3130 v = t;
3131 continue;
3132 default:
3133 return;
3135 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3136 && loop->simduid
3137 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3138 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3139 == loop->simduid))
3141 if (!linear_step)
3142 linear_step = 1;
3143 arginfo->linear_step = linear_step;
3144 arginfo->op = base;
3145 arginfo->simd_lane_linear = true;
3146 return;
3151 /* Function vectorizable_simd_clone_call.
3153 Check if STMT performs a function call that can be vectorized
3154 by calling a simd clone of the function.
3155 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3156 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3157 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3159 static bool
3160 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3161 gimple **vec_stmt, slp_tree slp_node)
3163 tree vec_dest;
3164 tree scalar_dest;
3165 tree op, type;
3166 tree vec_oprnd0 = NULL_TREE;
3167 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3168 tree vectype;
3169 unsigned int nunits;
3170 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3171 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3172 vec_info *vinfo = stmt_info->vinfo;
3173 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3174 tree fndecl, new_temp;
3175 gimple *def_stmt;
3176 gimple *new_stmt = NULL;
3177 int ncopies, j;
3178 auto_vec<simd_call_arg_info> arginfo;
3179 vec<tree> vargs = vNULL;
3180 size_t i, nargs;
3181 tree lhs, rtype, ratype;
3182 vec<constructor_elt, va_gc> *ret_ctor_elts;
3184 /* Is STMT a vectorizable call? */
3185 if (!is_gimple_call (stmt))
3186 return false;
3188 fndecl = gimple_call_fndecl (stmt);
3189 if (fndecl == NULL_TREE)
3190 return false;
3192 struct cgraph_node *node = cgraph_node::get (fndecl);
3193 if (node == NULL || node->simd_clones == NULL)
3194 return false;
3196 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3197 return false;
3199 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3200 && ! vec_stmt)
3201 return false;
3203 if (gimple_call_lhs (stmt)
3204 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3205 return false;
3207 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3209 vectype = STMT_VINFO_VECTYPE (stmt_info);
3211 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3212 return false;
3214 /* FORNOW */
3215 if (slp_node)
3216 return false;
3218 /* Process function arguments. */
3219 nargs = gimple_call_num_args (stmt);
3221 /* Bail out if the function has zero arguments. */
3222 if (nargs == 0)
3223 return false;
3225 arginfo.reserve (nargs, true);
3227 for (i = 0; i < nargs; i++)
3229 simd_call_arg_info thisarginfo;
3230 affine_iv iv;
3232 thisarginfo.linear_step = 0;
3233 thisarginfo.align = 0;
3234 thisarginfo.op = NULL_TREE;
3235 thisarginfo.simd_lane_linear = false;
3237 op = gimple_call_arg (stmt, i);
3238 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3239 &thisarginfo.vectype)
3240 || thisarginfo.dt == vect_uninitialized_def)
3242 if (dump_enabled_p ())
3243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3244 "use not simple.\n");
3245 return false;
3248 if (thisarginfo.dt == vect_constant_def
3249 || thisarginfo.dt == vect_external_def)
3250 gcc_assert (thisarginfo.vectype == NULL_TREE);
3251 else
3252 gcc_assert (thisarginfo.vectype != NULL_TREE);
3254 /* For linear arguments, the analyze phase should have saved
3255 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3256 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3257 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3259 gcc_assert (vec_stmt);
3260 thisarginfo.linear_step
3261 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3262 thisarginfo.op
3263 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3264 thisarginfo.simd_lane_linear
3265 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3266 == boolean_true_node);
3267 /* If loop has been peeled for alignment, we need to adjust it. */
3268 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3269 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3270 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3272 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3273 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3274 tree opt = TREE_TYPE (thisarginfo.op);
3275 bias = fold_convert (TREE_TYPE (step), bias);
3276 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3277 thisarginfo.op
3278 = fold_build2 (POINTER_TYPE_P (opt)
3279 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3280 thisarginfo.op, bias);
3283 else if (!vec_stmt
3284 && thisarginfo.dt != vect_constant_def
3285 && thisarginfo.dt != vect_external_def
3286 && loop_vinfo
3287 && TREE_CODE (op) == SSA_NAME
3288 && simple_iv (loop, loop_containing_stmt (stmt), op,
3289 &iv, false)
3290 && tree_fits_shwi_p (iv.step))
3292 thisarginfo.linear_step = tree_to_shwi (iv.step);
3293 thisarginfo.op = iv.base;
3295 else if ((thisarginfo.dt == vect_constant_def
3296 || thisarginfo.dt == vect_external_def)
3297 && POINTER_TYPE_P (TREE_TYPE (op)))
3298 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3299 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3300 linear too. */
3301 if (POINTER_TYPE_P (TREE_TYPE (op))
3302 && !thisarginfo.linear_step
3303 && !vec_stmt
3304 && thisarginfo.dt != vect_constant_def
3305 && thisarginfo.dt != vect_external_def
3306 && loop_vinfo
3307 && !slp_node
3308 && TREE_CODE (op) == SSA_NAME)
3309 vect_simd_lane_linear (op, loop, &thisarginfo);
3311 arginfo.quick_push (thisarginfo);
3314 unsigned int badness = 0;
3315 struct cgraph_node *bestn = NULL;
3316 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3317 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3318 else
3319 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3320 n = n->simdclone->next_clone)
3322 unsigned int this_badness = 0;
3323 if (n->simdclone->simdlen
3324 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3325 || n->simdclone->nargs != nargs)
3326 continue;
3327 if (n->simdclone->simdlen
3328 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3329 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3330 - exact_log2 (n->simdclone->simdlen)) * 1024;
3331 if (n->simdclone->inbranch)
3332 this_badness += 2048;
3333 int target_badness = targetm.simd_clone.usable (n);
3334 if (target_badness < 0)
3335 continue;
3336 this_badness += target_badness * 512;
3337 /* FORNOW: Have to add code to add the mask argument. */
3338 if (n->simdclone->inbranch)
3339 continue;
3340 for (i = 0; i < nargs; i++)
3342 switch (n->simdclone->args[i].arg_type)
3344 case SIMD_CLONE_ARG_TYPE_VECTOR:
3345 if (!useless_type_conversion_p
3346 (n->simdclone->args[i].orig_type,
3347 TREE_TYPE (gimple_call_arg (stmt, i))))
3348 i = -1;
3349 else if (arginfo[i].dt == vect_constant_def
3350 || arginfo[i].dt == vect_external_def
3351 || arginfo[i].linear_step)
3352 this_badness += 64;
3353 break;
3354 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3355 if (arginfo[i].dt != vect_constant_def
3356 && arginfo[i].dt != vect_external_def)
3357 i = -1;
3358 break;
3359 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3360 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3361 if (arginfo[i].dt == vect_constant_def
3362 || arginfo[i].dt == vect_external_def
3363 || (arginfo[i].linear_step
3364 != n->simdclone->args[i].linear_step))
3365 i = -1;
3366 break;
3367 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3368 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3369 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3370 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3371 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3372 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3373 /* FORNOW */
3374 i = -1;
3375 break;
3376 case SIMD_CLONE_ARG_TYPE_MASK:
3377 gcc_unreachable ();
3379 if (i == (size_t) -1)
3380 break;
3381 if (n->simdclone->args[i].alignment > arginfo[i].align)
3383 i = -1;
3384 break;
3386 if (arginfo[i].align)
3387 this_badness += (exact_log2 (arginfo[i].align)
3388 - exact_log2 (n->simdclone->args[i].alignment));
3390 if (i == (size_t) -1)
3391 continue;
3392 if (bestn == NULL || this_badness < badness)
3394 bestn = n;
3395 badness = this_badness;
3399 if (bestn == NULL)
3400 return false;
3402 for (i = 0; i < nargs; i++)
3403 if ((arginfo[i].dt == vect_constant_def
3404 || arginfo[i].dt == vect_external_def)
3405 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3407 arginfo[i].vectype
3408 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3409 i)));
3410 if (arginfo[i].vectype == NULL
3411 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3412 > bestn->simdclone->simdlen))
3413 return false;
3416 fndecl = bestn->decl;
3417 nunits = bestn->simdclone->simdlen;
3418 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3420 /* If the function isn't const, only allow it in simd loops where user
3421 has asserted that at least nunits consecutive iterations can be
3422 performed using SIMD instructions. */
3423 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3424 && gimple_vuse (stmt))
3425 return false;
3427 /* Sanity check: make sure that at least one copy of the vectorized stmt
3428 needs to be generated. */
3429 gcc_assert (ncopies >= 1);
3431 if (!vec_stmt) /* transformation not required. */
3433 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3434 for (i = 0; i < nargs; i++)
3435 if ((bestn->simdclone->args[i].arg_type
3436 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3437 || (bestn->simdclone->args[i].arg_type
3438 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3440 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3441 + 1);
3442 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3443 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3444 ? size_type_node : TREE_TYPE (arginfo[i].op);
3445 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3446 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3447 tree sll = arginfo[i].simd_lane_linear
3448 ? boolean_true_node : boolean_false_node;
3449 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3451 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3452 if (dump_enabled_p ())
3453 dump_printf_loc (MSG_NOTE, vect_location,
3454 "=== vectorizable_simd_clone_call ===\n");
3455 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3456 return true;
3459 /** Transform. **/
3461 if (dump_enabled_p ())
3462 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3464 /* Handle def. */
3465 scalar_dest = gimple_call_lhs (stmt);
3466 vec_dest = NULL_TREE;
3467 rtype = NULL_TREE;
3468 ratype = NULL_TREE;
3469 if (scalar_dest)
3471 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3472 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3473 if (TREE_CODE (rtype) == ARRAY_TYPE)
3475 ratype = rtype;
3476 rtype = TREE_TYPE (ratype);
3480 prev_stmt_info = NULL;
3481 for (j = 0; j < ncopies; ++j)
3483 /* Build argument list for the vectorized call. */
3484 if (j == 0)
3485 vargs.create (nargs);
3486 else
3487 vargs.truncate (0);
3489 for (i = 0; i < nargs; i++)
3491 unsigned int k, l, m, o;
3492 tree atype;
3493 op = gimple_call_arg (stmt, i);
3494 switch (bestn->simdclone->args[i].arg_type)
3496 case SIMD_CLONE_ARG_TYPE_VECTOR:
3497 atype = bestn->simdclone->args[i].vector_type;
3498 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3499 for (m = j * o; m < (j + 1) * o; m++)
3501 if (TYPE_VECTOR_SUBPARTS (atype)
3502 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3504 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3505 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3506 / TYPE_VECTOR_SUBPARTS (atype));
3507 gcc_assert ((k & (k - 1)) == 0);
3508 if (m == 0)
3509 vec_oprnd0
3510 = vect_get_vec_def_for_operand (op, stmt);
3511 else
3513 vec_oprnd0 = arginfo[i].op;
3514 if ((m & (k - 1)) == 0)
3515 vec_oprnd0
3516 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3517 vec_oprnd0);
3519 arginfo[i].op = vec_oprnd0;
3520 vec_oprnd0
3521 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3522 size_int (prec),
3523 bitsize_int ((m & (k - 1)) * prec));
3524 new_stmt
3525 = gimple_build_assign (make_ssa_name (atype),
3526 vec_oprnd0);
3527 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3528 vargs.safe_push (gimple_assign_lhs (new_stmt));
3530 else
3532 k = (TYPE_VECTOR_SUBPARTS (atype)
3533 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3534 gcc_assert ((k & (k - 1)) == 0);
3535 vec<constructor_elt, va_gc> *ctor_elts;
3536 if (k != 1)
3537 vec_alloc (ctor_elts, k);
3538 else
3539 ctor_elts = NULL;
3540 for (l = 0; l < k; l++)
3542 if (m == 0 && l == 0)
3543 vec_oprnd0
3544 = vect_get_vec_def_for_operand (op, stmt);
3545 else
3546 vec_oprnd0
3547 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3548 arginfo[i].op);
3549 arginfo[i].op = vec_oprnd0;
3550 if (k == 1)
3551 break;
3552 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3553 vec_oprnd0);
3555 if (k == 1)
3556 vargs.safe_push (vec_oprnd0);
3557 else
3559 vec_oprnd0 = build_constructor (atype, ctor_elts);
3560 new_stmt
3561 = gimple_build_assign (make_ssa_name (atype),
3562 vec_oprnd0);
3563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3564 vargs.safe_push (gimple_assign_lhs (new_stmt));
3568 break;
3569 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3570 vargs.safe_push (op);
3571 break;
3572 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3573 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3574 if (j == 0)
3576 gimple_seq stmts;
3577 arginfo[i].op
3578 = force_gimple_operand (arginfo[i].op, &stmts, true,
3579 NULL_TREE);
3580 if (stmts != NULL)
3582 basic_block new_bb;
3583 edge pe = loop_preheader_edge (loop);
3584 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3585 gcc_assert (!new_bb);
3587 if (arginfo[i].simd_lane_linear)
3589 vargs.safe_push (arginfo[i].op);
3590 break;
3592 tree phi_res = copy_ssa_name (op);
3593 gphi *new_phi = create_phi_node (phi_res, loop->header);
3594 set_vinfo_for_stmt (new_phi,
3595 new_stmt_vec_info (new_phi, loop_vinfo));
3596 add_phi_arg (new_phi, arginfo[i].op,
3597 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3598 enum tree_code code
3599 = POINTER_TYPE_P (TREE_TYPE (op))
3600 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3601 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3602 ? sizetype : TREE_TYPE (op);
3603 widest_int cst
3604 = wi::mul (bestn->simdclone->args[i].linear_step,
3605 ncopies * nunits);
3606 tree tcst = wide_int_to_tree (type, cst);
3607 tree phi_arg = copy_ssa_name (op);
3608 new_stmt
3609 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3610 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3611 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3612 set_vinfo_for_stmt (new_stmt,
3613 new_stmt_vec_info (new_stmt, loop_vinfo));
3614 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3615 UNKNOWN_LOCATION);
3616 arginfo[i].op = phi_res;
3617 vargs.safe_push (phi_res);
3619 else
3621 enum tree_code code
3622 = POINTER_TYPE_P (TREE_TYPE (op))
3623 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3624 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3625 ? sizetype : TREE_TYPE (op);
3626 widest_int cst
3627 = wi::mul (bestn->simdclone->args[i].linear_step,
3628 j * nunits);
3629 tree tcst = wide_int_to_tree (type, cst);
3630 new_temp = make_ssa_name (TREE_TYPE (op));
3631 new_stmt = gimple_build_assign (new_temp, code,
3632 arginfo[i].op, tcst);
3633 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3634 vargs.safe_push (new_temp);
3636 break;
3637 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3638 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3639 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3640 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3641 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3642 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3643 default:
3644 gcc_unreachable ();
3648 new_stmt = gimple_build_call_vec (fndecl, vargs);
3649 if (vec_dest)
3651 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3652 if (ratype)
3653 new_temp = create_tmp_var (ratype);
3654 else if (TYPE_VECTOR_SUBPARTS (vectype)
3655 == TYPE_VECTOR_SUBPARTS (rtype))
3656 new_temp = make_ssa_name (vec_dest, new_stmt);
3657 else
3658 new_temp = make_ssa_name (rtype, new_stmt);
3659 gimple_call_set_lhs (new_stmt, new_temp);
3661 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3663 if (vec_dest)
3665 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3667 unsigned int k, l;
3668 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3669 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3670 gcc_assert ((k & (k - 1)) == 0);
3671 for (l = 0; l < k; l++)
3673 tree t;
3674 if (ratype)
3676 t = build_fold_addr_expr (new_temp);
3677 t = build2 (MEM_REF, vectype, t,
3678 build_int_cst (TREE_TYPE (t),
3679 l * prec / BITS_PER_UNIT));
3681 else
3682 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3683 size_int (prec), bitsize_int (l * prec));
3684 new_stmt
3685 = gimple_build_assign (make_ssa_name (vectype), t);
3686 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3687 if (j == 0 && l == 0)
3688 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3689 else
3690 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3692 prev_stmt_info = vinfo_for_stmt (new_stmt);
3695 if (ratype)
3697 tree clobber = build_constructor (ratype, NULL);
3698 TREE_THIS_VOLATILE (clobber) = 1;
3699 new_stmt = gimple_build_assign (new_temp, clobber);
3700 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3702 continue;
3704 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3706 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3707 / TYPE_VECTOR_SUBPARTS (rtype));
3708 gcc_assert ((k & (k - 1)) == 0);
3709 if ((j & (k - 1)) == 0)
3710 vec_alloc (ret_ctor_elts, k);
3711 if (ratype)
3713 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3714 for (m = 0; m < o; m++)
3716 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3717 size_int (m), NULL_TREE, NULL_TREE);
3718 new_stmt
3719 = gimple_build_assign (make_ssa_name (rtype), tem);
3720 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3721 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3722 gimple_assign_lhs (new_stmt));
3724 tree clobber = build_constructor (ratype, NULL);
3725 TREE_THIS_VOLATILE (clobber) = 1;
3726 new_stmt = gimple_build_assign (new_temp, clobber);
3727 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3729 else
3730 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3731 if ((j & (k - 1)) != k - 1)
3732 continue;
3733 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3734 new_stmt
3735 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3736 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3738 if ((unsigned) j == k - 1)
3739 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3740 else
3741 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3743 prev_stmt_info = vinfo_for_stmt (new_stmt);
3744 continue;
3746 else if (ratype)
3748 tree t = build_fold_addr_expr (new_temp);
3749 t = build2 (MEM_REF, vectype, t,
3750 build_int_cst (TREE_TYPE (t), 0));
3751 new_stmt
3752 = gimple_build_assign (make_ssa_name (vec_dest), t);
3753 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3754 tree clobber = build_constructor (ratype, NULL);
3755 TREE_THIS_VOLATILE (clobber) = 1;
3756 vect_finish_stmt_generation (stmt,
3757 gimple_build_assign (new_temp,
3758 clobber), gsi);
3762 if (j == 0)
3763 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3764 else
3765 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3767 prev_stmt_info = vinfo_for_stmt (new_stmt);
3770 vargs.release ();
3772 /* The call in STMT might prevent it from being removed in dce.
3773 We however cannot remove it here, due to the way the ssa name
3774 it defines is mapped to the new definition. So just replace
3775 rhs of the statement with something harmless. */
3777 if (slp_node)
3778 return true;
3780 if (scalar_dest)
3782 type = TREE_TYPE (scalar_dest);
3783 if (is_pattern_stmt_p (stmt_info))
3784 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3785 else
3786 lhs = gimple_call_lhs (stmt);
3787 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3789 else
3790 new_stmt = gimple_build_nop ();
3791 set_vinfo_for_stmt (new_stmt, stmt_info);
3792 set_vinfo_for_stmt (stmt, NULL);
3793 STMT_VINFO_STMT (stmt_info) = new_stmt;
3794 gsi_replace (gsi, new_stmt, true);
3795 unlink_stmt_vdef (stmt);
3797 return true;
3801 /* Function vect_gen_widened_results_half
3803 Create a vector stmt whose code, type, number of arguments, and result
3804 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3805 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3806 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3807 needs to be created (DECL is a function-decl of a target-builtin).
3808 STMT is the original scalar stmt that we are vectorizing. */
3810 static gimple *
3811 vect_gen_widened_results_half (enum tree_code code,
3812 tree decl,
3813 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3814 tree vec_dest, gimple_stmt_iterator *gsi,
3815 gimple *stmt)
3817 gimple *new_stmt;
3818 tree new_temp;
3820 /* Generate half of the widened result: */
3821 if (code == CALL_EXPR)
3823 /* Target specific support */
3824 if (op_type == binary_op)
3825 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3826 else
3827 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3828 new_temp = make_ssa_name (vec_dest, new_stmt);
3829 gimple_call_set_lhs (new_stmt, new_temp);
3831 else
3833 /* Generic support */
3834 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3835 if (op_type != binary_op)
3836 vec_oprnd1 = NULL;
3837 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3838 new_temp = make_ssa_name (vec_dest, new_stmt);
3839 gimple_assign_set_lhs (new_stmt, new_temp);
3841 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3843 return new_stmt;
3847 /* Get vectorized definitions for loop-based vectorization. For the first
3848 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3849 scalar operand), and for the rest we get a copy with
3850 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3851 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3852 The vectors are collected into VEC_OPRNDS. */
3854 static void
3855 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3856 vec<tree> *vec_oprnds, int multi_step_cvt)
3858 tree vec_oprnd;
3860 /* Get first vector operand. */
3861 /* All the vector operands except the very first one (that is scalar oprnd)
3862 are stmt copies. */
3863 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3864 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3865 else
3866 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3868 vec_oprnds->quick_push (vec_oprnd);
3870 /* Get second vector operand. */
3871 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3872 vec_oprnds->quick_push (vec_oprnd);
3874 *oprnd = vec_oprnd;
3876 /* For conversion in multiple steps, continue to get operands
3877 recursively. */
3878 if (multi_step_cvt)
3879 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3883 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3884 For multi-step conversions store the resulting vectors and call the function
3885 recursively. */
3887 static void
3888 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3889 int multi_step_cvt, gimple *stmt,
3890 vec<tree> vec_dsts,
3891 gimple_stmt_iterator *gsi,
3892 slp_tree slp_node, enum tree_code code,
3893 stmt_vec_info *prev_stmt_info)
3895 unsigned int i;
3896 tree vop0, vop1, new_tmp, vec_dest;
3897 gimple *new_stmt;
3898 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3900 vec_dest = vec_dsts.pop ();
3902 for (i = 0; i < vec_oprnds->length (); i += 2)
3904 /* Create demotion operation. */
3905 vop0 = (*vec_oprnds)[i];
3906 vop1 = (*vec_oprnds)[i + 1];
3907 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3908 new_tmp = make_ssa_name (vec_dest, new_stmt);
3909 gimple_assign_set_lhs (new_stmt, new_tmp);
3910 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3912 if (multi_step_cvt)
3913 /* Store the resulting vector for next recursive call. */
3914 (*vec_oprnds)[i/2] = new_tmp;
3915 else
3917 /* This is the last step of the conversion sequence. Store the
3918 vectors in SLP_NODE or in vector info of the scalar statement
3919 (or in STMT_VINFO_RELATED_STMT chain). */
3920 if (slp_node)
3921 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3922 else
3924 if (!*prev_stmt_info)
3925 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3926 else
3927 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3929 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3934 /* For multi-step demotion operations we first generate demotion operations
3935 from the source type to the intermediate types, and then combine the
3936 results (stored in VEC_OPRNDS) in demotion operation to the destination
3937 type. */
3938 if (multi_step_cvt)
3940 /* At each level of recursion we have half of the operands we had at the
3941 previous level. */
3942 vec_oprnds->truncate ((i+1)/2);
3943 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3944 stmt, vec_dsts, gsi, slp_node,
3945 VEC_PACK_TRUNC_EXPR,
3946 prev_stmt_info);
3949 vec_dsts.quick_push (vec_dest);
3953 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3954 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3955 the resulting vectors and call the function recursively. */
3957 static void
3958 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3959 vec<tree> *vec_oprnds1,
3960 gimple *stmt, tree vec_dest,
3961 gimple_stmt_iterator *gsi,
3962 enum tree_code code1,
3963 enum tree_code code2, tree decl1,
3964 tree decl2, int op_type)
3966 int i;
3967 tree vop0, vop1, new_tmp1, new_tmp2;
3968 gimple *new_stmt1, *new_stmt2;
3969 vec<tree> vec_tmp = vNULL;
3971 vec_tmp.create (vec_oprnds0->length () * 2);
3972 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3974 if (op_type == binary_op)
3975 vop1 = (*vec_oprnds1)[i];
3976 else
3977 vop1 = NULL_TREE;
3979 /* Generate the two halves of promotion operation. */
3980 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3981 op_type, vec_dest, gsi, stmt);
3982 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3983 op_type, vec_dest, gsi, stmt);
3984 if (is_gimple_call (new_stmt1))
3986 new_tmp1 = gimple_call_lhs (new_stmt1);
3987 new_tmp2 = gimple_call_lhs (new_stmt2);
3989 else
3991 new_tmp1 = gimple_assign_lhs (new_stmt1);
3992 new_tmp2 = gimple_assign_lhs (new_stmt2);
3995 /* Store the results for the next step. */
3996 vec_tmp.quick_push (new_tmp1);
3997 vec_tmp.quick_push (new_tmp2);
4000 vec_oprnds0->release ();
4001 *vec_oprnds0 = vec_tmp;
4005 /* Check if STMT performs a conversion operation, that can be vectorized.
4006 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4007 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4008 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4010 static bool
4011 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4012 gimple **vec_stmt, slp_tree slp_node)
4014 tree vec_dest;
4015 tree scalar_dest;
4016 tree op0, op1 = NULL_TREE;
4017 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4019 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4020 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4021 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4022 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4023 tree new_temp;
4024 gimple *def_stmt;
4025 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4026 gimple *new_stmt = NULL;
4027 stmt_vec_info prev_stmt_info;
4028 int nunits_in;
4029 int nunits_out;
4030 tree vectype_out, vectype_in;
4031 int ncopies, i, j;
4032 tree lhs_type, rhs_type;
4033 enum { NARROW, NONE, WIDEN } modifier;
4034 vec<tree> vec_oprnds0 = vNULL;
4035 vec<tree> vec_oprnds1 = vNULL;
4036 tree vop0;
4037 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4038 vec_info *vinfo = stmt_info->vinfo;
4039 int multi_step_cvt = 0;
4040 vec<tree> interm_types = vNULL;
4041 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4042 int op_type;
4043 machine_mode rhs_mode;
4044 unsigned short fltsz;
4046 /* Is STMT a vectorizable conversion? */
4048 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4049 return false;
4051 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4052 && ! vec_stmt)
4053 return false;
4055 if (!is_gimple_assign (stmt))
4056 return false;
4058 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4059 return false;
4061 code = gimple_assign_rhs_code (stmt);
4062 if (!CONVERT_EXPR_CODE_P (code)
4063 && code != FIX_TRUNC_EXPR
4064 && code != FLOAT_EXPR
4065 && code != WIDEN_MULT_EXPR
4066 && code != WIDEN_LSHIFT_EXPR)
4067 return false;
4069 op_type = TREE_CODE_LENGTH (code);
4071 /* Check types of lhs and rhs. */
4072 scalar_dest = gimple_assign_lhs (stmt);
4073 lhs_type = TREE_TYPE (scalar_dest);
4074 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4076 op0 = gimple_assign_rhs1 (stmt);
4077 rhs_type = TREE_TYPE (op0);
4079 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4080 && !((INTEGRAL_TYPE_P (lhs_type)
4081 && INTEGRAL_TYPE_P (rhs_type))
4082 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4083 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4084 return false;
4086 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4087 && ((INTEGRAL_TYPE_P (lhs_type)
4088 && (TYPE_PRECISION (lhs_type)
4089 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
4090 || (INTEGRAL_TYPE_P (rhs_type)
4091 && (TYPE_PRECISION (rhs_type)
4092 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4094 if (dump_enabled_p ())
4095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4096 "type conversion to/from bit-precision unsupported."
4097 "\n");
4098 return false;
4101 /* Check the operands of the operation. */
4102 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4104 if (dump_enabled_p ())
4105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4106 "use not simple.\n");
4107 return false;
4109 if (op_type == binary_op)
4111 bool ok;
4113 op1 = gimple_assign_rhs2 (stmt);
4114 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4115 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4116 OP1. */
4117 if (CONSTANT_CLASS_P (op0))
4118 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4119 else
4120 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4122 if (!ok)
4124 if (dump_enabled_p ())
4125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4126 "use not simple.\n");
4127 return false;
4131 /* If op0 is an external or constant defs use a vector type of
4132 the same size as the output vector type. */
4133 if (!vectype_in)
4134 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4135 if (vec_stmt)
4136 gcc_assert (vectype_in);
4137 if (!vectype_in)
4139 if (dump_enabled_p ())
4141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4142 "no vectype for scalar type ");
4143 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4144 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4147 return false;
4150 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4151 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4153 if (dump_enabled_p ())
4155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4156 "can't convert between boolean and non "
4157 "boolean vectors");
4158 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4159 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4162 return false;
4165 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4166 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4167 if (nunits_in < nunits_out)
4168 modifier = NARROW;
4169 else if (nunits_out == nunits_in)
4170 modifier = NONE;
4171 else
4172 modifier = WIDEN;
4174 /* Multiple types in SLP are handled by creating the appropriate number of
4175 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4176 case of SLP. */
4177 if (slp_node)
4178 ncopies = 1;
4179 else if (modifier == NARROW)
4180 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4181 else
4182 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4184 /* Sanity check: make sure that at least one copy of the vectorized stmt
4185 needs to be generated. */
4186 gcc_assert (ncopies >= 1);
4188 /* Supportable by target? */
4189 switch (modifier)
4191 case NONE:
4192 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4193 return false;
4194 if (supportable_convert_operation (code, vectype_out, vectype_in,
4195 &decl1, &code1))
4196 break;
4197 /* FALLTHRU */
4198 unsupported:
4199 if (dump_enabled_p ())
4200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4201 "conversion not supported by target.\n");
4202 return false;
4204 case WIDEN:
4205 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4206 &code1, &code2, &multi_step_cvt,
4207 &interm_types))
4209 /* Binary widening operation can only be supported directly by the
4210 architecture. */
4211 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4212 break;
4215 if (code != FLOAT_EXPR
4216 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4217 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4218 goto unsupported;
4220 rhs_mode = TYPE_MODE (rhs_type);
4221 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4222 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4223 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4224 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4226 cvt_type
4227 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4228 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4229 if (cvt_type == NULL_TREE)
4230 goto unsupported;
4232 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4234 if (!supportable_convert_operation (code, vectype_out,
4235 cvt_type, &decl1, &codecvt1))
4236 goto unsupported;
4238 else if (!supportable_widening_operation (code, stmt, vectype_out,
4239 cvt_type, &codecvt1,
4240 &codecvt2, &multi_step_cvt,
4241 &interm_types))
4242 continue;
4243 else
4244 gcc_assert (multi_step_cvt == 0);
4246 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4247 vectype_in, &code1, &code2,
4248 &multi_step_cvt, &interm_types))
4249 break;
4252 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4253 goto unsupported;
4255 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4256 codecvt2 = ERROR_MARK;
4257 else
4259 multi_step_cvt++;
4260 interm_types.safe_push (cvt_type);
4261 cvt_type = NULL_TREE;
4263 break;
4265 case NARROW:
4266 gcc_assert (op_type == unary_op);
4267 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4268 &code1, &multi_step_cvt,
4269 &interm_types))
4270 break;
4272 if (code != FIX_TRUNC_EXPR
4273 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4274 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4275 goto unsupported;
4277 rhs_mode = TYPE_MODE (rhs_type);
4278 cvt_type
4279 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4280 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4281 if (cvt_type == NULL_TREE)
4282 goto unsupported;
4283 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4284 &decl1, &codecvt1))
4285 goto unsupported;
4286 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4287 &code1, &multi_step_cvt,
4288 &interm_types))
4289 break;
4290 goto unsupported;
4292 default:
4293 gcc_unreachable ();
4296 if (!vec_stmt) /* transformation not required. */
4298 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_NOTE, vect_location,
4300 "=== vectorizable_conversion ===\n");
4301 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4303 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4304 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4306 else if (modifier == NARROW)
4308 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4309 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4311 else
4313 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4314 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4316 interm_types.release ();
4317 return true;
4320 /** Transform. **/
4321 if (dump_enabled_p ())
4322 dump_printf_loc (MSG_NOTE, vect_location,
4323 "transform conversion. ncopies = %d.\n", ncopies);
4325 if (op_type == binary_op)
4327 if (CONSTANT_CLASS_P (op0))
4328 op0 = fold_convert (TREE_TYPE (op1), op0);
4329 else if (CONSTANT_CLASS_P (op1))
4330 op1 = fold_convert (TREE_TYPE (op0), op1);
4333 /* In case of multi-step conversion, we first generate conversion operations
4334 to the intermediate types, and then from that types to the final one.
4335 We create vector destinations for the intermediate type (TYPES) received
4336 from supportable_*_operation, and store them in the correct order
4337 for future use in vect_create_vectorized_*_stmts (). */
4338 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4339 vec_dest = vect_create_destination_var (scalar_dest,
4340 (cvt_type && modifier == WIDEN)
4341 ? cvt_type : vectype_out);
4342 vec_dsts.quick_push (vec_dest);
4344 if (multi_step_cvt)
4346 for (i = interm_types.length () - 1;
4347 interm_types.iterate (i, &intermediate_type); i--)
4349 vec_dest = vect_create_destination_var (scalar_dest,
4350 intermediate_type);
4351 vec_dsts.quick_push (vec_dest);
4355 if (cvt_type)
4356 vec_dest = vect_create_destination_var (scalar_dest,
4357 modifier == WIDEN
4358 ? vectype_out : cvt_type);
4360 if (!slp_node)
4362 if (modifier == WIDEN)
4364 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4365 if (op_type == binary_op)
4366 vec_oprnds1.create (1);
4368 else if (modifier == NARROW)
4369 vec_oprnds0.create (
4370 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4372 else if (code == WIDEN_LSHIFT_EXPR)
4373 vec_oprnds1.create (slp_node->vec_stmts_size);
4375 last_oprnd = op0;
4376 prev_stmt_info = NULL;
4377 switch (modifier)
4379 case NONE:
4380 for (j = 0; j < ncopies; j++)
4382 if (j == 0)
4383 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4384 -1);
4385 else
4386 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4388 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4390 /* Arguments are ready, create the new vector stmt. */
4391 if (code1 == CALL_EXPR)
4393 new_stmt = gimple_build_call (decl1, 1, vop0);
4394 new_temp = make_ssa_name (vec_dest, new_stmt);
4395 gimple_call_set_lhs (new_stmt, new_temp);
4397 else
4399 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4400 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4401 new_temp = make_ssa_name (vec_dest, new_stmt);
4402 gimple_assign_set_lhs (new_stmt, new_temp);
4405 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4406 if (slp_node)
4407 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4408 else
4410 if (!prev_stmt_info)
4411 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4412 else
4413 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4414 prev_stmt_info = vinfo_for_stmt (new_stmt);
4418 break;
4420 case WIDEN:
4421 /* In case the vectorization factor (VF) is bigger than the number
4422 of elements that we can fit in a vectype (nunits), we have to
4423 generate more than one vector stmt - i.e - we need to "unroll"
4424 the vector stmt by a factor VF/nunits. */
4425 for (j = 0; j < ncopies; j++)
4427 /* Handle uses. */
4428 if (j == 0)
4430 if (slp_node)
4432 if (code == WIDEN_LSHIFT_EXPR)
4434 unsigned int k;
4436 vec_oprnd1 = op1;
4437 /* Store vec_oprnd1 for every vector stmt to be created
4438 for SLP_NODE. We check during the analysis that all
4439 the shift arguments are the same. */
4440 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4441 vec_oprnds1.quick_push (vec_oprnd1);
4443 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4444 slp_node, -1);
4446 else
4447 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4448 &vec_oprnds1, slp_node, -1);
4450 else
4452 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4453 vec_oprnds0.quick_push (vec_oprnd0);
4454 if (op_type == binary_op)
4456 if (code == WIDEN_LSHIFT_EXPR)
4457 vec_oprnd1 = op1;
4458 else
4459 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4460 vec_oprnds1.quick_push (vec_oprnd1);
4464 else
4466 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4467 vec_oprnds0.truncate (0);
4468 vec_oprnds0.quick_push (vec_oprnd0);
4469 if (op_type == binary_op)
4471 if (code == WIDEN_LSHIFT_EXPR)
4472 vec_oprnd1 = op1;
4473 else
4474 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4475 vec_oprnd1);
4476 vec_oprnds1.truncate (0);
4477 vec_oprnds1.quick_push (vec_oprnd1);
4481 /* Arguments are ready. Create the new vector stmts. */
4482 for (i = multi_step_cvt; i >= 0; i--)
4484 tree this_dest = vec_dsts[i];
4485 enum tree_code c1 = code1, c2 = code2;
4486 if (i == 0 && codecvt2 != ERROR_MARK)
4488 c1 = codecvt1;
4489 c2 = codecvt2;
4491 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4492 &vec_oprnds1,
4493 stmt, this_dest, gsi,
4494 c1, c2, decl1, decl2,
4495 op_type);
4498 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4500 if (cvt_type)
4502 if (codecvt1 == CALL_EXPR)
4504 new_stmt = gimple_build_call (decl1, 1, vop0);
4505 new_temp = make_ssa_name (vec_dest, new_stmt);
4506 gimple_call_set_lhs (new_stmt, new_temp);
4508 else
4510 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4511 new_temp = make_ssa_name (vec_dest);
4512 new_stmt = gimple_build_assign (new_temp, codecvt1,
4513 vop0);
4516 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4518 else
4519 new_stmt = SSA_NAME_DEF_STMT (vop0);
4521 if (slp_node)
4522 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4523 else
4525 if (!prev_stmt_info)
4526 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4527 else
4528 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4529 prev_stmt_info = vinfo_for_stmt (new_stmt);
4534 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4535 break;
4537 case NARROW:
4538 /* In case the vectorization factor (VF) is bigger than the number
4539 of elements that we can fit in a vectype (nunits), we have to
4540 generate more than one vector stmt - i.e - we need to "unroll"
4541 the vector stmt by a factor VF/nunits. */
4542 for (j = 0; j < ncopies; j++)
4544 /* Handle uses. */
4545 if (slp_node)
4546 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4547 slp_node, -1);
4548 else
4550 vec_oprnds0.truncate (0);
4551 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4552 vect_pow2 (multi_step_cvt) - 1);
4555 /* Arguments are ready. Create the new vector stmts. */
4556 if (cvt_type)
4557 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4559 if (codecvt1 == CALL_EXPR)
4561 new_stmt = gimple_build_call (decl1, 1, vop0);
4562 new_temp = make_ssa_name (vec_dest, new_stmt);
4563 gimple_call_set_lhs (new_stmt, new_temp);
4565 else
4567 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4568 new_temp = make_ssa_name (vec_dest);
4569 new_stmt = gimple_build_assign (new_temp, codecvt1,
4570 vop0);
4573 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4574 vec_oprnds0[i] = new_temp;
4577 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4578 stmt, vec_dsts, gsi,
4579 slp_node, code1,
4580 &prev_stmt_info);
4583 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4584 break;
4587 vec_oprnds0.release ();
4588 vec_oprnds1.release ();
4589 interm_types.release ();
4591 return true;
4595 /* Function vectorizable_assignment.
4597 Check if STMT performs an assignment (copy) that can be vectorized.
4598 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4599 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4600 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4602 static bool
4603 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4604 gimple **vec_stmt, slp_tree slp_node)
4606 tree vec_dest;
4607 tree scalar_dest;
4608 tree op;
4609 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4610 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4611 tree new_temp;
4612 gimple *def_stmt;
4613 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4614 int ncopies;
4615 int i, j;
4616 vec<tree> vec_oprnds = vNULL;
4617 tree vop;
4618 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4619 vec_info *vinfo = stmt_info->vinfo;
4620 gimple *new_stmt = NULL;
4621 stmt_vec_info prev_stmt_info = NULL;
4622 enum tree_code code;
4623 tree vectype_in;
4625 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4626 return false;
4628 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4629 && ! vec_stmt)
4630 return false;
4632 /* Is vectorizable assignment? */
4633 if (!is_gimple_assign (stmt))
4634 return false;
4636 scalar_dest = gimple_assign_lhs (stmt);
4637 if (TREE_CODE (scalar_dest) != SSA_NAME)
4638 return false;
4640 code = gimple_assign_rhs_code (stmt);
4641 if (gimple_assign_single_p (stmt)
4642 || code == PAREN_EXPR
4643 || CONVERT_EXPR_CODE_P (code))
4644 op = gimple_assign_rhs1 (stmt);
4645 else
4646 return false;
4648 if (code == VIEW_CONVERT_EXPR)
4649 op = TREE_OPERAND (op, 0);
4651 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4652 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4654 /* Multiple types in SLP are handled by creating the appropriate number of
4655 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4656 case of SLP. */
4657 if (slp_node)
4658 ncopies = 1;
4659 else
4660 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4662 gcc_assert (ncopies >= 1);
4664 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4666 if (dump_enabled_p ())
4667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4668 "use not simple.\n");
4669 return false;
4672 /* We can handle NOP_EXPR conversions that do not change the number
4673 of elements or the vector size. */
4674 if ((CONVERT_EXPR_CODE_P (code)
4675 || code == VIEW_CONVERT_EXPR)
4676 && (!vectype_in
4677 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4678 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4679 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4680 return false;
4682 /* We do not handle bit-precision changes. */
4683 if ((CONVERT_EXPR_CODE_P (code)
4684 || code == VIEW_CONVERT_EXPR)
4685 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4686 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4687 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4688 || ((TYPE_PRECISION (TREE_TYPE (op))
4689 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4690 /* But a conversion that does not change the bit-pattern is ok. */
4691 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4692 > TYPE_PRECISION (TREE_TYPE (op)))
4693 && TYPE_UNSIGNED (TREE_TYPE (op)))
4694 /* Conversion between boolean types of different sizes is
4695 a simple assignment in case their vectypes are same
4696 boolean vectors. */
4697 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4698 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4700 if (dump_enabled_p ())
4701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4702 "type conversion to/from bit-precision "
4703 "unsupported.\n");
4704 return false;
4707 if (!vec_stmt) /* transformation not required. */
4709 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4710 if (dump_enabled_p ())
4711 dump_printf_loc (MSG_NOTE, vect_location,
4712 "=== vectorizable_assignment ===\n");
4713 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4714 return true;
4717 /** Transform. **/
4718 if (dump_enabled_p ())
4719 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4721 /* Handle def. */
4722 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4724 /* Handle use. */
4725 for (j = 0; j < ncopies; j++)
4727 /* Handle uses. */
4728 if (j == 0)
4729 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4730 else
4731 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4733 /* Arguments are ready. create the new vector stmt. */
4734 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4736 if (CONVERT_EXPR_CODE_P (code)
4737 || code == VIEW_CONVERT_EXPR)
4738 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4739 new_stmt = gimple_build_assign (vec_dest, vop);
4740 new_temp = make_ssa_name (vec_dest, new_stmt);
4741 gimple_assign_set_lhs (new_stmt, new_temp);
4742 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4743 if (slp_node)
4744 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4747 if (slp_node)
4748 continue;
4750 if (j == 0)
4751 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4752 else
4753 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4755 prev_stmt_info = vinfo_for_stmt (new_stmt);
4758 vec_oprnds.release ();
4759 return true;
4763 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4764 either as shift by a scalar or by a vector. */
4766 bool
4767 vect_supportable_shift (enum tree_code code, tree scalar_type)
4770 machine_mode vec_mode;
4771 optab optab;
4772 int icode;
4773 tree vectype;
4775 vectype = get_vectype_for_scalar_type (scalar_type);
4776 if (!vectype)
4777 return false;
4779 optab = optab_for_tree_code (code, vectype, optab_scalar);
4780 if (!optab
4781 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4783 optab = optab_for_tree_code (code, vectype, optab_vector);
4784 if (!optab
4785 || (optab_handler (optab, TYPE_MODE (vectype))
4786 == CODE_FOR_nothing))
4787 return false;
4790 vec_mode = TYPE_MODE (vectype);
4791 icode = (int) optab_handler (optab, vec_mode);
4792 if (icode == CODE_FOR_nothing)
4793 return false;
4795 return true;
4799 /* Function vectorizable_shift.
4801 Check if STMT performs a shift operation that can be vectorized.
4802 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4803 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4804 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4806 static bool
4807 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4808 gimple **vec_stmt, slp_tree slp_node)
4810 tree vec_dest;
4811 tree scalar_dest;
4812 tree op0, op1 = NULL;
4813 tree vec_oprnd1 = NULL_TREE;
4814 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4815 tree vectype;
4816 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4817 enum tree_code code;
4818 machine_mode vec_mode;
4819 tree new_temp;
4820 optab optab;
4821 int icode;
4822 machine_mode optab_op2_mode;
4823 gimple *def_stmt;
4824 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4825 gimple *new_stmt = NULL;
4826 stmt_vec_info prev_stmt_info;
4827 int nunits_in;
4828 int nunits_out;
4829 tree vectype_out;
4830 tree op1_vectype;
4831 int ncopies;
4832 int j, i;
4833 vec<tree> vec_oprnds0 = vNULL;
4834 vec<tree> vec_oprnds1 = vNULL;
4835 tree vop0, vop1;
4836 unsigned int k;
4837 bool scalar_shift_arg = true;
4838 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4839 vec_info *vinfo = stmt_info->vinfo;
4840 int vf;
4842 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4843 return false;
4845 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4846 && ! vec_stmt)
4847 return false;
4849 /* Is STMT a vectorizable binary/unary operation? */
4850 if (!is_gimple_assign (stmt))
4851 return false;
4853 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4854 return false;
4856 code = gimple_assign_rhs_code (stmt);
4858 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4859 || code == RROTATE_EXPR))
4860 return false;
4862 scalar_dest = gimple_assign_lhs (stmt);
4863 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4864 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4865 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4867 if (dump_enabled_p ())
4868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4869 "bit-precision shifts not supported.\n");
4870 return false;
4873 op0 = gimple_assign_rhs1 (stmt);
4874 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4876 if (dump_enabled_p ())
4877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4878 "use not simple.\n");
4879 return false;
4881 /* If op0 is an external or constant def use a vector type with
4882 the same size as the output vector type. */
4883 if (!vectype)
4884 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4885 if (vec_stmt)
4886 gcc_assert (vectype);
4887 if (!vectype)
4889 if (dump_enabled_p ())
4890 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4891 "no vectype for scalar type\n");
4892 return false;
4895 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4896 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4897 if (nunits_out != nunits_in)
4898 return false;
4900 op1 = gimple_assign_rhs2 (stmt);
4901 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4903 if (dump_enabled_p ())
4904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4905 "use not simple.\n");
4906 return false;
4909 if (loop_vinfo)
4910 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4911 else
4912 vf = 1;
4914 /* Multiple types in SLP are handled by creating the appropriate number of
4915 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4916 case of SLP. */
4917 if (slp_node)
4918 ncopies = 1;
4919 else
4920 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4922 gcc_assert (ncopies >= 1);
4924 /* Determine whether the shift amount is a vector, or scalar. If the
4925 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4927 if ((dt[1] == vect_internal_def
4928 || dt[1] == vect_induction_def)
4929 && !slp_node)
4930 scalar_shift_arg = false;
4931 else if (dt[1] == vect_constant_def
4932 || dt[1] == vect_external_def
4933 || dt[1] == vect_internal_def)
4935 /* In SLP, need to check whether the shift count is the same,
4936 in loops if it is a constant or invariant, it is always
4937 a scalar shift. */
4938 if (slp_node)
4940 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4941 gimple *slpstmt;
4943 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4944 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4945 scalar_shift_arg = false;
4948 /* If the shift amount is computed by a pattern stmt we cannot
4949 use the scalar amount directly thus give up and use a vector
4950 shift. */
4951 if (dt[1] == vect_internal_def)
4953 gimple *def = SSA_NAME_DEF_STMT (op1);
4954 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4955 scalar_shift_arg = false;
4958 else
4960 if (dump_enabled_p ())
4961 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4962 "operand mode requires invariant argument.\n");
4963 return false;
4966 /* Vector shifted by vector. */
4967 if (!scalar_shift_arg)
4969 optab = optab_for_tree_code (code, vectype, optab_vector);
4970 if (dump_enabled_p ())
4971 dump_printf_loc (MSG_NOTE, vect_location,
4972 "vector/vector shift/rotate found.\n");
4974 if (!op1_vectype)
4975 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4976 if (op1_vectype == NULL_TREE
4977 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4981 "unusable type for last operand in"
4982 " vector/vector shift/rotate.\n");
4983 return false;
4986 /* See if the machine has a vector shifted by scalar insn and if not
4987 then see if it has a vector shifted by vector insn. */
4988 else
4990 optab = optab_for_tree_code (code, vectype, optab_scalar);
4991 if (optab
4992 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4994 if (dump_enabled_p ())
4995 dump_printf_loc (MSG_NOTE, vect_location,
4996 "vector/scalar shift/rotate found.\n");
4998 else
5000 optab = optab_for_tree_code (code, vectype, optab_vector);
5001 if (optab
5002 && (optab_handler (optab, TYPE_MODE (vectype))
5003 != CODE_FOR_nothing))
5005 scalar_shift_arg = false;
5007 if (dump_enabled_p ())
5008 dump_printf_loc (MSG_NOTE, vect_location,
5009 "vector/vector shift/rotate found.\n");
5011 /* Unlike the other binary operators, shifts/rotates have
5012 the rhs being int, instead of the same type as the lhs,
5013 so make sure the scalar is the right type if we are
5014 dealing with vectors of long long/long/short/char. */
5015 if (dt[1] == vect_constant_def)
5016 op1 = fold_convert (TREE_TYPE (vectype), op1);
5017 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5018 TREE_TYPE (op1)))
5020 if (slp_node
5021 && TYPE_MODE (TREE_TYPE (vectype))
5022 != TYPE_MODE (TREE_TYPE (op1)))
5024 if (dump_enabled_p ())
5025 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5026 "unusable type for last operand in"
5027 " vector/vector shift/rotate.\n");
5028 return false;
5030 if (vec_stmt && !slp_node)
5032 op1 = fold_convert (TREE_TYPE (vectype), op1);
5033 op1 = vect_init_vector (stmt, op1,
5034 TREE_TYPE (vectype), NULL);
5041 /* Supportable by target? */
5042 if (!optab)
5044 if (dump_enabled_p ())
5045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5046 "no optab.\n");
5047 return false;
5049 vec_mode = TYPE_MODE (vectype);
5050 icode = (int) optab_handler (optab, vec_mode);
5051 if (icode == CODE_FOR_nothing)
5053 if (dump_enabled_p ())
5054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5055 "op not supported by target.\n");
5056 /* Check only during analysis. */
5057 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5058 || (vf < vect_min_worthwhile_factor (code)
5059 && !vec_stmt))
5060 return false;
5061 if (dump_enabled_p ())
5062 dump_printf_loc (MSG_NOTE, vect_location,
5063 "proceeding using word mode.\n");
5066 /* Worthwhile without SIMD support? Check only during analysis. */
5067 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5068 && vf < vect_min_worthwhile_factor (code)
5069 && !vec_stmt)
5071 if (dump_enabled_p ())
5072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5073 "not worthwhile without SIMD support.\n");
5074 return false;
5077 if (!vec_stmt) /* transformation not required. */
5079 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5080 if (dump_enabled_p ())
5081 dump_printf_loc (MSG_NOTE, vect_location,
5082 "=== vectorizable_shift ===\n");
5083 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5084 return true;
5087 /** Transform. **/
5089 if (dump_enabled_p ())
5090 dump_printf_loc (MSG_NOTE, vect_location,
5091 "transform binary/unary operation.\n");
5093 /* Handle def. */
5094 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5096 prev_stmt_info = NULL;
5097 for (j = 0; j < ncopies; j++)
5099 /* Handle uses. */
5100 if (j == 0)
5102 if (scalar_shift_arg)
5104 /* Vector shl and shr insn patterns can be defined with scalar
5105 operand 2 (shift operand). In this case, use constant or loop
5106 invariant op1 directly, without extending it to vector mode
5107 first. */
5108 optab_op2_mode = insn_data[icode].operand[2].mode;
5109 if (!VECTOR_MODE_P (optab_op2_mode))
5111 if (dump_enabled_p ())
5112 dump_printf_loc (MSG_NOTE, vect_location,
5113 "operand 1 using scalar mode.\n");
5114 vec_oprnd1 = op1;
5115 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5116 vec_oprnds1.quick_push (vec_oprnd1);
5117 if (slp_node)
5119 /* Store vec_oprnd1 for every vector stmt to be created
5120 for SLP_NODE. We check during the analysis that all
5121 the shift arguments are the same.
5122 TODO: Allow different constants for different vector
5123 stmts generated for an SLP instance. */
5124 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5125 vec_oprnds1.quick_push (vec_oprnd1);
5130 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5131 (a special case for certain kind of vector shifts); otherwise,
5132 operand 1 should be of a vector type (the usual case). */
5133 if (vec_oprnd1)
5134 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5135 slp_node, -1);
5136 else
5137 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5138 slp_node, -1);
5140 else
5141 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5143 /* Arguments are ready. Create the new vector stmt. */
5144 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5146 vop1 = vec_oprnds1[i];
5147 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5148 new_temp = make_ssa_name (vec_dest, new_stmt);
5149 gimple_assign_set_lhs (new_stmt, new_temp);
5150 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5151 if (slp_node)
5152 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5155 if (slp_node)
5156 continue;
5158 if (j == 0)
5159 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5160 else
5161 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5162 prev_stmt_info = vinfo_for_stmt (new_stmt);
5165 vec_oprnds0.release ();
5166 vec_oprnds1.release ();
5168 return true;
5172 /* Function vectorizable_operation.
5174 Check if STMT performs a binary, unary or ternary operation that can
5175 be vectorized.
5176 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5177 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5178 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5180 static bool
5181 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5182 gimple **vec_stmt, slp_tree slp_node)
5184 tree vec_dest;
5185 tree scalar_dest;
5186 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5188 tree vectype;
5189 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5190 enum tree_code code;
5191 machine_mode vec_mode;
5192 tree new_temp;
5193 int op_type;
5194 optab optab;
5195 bool target_support_p;
5196 gimple *def_stmt;
5197 enum vect_def_type dt[3]
5198 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5199 gimple *new_stmt = NULL;
5200 stmt_vec_info prev_stmt_info;
5201 int nunits_in;
5202 int nunits_out;
5203 tree vectype_out;
5204 int ncopies;
5205 int j, i;
5206 vec<tree> vec_oprnds0 = vNULL;
5207 vec<tree> vec_oprnds1 = vNULL;
5208 vec<tree> vec_oprnds2 = vNULL;
5209 tree vop0, vop1, vop2;
5210 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5211 vec_info *vinfo = stmt_info->vinfo;
5212 int vf;
5214 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5215 return false;
5217 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5218 && ! vec_stmt)
5219 return false;
5221 /* Is STMT a vectorizable binary/unary operation? */
5222 if (!is_gimple_assign (stmt))
5223 return false;
5225 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5226 return false;
5228 code = gimple_assign_rhs_code (stmt);
5230 /* For pointer addition, we should use the normal plus for
5231 the vector addition. */
5232 if (code == POINTER_PLUS_EXPR)
5233 code = PLUS_EXPR;
5235 /* Support only unary or binary operations. */
5236 op_type = TREE_CODE_LENGTH (code);
5237 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5239 if (dump_enabled_p ())
5240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5241 "num. args = %d (not unary/binary/ternary op).\n",
5242 op_type);
5243 return false;
5246 scalar_dest = gimple_assign_lhs (stmt);
5247 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5249 /* Most operations cannot handle bit-precision types without extra
5250 truncations. */
5251 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5252 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5253 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5254 /* Exception are bitwise binary operations. */
5255 && code != BIT_IOR_EXPR
5256 && code != BIT_XOR_EXPR
5257 && code != BIT_AND_EXPR)
5259 if (dump_enabled_p ())
5260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5261 "bit-precision arithmetic not supported.\n");
5262 return false;
5265 op0 = gimple_assign_rhs1 (stmt);
5266 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5268 if (dump_enabled_p ())
5269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5270 "use not simple.\n");
5271 return false;
5273 /* If op0 is an external or constant def use a vector type with
5274 the same size as the output vector type. */
5275 if (!vectype)
5277 /* For boolean type we cannot determine vectype by
5278 invariant value (don't know whether it is a vector
5279 of booleans or vector of integers). We use output
5280 vectype because operations on boolean don't change
5281 type. */
5282 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5284 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5286 if (dump_enabled_p ())
5287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5288 "not supported operation on bool value.\n");
5289 return false;
5291 vectype = vectype_out;
5293 else
5294 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5296 if (vec_stmt)
5297 gcc_assert (vectype);
5298 if (!vectype)
5300 if (dump_enabled_p ())
5302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5303 "no vectype for scalar type ");
5304 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5305 TREE_TYPE (op0));
5306 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5309 return false;
5312 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5313 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5314 if (nunits_out != nunits_in)
5315 return false;
5317 if (op_type == binary_op || op_type == ternary_op)
5319 op1 = gimple_assign_rhs2 (stmt);
5320 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5324 "use not simple.\n");
5325 return false;
5328 if (op_type == ternary_op)
5330 op2 = gimple_assign_rhs3 (stmt);
5331 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5333 if (dump_enabled_p ())
5334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5335 "use not simple.\n");
5336 return false;
5340 if (loop_vinfo)
5341 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5342 else
5343 vf = 1;
5345 /* Multiple types in SLP are handled by creating the appropriate number of
5346 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5347 case of SLP. */
5348 if (slp_node)
5349 ncopies = 1;
5350 else
5351 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5353 gcc_assert (ncopies >= 1);
5355 /* Shifts are handled in vectorizable_shift (). */
5356 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5357 || code == RROTATE_EXPR)
5358 return false;
5360 /* Supportable by target? */
5362 vec_mode = TYPE_MODE (vectype);
5363 if (code == MULT_HIGHPART_EXPR)
5364 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5365 else
5367 optab = optab_for_tree_code (code, vectype, optab_default);
5368 if (!optab)
5370 if (dump_enabled_p ())
5371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5372 "no optab.\n");
5373 return false;
5375 target_support_p = (optab_handler (optab, vec_mode)
5376 != CODE_FOR_nothing);
5379 if (!target_support_p)
5381 if (dump_enabled_p ())
5382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5383 "op not supported by target.\n");
5384 /* Check only during analysis. */
5385 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5386 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5387 return false;
5388 if (dump_enabled_p ())
5389 dump_printf_loc (MSG_NOTE, vect_location,
5390 "proceeding using word mode.\n");
5393 /* Worthwhile without SIMD support? Check only during analysis. */
5394 if (!VECTOR_MODE_P (vec_mode)
5395 && !vec_stmt
5396 && vf < vect_min_worthwhile_factor (code))
5398 if (dump_enabled_p ())
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5400 "not worthwhile without SIMD support.\n");
5401 return false;
5404 if (!vec_stmt) /* transformation not required. */
5406 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5407 if (dump_enabled_p ())
5408 dump_printf_loc (MSG_NOTE, vect_location,
5409 "=== vectorizable_operation ===\n");
5410 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5411 return true;
5414 /** Transform. **/
5416 if (dump_enabled_p ())
5417 dump_printf_loc (MSG_NOTE, vect_location,
5418 "transform binary/unary operation.\n");
5420 /* Handle def. */
5421 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5423 /* In case the vectorization factor (VF) is bigger than the number
5424 of elements that we can fit in a vectype (nunits), we have to generate
5425 more than one vector stmt - i.e - we need to "unroll" the
5426 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5427 from one copy of the vector stmt to the next, in the field
5428 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5429 stages to find the correct vector defs to be used when vectorizing
5430 stmts that use the defs of the current stmt. The example below
5431 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5432 we need to create 4 vectorized stmts):
5434 before vectorization:
5435 RELATED_STMT VEC_STMT
5436 S1: x = memref - -
5437 S2: z = x + 1 - -
5439 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5440 there):
5441 RELATED_STMT VEC_STMT
5442 VS1_0: vx0 = memref0 VS1_1 -
5443 VS1_1: vx1 = memref1 VS1_2 -
5444 VS1_2: vx2 = memref2 VS1_3 -
5445 VS1_3: vx3 = memref3 - -
5446 S1: x = load - VS1_0
5447 S2: z = x + 1 - -
5449 step2: vectorize stmt S2 (done here):
5450 To vectorize stmt S2 we first need to find the relevant vector
5451 def for the first operand 'x'. This is, as usual, obtained from
5452 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5453 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5454 relevant vector def 'vx0'. Having found 'vx0' we can generate
5455 the vector stmt VS2_0, and as usual, record it in the
5456 STMT_VINFO_VEC_STMT of stmt S2.
5457 When creating the second copy (VS2_1), we obtain the relevant vector
5458 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5459 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5460 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5461 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5462 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5463 chain of stmts and pointers:
5464 RELATED_STMT VEC_STMT
5465 VS1_0: vx0 = memref0 VS1_1 -
5466 VS1_1: vx1 = memref1 VS1_2 -
5467 VS1_2: vx2 = memref2 VS1_3 -
5468 VS1_3: vx3 = memref3 - -
5469 S1: x = load - VS1_0
5470 VS2_0: vz0 = vx0 + v1 VS2_1 -
5471 VS2_1: vz1 = vx1 + v1 VS2_2 -
5472 VS2_2: vz2 = vx2 + v1 VS2_3 -
5473 VS2_3: vz3 = vx3 + v1 - -
5474 S2: z = x + 1 - VS2_0 */
5476 prev_stmt_info = NULL;
5477 for (j = 0; j < ncopies; j++)
5479 /* Handle uses. */
5480 if (j == 0)
5482 if (op_type == binary_op || op_type == ternary_op)
5483 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5484 slp_node, -1);
5485 else
5486 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5487 slp_node, -1);
5488 if (op_type == ternary_op)
5489 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5490 slp_node, -1);
5492 else
5494 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5495 if (op_type == ternary_op)
5497 tree vec_oprnd = vec_oprnds2.pop ();
5498 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5499 vec_oprnd));
5503 /* Arguments are ready. Create the new vector stmt. */
5504 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5506 vop1 = ((op_type == binary_op || op_type == ternary_op)
5507 ? vec_oprnds1[i] : NULL_TREE);
5508 vop2 = ((op_type == ternary_op)
5509 ? vec_oprnds2[i] : NULL_TREE);
5510 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5511 new_temp = make_ssa_name (vec_dest, new_stmt);
5512 gimple_assign_set_lhs (new_stmt, new_temp);
5513 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5514 if (slp_node)
5515 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5518 if (slp_node)
5519 continue;
5521 if (j == 0)
5522 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5523 else
5524 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5525 prev_stmt_info = vinfo_for_stmt (new_stmt);
5528 vec_oprnds0.release ();
5529 vec_oprnds1.release ();
5530 vec_oprnds2.release ();
5532 return true;
5535 /* A helper function to ensure data reference DR's base alignment
5536 for STMT_INFO. */
5538 static void
5539 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5541 if (!dr->aux)
5542 return;
5544 if (DR_VECT_AUX (dr)->base_misaligned)
5546 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5547 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5549 if (decl_in_symtab_p (base_decl))
5550 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5551 else
5553 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5554 DECL_USER_ALIGN (base_decl) = 1;
5556 DR_VECT_AUX (dr)->base_misaligned = false;
5561 /* Function get_group_alias_ptr_type.
5563 Return the alias type for the group starting at FIRST_STMT. */
5565 static tree
5566 get_group_alias_ptr_type (gimple *first_stmt)
5568 struct data_reference *first_dr, *next_dr;
5569 gimple *next_stmt;
5571 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5572 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5573 while (next_stmt)
5575 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5576 if (get_alias_set (DR_REF (first_dr))
5577 != get_alias_set (DR_REF (next_dr)))
5579 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_NOTE, vect_location,
5581 "conflicting alias set types.\n");
5582 return ptr_type_node;
5584 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5586 return reference_alias_ptr_type (DR_REF (first_dr));
5590 /* Function vectorizable_store.
5592 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5593 can be vectorized.
5594 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5595 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5596 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5598 static bool
5599 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5600 slp_tree slp_node)
5602 tree scalar_dest;
5603 tree data_ref;
5604 tree op;
5605 tree vec_oprnd = NULL_TREE;
5606 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5607 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5608 tree elem_type;
5609 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5610 struct loop *loop = NULL;
5611 machine_mode vec_mode;
5612 tree dummy;
5613 enum dr_alignment_support alignment_support_scheme;
5614 gimple *def_stmt;
5615 enum vect_def_type dt;
5616 stmt_vec_info prev_stmt_info = NULL;
5617 tree dataref_ptr = NULL_TREE;
5618 tree dataref_offset = NULL_TREE;
5619 gimple *ptr_incr = NULL;
5620 int ncopies;
5621 int j;
5622 gimple *next_stmt, *first_stmt;
5623 bool grouped_store;
5624 unsigned int group_size, i;
5625 vec<tree> oprnds = vNULL;
5626 vec<tree> result_chain = vNULL;
5627 bool inv_p;
5628 tree offset = NULL_TREE;
5629 vec<tree> vec_oprnds = vNULL;
5630 bool slp = (slp_node != NULL);
5631 unsigned int vec_num;
5632 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5633 vec_info *vinfo = stmt_info->vinfo;
5634 tree aggr_type;
5635 gather_scatter_info gs_info;
5636 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5637 gimple *new_stmt;
5638 int vf;
5639 vec_load_store_type vls_type;
5640 tree ref_type;
5642 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5643 return false;
5645 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5646 && ! vec_stmt)
5647 return false;
5649 /* Is vectorizable store? */
5651 if (!is_gimple_assign (stmt))
5652 return false;
5654 scalar_dest = gimple_assign_lhs (stmt);
5655 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5656 && is_pattern_stmt_p (stmt_info))
5657 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5658 if (TREE_CODE (scalar_dest) != ARRAY_REF
5659 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5660 && TREE_CODE (scalar_dest) != INDIRECT_REF
5661 && TREE_CODE (scalar_dest) != COMPONENT_REF
5662 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5663 && TREE_CODE (scalar_dest) != REALPART_EXPR
5664 && TREE_CODE (scalar_dest) != MEM_REF)
5665 return false;
5667 /* Cannot have hybrid store SLP -- that would mean storing to the
5668 same location twice. */
5669 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5671 gcc_assert (gimple_assign_single_p (stmt));
5673 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5674 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5676 if (loop_vinfo)
5678 loop = LOOP_VINFO_LOOP (loop_vinfo);
5679 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5681 else
5682 vf = 1;
5684 /* Multiple types in SLP are handled by creating the appropriate number of
5685 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5686 case of SLP. */
5687 if (slp)
5688 ncopies = 1;
5689 else
5690 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5692 gcc_assert (ncopies >= 1);
5694 /* FORNOW. This restriction should be relaxed. */
5695 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5697 if (dump_enabled_p ())
5698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5699 "multiple types in nested loop.\n");
5700 return false;
5703 op = gimple_assign_rhs1 (stmt);
5705 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5707 if (dump_enabled_p ())
5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5709 "use not simple.\n");
5710 return false;
5713 if (dt == vect_constant_def || dt == vect_external_def)
5714 vls_type = VLS_STORE_INVARIANT;
5715 else
5716 vls_type = VLS_STORE;
5718 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5719 return false;
5721 elem_type = TREE_TYPE (vectype);
5722 vec_mode = TYPE_MODE (vectype);
5724 /* FORNOW. In some cases can vectorize even if data-type not supported
5725 (e.g. - array initialization with 0). */
5726 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5727 return false;
5729 if (!STMT_VINFO_DATA_REF (stmt_info))
5730 return false;
5732 vect_memory_access_type memory_access_type;
5733 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5734 &memory_access_type, &gs_info))
5735 return false;
5737 if (!vec_stmt) /* transformation not required. */
5739 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5740 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5741 /* The SLP costs are calculated during SLP analysis. */
5742 if (!PURE_SLP_STMT (stmt_info))
5743 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5744 NULL, NULL, NULL);
5745 return true;
5747 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5749 /** Transform. **/
5751 ensure_base_align (stmt_info, dr);
5753 if (memory_access_type == VMAT_GATHER_SCATTER)
5755 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5756 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5757 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5758 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5759 edge pe = loop_preheader_edge (loop);
5760 gimple_seq seq;
5761 basic_block new_bb;
5762 enum { NARROW, NONE, WIDEN } modifier;
5763 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5765 if (nunits == (unsigned int) scatter_off_nunits)
5766 modifier = NONE;
5767 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5769 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5770 modifier = WIDEN;
5772 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5773 sel[i] = i | nunits;
5775 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5776 gcc_assert (perm_mask != NULL_TREE);
5778 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5780 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5781 modifier = NARROW;
5783 for (i = 0; i < (unsigned int) nunits; ++i)
5784 sel[i] = i | scatter_off_nunits;
5786 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5787 gcc_assert (perm_mask != NULL_TREE);
5788 ncopies *= 2;
5790 else
5791 gcc_unreachable ();
5793 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5794 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5795 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5796 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5797 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5798 scaletype = TREE_VALUE (arglist);
5800 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5801 && TREE_CODE (rettype) == VOID_TYPE);
5803 ptr = fold_convert (ptrtype, gs_info.base);
5804 if (!is_gimple_min_invariant (ptr))
5806 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5807 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5808 gcc_assert (!new_bb);
5811 /* Currently we support only unconditional scatter stores,
5812 so mask should be all ones. */
5813 mask = build_int_cst (masktype, -1);
5814 mask = vect_init_vector (stmt, mask, masktype, NULL);
5816 scale = build_int_cst (scaletype, gs_info.scale);
5818 prev_stmt_info = NULL;
5819 for (j = 0; j < ncopies; ++j)
5821 if (j == 0)
5823 src = vec_oprnd1
5824 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5825 op = vec_oprnd0
5826 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5828 else if (modifier != NONE && (j & 1))
5830 if (modifier == WIDEN)
5832 src = vec_oprnd1
5833 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5834 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5835 stmt, gsi);
5837 else if (modifier == NARROW)
5839 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5840 stmt, gsi);
5841 op = vec_oprnd0
5842 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5843 vec_oprnd0);
5845 else
5846 gcc_unreachable ();
5848 else
5850 src = vec_oprnd1
5851 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5852 op = vec_oprnd0
5853 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5854 vec_oprnd0);
5857 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5859 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5860 == TYPE_VECTOR_SUBPARTS (srctype));
5861 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5862 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5863 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5864 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5865 src = var;
5868 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5870 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5871 == TYPE_VECTOR_SUBPARTS (idxtype));
5872 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5873 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5874 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5875 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5876 op = var;
5879 new_stmt
5880 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5882 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5884 if (prev_stmt_info == NULL)
5885 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5886 else
5887 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5888 prev_stmt_info = vinfo_for_stmt (new_stmt);
5890 return true;
5893 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5894 if (grouped_store)
5896 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5897 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5898 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5900 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5902 /* FORNOW */
5903 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5905 /* We vectorize all the stmts of the interleaving group when we
5906 reach the last stmt in the group. */
5907 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5908 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5909 && !slp)
5911 *vec_stmt = NULL;
5912 return true;
5915 if (slp)
5917 grouped_store = false;
5918 /* VEC_NUM is the number of vect stmts to be created for this
5919 group. */
5920 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5921 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5922 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5923 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5924 op = gimple_assign_rhs1 (first_stmt);
5926 else
5927 /* VEC_NUM is the number of vect stmts to be created for this
5928 group. */
5929 vec_num = group_size;
5931 ref_type = get_group_alias_ptr_type (first_stmt);
5933 else
5935 first_stmt = stmt;
5936 first_dr = dr;
5937 group_size = vec_num = 1;
5938 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5941 if (dump_enabled_p ())
5942 dump_printf_loc (MSG_NOTE, vect_location,
5943 "transform store. ncopies = %d\n", ncopies);
5945 if (memory_access_type == VMAT_ELEMENTWISE
5946 || memory_access_type == VMAT_STRIDED_SLP)
5948 gimple_stmt_iterator incr_gsi;
5949 bool insert_after;
5950 gimple *incr;
5951 tree offvar;
5952 tree ivstep;
5953 tree running_off;
5954 gimple_seq stmts = NULL;
5955 tree stride_base, stride_step, alias_off;
5956 tree vec_oprnd;
5957 unsigned int g;
5959 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5961 stride_base
5962 = fold_build_pointer_plus
5963 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5964 size_binop (PLUS_EXPR,
5965 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5966 convert_to_ptrofftype (DR_INIT (first_dr))));
5967 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5969 /* For a store with loop-invariant (but other than power-of-2)
5970 stride (i.e. not a grouped access) like so:
5972 for (i = 0; i < n; i += stride)
5973 array[i] = ...;
5975 we generate a new induction variable and new stores from
5976 the components of the (vectorized) rhs:
5978 for (j = 0; ; j += VF*stride)
5979 vectemp = ...;
5980 tmp1 = vectemp[0];
5981 array[j] = tmp1;
5982 tmp2 = vectemp[1];
5983 array[j + stride] = tmp2;
5987 unsigned nstores = nunits;
5988 unsigned lnel = 1;
5989 tree ltype = elem_type;
5990 if (slp)
5992 if (group_size < nunits
5993 && nunits % group_size == 0)
5995 nstores = nunits / group_size;
5996 lnel = group_size;
5997 ltype = build_vector_type (elem_type, group_size);
5999 else if (group_size >= nunits
6000 && group_size % nunits == 0)
6002 nstores = 1;
6003 lnel = nunits;
6004 ltype = vectype;
6006 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6007 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6010 ivstep = stride_step;
6011 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6012 build_int_cst (TREE_TYPE (ivstep), vf));
6014 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6016 create_iv (stride_base, ivstep, NULL,
6017 loop, &incr_gsi, insert_after,
6018 &offvar, NULL);
6019 incr = gsi_stmt (incr_gsi);
6020 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6022 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6023 if (stmts)
6024 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6026 prev_stmt_info = NULL;
6027 alias_off = build_int_cst (ref_type, 0);
6028 next_stmt = first_stmt;
6029 for (g = 0; g < group_size; g++)
6031 running_off = offvar;
6032 if (g)
6034 tree size = TYPE_SIZE_UNIT (ltype);
6035 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6036 size);
6037 tree newoff = copy_ssa_name (running_off, NULL);
6038 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6039 running_off, pos);
6040 vect_finish_stmt_generation (stmt, incr, gsi);
6041 running_off = newoff;
6043 unsigned int group_el = 0;
6044 unsigned HOST_WIDE_INT
6045 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6046 for (j = 0; j < ncopies; j++)
6048 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6049 and first_stmt == stmt. */
6050 if (j == 0)
6052 if (slp)
6054 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6055 slp_node, -1);
6056 vec_oprnd = vec_oprnds[0];
6058 else
6060 gcc_assert (gimple_assign_single_p (next_stmt));
6061 op = gimple_assign_rhs1 (next_stmt);
6062 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6065 else
6067 if (slp)
6068 vec_oprnd = vec_oprnds[j];
6069 else
6071 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6072 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6076 for (i = 0; i < nstores; i++)
6078 tree newref, newoff;
6079 gimple *incr, *assign;
6080 tree size = TYPE_SIZE (ltype);
6081 /* Extract the i'th component. */
6082 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6083 bitsize_int (i), size);
6084 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6085 size, pos);
6087 elem = force_gimple_operand_gsi (gsi, elem, true,
6088 NULL_TREE, true,
6089 GSI_SAME_STMT);
6091 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6092 group_el * elsz);
6093 newref = build2 (MEM_REF, ltype,
6094 running_off, this_off);
6096 /* And store it to *running_off. */
6097 assign = gimple_build_assign (newref, elem);
6098 vect_finish_stmt_generation (stmt, assign, gsi);
6100 group_el += lnel;
6101 if (! slp
6102 || group_el == group_size)
6104 newoff = copy_ssa_name (running_off, NULL);
6105 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6106 running_off, stride_step);
6107 vect_finish_stmt_generation (stmt, incr, gsi);
6109 running_off = newoff;
6110 group_el = 0;
6112 if (g == group_size - 1
6113 && !slp)
6115 if (j == 0 && i == 0)
6116 STMT_VINFO_VEC_STMT (stmt_info)
6117 = *vec_stmt = assign;
6118 else
6119 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6120 prev_stmt_info = vinfo_for_stmt (assign);
6124 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6125 if (slp)
6126 break;
6128 return true;
6131 auto_vec<tree> dr_chain (group_size);
6132 oprnds.create (group_size);
6134 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6135 gcc_assert (alignment_support_scheme);
6136 /* Targets with store-lane instructions must not require explicit
6137 realignment. */
6138 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6139 || alignment_support_scheme == dr_aligned
6140 || alignment_support_scheme == dr_unaligned_supported);
6142 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6143 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6144 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6146 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6147 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6148 else
6149 aggr_type = vectype;
6151 /* In case the vectorization factor (VF) is bigger than the number
6152 of elements that we can fit in a vectype (nunits), we have to generate
6153 more than one vector stmt - i.e - we need to "unroll" the
6154 vector stmt by a factor VF/nunits. For more details see documentation in
6155 vect_get_vec_def_for_copy_stmt. */
6157 /* In case of interleaving (non-unit grouped access):
6159 S1: &base + 2 = x2
6160 S2: &base = x0
6161 S3: &base + 1 = x1
6162 S4: &base + 3 = x3
6164 We create vectorized stores starting from base address (the access of the
6165 first stmt in the chain (S2 in the above example), when the last store stmt
6166 of the chain (S4) is reached:
6168 VS1: &base = vx2
6169 VS2: &base + vec_size*1 = vx0
6170 VS3: &base + vec_size*2 = vx1
6171 VS4: &base + vec_size*3 = vx3
6173 Then permutation statements are generated:
6175 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6176 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6179 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6180 (the order of the data-refs in the output of vect_permute_store_chain
6181 corresponds to the order of scalar stmts in the interleaving chain - see
6182 the documentation of vect_permute_store_chain()).
6184 In case of both multiple types and interleaving, above vector stores and
6185 permutation stmts are created for every copy. The result vector stmts are
6186 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6187 STMT_VINFO_RELATED_STMT for the next copies.
6190 prev_stmt_info = NULL;
6191 for (j = 0; j < ncopies; j++)
6194 if (j == 0)
6196 if (slp)
6198 /* Get vectorized arguments for SLP_NODE. */
6199 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6200 NULL, slp_node, -1);
6202 vec_oprnd = vec_oprnds[0];
6204 else
6206 /* For interleaved stores we collect vectorized defs for all the
6207 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6208 used as an input to vect_permute_store_chain(), and OPRNDS as
6209 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6211 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6212 OPRNDS are of size 1. */
6213 next_stmt = first_stmt;
6214 for (i = 0; i < group_size; i++)
6216 /* Since gaps are not supported for interleaved stores,
6217 GROUP_SIZE is the exact number of stmts in the chain.
6218 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6219 there is no interleaving, GROUP_SIZE is 1, and only one
6220 iteration of the loop will be executed. */
6221 gcc_assert (next_stmt
6222 && gimple_assign_single_p (next_stmt));
6223 op = gimple_assign_rhs1 (next_stmt);
6225 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6226 dr_chain.quick_push (vec_oprnd);
6227 oprnds.quick_push (vec_oprnd);
6228 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6232 /* We should have catched mismatched types earlier. */
6233 gcc_assert (useless_type_conversion_p (vectype,
6234 TREE_TYPE (vec_oprnd)));
6235 bool simd_lane_access_p
6236 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6237 if (simd_lane_access_p
6238 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6239 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6240 && integer_zerop (DR_OFFSET (first_dr))
6241 && integer_zerop (DR_INIT (first_dr))
6242 && alias_sets_conflict_p (get_alias_set (aggr_type),
6243 get_alias_set (TREE_TYPE (ref_type))))
6245 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6246 dataref_offset = build_int_cst (ref_type, 0);
6247 inv_p = false;
6249 else
6250 dataref_ptr
6251 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6252 simd_lane_access_p ? loop : NULL,
6253 offset, &dummy, gsi, &ptr_incr,
6254 simd_lane_access_p, &inv_p);
6255 gcc_assert (bb_vinfo || !inv_p);
6257 else
6259 /* For interleaved stores we created vectorized defs for all the
6260 defs stored in OPRNDS in the previous iteration (previous copy).
6261 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6262 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6263 next copy.
6264 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6265 OPRNDS are of size 1. */
6266 for (i = 0; i < group_size; i++)
6268 op = oprnds[i];
6269 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6270 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6271 dr_chain[i] = vec_oprnd;
6272 oprnds[i] = vec_oprnd;
6274 if (dataref_offset)
6275 dataref_offset
6276 = int_const_binop (PLUS_EXPR, dataref_offset,
6277 TYPE_SIZE_UNIT (aggr_type));
6278 else
6279 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6280 TYPE_SIZE_UNIT (aggr_type));
6283 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6285 tree vec_array;
6287 /* Combine all the vectors into an array. */
6288 vec_array = create_vector_array (vectype, vec_num);
6289 for (i = 0; i < vec_num; i++)
6291 vec_oprnd = dr_chain[i];
6292 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6295 /* Emit:
6296 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6297 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6298 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6299 gimple_call_set_lhs (new_stmt, data_ref);
6300 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6302 else
6304 new_stmt = NULL;
6305 if (grouped_store)
6307 if (j == 0)
6308 result_chain.create (group_size);
6309 /* Permute. */
6310 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6311 &result_chain);
6314 next_stmt = first_stmt;
6315 for (i = 0; i < vec_num; i++)
6317 unsigned align, misalign;
6319 if (i > 0)
6320 /* Bump the vector pointer. */
6321 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6322 stmt, NULL_TREE);
6324 if (slp)
6325 vec_oprnd = vec_oprnds[i];
6326 else if (grouped_store)
6327 /* For grouped stores vectorized defs are interleaved in
6328 vect_permute_store_chain(). */
6329 vec_oprnd = result_chain[i];
6331 data_ref = fold_build2 (MEM_REF, vectype,
6332 dataref_ptr,
6333 dataref_offset
6334 ? dataref_offset
6335 : build_int_cst (ref_type, 0));
6336 align = TYPE_ALIGN_UNIT (vectype);
6337 if (aligned_access_p (first_dr))
6338 misalign = 0;
6339 else if (DR_MISALIGNMENT (first_dr) == -1)
6341 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6342 align = TYPE_ALIGN_UNIT (elem_type);
6343 else
6344 align = get_object_alignment (DR_REF (first_dr))
6345 / BITS_PER_UNIT;
6346 misalign = 0;
6347 TREE_TYPE (data_ref)
6348 = build_aligned_type (TREE_TYPE (data_ref),
6349 align * BITS_PER_UNIT);
6351 else
6353 TREE_TYPE (data_ref)
6354 = build_aligned_type (TREE_TYPE (data_ref),
6355 TYPE_ALIGN (elem_type));
6356 misalign = DR_MISALIGNMENT (first_dr);
6358 if (dataref_offset == NULL_TREE
6359 && TREE_CODE (dataref_ptr) == SSA_NAME)
6360 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6361 misalign);
6363 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6365 tree perm_mask = perm_mask_for_reverse (vectype);
6366 tree perm_dest
6367 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6368 vectype);
6369 tree new_temp = make_ssa_name (perm_dest);
6371 /* Generate the permute statement. */
6372 gimple *perm_stmt
6373 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6374 vec_oprnd, perm_mask);
6375 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6377 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6378 vec_oprnd = new_temp;
6381 /* Arguments are ready. Create the new vector stmt. */
6382 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6383 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6385 if (slp)
6386 continue;
6388 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6389 if (!next_stmt)
6390 break;
6393 if (!slp)
6395 if (j == 0)
6396 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6397 else
6398 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6399 prev_stmt_info = vinfo_for_stmt (new_stmt);
6403 oprnds.release ();
6404 result_chain.release ();
6405 vec_oprnds.release ();
6407 return true;
6410 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6411 VECTOR_CST mask. No checks are made that the target platform supports the
6412 mask, so callers may wish to test can_vec_perm_p separately, or use
6413 vect_gen_perm_mask_checked. */
6415 tree
6416 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6418 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6419 int i, nunits;
6421 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6423 mask_elt_type = lang_hooks.types.type_for_mode
6424 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6425 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6427 mask_elts = XALLOCAVEC (tree, nunits);
6428 for (i = nunits - 1; i >= 0; i--)
6429 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6430 mask_vec = build_vector (mask_type, mask_elts);
6432 return mask_vec;
6435 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6436 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6438 tree
6439 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6441 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6442 return vect_gen_perm_mask_any (vectype, sel);
6445 /* Given a vector variable X and Y, that was generated for the scalar
6446 STMT, generate instructions to permute the vector elements of X and Y
6447 using permutation mask MASK_VEC, insert them at *GSI and return the
6448 permuted vector variable. */
6450 static tree
6451 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6452 gimple_stmt_iterator *gsi)
6454 tree vectype = TREE_TYPE (x);
6455 tree perm_dest, data_ref;
6456 gimple *perm_stmt;
6458 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6459 data_ref = make_ssa_name (perm_dest);
6461 /* Generate the permute statement. */
6462 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6463 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6465 return data_ref;
6468 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6469 inserting them on the loops preheader edge. Returns true if we
6470 were successful in doing so (and thus STMT can be moved then),
6471 otherwise returns false. */
6473 static bool
6474 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6476 ssa_op_iter i;
6477 tree op;
6478 bool any = false;
6480 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6482 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6483 if (!gimple_nop_p (def_stmt)
6484 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6486 /* Make sure we don't need to recurse. While we could do
6487 so in simple cases when there are more complex use webs
6488 we don't have an easy way to preserve stmt order to fulfil
6489 dependencies within them. */
6490 tree op2;
6491 ssa_op_iter i2;
6492 if (gimple_code (def_stmt) == GIMPLE_PHI)
6493 return false;
6494 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6496 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6497 if (!gimple_nop_p (def_stmt2)
6498 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6499 return false;
6501 any = true;
6505 if (!any)
6506 return true;
6508 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6510 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6511 if (!gimple_nop_p (def_stmt)
6512 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6514 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6515 gsi_remove (&gsi, false);
6516 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6520 return true;
6523 /* vectorizable_load.
6525 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6526 can be vectorized.
6527 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6528 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6529 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6531 static bool
6532 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6533 slp_tree slp_node, slp_instance slp_node_instance)
6535 tree scalar_dest;
6536 tree vec_dest = NULL;
6537 tree data_ref = NULL;
6538 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6539 stmt_vec_info prev_stmt_info;
6540 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6541 struct loop *loop = NULL;
6542 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6543 bool nested_in_vect_loop = false;
6544 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6545 tree elem_type;
6546 tree new_temp;
6547 machine_mode mode;
6548 gimple *new_stmt = NULL;
6549 tree dummy;
6550 enum dr_alignment_support alignment_support_scheme;
6551 tree dataref_ptr = NULL_TREE;
6552 tree dataref_offset = NULL_TREE;
6553 gimple *ptr_incr = NULL;
6554 int ncopies;
6555 int i, j, group_size, group_gap_adj;
6556 tree msq = NULL_TREE, lsq;
6557 tree offset = NULL_TREE;
6558 tree byte_offset = NULL_TREE;
6559 tree realignment_token = NULL_TREE;
6560 gphi *phi = NULL;
6561 vec<tree> dr_chain = vNULL;
6562 bool grouped_load = false;
6563 gimple *first_stmt;
6564 gimple *first_stmt_for_drptr = NULL;
6565 bool inv_p;
6566 bool compute_in_loop = false;
6567 struct loop *at_loop;
6568 int vec_num;
6569 bool slp = (slp_node != NULL);
6570 bool slp_perm = false;
6571 enum tree_code code;
6572 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6573 int vf;
6574 tree aggr_type;
6575 gather_scatter_info gs_info;
6576 vec_info *vinfo = stmt_info->vinfo;
6577 tree ref_type;
6579 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6580 return false;
6582 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6583 && ! vec_stmt)
6584 return false;
6586 /* Is vectorizable load? */
6587 if (!is_gimple_assign (stmt))
6588 return false;
6590 scalar_dest = gimple_assign_lhs (stmt);
6591 if (TREE_CODE (scalar_dest) != SSA_NAME)
6592 return false;
6594 code = gimple_assign_rhs_code (stmt);
6595 if (code != ARRAY_REF
6596 && code != BIT_FIELD_REF
6597 && code != INDIRECT_REF
6598 && code != COMPONENT_REF
6599 && code != IMAGPART_EXPR
6600 && code != REALPART_EXPR
6601 && code != MEM_REF
6602 && TREE_CODE_CLASS (code) != tcc_declaration)
6603 return false;
6605 if (!STMT_VINFO_DATA_REF (stmt_info))
6606 return false;
6608 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6609 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6611 if (loop_vinfo)
6613 loop = LOOP_VINFO_LOOP (loop_vinfo);
6614 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6615 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6617 else
6618 vf = 1;
6620 /* Multiple types in SLP are handled by creating the appropriate number of
6621 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6622 case of SLP. */
6623 if (slp)
6624 ncopies = 1;
6625 else
6626 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6628 gcc_assert (ncopies >= 1);
6630 /* FORNOW. This restriction should be relaxed. */
6631 if (nested_in_vect_loop && ncopies > 1)
6633 if (dump_enabled_p ())
6634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6635 "multiple types in nested loop.\n");
6636 return false;
6639 /* Invalidate assumptions made by dependence analysis when vectorization
6640 on the unrolled body effectively re-orders stmts. */
6641 if (ncopies > 1
6642 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6643 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6644 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6646 if (dump_enabled_p ())
6647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6648 "cannot perform implicit CSE when unrolling "
6649 "with negative dependence distance\n");
6650 return false;
6653 elem_type = TREE_TYPE (vectype);
6654 mode = TYPE_MODE (vectype);
6656 /* FORNOW. In some cases can vectorize even if data-type not supported
6657 (e.g. - data copies). */
6658 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6660 if (dump_enabled_p ())
6661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6662 "Aligned load, but unsupported type.\n");
6663 return false;
6666 /* Check if the load is a part of an interleaving chain. */
6667 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6669 grouped_load = true;
6670 /* FORNOW */
6671 gcc_assert (!nested_in_vect_loop);
6672 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6674 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6675 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6677 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6678 slp_perm = true;
6680 /* Invalidate assumptions made by dependence analysis when vectorization
6681 on the unrolled body effectively re-orders stmts. */
6682 if (!PURE_SLP_STMT (stmt_info)
6683 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6684 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6685 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6687 if (dump_enabled_p ())
6688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6689 "cannot perform implicit CSE when performing "
6690 "group loads with negative dependence distance\n");
6691 return false;
6694 /* Similarly when the stmt is a load that is both part of a SLP
6695 instance and a loop vectorized stmt via the same-dr mechanism
6696 we have to give up. */
6697 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6698 && (STMT_SLP_TYPE (stmt_info)
6699 != STMT_SLP_TYPE (vinfo_for_stmt
6700 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6702 if (dump_enabled_p ())
6703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6704 "conflicting SLP types for CSEd load\n");
6705 return false;
6709 vect_memory_access_type memory_access_type;
6710 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6711 &memory_access_type, &gs_info))
6712 return false;
6714 if (!vec_stmt) /* transformation not required. */
6716 if (!slp)
6717 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6718 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6719 /* The SLP costs are calculated during SLP analysis. */
6720 if (!PURE_SLP_STMT (stmt_info))
6721 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6722 NULL, NULL, NULL);
6723 return true;
6726 if (!slp)
6727 gcc_assert (memory_access_type
6728 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6730 if (dump_enabled_p ())
6731 dump_printf_loc (MSG_NOTE, vect_location,
6732 "transform load. ncopies = %d\n", ncopies);
6734 /** Transform. **/
6736 ensure_base_align (stmt_info, dr);
6738 if (memory_access_type == VMAT_GATHER_SCATTER)
6740 tree vec_oprnd0 = NULL_TREE, op;
6741 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6742 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6743 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6744 edge pe = loop_preheader_edge (loop);
6745 gimple_seq seq;
6746 basic_block new_bb;
6747 enum { NARROW, NONE, WIDEN } modifier;
6748 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6750 if (nunits == gather_off_nunits)
6751 modifier = NONE;
6752 else if (nunits == gather_off_nunits / 2)
6754 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6755 modifier = WIDEN;
6757 for (i = 0; i < gather_off_nunits; ++i)
6758 sel[i] = i | nunits;
6760 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6762 else if (nunits == gather_off_nunits * 2)
6764 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6765 modifier = NARROW;
6767 for (i = 0; i < nunits; ++i)
6768 sel[i] = i < gather_off_nunits
6769 ? i : i + nunits - gather_off_nunits;
6771 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6772 ncopies *= 2;
6774 else
6775 gcc_unreachable ();
6777 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6778 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6779 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6780 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6781 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6782 scaletype = TREE_VALUE (arglist);
6783 gcc_checking_assert (types_compatible_p (srctype, rettype));
6785 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6787 ptr = fold_convert (ptrtype, gs_info.base);
6788 if (!is_gimple_min_invariant (ptr))
6790 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6791 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6792 gcc_assert (!new_bb);
6795 /* Currently we support only unconditional gather loads,
6796 so mask should be all ones. */
6797 if (TREE_CODE (masktype) == INTEGER_TYPE)
6798 mask = build_int_cst (masktype, -1);
6799 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6801 mask = build_int_cst (TREE_TYPE (masktype), -1);
6802 mask = build_vector_from_val (masktype, mask);
6803 mask = vect_init_vector (stmt, mask, masktype, NULL);
6805 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6807 REAL_VALUE_TYPE r;
6808 long tmp[6];
6809 for (j = 0; j < 6; ++j)
6810 tmp[j] = -1;
6811 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6812 mask = build_real (TREE_TYPE (masktype), r);
6813 mask = build_vector_from_val (masktype, mask);
6814 mask = vect_init_vector (stmt, mask, masktype, NULL);
6816 else
6817 gcc_unreachable ();
6819 scale = build_int_cst (scaletype, gs_info.scale);
6821 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6822 merge = build_int_cst (TREE_TYPE (rettype), 0);
6823 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6825 REAL_VALUE_TYPE r;
6826 long tmp[6];
6827 for (j = 0; j < 6; ++j)
6828 tmp[j] = 0;
6829 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6830 merge = build_real (TREE_TYPE (rettype), r);
6832 else
6833 gcc_unreachable ();
6834 merge = build_vector_from_val (rettype, merge);
6835 merge = vect_init_vector (stmt, merge, rettype, NULL);
6837 prev_stmt_info = NULL;
6838 for (j = 0; j < ncopies; ++j)
6840 if (modifier == WIDEN && (j & 1))
6841 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6842 perm_mask, stmt, gsi);
6843 else if (j == 0)
6844 op = vec_oprnd0
6845 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6846 else
6847 op = vec_oprnd0
6848 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6850 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6852 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6853 == TYPE_VECTOR_SUBPARTS (idxtype));
6854 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6855 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6856 new_stmt
6857 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6858 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6859 op = var;
6862 new_stmt
6863 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6865 if (!useless_type_conversion_p (vectype, rettype))
6867 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6868 == TYPE_VECTOR_SUBPARTS (rettype));
6869 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6870 gimple_call_set_lhs (new_stmt, op);
6871 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6872 var = make_ssa_name (vec_dest);
6873 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6874 new_stmt
6875 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6877 else
6879 var = make_ssa_name (vec_dest, new_stmt);
6880 gimple_call_set_lhs (new_stmt, var);
6883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6885 if (modifier == NARROW)
6887 if ((j & 1) == 0)
6889 prev_res = var;
6890 continue;
6892 var = permute_vec_elements (prev_res, var,
6893 perm_mask, stmt, gsi);
6894 new_stmt = SSA_NAME_DEF_STMT (var);
6897 if (prev_stmt_info == NULL)
6898 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6899 else
6900 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6901 prev_stmt_info = vinfo_for_stmt (new_stmt);
6903 return true;
6906 if (memory_access_type == VMAT_ELEMENTWISE
6907 || memory_access_type == VMAT_STRIDED_SLP)
6909 gimple_stmt_iterator incr_gsi;
6910 bool insert_after;
6911 gimple *incr;
6912 tree offvar;
6913 tree ivstep;
6914 tree running_off;
6915 vec<constructor_elt, va_gc> *v = NULL;
6916 gimple_seq stmts = NULL;
6917 tree stride_base, stride_step, alias_off;
6919 gcc_assert (!nested_in_vect_loop);
6921 if (slp && grouped_load)
6923 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6924 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6925 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6926 ref_type = get_group_alias_ptr_type (first_stmt);
6928 else
6930 first_stmt = stmt;
6931 first_dr = dr;
6932 group_size = 1;
6933 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6936 stride_base
6937 = fold_build_pointer_plus
6938 (DR_BASE_ADDRESS (first_dr),
6939 size_binop (PLUS_EXPR,
6940 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6941 convert_to_ptrofftype (DR_INIT (first_dr))));
6942 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6944 /* For a load with loop-invariant (but other than power-of-2)
6945 stride (i.e. not a grouped access) like so:
6947 for (i = 0; i < n; i += stride)
6948 ... = array[i];
6950 we generate a new induction variable and new accesses to
6951 form a new vector (or vectors, depending on ncopies):
6953 for (j = 0; ; j += VF*stride)
6954 tmp1 = array[j];
6955 tmp2 = array[j + stride];
6957 vectemp = {tmp1, tmp2, ...}
6960 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6961 build_int_cst (TREE_TYPE (stride_step), vf));
6963 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6965 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6966 loop, &incr_gsi, insert_after,
6967 &offvar, NULL);
6968 incr = gsi_stmt (incr_gsi);
6969 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6971 stride_step = force_gimple_operand (unshare_expr (stride_step),
6972 &stmts, true, NULL_TREE);
6973 if (stmts)
6974 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6976 prev_stmt_info = NULL;
6977 running_off = offvar;
6978 alias_off = build_int_cst (ref_type, 0);
6979 int nloads = nunits;
6980 int lnel = 1;
6981 tree ltype = TREE_TYPE (vectype);
6982 tree lvectype = vectype;
6983 auto_vec<tree> dr_chain;
6984 if (memory_access_type == VMAT_STRIDED_SLP)
6986 if (group_size < nunits)
6988 /* Avoid emitting a constructor of vector elements by performing
6989 the loads using an integer type of the same size,
6990 constructing a vector of those and then re-interpreting it
6991 as the original vector type. This works around the fact
6992 that the vec_init optab was only designed for scalar
6993 element modes and thus expansion goes through memory.
6994 This avoids a huge runtime penalty due to the general
6995 inability to perform store forwarding from smaller stores
6996 to a larger load. */
6997 unsigned lsize
6998 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
6999 enum machine_mode elmode = mode_for_size (lsize, MODE_INT, 0);
7000 enum machine_mode vmode = mode_for_vector (elmode,
7001 nunits / group_size);
7002 /* If we can't construct such a vector fall back to
7003 element loads of the original vector type. */
7004 if (VECTOR_MODE_P (vmode)
7005 && optab_handler (vec_init_optab, vmode) != CODE_FOR_nothing)
7007 nloads = nunits / group_size;
7008 lnel = group_size;
7009 ltype = build_nonstandard_integer_type (lsize, 1);
7010 lvectype = build_vector_type (ltype, nloads);
7013 else
7015 nloads = 1;
7016 lnel = nunits;
7017 ltype = vectype;
7019 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7021 if (slp)
7023 /* For SLP permutation support we need to load the whole group,
7024 not only the number of vector stmts the permutation result
7025 fits in. */
7026 if (slp_perm)
7028 ncopies = (group_size * vf + nunits - 1) / nunits;
7029 dr_chain.create (ncopies);
7031 else
7032 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7034 int group_el = 0;
7035 unsigned HOST_WIDE_INT
7036 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7037 for (j = 0; j < ncopies; j++)
7039 if (nloads > 1)
7040 vec_alloc (v, nloads);
7041 for (i = 0; i < nloads; i++)
7043 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7044 group_el * elsz);
7045 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7046 build2 (MEM_REF, ltype,
7047 running_off, this_off));
7048 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7049 if (nloads > 1)
7050 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7051 gimple_assign_lhs (new_stmt));
7053 group_el += lnel;
7054 if (! slp
7055 || group_el == group_size)
7057 tree newoff = copy_ssa_name (running_off);
7058 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7059 running_off, stride_step);
7060 vect_finish_stmt_generation (stmt, incr, gsi);
7062 running_off = newoff;
7063 group_el = 0;
7066 if (nloads > 1)
7068 tree vec_inv = build_constructor (lvectype, v);
7069 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7070 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7071 if (lvectype != vectype)
7073 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7074 VIEW_CONVERT_EXPR,
7075 build1 (VIEW_CONVERT_EXPR,
7076 vectype, new_temp));
7077 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7081 if (slp)
7083 if (slp_perm)
7084 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7085 else
7086 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7088 else
7090 if (j == 0)
7091 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7092 else
7093 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7094 prev_stmt_info = vinfo_for_stmt (new_stmt);
7097 if (slp_perm)
7099 unsigned n_perms;
7100 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7101 slp_node_instance, false, &n_perms);
7103 return true;
7106 if (grouped_load)
7108 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7109 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7110 /* For SLP vectorization we directly vectorize a subchain
7111 without permutation. */
7112 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7113 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7114 /* For BB vectorization always use the first stmt to base
7115 the data ref pointer on. */
7116 if (bb_vinfo)
7117 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7119 /* Check if the chain of loads is already vectorized. */
7120 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7121 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7122 ??? But we can only do so if there is exactly one
7123 as we have no way to get at the rest. Leave the CSE
7124 opportunity alone.
7125 ??? With the group load eventually participating
7126 in multiple different permutations (having multiple
7127 slp nodes which refer to the same group) the CSE
7128 is even wrong code. See PR56270. */
7129 && !slp)
7131 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7132 return true;
7134 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7135 group_gap_adj = 0;
7137 /* VEC_NUM is the number of vect stmts to be created for this group. */
7138 if (slp)
7140 grouped_load = false;
7141 /* For SLP permutation support we need to load the whole group,
7142 not only the number of vector stmts the permutation result
7143 fits in. */
7144 if (slp_perm)
7145 vec_num = (group_size * vf + nunits - 1) / nunits;
7146 else
7147 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7148 group_gap_adj = vf * group_size - nunits * vec_num;
7150 else
7151 vec_num = group_size;
7153 ref_type = get_group_alias_ptr_type (first_stmt);
7155 else
7157 first_stmt = stmt;
7158 first_dr = dr;
7159 group_size = vec_num = 1;
7160 group_gap_adj = 0;
7161 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7164 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7165 gcc_assert (alignment_support_scheme);
7166 /* Targets with load-lane instructions must not require explicit
7167 realignment. */
7168 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7169 || alignment_support_scheme == dr_aligned
7170 || alignment_support_scheme == dr_unaligned_supported);
7172 /* In case the vectorization factor (VF) is bigger than the number
7173 of elements that we can fit in a vectype (nunits), we have to generate
7174 more than one vector stmt - i.e - we need to "unroll" the
7175 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7176 from one copy of the vector stmt to the next, in the field
7177 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7178 stages to find the correct vector defs to be used when vectorizing
7179 stmts that use the defs of the current stmt. The example below
7180 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7181 need to create 4 vectorized stmts):
7183 before vectorization:
7184 RELATED_STMT VEC_STMT
7185 S1: x = memref - -
7186 S2: z = x + 1 - -
7188 step 1: vectorize stmt S1:
7189 We first create the vector stmt VS1_0, and, as usual, record a
7190 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7191 Next, we create the vector stmt VS1_1, and record a pointer to
7192 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7193 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7194 stmts and pointers:
7195 RELATED_STMT VEC_STMT
7196 VS1_0: vx0 = memref0 VS1_1 -
7197 VS1_1: vx1 = memref1 VS1_2 -
7198 VS1_2: vx2 = memref2 VS1_3 -
7199 VS1_3: vx3 = memref3 - -
7200 S1: x = load - VS1_0
7201 S2: z = x + 1 - -
7203 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7204 information we recorded in RELATED_STMT field is used to vectorize
7205 stmt S2. */
7207 /* In case of interleaving (non-unit grouped access):
7209 S1: x2 = &base + 2
7210 S2: x0 = &base
7211 S3: x1 = &base + 1
7212 S4: x3 = &base + 3
7214 Vectorized loads are created in the order of memory accesses
7215 starting from the access of the first stmt of the chain:
7217 VS1: vx0 = &base
7218 VS2: vx1 = &base + vec_size*1
7219 VS3: vx3 = &base + vec_size*2
7220 VS4: vx4 = &base + vec_size*3
7222 Then permutation statements are generated:
7224 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7225 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7228 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7229 (the order of the data-refs in the output of vect_permute_load_chain
7230 corresponds to the order of scalar stmts in the interleaving chain - see
7231 the documentation of vect_permute_load_chain()).
7232 The generation of permutation stmts and recording them in
7233 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7235 In case of both multiple types and interleaving, the vector loads and
7236 permutation stmts above are created for every copy. The result vector
7237 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7238 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7240 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7241 on a target that supports unaligned accesses (dr_unaligned_supported)
7242 we generate the following code:
7243 p = initial_addr;
7244 indx = 0;
7245 loop {
7246 p = p + indx * vectype_size;
7247 vec_dest = *(p);
7248 indx = indx + 1;
7251 Otherwise, the data reference is potentially unaligned on a target that
7252 does not support unaligned accesses (dr_explicit_realign_optimized) -
7253 then generate the following code, in which the data in each iteration is
7254 obtained by two vector loads, one from the previous iteration, and one
7255 from the current iteration:
7256 p1 = initial_addr;
7257 msq_init = *(floor(p1))
7258 p2 = initial_addr + VS - 1;
7259 realignment_token = call target_builtin;
7260 indx = 0;
7261 loop {
7262 p2 = p2 + indx * vectype_size
7263 lsq = *(floor(p2))
7264 vec_dest = realign_load (msq, lsq, realignment_token)
7265 indx = indx + 1;
7266 msq = lsq;
7267 } */
7269 /* If the misalignment remains the same throughout the execution of the
7270 loop, we can create the init_addr and permutation mask at the loop
7271 preheader. Otherwise, it needs to be created inside the loop.
7272 This can only occur when vectorizing memory accesses in the inner-loop
7273 nested within an outer-loop that is being vectorized. */
7275 if (nested_in_vect_loop
7276 && (TREE_INT_CST_LOW (DR_STEP (dr))
7277 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7279 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7280 compute_in_loop = true;
7283 if ((alignment_support_scheme == dr_explicit_realign_optimized
7284 || alignment_support_scheme == dr_explicit_realign)
7285 && !compute_in_loop)
7287 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7288 alignment_support_scheme, NULL_TREE,
7289 &at_loop);
7290 if (alignment_support_scheme == dr_explicit_realign_optimized)
7292 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7293 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7294 size_one_node);
7297 else
7298 at_loop = loop;
7300 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7301 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7303 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7304 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7305 else
7306 aggr_type = vectype;
7308 prev_stmt_info = NULL;
7309 for (j = 0; j < ncopies; j++)
7311 /* 1. Create the vector or array pointer update chain. */
7312 if (j == 0)
7314 bool simd_lane_access_p
7315 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7316 if (simd_lane_access_p
7317 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7318 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7319 && integer_zerop (DR_OFFSET (first_dr))
7320 && integer_zerop (DR_INIT (first_dr))
7321 && alias_sets_conflict_p (get_alias_set (aggr_type),
7322 get_alias_set (TREE_TYPE (ref_type)))
7323 && (alignment_support_scheme == dr_aligned
7324 || alignment_support_scheme == dr_unaligned_supported))
7326 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7327 dataref_offset = build_int_cst (ref_type, 0);
7328 inv_p = false;
7330 else if (first_stmt_for_drptr
7331 && first_stmt != first_stmt_for_drptr)
7333 dataref_ptr
7334 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7335 at_loop, offset, &dummy, gsi,
7336 &ptr_incr, simd_lane_access_p,
7337 &inv_p, byte_offset);
7338 /* Adjust the pointer by the difference to first_stmt. */
7339 data_reference_p ptrdr
7340 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7341 tree diff = fold_convert (sizetype,
7342 size_binop (MINUS_EXPR,
7343 DR_INIT (first_dr),
7344 DR_INIT (ptrdr)));
7345 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7346 stmt, diff);
7348 else
7349 dataref_ptr
7350 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7351 offset, &dummy, gsi, &ptr_incr,
7352 simd_lane_access_p, &inv_p,
7353 byte_offset);
7355 else if (dataref_offset)
7356 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7357 TYPE_SIZE_UNIT (aggr_type));
7358 else
7359 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7360 TYPE_SIZE_UNIT (aggr_type));
7362 if (grouped_load || slp_perm)
7363 dr_chain.create (vec_num);
7365 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7367 tree vec_array;
7369 vec_array = create_vector_array (vectype, vec_num);
7371 /* Emit:
7372 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7373 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7374 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7375 gimple_call_set_lhs (new_stmt, vec_array);
7376 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7378 /* Extract each vector into an SSA_NAME. */
7379 for (i = 0; i < vec_num; i++)
7381 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7382 vec_array, i);
7383 dr_chain.quick_push (new_temp);
7386 /* Record the mapping between SSA_NAMEs and statements. */
7387 vect_record_grouped_load_vectors (stmt, dr_chain);
7389 else
7391 for (i = 0; i < vec_num; i++)
7393 if (i > 0)
7394 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7395 stmt, NULL_TREE);
7397 /* 2. Create the vector-load in the loop. */
7398 switch (alignment_support_scheme)
7400 case dr_aligned:
7401 case dr_unaligned_supported:
7403 unsigned int align, misalign;
7405 data_ref
7406 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7407 dataref_offset
7408 ? dataref_offset
7409 : build_int_cst (ref_type, 0));
7410 align = TYPE_ALIGN_UNIT (vectype);
7411 if (alignment_support_scheme == dr_aligned)
7413 gcc_assert (aligned_access_p (first_dr));
7414 misalign = 0;
7416 else if (DR_MISALIGNMENT (first_dr) == -1)
7418 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7419 align = TYPE_ALIGN_UNIT (elem_type);
7420 else
7421 align = (get_object_alignment (DR_REF (first_dr))
7422 / BITS_PER_UNIT);
7423 misalign = 0;
7424 TREE_TYPE (data_ref)
7425 = build_aligned_type (TREE_TYPE (data_ref),
7426 align * BITS_PER_UNIT);
7428 else
7430 TREE_TYPE (data_ref)
7431 = build_aligned_type (TREE_TYPE (data_ref),
7432 TYPE_ALIGN (elem_type));
7433 misalign = DR_MISALIGNMENT (first_dr);
7435 if (dataref_offset == NULL_TREE
7436 && TREE_CODE (dataref_ptr) == SSA_NAME)
7437 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7438 align, misalign);
7439 break;
7441 case dr_explicit_realign:
7443 tree ptr, bump;
7445 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7447 if (compute_in_loop)
7448 msq = vect_setup_realignment (first_stmt, gsi,
7449 &realignment_token,
7450 dr_explicit_realign,
7451 dataref_ptr, NULL);
7453 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7454 ptr = copy_ssa_name (dataref_ptr);
7455 else
7456 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7457 new_stmt = gimple_build_assign
7458 (ptr, BIT_AND_EXPR, dataref_ptr,
7459 build_int_cst
7460 (TREE_TYPE (dataref_ptr),
7461 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7462 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7463 data_ref
7464 = build2 (MEM_REF, vectype, ptr,
7465 build_int_cst (ref_type, 0));
7466 vec_dest = vect_create_destination_var (scalar_dest,
7467 vectype);
7468 new_stmt = gimple_build_assign (vec_dest, data_ref);
7469 new_temp = make_ssa_name (vec_dest, new_stmt);
7470 gimple_assign_set_lhs (new_stmt, new_temp);
7471 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7472 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7473 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7474 msq = new_temp;
7476 bump = size_binop (MULT_EXPR, vs,
7477 TYPE_SIZE_UNIT (elem_type));
7478 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7479 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7480 new_stmt = gimple_build_assign
7481 (NULL_TREE, BIT_AND_EXPR, ptr,
7482 build_int_cst
7483 (TREE_TYPE (ptr),
7484 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7485 ptr = copy_ssa_name (ptr, new_stmt);
7486 gimple_assign_set_lhs (new_stmt, ptr);
7487 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7488 data_ref
7489 = build2 (MEM_REF, vectype, ptr,
7490 build_int_cst (ref_type, 0));
7491 break;
7493 case dr_explicit_realign_optimized:
7494 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7495 new_temp = copy_ssa_name (dataref_ptr);
7496 else
7497 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7498 new_stmt = gimple_build_assign
7499 (new_temp, BIT_AND_EXPR, dataref_ptr,
7500 build_int_cst
7501 (TREE_TYPE (dataref_ptr),
7502 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7503 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7504 data_ref
7505 = build2 (MEM_REF, vectype, new_temp,
7506 build_int_cst (ref_type, 0));
7507 break;
7508 default:
7509 gcc_unreachable ();
7511 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7512 new_stmt = gimple_build_assign (vec_dest, data_ref);
7513 new_temp = make_ssa_name (vec_dest, new_stmt);
7514 gimple_assign_set_lhs (new_stmt, new_temp);
7515 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7517 /* 3. Handle explicit realignment if necessary/supported.
7518 Create in loop:
7519 vec_dest = realign_load (msq, lsq, realignment_token) */
7520 if (alignment_support_scheme == dr_explicit_realign_optimized
7521 || alignment_support_scheme == dr_explicit_realign)
7523 lsq = gimple_assign_lhs (new_stmt);
7524 if (!realignment_token)
7525 realignment_token = dataref_ptr;
7526 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7527 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7528 msq, lsq, realignment_token);
7529 new_temp = make_ssa_name (vec_dest, new_stmt);
7530 gimple_assign_set_lhs (new_stmt, new_temp);
7531 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7533 if (alignment_support_scheme == dr_explicit_realign_optimized)
7535 gcc_assert (phi);
7536 if (i == vec_num - 1 && j == ncopies - 1)
7537 add_phi_arg (phi, lsq,
7538 loop_latch_edge (containing_loop),
7539 UNKNOWN_LOCATION);
7540 msq = lsq;
7544 /* 4. Handle invariant-load. */
7545 if (inv_p && !bb_vinfo)
7547 gcc_assert (!grouped_load);
7548 /* If we have versioned for aliasing or the loop doesn't
7549 have any data dependencies that would preclude this,
7550 then we are sure this is a loop invariant load and
7551 thus we can insert it on the preheader edge. */
7552 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7553 && !nested_in_vect_loop
7554 && hoist_defs_of_uses (stmt, loop))
7556 if (dump_enabled_p ())
7558 dump_printf_loc (MSG_NOTE, vect_location,
7559 "hoisting out of the vectorized "
7560 "loop: ");
7561 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7563 tree tem = copy_ssa_name (scalar_dest);
7564 gsi_insert_on_edge_immediate
7565 (loop_preheader_edge (loop),
7566 gimple_build_assign (tem,
7567 unshare_expr
7568 (gimple_assign_rhs1 (stmt))));
7569 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7570 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7571 set_vinfo_for_stmt (new_stmt,
7572 new_stmt_vec_info (new_stmt, vinfo));
7574 else
7576 gimple_stmt_iterator gsi2 = *gsi;
7577 gsi_next (&gsi2);
7578 new_temp = vect_init_vector (stmt, scalar_dest,
7579 vectype, &gsi2);
7580 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7584 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7586 tree perm_mask = perm_mask_for_reverse (vectype);
7587 new_temp = permute_vec_elements (new_temp, new_temp,
7588 perm_mask, stmt, gsi);
7589 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7592 /* Collect vector loads and later create their permutation in
7593 vect_transform_grouped_load (). */
7594 if (grouped_load || slp_perm)
7595 dr_chain.quick_push (new_temp);
7597 /* Store vector loads in the corresponding SLP_NODE. */
7598 if (slp && !slp_perm)
7599 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7601 /* Bump the vector pointer to account for a gap or for excess
7602 elements loaded for a permuted SLP load. */
7603 if (group_gap_adj != 0)
7605 bool ovf;
7606 tree bump
7607 = wide_int_to_tree (sizetype,
7608 wi::smul (TYPE_SIZE_UNIT (elem_type),
7609 group_gap_adj, &ovf));
7610 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7611 stmt, bump);
7615 if (slp && !slp_perm)
7616 continue;
7618 if (slp_perm)
7620 unsigned n_perms;
7621 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7622 slp_node_instance, false,
7623 &n_perms))
7625 dr_chain.release ();
7626 return false;
7629 else
7631 if (grouped_load)
7633 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7634 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7635 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7637 else
7639 if (j == 0)
7640 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7641 else
7642 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7643 prev_stmt_info = vinfo_for_stmt (new_stmt);
7646 dr_chain.release ();
7649 return true;
7652 /* Function vect_is_simple_cond.
7654 Input:
7655 LOOP - the loop that is being vectorized.
7656 COND - Condition that is checked for simple use.
7658 Output:
7659 *COMP_VECTYPE - the vector type for the comparison.
7661 Returns whether a COND can be vectorized. Checks whether
7662 condition operands are supportable using vec_is_simple_use. */
7664 static bool
7665 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7667 tree lhs, rhs;
7668 enum vect_def_type dt;
7669 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7671 /* Mask case. */
7672 if (TREE_CODE (cond) == SSA_NAME
7673 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7675 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7676 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7677 &dt, comp_vectype)
7678 || !*comp_vectype
7679 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7680 return false;
7681 return true;
7684 if (!COMPARISON_CLASS_P (cond))
7685 return false;
7687 lhs = TREE_OPERAND (cond, 0);
7688 rhs = TREE_OPERAND (cond, 1);
7690 if (TREE_CODE (lhs) == SSA_NAME)
7692 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7693 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7694 return false;
7696 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7697 && TREE_CODE (lhs) != FIXED_CST)
7698 return false;
7700 if (TREE_CODE (rhs) == SSA_NAME)
7702 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7703 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7704 return false;
7706 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7707 && TREE_CODE (rhs) != FIXED_CST)
7708 return false;
7710 if (vectype1 && vectype2
7711 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7712 return false;
7714 *comp_vectype = vectype1 ? vectype1 : vectype2;
7715 return true;
7718 /* vectorizable_condition.
7720 Check if STMT is conditional modify expression that can be vectorized.
7721 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7722 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7723 at GSI.
7725 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7726 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7727 else clause if it is 2).
7729 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7731 bool
7732 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7733 gimple **vec_stmt, tree reduc_def, int reduc_index,
7734 slp_tree slp_node)
7736 tree scalar_dest = NULL_TREE;
7737 tree vec_dest = NULL_TREE;
7738 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7739 tree then_clause, else_clause;
7740 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7741 tree comp_vectype = NULL_TREE;
7742 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7743 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7744 tree vec_compare;
7745 tree new_temp;
7746 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7747 enum vect_def_type dt, dts[4];
7748 int ncopies;
7749 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7750 stmt_vec_info prev_stmt_info = NULL;
7751 int i, j;
7752 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7753 vec<tree> vec_oprnds0 = vNULL;
7754 vec<tree> vec_oprnds1 = vNULL;
7755 vec<tree> vec_oprnds2 = vNULL;
7756 vec<tree> vec_oprnds3 = vNULL;
7757 tree vec_cmp_type;
7758 bool masked = false;
7760 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7761 return false;
7763 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7765 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7766 return false;
7768 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7769 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7770 && reduc_def))
7771 return false;
7773 /* FORNOW: not yet supported. */
7774 if (STMT_VINFO_LIVE_P (stmt_info))
7776 if (dump_enabled_p ())
7777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7778 "value used after loop.\n");
7779 return false;
7783 /* Is vectorizable conditional operation? */
7784 if (!is_gimple_assign (stmt))
7785 return false;
7787 code = gimple_assign_rhs_code (stmt);
7789 if (code != COND_EXPR)
7790 return false;
7792 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7793 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7794 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7796 if (slp_node)
7797 ncopies = 1;
7798 else
7799 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7801 gcc_assert (ncopies >= 1);
7802 if (reduc_index && ncopies > 1)
7803 return false; /* FORNOW */
7805 cond_expr = gimple_assign_rhs1 (stmt);
7806 then_clause = gimple_assign_rhs2 (stmt);
7807 else_clause = gimple_assign_rhs3 (stmt);
7809 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7810 || !comp_vectype)
7811 return false;
7813 gimple *def_stmt;
7814 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7815 &vectype1))
7816 return false;
7817 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7818 &vectype2))
7819 return false;
7821 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7822 return false;
7824 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7825 return false;
7827 masked = !COMPARISON_CLASS_P (cond_expr);
7828 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7830 if (vec_cmp_type == NULL_TREE)
7831 return false;
7833 cond_code = TREE_CODE (cond_expr);
7834 if (!masked)
7836 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7837 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7840 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7842 /* Boolean values may have another representation in vectors
7843 and therefore we prefer bit operations over comparison for
7844 them (which also works for scalar masks). We store opcodes
7845 to use in bitop1 and bitop2. Statement is vectorized as
7846 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7847 depending on bitop1 and bitop2 arity. */
7848 switch (cond_code)
7850 case GT_EXPR:
7851 bitop1 = BIT_NOT_EXPR;
7852 bitop2 = BIT_AND_EXPR;
7853 break;
7854 case GE_EXPR:
7855 bitop1 = BIT_NOT_EXPR;
7856 bitop2 = BIT_IOR_EXPR;
7857 break;
7858 case LT_EXPR:
7859 bitop1 = BIT_NOT_EXPR;
7860 bitop2 = BIT_AND_EXPR;
7861 std::swap (cond_expr0, cond_expr1);
7862 break;
7863 case LE_EXPR:
7864 bitop1 = BIT_NOT_EXPR;
7865 bitop2 = BIT_IOR_EXPR;
7866 std::swap (cond_expr0, cond_expr1);
7867 break;
7868 case NE_EXPR:
7869 bitop1 = BIT_XOR_EXPR;
7870 break;
7871 case EQ_EXPR:
7872 bitop1 = BIT_XOR_EXPR;
7873 bitop2 = BIT_NOT_EXPR;
7874 break;
7875 default:
7876 return false;
7878 cond_code = SSA_NAME;
7881 if (!vec_stmt)
7883 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7884 if (bitop1 != NOP_EXPR)
7886 machine_mode mode = TYPE_MODE (comp_vectype);
7887 optab optab;
7889 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
7890 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7891 return false;
7893 if (bitop2 != NOP_EXPR)
7895 optab = optab_for_tree_code (bitop2, comp_vectype,
7896 optab_default);
7897 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7898 return false;
7901 return expand_vec_cond_expr_p (vectype, comp_vectype,
7902 cond_code);
7905 /* Transform. */
7907 if (!slp_node)
7909 vec_oprnds0.create (1);
7910 vec_oprnds1.create (1);
7911 vec_oprnds2.create (1);
7912 vec_oprnds3.create (1);
7915 /* Handle def. */
7916 scalar_dest = gimple_assign_lhs (stmt);
7917 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7919 /* Handle cond expr. */
7920 for (j = 0; j < ncopies; j++)
7922 gassign *new_stmt = NULL;
7923 if (j == 0)
7925 if (slp_node)
7927 auto_vec<tree, 4> ops;
7928 auto_vec<vec<tree>, 4> vec_defs;
7930 if (masked)
7931 ops.safe_push (cond_expr);
7932 else
7934 ops.safe_push (cond_expr0);
7935 ops.safe_push (cond_expr1);
7937 ops.safe_push (then_clause);
7938 ops.safe_push (else_clause);
7939 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7940 vec_oprnds3 = vec_defs.pop ();
7941 vec_oprnds2 = vec_defs.pop ();
7942 if (!masked)
7943 vec_oprnds1 = vec_defs.pop ();
7944 vec_oprnds0 = vec_defs.pop ();
7946 else
7948 gimple *gtemp;
7949 if (masked)
7951 vec_cond_lhs
7952 = vect_get_vec_def_for_operand (cond_expr, stmt,
7953 comp_vectype);
7954 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7955 &gtemp, &dts[0]);
7957 else
7959 vec_cond_lhs
7960 = vect_get_vec_def_for_operand (cond_expr0,
7961 stmt, comp_vectype);
7962 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
7964 vec_cond_rhs
7965 = vect_get_vec_def_for_operand (cond_expr1,
7966 stmt, comp_vectype);
7967 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
7969 if (reduc_index == 1)
7970 vec_then_clause = reduc_def;
7971 else
7973 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7974 stmt);
7975 vect_is_simple_use (then_clause, loop_vinfo,
7976 &gtemp, &dts[2]);
7978 if (reduc_index == 2)
7979 vec_else_clause = reduc_def;
7980 else
7982 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7983 stmt);
7984 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7988 else
7990 vec_cond_lhs
7991 = vect_get_vec_def_for_stmt_copy (dts[0],
7992 vec_oprnds0.pop ());
7993 if (!masked)
7994 vec_cond_rhs
7995 = vect_get_vec_def_for_stmt_copy (dts[1],
7996 vec_oprnds1.pop ());
7998 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7999 vec_oprnds2.pop ());
8000 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8001 vec_oprnds3.pop ());
8004 if (!slp_node)
8006 vec_oprnds0.quick_push (vec_cond_lhs);
8007 if (!masked)
8008 vec_oprnds1.quick_push (vec_cond_rhs);
8009 vec_oprnds2.quick_push (vec_then_clause);
8010 vec_oprnds3.quick_push (vec_else_clause);
8013 /* Arguments are ready. Create the new vector stmt. */
8014 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8016 vec_then_clause = vec_oprnds2[i];
8017 vec_else_clause = vec_oprnds3[i];
8019 if (masked)
8020 vec_compare = vec_cond_lhs;
8021 else
8023 vec_cond_rhs = vec_oprnds1[i];
8024 if (bitop1 == NOP_EXPR)
8025 vec_compare = build2 (cond_code, vec_cmp_type,
8026 vec_cond_lhs, vec_cond_rhs);
8027 else
8029 new_temp = make_ssa_name (vec_cmp_type);
8030 if (bitop1 == BIT_NOT_EXPR)
8031 new_stmt = gimple_build_assign (new_temp, bitop1,
8032 vec_cond_rhs);
8033 else
8034 new_stmt
8035 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8036 vec_cond_rhs);
8037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8038 if (bitop2 == NOP_EXPR)
8039 vec_compare = new_temp;
8040 else if (bitop2 == BIT_NOT_EXPR)
8042 /* Instead of doing ~x ? y : z do x ? z : y. */
8043 vec_compare = new_temp;
8044 std::swap (vec_then_clause, vec_else_clause);
8046 else
8048 vec_compare = make_ssa_name (vec_cmp_type);
8049 new_stmt
8050 = gimple_build_assign (vec_compare, bitop2,
8051 vec_cond_lhs, new_temp);
8052 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8056 new_temp = make_ssa_name (vec_dest);
8057 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8058 vec_compare, vec_then_clause,
8059 vec_else_clause);
8060 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8061 if (slp_node)
8062 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8065 if (slp_node)
8066 continue;
8068 if (j == 0)
8069 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8070 else
8071 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8073 prev_stmt_info = vinfo_for_stmt (new_stmt);
8076 vec_oprnds0.release ();
8077 vec_oprnds1.release ();
8078 vec_oprnds2.release ();
8079 vec_oprnds3.release ();
8081 return true;
8084 /* vectorizable_comparison.
8086 Check if STMT is comparison expression that can be vectorized.
8087 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8088 comparison, put it in VEC_STMT, and insert it at GSI.
8090 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8092 static bool
8093 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8094 gimple **vec_stmt, tree reduc_def,
8095 slp_tree slp_node)
8097 tree lhs, rhs1, rhs2;
8098 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8099 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8100 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8101 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8102 tree new_temp;
8103 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8104 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8105 unsigned nunits;
8106 int ncopies;
8107 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8108 stmt_vec_info prev_stmt_info = NULL;
8109 int i, j;
8110 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8111 vec<tree> vec_oprnds0 = vNULL;
8112 vec<tree> vec_oprnds1 = vNULL;
8113 gimple *def_stmt;
8114 tree mask_type;
8115 tree mask;
8117 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8118 return false;
8120 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8121 return false;
8123 mask_type = vectype;
8124 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8126 if (slp_node)
8127 ncopies = 1;
8128 else
8129 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
8131 gcc_assert (ncopies >= 1);
8132 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8133 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8134 && reduc_def))
8135 return false;
8137 if (STMT_VINFO_LIVE_P (stmt_info))
8139 if (dump_enabled_p ())
8140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8141 "value used after loop.\n");
8142 return false;
8145 if (!is_gimple_assign (stmt))
8146 return false;
8148 code = gimple_assign_rhs_code (stmt);
8150 if (TREE_CODE_CLASS (code) != tcc_comparison)
8151 return false;
8153 rhs1 = gimple_assign_rhs1 (stmt);
8154 rhs2 = gimple_assign_rhs2 (stmt);
8156 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8157 &dts[0], &vectype1))
8158 return false;
8160 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8161 &dts[1], &vectype2))
8162 return false;
8164 if (vectype1 && vectype2
8165 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8166 return false;
8168 vectype = vectype1 ? vectype1 : vectype2;
8170 /* Invariant comparison. */
8171 if (!vectype)
8173 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8174 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8175 return false;
8177 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8178 return false;
8180 /* Can't compare mask and non-mask types. */
8181 if (vectype1 && vectype2
8182 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8183 return false;
8185 /* Boolean values may have another representation in vectors
8186 and therefore we prefer bit operations over comparison for
8187 them (which also works for scalar masks). We store opcodes
8188 to use in bitop1 and bitop2. Statement is vectorized as
8189 BITOP2 (rhs1 BITOP1 rhs2) or
8190 rhs1 BITOP2 (BITOP1 rhs2)
8191 depending on bitop1 and bitop2 arity. */
8192 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8194 if (code == GT_EXPR)
8196 bitop1 = BIT_NOT_EXPR;
8197 bitop2 = BIT_AND_EXPR;
8199 else if (code == GE_EXPR)
8201 bitop1 = BIT_NOT_EXPR;
8202 bitop2 = BIT_IOR_EXPR;
8204 else if (code == LT_EXPR)
8206 bitop1 = BIT_NOT_EXPR;
8207 bitop2 = BIT_AND_EXPR;
8208 std::swap (rhs1, rhs2);
8209 std::swap (dts[0], dts[1]);
8211 else if (code == LE_EXPR)
8213 bitop1 = BIT_NOT_EXPR;
8214 bitop2 = BIT_IOR_EXPR;
8215 std::swap (rhs1, rhs2);
8216 std::swap (dts[0], dts[1]);
8218 else
8220 bitop1 = BIT_XOR_EXPR;
8221 if (code == EQ_EXPR)
8222 bitop2 = BIT_NOT_EXPR;
8226 if (!vec_stmt)
8228 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8229 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8230 dts, NULL, NULL);
8231 if (bitop1 == NOP_EXPR)
8232 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8233 else
8235 machine_mode mode = TYPE_MODE (vectype);
8236 optab optab;
8238 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8239 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8240 return false;
8242 if (bitop2 != NOP_EXPR)
8244 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8245 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8246 return false;
8248 return true;
8252 /* Transform. */
8253 if (!slp_node)
8255 vec_oprnds0.create (1);
8256 vec_oprnds1.create (1);
8259 /* Handle def. */
8260 lhs = gimple_assign_lhs (stmt);
8261 mask = vect_create_destination_var (lhs, mask_type);
8263 /* Handle cmp expr. */
8264 for (j = 0; j < ncopies; j++)
8266 gassign *new_stmt = NULL;
8267 if (j == 0)
8269 if (slp_node)
8271 auto_vec<tree, 2> ops;
8272 auto_vec<vec<tree>, 2> vec_defs;
8274 ops.safe_push (rhs1);
8275 ops.safe_push (rhs2);
8276 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
8277 vec_oprnds1 = vec_defs.pop ();
8278 vec_oprnds0 = vec_defs.pop ();
8280 else
8282 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8283 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8286 else
8288 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8289 vec_oprnds0.pop ());
8290 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8291 vec_oprnds1.pop ());
8294 if (!slp_node)
8296 vec_oprnds0.quick_push (vec_rhs1);
8297 vec_oprnds1.quick_push (vec_rhs2);
8300 /* Arguments are ready. Create the new vector stmt. */
8301 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8303 vec_rhs2 = vec_oprnds1[i];
8305 new_temp = make_ssa_name (mask);
8306 if (bitop1 == NOP_EXPR)
8308 new_stmt = gimple_build_assign (new_temp, code,
8309 vec_rhs1, vec_rhs2);
8310 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8312 else
8314 if (bitop1 == BIT_NOT_EXPR)
8315 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8316 else
8317 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8318 vec_rhs2);
8319 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8320 if (bitop2 != NOP_EXPR)
8322 tree res = make_ssa_name (mask);
8323 if (bitop2 == BIT_NOT_EXPR)
8324 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8325 else
8326 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8327 new_temp);
8328 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8331 if (slp_node)
8332 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8335 if (slp_node)
8336 continue;
8338 if (j == 0)
8339 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8340 else
8341 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8343 prev_stmt_info = vinfo_for_stmt (new_stmt);
8346 vec_oprnds0.release ();
8347 vec_oprnds1.release ();
8349 return true;
8352 /* Make sure the statement is vectorizable. */
8354 bool
8355 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8357 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8358 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8359 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8360 bool ok;
8361 tree scalar_type, vectype;
8362 gimple *pattern_stmt;
8363 gimple_seq pattern_def_seq;
8365 if (dump_enabled_p ())
8367 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8368 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8371 if (gimple_has_volatile_ops (stmt))
8373 if (dump_enabled_p ())
8374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8375 "not vectorized: stmt has volatile operands\n");
8377 return false;
8380 /* Skip stmts that do not need to be vectorized. In loops this is expected
8381 to include:
8382 - the COND_EXPR which is the loop exit condition
8383 - any LABEL_EXPRs in the loop
8384 - computations that are used only for array indexing or loop control.
8385 In basic blocks we only analyze statements that are a part of some SLP
8386 instance, therefore, all the statements are relevant.
8388 Pattern statement needs to be analyzed instead of the original statement
8389 if the original statement is not relevant. Otherwise, we analyze both
8390 statements. In basic blocks we are called from some SLP instance
8391 traversal, don't analyze pattern stmts instead, the pattern stmts
8392 already will be part of SLP instance. */
8394 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8395 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8396 && !STMT_VINFO_LIVE_P (stmt_info))
8398 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8399 && pattern_stmt
8400 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8401 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8403 /* Analyze PATTERN_STMT instead of the original stmt. */
8404 stmt = pattern_stmt;
8405 stmt_info = vinfo_for_stmt (pattern_stmt);
8406 if (dump_enabled_p ())
8408 dump_printf_loc (MSG_NOTE, vect_location,
8409 "==> examining pattern statement: ");
8410 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8413 else
8415 if (dump_enabled_p ())
8416 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8418 return true;
8421 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8422 && node == NULL
8423 && pattern_stmt
8424 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8425 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8427 /* Analyze PATTERN_STMT too. */
8428 if (dump_enabled_p ())
8430 dump_printf_loc (MSG_NOTE, vect_location,
8431 "==> examining pattern statement: ");
8432 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8435 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8436 return false;
8439 if (is_pattern_stmt_p (stmt_info)
8440 && node == NULL
8441 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8443 gimple_stmt_iterator si;
8445 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8447 gimple *pattern_def_stmt = gsi_stmt (si);
8448 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8449 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8451 /* Analyze def stmt of STMT if it's a pattern stmt. */
8452 if (dump_enabled_p ())
8454 dump_printf_loc (MSG_NOTE, vect_location,
8455 "==> examining pattern def statement: ");
8456 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8459 if (!vect_analyze_stmt (pattern_def_stmt,
8460 need_to_vectorize, node))
8461 return false;
8466 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8468 case vect_internal_def:
8469 break;
8471 case vect_reduction_def:
8472 case vect_nested_cycle:
8473 gcc_assert (!bb_vinfo
8474 && (relevance == vect_used_in_outer
8475 || relevance == vect_used_in_outer_by_reduction
8476 || relevance == vect_used_by_reduction
8477 || relevance == vect_unused_in_scope
8478 || relevance == vect_used_only_live));
8479 break;
8481 case vect_induction_def:
8482 case vect_constant_def:
8483 case vect_external_def:
8484 case vect_unknown_def_type:
8485 default:
8486 gcc_unreachable ();
8489 if (bb_vinfo)
8491 gcc_assert (PURE_SLP_STMT (stmt_info));
8493 /* Memory accesses already got their vector type assigned
8494 in vect_analyze_data_refs. */
8495 if (! STMT_VINFO_DATA_REF (stmt_info))
8497 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8498 if (dump_enabled_p ())
8500 dump_printf_loc (MSG_NOTE, vect_location,
8501 "get vectype for scalar type: ");
8502 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8503 dump_printf (MSG_NOTE, "\n");
8506 vectype = get_vectype_for_scalar_type (scalar_type);
8507 if (!vectype)
8509 if (dump_enabled_p ())
8511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8512 "not SLPed: unsupported data-type ");
8513 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8514 scalar_type);
8515 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8517 return false;
8520 if (dump_enabled_p ())
8522 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8523 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8524 dump_printf (MSG_NOTE, "\n");
8527 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8531 if (STMT_VINFO_RELEVANT_P (stmt_info))
8533 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8534 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8535 || (is_gimple_call (stmt)
8536 && gimple_call_lhs (stmt) == NULL_TREE));
8537 *need_to_vectorize = true;
8540 if (PURE_SLP_STMT (stmt_info) && !node)
8542 dump_printf_loc (MSG_NOTE, vect_location,
8543 "handled only by SLP analysis\n");
8544 return true;
8547 ok = true;
8548 if (!bb_vinfo
8549 && (STMT_VINFO_RELEVANT_P (stmt_info)
8550 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8551 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8552 || vectorizable_conversion (stmt, NULL, NULL, node)
8553 || vectorizable_shift (stmt, NULL, NULL, node)
8554 || vectorizable_operation (stmt, NULL, NULL, node)
8555 || vectorizable_assignment (stmt, NULL, NULL, node)
8556 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8557 || vectorizable_call (stmt, NULL, NULL, node)
8558 || vectorizable_store (stmt, NULL, NULL, node)
8559 || vectorizable_reduction (stmt, NULL, NULL, node)
8560 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8561 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8562 else
8564 if (bb_vinfo)
8565 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8566 || vectorizable_conversion (stmt, NULL, NULL, node)
8567 || vectorizable_shift (stmt, NULL, NULL, node)
8568 || vectorizable_operation (stmt, NULL, NULL, node)
8569 || vectorizable_assignment (stmt, NULL, NULL, node)
8570 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8571 || vectorizable_call (stmt, NULL, NULL, node)
8572 || vectorizable_store (stmt, NULL, NULL, node)
8573 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8574 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8577 if (!ok)
8579 if (dump_enabled_p ())
8581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8582 "not vectorized: relevant stmt not ");
8583 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8584 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8587 return false;
8590 if (bb_vinfo)
8591 return true;
8593 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8594 need extra handling, except for vectorizable reductions. */
8595 if (STMT_VINFO_LIVE_P (stmt_info)
8596 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8597 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8599 if (!ok)
8601 if (dump_enabled_p ())
8603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8604 "not vectorized: live stmt not ");
8605 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8606 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8609 return false;
8612 return true;
8616 /* Function vect_transform_stmt.
8618 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8620 bool
8621 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8622 bool *grouped_store, slp_tree slp_node,
8623 slp_instance slp_node_instance)
8625 bool is_store = false;
8626 gimple *vec_stmt = NULL;
8627 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8628 bool done;
8630 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8631 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8633 switch (STMT_VINFO_TYPE (stmt_info))
8635 case type_demotion_vec_info_type:
8636 case type_promotion_vec_info_type:
8637 case type_conversion_vec_info_type:
8638 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8639 gcc_assert (done);
8640 break;
8642 case induc_vec_info_type:
8643 gcc_assert (!slp_node);
8644 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8645 gcc_assert (done);
8646 break;
8648 case shift_vec_info_type:
8649 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8650 gcc_assert (done);
8651 break;
8653 case op_vec_info_type:
8654 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8655 gcc_assert (done);
8656 break;
8658 case assignment_vec_info_type:
8659 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8660 gcc_assert (done);
8661 break;
8663 case load_vec_info_type:
8664 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8665 slp_node_instance);
8666 gcc_assert (done);
8667 break;
8669 case store_vec_info_type:
8670 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8671 gcc_assert (done);
8672 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8674 /* In case of interleaving, the whole chain is vectorized when the
8675 last store in the chain is reached. Store stmts before the last
8676 one are skipped, and there vec_stmt_info shouldn't be freed
8677 meanwhile. */
8678 *grouped_store = true;
8679 if (STMT_VINFO_VEC_STMT (stmt_info))
8680 is_store = true;
8682 else
8683 is_store = true;
8684 break;
8686 case condition_vec_info_type:
8687 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8688 gcc_assert (done);
8689 break;
8691 case comparison_vec_info_type:
8692 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8693 gcc_assert (done);
8694 break;
8696 case call_vec_info_type:
8697 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8698 stmt = gsi_stmt (*gsi);
8699 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8700 is_store = true;
8701 break;
8703 case call_simd_clone_vec_info_type:
8704 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8705 stmt = gsi_stmt (*gsi);
8706 break;
8708 case reduc_vec_info_type:
8709 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8710 gcc_assert (done);
8711 break;
8713 default:
8714 if (!STMT_VINFO_LIVE_P (stmt_info))
8716 if (dump_enabled_p ())
8717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8718 "stmt not supported.\n");
8719 gcc_unreachable ();
8723 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8724 This would break hybrid SLP vectorization. */
8725 if (slp_node)
8726 gcc_assert (!vec_stmt
8727 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8729 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8730 is being vectorized, but outside the immediately enclosing loop. */
8731 if (vec_stmt
8732 && STMT_VINFO_LOOP_VINFO (stmt_info)
8733 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8734 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8735 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8736 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8737 || STMT_VINFO_RELEVANT (stmt_info) ==
8738 vect_used_in_outer_by_reduction))
8740 struct loop *innerloop = LOOP_VINFO_LOOP (
8741 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8742 imm_use_iterator imm_iter;
8743 use_operand_p use_p;
8744 tree scalar_dest;
8745 gimple *exit_phi;
8747 if (dump_enabled_p ())
8748 dump_printf_loc (MSG_NOTE, vect_location,
8749 "Record the vdef for outer-loop vectorization.\n");
8751 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8752 (to be used when vectorizing outer-loop stmts that use the DEF of
8753 STMT). */
8754 if (gimple_code (stmt) == GIMPLE_PHI)
8755 scalar_dest = PHI_RESULT (stmt);
8756 else
8757 scalar_dest = gimple_assign_lhs (stmt);
8759 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8761 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8763 exit_phi = USE_STMT (use_p);
8764 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8769 /* Handle stmts whose DEF is used outside the loop-nest that is
8770 being vectorized. */
8771 if (slp_node)
8773 gimple *slp_stmt;
8774 int i;
8775 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8777 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8778 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8779 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8781 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8782 &vec_stmt);
8783 gcc_assert (done);
8787 else if (STMT_VINFO_LIVE_P (stmt_info)
8788 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8790 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8791 gcc_assert (done);
8794 if (vec_stmt)
8795 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8797 return is_store;
8801 /* Remove a group of stores (for SLP or interleaving), free their
8802 stmt_vec_info. */
8804 void
8805 vect_remove_stores (gimple *first_stmt)
8807 gimple *next = first_stmt;
8808 gimple *tmp;
8809 gimple_stmt_iterator next_si;
8811 while (next)
8813 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8815 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8816 if (is_pattern_stmt_p (stmt_info))
8817 next = STMT_VINFO_RELATED_STMT (stmt_info);
8818 /* Free the attached stmt_vec_info and remove the stmt. */
8819 next_si = gsi_for_stmt (next);
8820 unlink_stmt_vdef (next);
8821 gsi_remove (&next_si, true);
8822 release_defs (next);
8823 free_stmt_vec_info (next);
8824 next = tmp;
8829 /* Function new_stmt_vec_info.
8831 Create and initialize a new stmt_vec_info struct for STMT. */
8833 stmt_vec_info
8834 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8836 stmt_vec_info res;
8837 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8839 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8840 STMT_VINFO_STMT (res) = stmt;
8841 res->vinfo = vinfo;
8842 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8843 STMT_VINFO_LIVE_P (res) = false;
8844 STMT_VINFO_VECTYPE (res) = NULL;
8845 STMT_VINFO_VEC_STMT (res) = NULL;
8846 STMT_VINFO_VECTORIZABLE (res) = true;
8847 STMT_VINFO_IN_PATTERN_P (res) = false;
8848 STMT_VINFO_RELATED_STMT (res) = NULL;
8849 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8850 STMT_VINFO_DATA_REF (res) = NULL;
8851 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8852 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8854 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8855 STMT_VINFO_DR_OFFSET (res) = NULL;
8856 STMT_VINFO_DR_INIT (res) = NULL;
8857 STMT_VINFO_DR_STEP (res) = NULL;
8858 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8860 if (gimple_code (stmt) == GIMPLE_PHI
8861 && is_loop_header_bb_p (gimple_bb (stmt)))
8862 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8863 else
8864 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8866 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8867 STMT_SLP_TYPE (res) = loop_vect;
8868 STMT_VINFO_NUM_SLP_USES (res) = 0;
8870 GROUP_FIRST_ELEMENT (res) = NULL;
8871 GROUP_NEXT_ELEMENT (res) = NULL;
8872 GROUP_SIZE (res) = 0;
8873 GROUP_STORE_COUNT (res) = 0;
8874 GROUP_GAP (res) = 0;
8875 GROUP_SAME_DR_STMT (res) = NULL;
8877 return res;
8881 /* Create a hash table for stmt_vec_info. */
8883 void
8884 init_stmt_vec_info_vec (void)
8886 gcc_assert (!stmt_vec_info_vec.exists ());
8887 stmt_vec_info_vec.create (50);
8891 /* Free hash table for stmt_vec_info. */
8893 void
8894 free_stmt_vec_info_vec (void)
8896 unsigned int i;
8897 stmt_vec_info info;
8898 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8899 if (info != NULL)
8900 free_stmt_vec_info (STMT_VINFO_STMT (info));
8901 gcc_assert (stmt_vec_info_vec.exists ());
8902 stmt_vec_info_vec.release ();
8906 /* Free stmt vectorization related info. */
8908 void
8909 free_stmt_vec_info (gimple *stmt)
8911 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8913 if (!stmt_info)
8914 return;
8916 /* Check if this statement has a related "pattern stmt"
8917 (introduced by the vectorizer during the pattern recognition
8918 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8919 too. */
8920 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8922 stmt_vec_info patt_info
8923 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8924 if (patt_info)
8926 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8927 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8928 gimple_set_bb (patt_stmt, NULL);
8929 tree lhs = gimple_get_lhs (patt_stmt);
8930 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8931 release_ssa_name (lhs);
8932 if (seq)
8934 gimple_stmt_iterator si;
8935 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8937 gimple *seq_stmt = gsi_stmt (si);
8938 gimple_set_bb (seq_stmt, NULL);
8939 lhs = gimple_get_lhs (seq_stmt);
8940 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8941 release_ssa_name (lhs);
8942 free_stmt_vec_info (seq_stmt);
8945 free_stmt_vec_info (patt_stmt);
8949 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8950 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8951 set_vinfo_for_stmt (stmt, NULL);
8952 free (stmt_info);
8956 /* Function get_vectype_for_scalar_type_and_size.
8958 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8959 by the target. */
8961 static tree
8962 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8964 tree orig_scalar_type = scalar_type;
8965 machine_mode inner_mode = TYPE_MODE (scalar_type);
8966 machine_mode simd_mode;
8967 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8968 int nunits;
8969 tree vectype;
8971 if (nbytes == 0)
8972 return NULL_TREE;
8974 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8975 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8976 return NULL_TREE;
8978 /* For vector types of elements whose mode precision doesn't
8979 match their types precision we use a element type of mode
8980 precision. The vectorization routines will have to make sure
8981 they support the proper result truncation/extension.
8982 We also make sure to build vector types with INTEGER_TYPE
8983 component type only. */
8984 if (INTEGRAL_TYPE_P (scalar_type)
8985 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8986 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8987 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8988 TYPE_UNSIGNED (scalar_type));
8990 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8991 When the component mode passes the above test simply use a type
8992 corresponding to that mode. The theory is that any use that
8993 would cause problems with this will disable vectorization anyway. */
8994 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8995 && !INTEGRAL_TYPE_P (scalar_type))
8996 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8998 /* We can't build a vector type of elements with alignment bigger than
8999 their size. */
9000 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9001 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9002 TYPE_UNSIGNED (scalar_type));
9004 /* If we felt back to using the mode fail if there was
9005 no scalar type for it. */
9006 if (scalar_type == NULL_TREE)
9007 return NULL_TREE;
9009 /* If no size was supplied use the mode the target prefers. Otherwise
9010 lookup a vector mode of the specified size. */
9011 if (size == 0)
9012 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9013 else
9014 simd_mode = mode_for_vector (inner_mode, size / nbytes);
9015 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9016 if (nunits <= 1)
9017 return NULL_TREE;
9019 vectype = build_vector_type (scalar_type, nunits);
9021 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9022 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9023 return NULL_TREE;
9025 /* Re-attach the address-space qualifier if we canonicalized the scalar
9026 type. */
9027 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9028 return build_qualified_type
9029 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9031 return vectype;
9034 unsigned int current_vector_size;
9036 /* Function get_vectype_for_scalar_type.
9038 Returns the vector type corresponding to SCALAR_TYPE as supported
9039 by the target. */
9041 tree
9042 get_vectype_for_scalar_type (tree scalar_type)
9044 tree vectype;
9045 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9046 current_vector_size);
9047 if (vectype
9048 && current_vector_size == 0)
9049 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9050 return vectype;
9053 /* Function get_mask_type_for_scalar_type.
9055 Returns the mask type corresponding to a result of comparison
9056 of vectors of specified SCALAR_TYPE as supported by target. */
9058 tree
9059 get_mask_type_for_scalar_type (tree scalar_type)
9061 tree vectype = get_vectype_for_scalar_type (scalar_type);
9063 if (!vectype)
9064 return NULL;
9066 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9067 current_vector_size);
9070 /* Function get_same_sized_vectype
9072 Returns a vector type corresponding to SCALAR_TYPE of size
9073 VECTOR_TYPE if supported by the target. */
9075 tree
9076 get_same_sized_vectype (tree scalar_type, tree vector_type)
9078 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9079 return build_same_sized_truth_vector_type (vector_type);
9081 return get_vectype_for_scalar_type_and_size
9082 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9085 /* Function vect_is_simple_use.
9087 Input:
9088 VINFO - the vect info of the loop or basic block that is being vectorized.
9089 OPERAND - operand in the loop or bb.
9090 Output:
9091 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9092 DT - the type of definition
9094 Returns whether a stmt with OPERAND can be vectorized.
9095 For loops, supportable operands are constants, loop invariants, and operands
9096 that are defined by the current iteration of the loop. Unsupportable
9097 operands are those that are defined by a previous iteration of the loop (as
9098 is the case in reduction/induction computations).
9099 For basic blocks, supportable operands are constants and bb invariants.
9100 For now, operands defined outside the basic block are not supported. */
9102 bool
9103 vect_is_simple_use (tree operand, vec_info *vinfo,
9104 gimple **def_stmt, enum vect_def_type *dt)
9106 *def_stmt = NULL;
9107 *dt = vect_unknown_def_type;
9109 if (dump_enabled_p ())
9111 dump_printf_loc (MSG_NOTE, vect_location,
9112 "vect_is_simple_use: operand ");
9113 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9114 dump_printf (MSG_NOTE, "\n");
9117 if (CONSTANT_CLASS_P (operand))
9119 *dt = vect_constant_def;
9120 return true;
9123 if (is_gimple_min_invariant (operand))
9125 *dt = vect_external_def;
9126 return true;
9129 if (TREE_CODE (operand) != SSA_NAME)
9131 if (dump_enabled_p ())
9132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9133 "not ssa-name.\n");
9134 return false;
9137 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9139 *dt = vect_external_def;
9140 return true;
9143 *def_stmt = SSA_NAME_DEF_STMT (operand);
9144 if (dump_enabled_p ())
9146 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9147 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9150 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9151 *dt = vect_external_def;
9152 else
9154 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9155 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9158 if (dump_enabled_p ())
9160 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9161 switch (*dt)
9163 case vect_uninitialized_def:
9164 dump_printf (MSG_NOTE, "uninitialized\n");
9165 break;
9166 case vect_constant_def:
9167 dump_printf (MSG_NOTE, "constant\n");
9168 break;
9169 case vect_external_def:
9170 dump_printf (MSG_NOTE, "external\n");
9171 break;
9172 case vect_internal_def:
9173 dump_printf (MSG_NOTE, "internal\n");
9174 break;
9175 case vect_induction_def:
9176 dump_printf (MSG_NOTE, "induction\n");
9177 break;
9178 case vect_reduction_def:
9179 dump_printf (MSG_NOTE, "reduction\n");
9180 break;
9181 case vect_double_reduction_def:
9182 dump_printf (MSG_NOTE, "double reduction\n");
9183 break;
9184 case vect_nested_cycle:
9185 dump_printf (MSG_NOTE, "nested cycle\n");
9186 break;
9187 case vect_unknown_def_type:
9188 dump_printf (MSG_NOTE, "unknown\n");
9189 break;
9193 if (*dt == vect_unknown_def_type)
9195 if (dump_enabled_p ())
9196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9197 "Unsupported pattern.\n");
9198 return false;
9201 switch (gimple_code (*def_stmt))
9203 case GIMPLE_PHI:
9204 case GIMPLE_ASSIGN:
9205 case GIMPLE_CALL:
9206 break;
9207 default:
9208 if (dump_enabled_p ())
9209 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9210 "unsupported defining stmt:\n");
9211 return false;
9214 return true;
9217 /* Function vect_is_simple_use.
9219 Same as vect_is_simple_use but also determines the vector operand
9220 type of OPERAND and stores it to *VECTYPE. If the definition of
9221 OPERAND is vect_uninitialized_def, vect_constant_def or
9222 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9223 is responsible to compute the best suited vector type for the
9224 scalar operand. */
9226 bool
9227 vect_is_simple_use (tree operand, vec_info *vinfo,
9228 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9230 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9231 return false;
9233 /* Now get a vector type if the def is internal, otherwise supply
9234 NULL_TREE and leave it up to the caller to figure out a proper
9235 type for the use stmt. */
9236 if (*dt == vect_internal_def
9237 || *dt == vect_induction_def
9238 || *dt == vect_reduction_def
9239 || *dt == vect_double_reduction_def
9240 || *dt == vect_nested_cycle)
9242 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9244 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9245 && !STMT_VINFO_RELEVANT (stmt_info)
9246 && !STMT_VINFO_LIVE_P (stmt_info))
9247 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9249 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9250 gcc_assert (*vectype != NULL_TREE);
9252 else if (*dt == vect_uninitialized_def
9253 || *dt == vect_constant_def
9254 || *dt == vect_external_def)
9255 *vectype = NULL_TREE;
9256 else
9257 gcc_unreachable ();
9259 return true;
9263 /* Function supportable_widening_operation
9265 Check whether an operation represented by the code CODE is a
9266 widening operation that is supported by the target platform in
9267 vector form (i.e., when operating on arguments of type VECTYPE_IN
9268 producing a result of type VECTYPE_OUT).
9270 Widening operations we currently support are NOP (CONVERT), FLOAT
9271 and WIDEN_MULT. This function checks if these operations are supported
9272 by the target platform either directly (via vector tree-codes), or via
9273 target builtins.
9275 Output:
9276 - CODE1 and CODE2 are codes of vector operations to be used when
9277 vectorizing the operation, if available.
9278 - MULTI_STEP_CVT determines the number of required intermediate steps in
9279 case of multi-step conversion (like char->short->int - in that case
9280 MULTI_STEP_CVT will be 1).
9281 - INTERM_TYPES contains the intermediate type required to perform the
9282 widening operation (short in the above example). */
9284 bool
9285 supportable_widening_operation (enum tree_code code, gimple *stmt,
9286 tree vectype_out, tree vectype_in,
9287 enum tree_code *code1, enum tree_code *code2,
9288 int *multi_step_cvt,
9289 vec<tree> *interm_types)
9291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9292 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9293 struct loop *vect_loop = NULL;
9294 machine_mode vec_mode;
9295 enum insn_code icode1, icode2;
9296 optab optab1, optab2;
9297 tree vectype = vectype_in;
9298 tree wide_vectype = vectype_out;
9299 enum tree_code c1, c2;
9300 int i;
9301 tree prev_type, intermediate_type;
9302 machine_mode intermediate_mode, prev_mode;
9303 optab optab3, optab4;
9305 *multi_step_cvt = 0;
9306 if (loop_info)
9307 vect_loop = LOOP_VINFO_LOOP (loop_info);
9309 switch (code)
9311 case WIDEN_MULT_EXPR:
9312 /* The result of a vectorized widening operation usually requires
9313 two vectors (because the widened results do not fit into one vector).
9314 The generated vector results would normally be expected to be
9315 generated in the same order as in the original scalar computation,
9316 i.e. if 8 results are generated in each vector iteration, they are
9317 to be organized as follows:
9318 vect1: [res1,res2,res3,res4],
9319 vect2: [res5,res6,res7,res8].
9321 However, in the special case that the result of the widening
9322 operation is used in a reduction computation only, the order doesn't
9323 matter (because when vectorizing a reduction we change the order of
9324 the computation). Some targets can take advantage of this and
9325 generate more efficient code. For example, targets like Altivec,
9326 that support widen_mult using a sequence of {mult_even,mult_odd}
9327 generate the following vectors:
9328 vect1: [res1,res3,res5,res7],
9329 vect2: [res2,res4,res6,res8].
9331 When vectorizing outer-loops, we execute the inner-loop sequentially
9332 (each vectorized inner-loop iteration contributes to VF outer-loop
9333 iterations in parallel). We therefore don't allow to change the
9334 order of the computation in the inner-loop during outer-loop
9335 vectorization. */
9336 /* TODO: Another case in which order doesn't *really* matter is when we
9337 widen and then contract again, e.g. (short)((int)x * y >> 8).
9338 Normally, pack_trunc performs an even/odd permute, whereas the
9339 repack from an even/odd expansion would be an interleave, which
9340 would be significantly simpler for e.g. AVX2. */
9341 /* In any case, in order to avoid duplicating the code below, recurse
9342 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9343 are properly set up for the caller. If we fail, we'll continue with
9344 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9345 if (vect_loop
9346 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9347 && !nested_in_vect_loop_p (vect_loop, stmt)
9348 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9349 stmt, vectype_out, vectype_in,
9350 code1, code2, multi_step_cvt,
9351 interm_types))
9353 /* Elements in a vector with vect_used_by_reduction property cannot
9354 be reordered if the use chain with this property does not have the
9355 same operation. One such an example is s += a * b, where elements
9356 in a and b cannot be reordered. Here we check if the vector defined
9357 by STMT is only directly used in the reduction statement. */
9358 tree lhs = gimple_assign_lhs (stmt);
9359 use_operand_p dummy;
9360 gimple *use_stmt;
9361 stmt_vec_info use_stmt_info = NULL;
9362 if (single_imm_use (lhs, &dummy, &use_stmt)
9363 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9364 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9365 return true;
9367 c1 = VEC_WIDEN_MULT_LO_EXPR;
9368 c2 = VEC_WIDEN_MULT_HI_EXPR;
9369 break;
9371 case DOT_PROD_EXPR:
9372 c1 = DOT_PROD_EXPR;
9373 c2 = DOT_PROD_EXPR;
9374 break;
9376 case SAD_EXPR:
9377 c1 = SAD_EXPR;
9378 c2 = SAD_EXPR;
9379 break;
9381 case VEC_WIDEN_MULT_EVEN_EXPR:
9382 /* Support the recursion induced just above. */
9383 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9384 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9385 break;
9387 case WIDEN_LSHIFT_EXPR:
9388 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9389 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9390 break;
9392 CASE_CONVERT:
9393 c1 = VEC_UNPACK_LO_EXPR;
9394 c2 = VEC_UNPACK_HI_EXPR;
9395 break;
9397 case FLOAT_EXPR:
9398 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9399 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9400 break;
9402 case FIX_TRUNC_EXPR:
9403 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9404 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9405 computing the operation. */
9406 return false;
9408 default:
9409 gcc_unreachable ();
9412 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9413 std::swap (c1, c2);
9415 if (code == FIX_TRUNC_EXPR)
9417 /* The signedness is determined from output operand. */
9418 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9419 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9421 else
9423 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9424 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9427 if (!optab1 || !optab2)
9428 return false;
9430 vec_mode = TYPE_MODE (vectype);
9431 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9432 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9433 return false;
9435 *code1 = c1;
9436 *code2 = c2;
9438 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9439 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9440 /* For scalar masks we may have different boolean
9441 vector types having the same QImode. Thus we
9442 add additional check for elements number. */
9443 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9444 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9445 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9447 /* Check if it's a multi-step conversion that can be done using intermediate
9448 types. */
9450 prev_type = vectype;
9451 prev_mode = vec_mode;
9453 if (!CONVERT_EXPR_CODE_P (code))
9454 return false;
9456 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9457 intermediate steps in promotion sequence. We try
9458 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9459 not. */
9460 interm_types->create (MAX_INTERM_CVT_STEPS);
9461 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9463 intermediate_mode = insn_data[icode1].operand[0].mode;
9464 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9466 intermediate_type
9467 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9468 current_vector_size);
9469 if (intermediate_mode != TYPE_MODE (intermediate_type))
9470 return false;
9472 else
9473 intermediate_type
9474 = lang_hooks.types.type_for_mode (intermediate_mode,
9475 TYPE_UNSIGNED (prev_type));
9477 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9478 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9480 if (!optab3 || !optab4
9481 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9482 || insn_data[icode1].operand[0].mode != intermediate_mode
9483 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9484 || insn_data[icode2].operand[0].mode != intermediate_mode
9485 || ((icode1 = optab_handler (optab3, intermediate_mode))
9486 == CODE_FOR_nothing)
9487 || ((icode2 = optab_handler (optab4, intermediate_mode))
9488 == CODE_FOR_nothing))
9489 break;
9491 interm_types->quick_push (intermediate_type);
9492 (*multi_step_cvt)++;
9494 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9495 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9496 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9497 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9498 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9500 prev_type = intermediate_type;
9501 prev_mode = intermediate_mode;
9504 interm_types->release ();
9505 return false;
9509 /* Function supportable_narrowing_operation
9511 Check whether an operation represented by the code CODE is a
9512 narrowing operation that is supported by the target platform in
9513 vector form (i.e., when operating on arguments of type VECTYPE_IN
9514 and producing a result of type VECTYPE_OUT).
9516 Narrowing operations we currently support are NOP (CONVERT) and
9517 FIX_TRUNC. This function checks if these operations are supported by
9518 the target platform directly via vector tree-codes.
9520 Output:
9521 - CODE1 is the code of a vector operation to be used when
9522 vectorizing the operation, if available.
9523 - MULTI_STEP_CVT determines the number of required intermediate steps in
9524 case of multi-step conversion (like int->short->char - in that case
9525 MULTI_STEP_CVT will be 1).
9526 - INTERM_TYPES contains the intermediate type required to perform the
9527 narrowing operation (short in the above example). */
9529 bool
9530 supportable_narrowing_operation (enum tree_code code,
9531 tree vectype_out, tree vectype_in,
9532 enum tree_code *code1, int *multi_step_cvt,
9533 vec<tree> *interm_types)
9535 machine_mode vec_mode;
9536 enum insn_code icode1;
9537 optab optab1, interm_optab;
9538 tree vectype = vectype_in;
9539 tree narrow_vectype = vectype_out;
9540 enum tree_code c1;
9541 tree intermediate_type, prev_type;
9542 machine_mode intermediate_mode, prev_mode;
9543 int i;
9544 bool uns;
9546 *multi_step_cvt = 0;
9547 switch (code)
9549 CASE_CONVERT:
9550 c1 = VEC_PACK_TRUNC_EXPR;
9551 break;
9553 case FIX_TRUNC_EXPR:
9554 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9555 break;
9557 case FLOAT_EXPR:
9558 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9559 tree code and optabs used for computing the operation. */
9560 return false;
9562 default:
9563 gcc_unreachable ();
9566 if (code == FIX_TRUNC_EXPR)
9567 /* The signedness is determined from output operand. */
9568 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9569 else
9570 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9572 if (!optab1)
9573 return false;
9575 vec_mode = TYPE_MODE (vectype);
9576 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9577 return false;
9579 *code1 = c1;
9581 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9582 /* For scalar masks we may have different boolean
9583 vector types having the same QImode. Thus we
9584 add additional check for elements number. */
9585 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9586 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9587 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9589 /* Check if it's a multi-step conversion that can be done using intermediate
9590 types. */
9591 prev_mode = vec_mode;
9592 prev_type = vectype;
9593 if (code == FIX_TRUNC_EXPR)
9594 uns = TYPE_UNSIGNED (vectype_out);
9595 else
9596 uns = TYPE_UNSIGNED (vectype);
9598 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9599 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9600 costly than signed. */
9601 if (code == FIX_TRUNC_EXPR && uns)
9603 enum insn_code icode2;
9605 intermediate_type
9606 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9607 interm_optab
9608 = optab_for_tree_code (c1, intermediate_type, optab_default);
9609 if (interm_optab != unknown_optab
9610 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9611 && insn_data[icode1].operand[0].mode
9612 == insn_data[icode2].operand[0].mode)
9614 uns = false;
9615 optab1 = interm_optab;
9616 icode1 = icode2;
9620 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9621 intermediate steps in promotion sequence. We try
9622 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9623 interm_types->create (MAX_INTERM_CVT_STEPS);
9624 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9626 intermediate_mode = insn_data[icode1].operand[0].mode;
9627 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9629 intermediate_type
9630 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9631 current_vector_size);
9632 if (intermediate_mode != TYPE_MODE (intermediate_type))
9633 return false;
9635 else
9636 intermediate_type
9637 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9638 interm_optab
9639 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9640 optab_default);
9641 if (!interm_optab
9642 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9643 || insn_data[icode1].operand[0].mode != intermediate_mode
9644 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9645 == CODE_FOR_nothing))
9646 break;
9648 interm_types->quick_push (intermediate_type);
9649 (*multi_step_cvt)++;
9651 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9652 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9653 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9654 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9656 prev_mode = intermediate_mode;
9657 prev_type = intermediate_type;
9658 optab1 = interm_optab;
9661 interm_types->release ();
9662 return false;