2016-07-28 Steven G. Kargl <kargl@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-stmts.c
blobce5536c69276962bd8bd426f9e032c04e472bd78
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
175 tree mem_ref, alias_ptr_type;
177 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
186 /* Function vect_mark_relevant.
188 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
190 static void
191 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
192 enum vect_relevant relevant, bool live_p)
194 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
195 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
196 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
197 gimple *pattern_stmt;
199 if (dump_enabled_p ())
201 dump_printf_loc (MSG_NOTE, vect_location,
202 "mark relevant %d, live %d: ", relevant, live_p);
203 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
206 /* If this stmt is an original stmt in a pattern, we might need to mark its
207 related pattern stmt instead of the original stmt. However, such stmts
208 may have their own uses that are not in any pattern, in such cases the
209 stmt itself should be marked. */
210 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
212 /* This is the last stmt in a sequence that was detected as a
213 pattern that can potentially be vectorized. Don't mark the stmt
214 as relevant/live because it's not going to be vectorized.
215 Instead mark the pattern-stmt that replaces it. */
217 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
219 if (dump_enabled_p ())
220 dump_printf_loc (MSG_NOTE, vect_location,
221 "last stmt in pattern. don't mark"
222 " relevant/live.\n");
223 stmt_info = vinfo_for_stmt (pattern_stmt);
224 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
225 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
226 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
227 stmt = pattern_stmt;
230 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
231 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
232 STMT_VINFO_RELEVANT (stmt_info) = relevant;
234 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
235 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
237 if (dump_enabled_p ())
238 dump_printf_loc (MSG_NOTE, vect_location,
239 "already marked relevant/live.\n");
240 return;
243 worklist->safe_push (stmt);
247 /* Function is_simple_and_all_uses_invariant
249 Return true if STMT is simple and all uses of it are invariant. */
251 bool
252 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
254 tree op;
255 gimple *def_stmt;
256 ssa_op_iter iter;
258 if (!is_gimple_assign (stmt))
259 return false;
261 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
263 enum vect_def_type dt = vect_uninitialized_def;
265 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
267 if (dump_enabled_p ())
268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
269 "use not simple.\n");
270 return false;
273 if (dt != vect_external_def && dt != vect_constant_def)
274 return false;
276 return true;
279 /* Function vect_stmt_relevant_p.
281 Return true if STMT in loop that is represented by LOOP_VINFO is
282 "relevant for vectorization".
284 A stmt is considered "relevant for vectorization" if:
285 - it has uses outside the loop.
286 - it has vdefs (it alters memory).
287 - control stmts in the loop (except for the exit condition).
289 CHECKME: what other side effects would the vectorizer allow? */
291 static bool
292 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
293 enum vect_relevant *relevant, bool *live_p)
295 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
296 ssa_op_iter op_iter;
297 imm_use_iterator imm_iter;
298 use_operand_p use_p;
299 def_operand_p def_p;
301 *relevant = vect_unused_in_scope;
302 *live_p = false;
304 /* cond stmt other than loop exit cond. */
305 if (is_ctrl_stmt (stmt)
306 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
307 != loop_exit_ctrl_vec_info_type)
308 *relevant = vect_used_in_scope;
310 /* changing memory. */
311 if (gimple_code (stmt) != GIMPLE_PHI)
312 if (gimple_vdef (stmt)
313 && !gimple_clobber_p (stmt))
315 if (dump_enabled_p ())
316 dump_printf_loc (MSG_NOTE, vect_location,
317 "vec_stmt_relevant_p: stmt has vdefs.\n");
318 *relevant = vect_used_in_scope;
321 /* uses outside the loop. */
322 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
324 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326 basic_block bb = gimple_bb (USE_STMT (use_p));
327 if (!flow_bb_inside_loop_p (loop, bb))
329 if (dump_enabled_p ())
330 dump_printf_loc (MSG_NOTE, vect_location,
331 "vec_stmt_relevant_p: used out of loop.\n");
333 if (is_gimple_debug (USE_STMT (use_p)))
334 continue;
336 /* We expect all such uses to be in the loop exit phis
337 (because of loop closed form) */
338 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
339 gcc_assert (bb == single_exit (loop)->dest);
341 *live_p = true;
346 if (*live_p && *relevant == vect_unused_in_scope
347 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
349 if (dump_enabled_p ())
350 dump_printf_loc (MSG_NOTE, vect_location,
351 "vec_stmt_relevant_p: stmt live but not relevant.\n");
352 *relevant = vect_used_only_live;
355 return (*live_p || *relevant);
359 /* Function exist_non_indexing_operands_for_use_p
361 USE is one of the uses attached to STMT. Check if USE is
362 used in STMT for anything other than indexing an array. */
364 static bool
365 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
367 tree operand;
368 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
370 /* USE corresponds to some operand in STMT. If there is no data
371 reference in STMT, then any operand that corresponds to USE
372 is not indexing an array. */
373 if (!STMT_VINFO_DATA_REF (stmt_info))
374 return true;
376 /* STMT has a data_ref. FORNOW this means that its of one of
377 the following forms:
378 -1- ARRAY_REF = var
379 -2- var = ARRAY_REF
380 (This should have been verified in analyze_data_refs).
382 'var' in the second case corresponds to a def, not a use,
383 so USE cannot correspond to any operands that are not used
384 for array indexing.
386 Therefore, all we need to check is if STMT falls into the
387 first case, and whether var corresponds to USE. */
389 if (!gimple_assign_copy_p (stmt))
391 if (is_gimple_call (stmt)
392 && gimple_call_internal_p (stmt))
393 switch (gimple_call_internal_fn (stmt))
395 case IFN_MASK_STORE:
396 operand = gimple_call_arg (stmt, 3);
397 if (operand == use)
398 return true;
399 /* FALLTHRU */
400 case IFN_MASK_LOAD:
401 operand = gimple_call_arg (stmt, 2);
402 if (operand == use)
403 return true;
404 break;
405 default:
406 break;
408 return false;
411 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
412 return false;
413 operand = gimple_assign_rhs1 (stmt);
414 if (TREE_CODE (operand) != SSA_NAME)
415 return false;
417 if (operand == use)
418 return true;
420 return false;
425 Function process_use.
427 Inputs:
428 - a USE in STMT in a loop represented by LOOP_VINFO
429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430 that defined USE. This is done by calling mark_relevant and passing it
431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433 be performed.
435 Outputs:
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440 Exceptions:
441 - case 1: If USE is used only for address computations (e.g. array indexing),
442 which does not need to be directly vectorized, then the liveness/relevance
443 of the respective DEF_STMT is left unchanged.
444 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
445 skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
447 be modified accordingly.
449 Return true if everything is as expected. Return false otherwise. */
451 static bool
452 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
453 enum vect_relevant relevant, vec<gimple *> *worklist,
454 bool force)
456 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
457 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458 stmt_vec_info dstmt_vinfo;
459 basic_block bb, def_bb;
460 gimple *def_stmt;
461 enum vect_def_type dt;
463 /* case 1: we are only interested in uses that need to be vectorized. Uses
464 that are used for address computation are not considered relevant. */
465 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
466 return true;
468 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
472 "not vectorized: unsupported use in stmt.\n");
473 return false;
476 if (!def_stmt || gimple_nop_p (def_stmt))
477 return true;
479 def_bb = gimple_bb (def_stmt);
480 if (!flow_bb_inside_loop_p (loop, def_bb))
482 if (dump_enabled_p ())
483 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
484 return true;
487 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
488 DEF_STMT must have already been processed, because this should be the
489 only way that STMT, which is a reduction-phi, was put in the worklist,
490 as there should be no other uses for DEF_STMT in the loop. So we just
491 check that everything is as expected, and we are done. */
492 dstmt_vinfo = vinfo_for_stmt (def_stmt);
493 bb = gimple_bb (stmt);
494 if (gimple_code (stmt) == GIMPLE_PHI
495 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
496 && gimple_code (def_stmt) != GIMPLE_PHI
497 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
498 && bb->loop_father == def_bb->loop_father)
500 if (dump_enabled_p ())
501 dump_printf_loc (MSG_NOTE, vect_location,
502 "reduc-stmt defining reduc-phi in the same nest.\n");
503 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
504 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
505 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
506 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
507 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
508 return true;
511 /* case 3a: outer-loop stmt defining an inner-loop stmt:
512 outer-loop-header-bb:
513 d = def_stmt
514 inner-loop:
515 stmt # use (d)
516 outer-loop-tail-bb:
517 ... */
518 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
520 if (dump_enabled_p ())
521 dump_printf_loc (MSG_NOTE, vect_location,
522 "outer-loop def-stmt defining inner-loop stmt.\n");
524 switch (relevant)
526 case vect_unused_in_scope:
527 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
528 vect_used_in_scope : vect_unused_in_scope;
529 break;
531 case vect_used_in_outer_by_reduction:
532 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
533 relevant = vect_used_by_reduction;
534 break;
536 case vect_used_in_outer:
537 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
538 relevant = vect_used_in_scope;
539 break;
541 case vect_used_in_scope:
542 break;
544 default:
545 gcc_unreachable ();
549 /* case 3b: inner-loop stmt defining an outer-loop stmt:
550 outer-loop-header-bb:
552 inner-loop:
553 d = def_stmt
554 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555 stmt # use (d) */
556 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
558 if (dump_enabled_p ())
559 dump_printf_loc (MSG_NOTE, vect_location,
560 "inner-loop def-stmt defining outer-loop stmt.\n");
562 switch (relevant)
564 case vect_unused_in_scope:
565 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
566 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
567 vect_used_in_outer_by_reduction : vect_unused_in_scope;
568 break;
570 case vect_used_by_reduction:
571 case vect_used_only_live:
572 relevant = vect_used_in_outer_by_reduction;
573 break;
575 case vect_used_in_scope:
576 relevant = vect_used_in_outer;
577 break;
579 default:
580 gcc_unreachable ();
584 vect_mark_relevant (worklist, def_stmt, relevant, false);
585 return true;
589 /* Function vect_mark_stmts_to_be_vectorized.
591 Not all stmts in the loop need to be vectorized. For example:
593 for i...
594 for j...
595 1. T0 = i + j
596 2. T1 = a[T0]
598 3. j = j + 1
600 Stmt 1 and 3 do not need to be vectorized, because loop control and
601 addressing of vectorized data-refs are handled differently.
603 This pass detects such stmts. */
605 bool
606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
608 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
609 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
610 unsigned int nbbs = loop->num_nodes;
611 gimple_stmt_iterator si;
612 gimple *stmt;
613 unsigned int i;
614 stmt_vec_info stmt_vinfo;
615 basic_block bb;
616 gimple *phi;
617 bool live_p;
618 enum vect_relevant relevant;
620 if (dump_enabled_p ())
621 dump_printf_loc (MSG_NOTE, vect_location,
622 "=== vect_mark_stmts_to_be_vectorized ===\n");
624 auto_vec<gimple *, 64> worklist;
626 /* 1. Init worklist. */
627 for (i = 0; i < nbbs; i++)
629 bb = bbs[i];
630 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
632 phi = gsi_stmt (si);
633 if (dump_enabled_p ())
635 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
636 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
639 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
640 vect_mark_relevant (&worklist, phi, relevant, live_p);
642 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
644 stmt = gsi_stmt (si);
645 if (dump_enabled_p ())
647 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
648 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
651 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
652 vect_mark_relevant (&worklist, stmt, relevant, live_p);
656 /* 2. Process_worklist */
657 while (worklist.length () > 0)
659 use_operand_p use_p;
660 ssa_op_iter iter;
662 stmt = worklist.pop ();
663 if (dump_enabled_p ())
665 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
669 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
670 (DEF_STMT) as relevant/irrelevant according to the relevance property
671 of STMT. */
672 stmt_vinfo = vinfo_for_stmt (stmt);
673 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
675 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
676 propagated as is to the DEF_STMTs of its USEs.
678 One exception is when STMT has been identified as defining a reduction
679 variable; in this case we set the relevance to vect_used_by_reduction.
680 This is because we distinguish between two kinds of relevant stmts -
681 those that are used by a reduction computation, and those that are
682 (also) used by a regular computation. This allows us later on to
683 identify stmts that are used solely by a reduction, and therefore the
684 order of the results that they produce does not have to be kept. */
686 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
688 case vect_reduction_def:
689 gcc_assert (relevant != vect_unused_in_scope);
690 if (relevant != vect_unused_in_scope
691 && relevant != vect_used_in_scope
692 && relevant != vect_used_by_reduction
693 && relevant != vect_used_only_live)
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
697 "unsupported use of reduction.\n");
698 return false;
700 break;
702 case vect_nested_cycle:
703 if (relevant != vect_unused_in_scope
704 && relevant != vect_used_in_outer_by_reduction
705 && relevant != vect_used_in_outer)
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
709 "unsupported use of nested cycle.\n");
711 return false;
713 break;
715 case vect_double_reduction_def:
716 if (relevant != vect_unused_in_scope
717 && relevant != vect_used_by_reduction
718 && relevant != vect_used_only_live)
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "unsupported use of double reduction.\n");
724 return false;
726 break;
728 default:
729 break;
732 if (is_pattern_stmt_p (stmt_vinfo))
734 /* Pattern statements are not inserted into the code, so
735 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
736 have to scan the RHS or function arguments instead. */
737 if (is_gimple_assign (stmt))
739 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
740 tree op = gimple_assign_rhs1 (stmt);
742 i = 1;
743 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
745 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
746 relevant, &worklist, false)
747 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
748 relevant, &worklist, false))
749 return false;
750 i = 2;
752 for (; i < gimple_num_ops (stmt); i++)
754 op = gimple_op (stmt, i);
755 if (TREE_CODE (op) == SSA_NAME
756 && !process_use (stmt, op, loop_vinfo, relevant,
757 &worklist, false))
758 return false;
761 else if (is_gimple_call (stmt))
763 for (i = 0; i < gimple_call_num_args (stmt); i++)
765 tree arg = gimple_call_arg (stmt, i);
766 if (!process_use (stmt, arg, loop_vinfo, relevant,
767 &worklist, false))
768 return false;
772 else
773 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
775 tree op = USE_FROM_PTR (use_p);
776 if (!process_use (stmt, op, loop_vinfo, relevant,
777 &worklist, false))
778 return false;
781 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
783 gather_scatter_info gs_info;
784 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
785 gcc_unreachable ();
786 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
787 &worklist, true))
788 return false;
790 } /* while worklist */
792 return true;
796 /* Function vect_model_simple_cost.
798 Models cost for simple operations, i.e. those that only emit ncopies of a
799 single op. Right now, this does not account for multiple insns that could
800 be generated for the single vector op. We will handle that shortly. */
802 void
803 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
804 enum vect_def_type *dt,
805 stmt_vector_for_cost *prologue_cost_vec,
806 stmt_vector_for_cost *body_cost_vec)
808 int i;
809 int inside_cost = 0, prologue_cost = 0;
811 /* The SLP costs were already calculated during SLP tree build. */
812 if (PURE_SLP_STMT (stmt_info))
813 return;
815 /* FORNOW: Assuming maximum 2 args per stmts. */
816 for (i = 0; i < 2; i++)
817 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
818 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
819 stmt_info, 0, vect_prologue);
821 /* Pass the inside-of-loop statements to the target-specific cost model. */
822 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
823 stmt_info, 0, vect_body);
825 if (dump_enabled_p ())
826 dump_printf_loc (MSG_NOTE, vect_location,
827 "vect_model_simple_cost: inside_cost = %d, "
828 "prologue_cost = %d .\n", inside_cost, prologue_cost);
832 /* Model cost for type demotion and promotion operations. PWR is normally
833 zero for single-step promotions and demotions. It will be one if
834 two-step promotion/demotion is required, and so on. Each additional
835 step doubles the number of instructions required. */
837 static void
838 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
839 enum vect_def_type *dt, int pwr)
841 int i, tmp;
842 int inside_cost = 0, prologue_cost = 0;
843 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
844 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
845 void *target_cost_data;
847 /* The SLP costs were already calculated during SLP tree build. */
848 if (PURE_SLP_STMT (stmt_info))
849 return;
851 if (loop_vinfo)
852 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
853 else
854 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
856 for (i = 0; i < pwr + 1; i++)
858 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
859 (i + 1) : i;
860 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
861 vec_promote_demote, stmt_info, 0,
862 vect_body);
865 /* FORNOW: Assuming maximum 2 args per stmts. */
866 for (i = 0; i < 2; i++)
867 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
868 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
869 stmt_info, 0, vect_prologue);
871 if (dump_enabled_p ())
872 dump_printf_loc (MSG_NOTE, vect_location,
873 "vect_model_promotion_demotion_cost: inside_cost = %d, "
874 "prologue_cost = %d .\n", inside_cost, prologue_cost);
877 /* Function vect_model_store_cost
879 Models cost for stores. In the case of grouped accesses, one access
880 has the overhead of the grouped access attributed to it. */
882 void
883 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
884 vect_memory_access_type memory_access_type,
885 enum vect_def_type dt, slp_tree slp_node,
886 stmt_vector_for_cost *prologue_cost_vec,
887 stmt_vector_for_cost *body_cost_vec)
889 unsigned int inside_cost = 0, prologue_cost = 0;
890 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
891 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
892 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
894 if (dt == vect_constant_def || dt == vect_external_def)
895 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
896 stmt_info, 0, vect_prologue);
898 /* Grouped stores update all elements in the group at once,
899 so we want the DR for the first statement. */
900 if (!slp_node && grouped_access_p)
902 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
903 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
906 /* True if we should include any once-per-group costs as well as
907 the cost of the statement itself. For SLP we only get called
908 once per group anyhow. */
909 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
911 /* We assume that the cost of a single store-lanes instruction is
912 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
913 access is instead being provided by a permute-and-store operation,
914 include the cost of the permutes. */
915 if (first_stmt_p
916 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
918 /* Uses a high and low interleave or shuffle operations for each
919 needed permute. */
920 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
921 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
922 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
923 stmt_info, 0, vect_body);
925 if (dump_enabled_p ())
926 dump_printf_loc (MSG_NOTE, vect_location,
927 "vect_model_store_cost: strided group_size = %d .\n",
928 group_size);
931 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
932 /* Costs of the stores. */
933 if (memory_access_type == VMAT_ELEMENTWISE)
934 /* N scalar stores plus extracting the elements. */
935 inside_cost += record_stmt_cost (body_cost_vec,
936 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
937 scalar_store, stmt_info, 0, vect_body);
938 else
939 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
941 if (memory_access_type == VMAT_ELEMENTWISE
942 || memory_access_type == VMAT_STRIDED_SLP)
943 inside_cost += record_stmt_cost (body_cost_vec,
944 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
945 vec_to_scalar, stmt_info, 0, vect_body);
947 if (dump_enabled_p ())
948 dump_printf_loc (MSG_NOTE, vect_location,
949 "vect_model_store_cost: inside_cost = %d, "
950 "prologue_cost = %d .\n", inside_cost, prologue_cost);
954 /* Calculate cost of DR's memory access. */
955 void
956 vect_get_store_cost (struct data_reference *dr, int ncopies,
957 unsigned int *inside_cost,
958 stmt_vector_for_cost *body_cost_vec)
960 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
961 gimple *stmt = DR_STMT (dr);
962 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
964 switch (alignment_support_scheme)
966 case dr_aligned:
968 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
969 vector_store, stmt_info, 0,
970 vect_body);
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_NOTE, vect_location,
974 "vect_model_store_cost: aligned.\n");
975 break;
978 case dr_unaligned_supported:
980 /* Here, we assign an additional cost for the unaligned store. */
981 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
982 unaligned_store, stmt_info,
983 DR_MISALIGNMENT (dr), vect_body);
984 if (dump_enabled_p ())
985 dump_printf_loc (MSG_NOTE, vect_location,
986 "vect_model_store_cost: unaligned supported by "
987 "hardware.\n");
988 break;
991 case dr_unaligned_unsupported:
993 *inside_cost = VECT_MAX_COST;
995 if (dump_enabled_p ())
996 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
997 "vect_model_store_cost: unsupported access.\n");
998 break;
1001 default:
1002 gcc_unreachable ();
1007 /* Function vect_model_load_cost
1009 Models cost for loads. In the case of grouped accesses, one access has
1010 the overhead of the grouped access attributed to it. Since unaligned
1011 accesses are supported for loads, we also account for the costs of the
1012 access scheme chosen. */
1014 void
1015 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1016 vect_memory_access_type memory_access_type,
1017 slp_tree slp_node,
1018 stmt_vector_for_cost *prologue_cost_vec,
1019 stmt_vector_for_cost *body_cost_vec)
1021 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1022 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1023 unsigned int inside_cost = 0, prologue_cost = 0;
1024 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1026 /* Grouped loads read all elements in the group at once,
1027 so we want the DR for the first statement. */
1028 if (!slp_node && grouped_access_p)
1030 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1031 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1034 /* True if we should include any once-per-group costs as well as
1035 the cost of the statement itself. For SLP we only get called
1036 once per group anyhow. */
1037 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1039 /* We assume that the cost of a single load-lanes instruction is
1040 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1041 access is instead being provided by a load-and-permute operation,
1042 include the cost of the permutes. */
1043 if (first_stmt_p
1044 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1046 /* Uses an even and odd extract operations or shuffle operations
1047 for each needed permute. */
1048 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1049 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1050 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1051 stmt_info, 0, vect_body);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_load_cost: strided group_size = %d .\n",
1056 group_size);
1059 /* The loads themselves. */
1060 if (memory_access_type == VMAT_ELEMENTWISE)
1062 /* N scalar loads plus gathering them into a vector. */
1063 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1064 inside_cost += record_stmt_cost (body_cost_vec,
1065 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1066 scalar_load, stmt_info, 0, vect_body);
1068 else
1069 vect_get_load_cost (dr, ncopies, first_stmt_p,
1070 &inside_cost, &prologue_cost,
1071 prologue_cost_vec, body_cost_vec, true);
1072 if (memory_access_type == VMAT_ELEMENTWISE
1073 || memory_access_type == VMAT_STRIDED_SLP)
1074 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1075 stmt_info, 0, vect_body);
1077 if (dump_enabled_p ())
1078 dump_printf_loc (MSG_NOTE, vect_location,
1079 "vect_model_load_cost: inside_cost = %d, "
1080 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1084 /* Calculate cost of DR's memory access. */
1085 void
1086 vect_get_load_cost (struct data_reference *dr, int ncopies,
1087 bool add_realign_cost, unsigned int *inside_cost,
1088 unsigned int *prologue_cost,
1089 stmt_vector_for_cost *prologue_cost_vec,
1090 stmt_vector_for_cost *body_cost_vec,
1091 bool record_prologue_costs)
1093 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1094 gimple *stmt = DR_STMT (dr);
1095 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1097 switch (alignment_support_scheme)
1099 case dr_aligned:
1101 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1102 stmt_info, 0, vect_body);
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_NOTE, vect_location,
1106 "vect_model_load_cost: aligned.\n");
1108 break;
1110 case dr_unaligned_supported:
1112 /* Here, we assign an additional cost for the unaligned load. */
1113 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1114 unaligned_load, stmt_info,
1115 DR_MISALIGNMENT (dr), vect_body);
1117 if (dump_enabled_p ())
1118 dump_printf_loc (MSG_NOTE, vect_location,
1119 "vect_model_load_cost: unaligned supported by "
1120 "hardware.\n");
1122 break;
1124 case dr_explicit_realign:
1126 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1127 vector_load, stmt_info, 0, vect_body);
1128 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1129 vec_perm, stmt_info, 0, vect_body);
1131 /* FIXME: If the misalignment remains fixed across the iterations of
1132 the containing loop, the following cost should be added to the
1133 prologue costs. */
1134 if (targetm.vectorize.builtin_mask_for_load)
1135 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1136 stmt_info, 0, vect_body);
1138 if (dump_enabled_p ())
1139 dump_printf_loc (MSG_NOTE, vect_location,
1140 "vect_model_load_cost: explicit realign\n");
1142 break;
1144 case dr_explicit_realign_optimized:
1146 if (dump_enabled_p ())
1147 dump_printf_loc (MSG_NOTE, vect_location,
1148 "vect_model_load_cost: unaligned software "
1149 "pipelined.\n");
1151 /* Unaligned software pipeline has a load of an address, an initial
1152 load, and possibly a mask operation to "prime" the loop. However,
1153 if this is an access in a group of loads, which provide grouped
1154 access, then the above cost should only be considered for one
1155 access in the group. Inside the loop, there is a load op
1156 and a realignment op. */
1158 if (add_realign_cost && record_prologue_costs)
1160 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1161 vector_stmt, stmt_info,
1162 0, vect_prologue);
1163 if (targetm.vectorize.builtin_mask_for_load)
1164 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1165 vector_stmt, stmt_info,
1166 0, vect_prologue);
1169 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1170 stmt_info, 0, vect_body);
1171 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1172 stmt_info, 0, vect_body);
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE, vect_location,
1176 "vect_model_load_cost: explicit realign optimized"
1177 "\n");
1179 break;
1182 case dr_unaligned_unsupported:
1184 *inside_cost = VECT_MAX_COST;
1186 if (dump_enabled_p ())
1187 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1188 "vect_model_load_cost: unsupported access.\n");
1189 break;
1192 default:
1193 gcc_unreachable ();
1197 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1198 the loop preheader for the vectorized stmt STMT. */
1200 static void
1201 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1203 if (gsi)
1204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1205 else
1207 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1210 if (loop_vinfo)
1212 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1213 basic_block new_bb;
1214 edge pe;
1216 if (nested_in_vect_loop_p (loop, stmt))
1217 loop = loop->inner;
1219 pe = loop_preheader_edge (loop);
1220 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1221 gcc_assert (!new_bb);
1223 else
1225 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1226 basic_block bb;
1227 gimple_stmt_iterator gsi_bb_start;
1229 gcc_assert (bb_vinfo);
1230 bb = BB_VINFO_BB (bb_vinfo);
1231 gsi_bb_start = gsi_after_labels (bb);
1232 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1236 if (dump_enabled_p ())
1238 dump_printf_loc (MSG_NOTE, vect_location,
1239 "created new init_stmt: ");
1240 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1244 /* Function vect_init_vector.
1246 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1247 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1248 vector type a vector with all elements equal to VAL is created first.
1249 Place the initialization at BSI if it is not NULL. Otherwise, place the
1250 initialization at the loop preheader.
1251 Return the DEF of INIT_STMT.
1252 It will be used in the vectorization of STMT. */
1254 tree
1255 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1257 gimple *init_stmt;
1258 tree new_temp;
1260 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1261 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1263 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1264 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1266 /* Scalar boolean value should be transformed into
1267 all zeros or all ones value before building a vector. */
1268 if (VECTOR_BOOLEAN_TYPE_P (type))
1270 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1271 tree false_val = build_zero_cst (TREE_TYPE (type));
1273 if (CONSTANT_CLASS_P (val))
1274 val = integer_zerop (val) ? false_val : true_val;
1275 else
1277 new_temp = make_ssa_name (TREE_TYPE (type));
1278 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1279 val, true_val, false_val);
1280 vect_init_vector_1 (stmt, init_stmt, gsi);
1281 val = new_temp;
1284 else if (CONSTANT_CLASS_P (val))
1285 val = fold_convert (TREE_TYPE (type), val);
1286 else
1288 new_temp = make_ssa_name (TREE_TYPE (type));
1289 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1290 init_stmt = gimple_build_assign (new_temp,
1291 fold_build1 (VIEW_CONVERT_EXPR,
1292 TREE_TYPE (type),
1293 val));
1294 else
1295 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1296 vect_init_vector_1 (stmt, init_stmt, gsi);
1297 val = new_temp;
1300 val = build_vector_from_val (type, val);
1303 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1304 init_stmt = gimple_build_assign (new_temp, val);
1305 vect_init_vector_1 (stmt, init_stmt, gsi);
1306 return new_temp;
1309 /* Function vect_get_vec_def_for_operand_1.
1311 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1312 DT that will be used in the vectorized stmt. */
1314 tree
1315 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1317 tree vec_oprnd;
1318 gimple *vec_stmt;
1319 stmt_vec_info def_stmt_info = NULL;
1321 switch (dt)
1323 /* operand is a constant or a loop invariant. */
1324 case vect_constant_def:
1325 case vect_external_def:
1326 /* Code should use vect_get_vec_def_for_operand. */
1327 gcc_unreachable ();
1329 /* operand is defined inside the loop. */
1330 case vect_internal_def:
1332 /* Get the def from the vectorized stmt. */
1333 def_stmt_info = vinfo_for_stmt (def_stmt);
1335 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1336 /* Get vectorized pattern statement. */
1337 if (!vec_stmt
1338 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1339 && !STMT_VINFO_RELEVANT (def_stmt_info))
1340 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1341 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1342 gcc_assert (vec_stmt);
1343 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1344 vec_oprnd = PHI_RESULT (vec_stmt);
1345 else if (is_gimple_call (vec_stmt))
1346 vec_oprnd = gimple_call_lhs (vec_stmt);
1347 else
1348 vec_oprnd = gimple_assign_lhs (vec_stmt);
1349 return vec_oprnd;
1352 /* operand is defined by a loop header phi - reduction */
1353 case vect_reduction_def:
1354 case vect_double_reduction_def:
1355 case vect_nested_cycle:
1356 /* Code should use get_initial_def_for_reduction. */
1357 gcc_unreachable ();
1359 /* operand is defined by loop-header phi - induction. */
1360 case vect_induction_def:
1362 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1364 /* Get the def from the vectorized stmt. */
1365 def_stmt_info = vinfo_for_stmt (def_stmt);
1366 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1367 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1368 vec_oprnd = PHI_RESULT (vec_stmt);
1369 else
1370 vec_oprnd = gimple_get_lhs (vec_stmt);
1371 return vec_oprnd;
1374 default:
1375 gcc_unreachable ();
1380 /* Function vect_get_vec_def_for_operand.
1382 OP is an operand in STMT. This function returns a (vector) def that will be
1383 used in the vectorized stmt for STMT.
1385 In the case that OP is an SSA_NAME which is defined in the loop, then
1386 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1388 In case OP is an invariant or constant, a new stmt that creates a vector def
1389 needs to be introduced. VECTYPE may be used to specify a required type for
1390 vector invariant. */
1392 tree
1393 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1395 gimple *def_stmt;
1396 enum vect_def_type dt;
1397 bool is_simple_use;
1398 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1399 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1401 if (dump_enabled_p ())
1403 dump_printf_loc (MSG_NOTE, vect_location,
1404 "vect_get_vec_def_for_operand: ");
1405 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1406 dump_printf (MSG_NOTE, "\n");
1409 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1410 gcc_assert (is_simple_use);
1411 if (def_stmt && dump_enabled_p ())
1413 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1414 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1417 if (dt == vect_constant_def || dt == vect_external_def)
1419 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1420 tree vector_type;
1422 if (vectype)
1423 vector_type = vectype;
1424 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1425 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1426 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1427 else
1428 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1430 gcc_assert (vector_type);
1431 return vect_init_vector (stmt, op, vector_type, NULL);
1433 else
1434 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1438 /* Function vect_get_vec_def_for_stmt_copy
1440 Return a vector-def for an operand. This function is used when the
1441 vectorized stmt to be created (by the caller to this function) is a "copy"
1442 created in case the vectorized result cannot fit in one vector, and several
1443 copies of the vector-stmt are required. In this case the vector-def is
1444 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1445 of the stmt that defines VEC_OPRND.
1446 DT is the type of the vector def VEC_OPRND.
1448 Context:
1449 In case the vectorization factor (VF) is bigger than the number
1450 of elements that can fit in a vectype (nunits), we have to generate
1451 more than one vector stmt to vectorize the scalar stmt. This situation
1452 arises when there are multiple data-types operated upon in the loop; the
1453 smallest data-type determines the VF, and as a result, when vectorizing
1454 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1455 vector stmt (each computing a vector of 'nunits' results, and together
1456 computing 'VF' results in each iteration). This function is called when
1457 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1458 which VF=16 and nunits=4, so the number of copies required is 4):
1460 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1462 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1463 VS1.1: vx.1 = memref1 VS1.2
1464 VS1.2: vx.2 = memref2 VS1.3
1465 VS1.3: vx.3 = memref3
1467 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1468 VSnew.1: vz1 = vx.1 + ... VSnew.2
1469 VSnew.2: vz2 = vx.2 + ... VSnew.3
1470 VSnew.3: vz3 = vx.3 + ...
1472 The vectorization of S1 is explained in vectorizable_load.
1473 The vectorization of S2:
1474 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1475 the function 'vect_get_vec_def_for_operand' is called to
1476 get the relevant vector-def for each operand of S2. For operand x it
1477 returns the vector-def 'vx.0'.
1479 To create the remaining copies of the vector-stmt (VSnew.j), this
1480 function is called to get the relevant vector-def for each operand. It is
1481 obtained from the respective VS1.j stmt, which is recorded in the
1482 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1484 For example, to obtain the vector-def 'vx.1' in order to create the
1485 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1486 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1487 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1488 and return its def ('vx.1').
1489 Overall, to create the above sequence this function will be called 3 times:
1490 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1491 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1492 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1494 tree
1495 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1497 gimple *vec_stmt_for_operand;
1498 stmt_vec_info def_stmt_info;
1500 /* Do nothing; can reuse same def. */
1501 if (dt == vect_external_def || dt == vect_constant_def )
1502 return vec_oprnd;
1504 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1505 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1506 gcc_assert (def_stmt_info);
1507 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1508 gcc_assert (vec_stmt_for_operand);
1509 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1510 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1511 else
1512 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1513 return vec_oprnd;
1517 /* Get vectorized definitions for the operands to create a copy of an original
1518 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1520 static void
1521 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1522 vec<tree> *vec_oprnds0,
1523 vec<tree> *vec_oprnds1)
1525 tree vec_oprnd = vec_oprnds0->pop ();
1527 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1528 vec_oprnds0->quick_push (vec_oprnd);
1530 if (vec_oprnds1 && vec_oprnds1->length ())
1532 vec_oprnd = vec_oprnds1->pop ();
1533 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1534 vec_oprnds1->quick_push (vec_oprnd);
1539 /* Get vectorized definitions for OP0 and OP1.
1540 REDUC_INDEX is the index of reduction operand in case of reduction,
1541 and -1 otherwise. */
1543 void
1544 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1545 vec<tree> *vec_oprnds0,
1546 vec<tree> *vec_oprnds1,
1547 slp_tree slp_node, int reduc_index)
1549 if (slp_node)
1551 int nops = (op1 == NULL_TREE) ? 1 : 2;
1552 auto_vec<tree> ops (nops);
1553 auto_vec<vec<tree> > vec_defs (nops);
1555 ops.quick_push (op0);
1556 if (op1)
1557 ops.quick_push (op1);
1559 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1561 *vec_oprnds0 = vec_defs[0];
1562 if (op1)
1563 *vec_oprnds1 = vec_defs[1];
1565 else
1567 tree vec_oprnd;
1569 vec_oprnds0->create (1);
1570 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1571 vec_oprnds0->quick_push (vec_oprnd);
1573 if (op1)
1575 vec_oprnds1->create (1);
1576 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1577 vec_oprnds1->quick_push (vec_oprnd);
1583 /* Function vect_finish_stmt_generation.
1585 Insert a new stmt. */
1587 void
1588 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1589 gimple_stmt_iterator *gsi)
1591 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1592 vec_info *vinfo = stmt_info->vinfo;
1594 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1596 if (!gsi_end_p (*gsi)
1597 && gimple_has_mem_ops (vec_stmt))
1599 gimple *at_stmt = gsi_stmt (*gsi);
1600 tree vuse = gimple_vuse (at_stmt);
1601 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1603 tree vdef = gimple_vdef (at_stmt);
1604 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1605 /* If we have an SSA vuse and insert a store, update virtual
1606 SSA form to avoid triggering the renamer. Do so only
1607 if we can easily see all uses - which is what almost always
1608 happens with the way vectorized stmts are inserted. */
1609 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1610 && ((is_gimple_assign (vec_stmt)
1611 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1612 || (is_gimple_call (vec_stmt)
1613 && !(gimple_call_flags (vec_stmt)
1614 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1616 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1617 gimple_set_vdef (vec_stmt, new_vdef);
1618 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1622 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1624 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1626 if (dump_enabled_p ())
1628 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1629 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1632 gimple_set_location (vec_stmt, gimple_location (stmt));
1634 /* While EH edges will generally prevent vectorization, stmt might
1635 e.g. be in a must-not-throw region. Ensure newly created stmts
1636 that could throw are part of the same region. */
1637 int lp_nr = lookup_stmt_eh_lp (stmt);
1638 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1639 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1642 /* We want to vectorize a call to combined function CFN with function
1643 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1644 as the types of all inputs. Check whether this is possible using
1645 an internal function, returning its code if so or IFN_LAST if not. */
1647 static internal_fn
1648 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1649 tree vectype_out, tree vectype_in)
1651 internal_fn ifn;
1652 if (internal_fn_p (cfn))
1653 ifn = as_internal_fn (cfn);
1654 else
1655 ifn = associated_internal_fn (fndecl);
1656 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1658 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1659 if (info.vectorizable)
1661 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1662 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1663 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1664 OPTIMIZE_FOR_SPEED))
1665 return ifn;
1668 return IFN_LAST;
1672 static tree permute_vec_elements (tree, tree, tree, gimple *,
1673 gimple_stmt_iterator *);
1675 /* STMT is a non-strided load or store, meaning that it accesses
1676 elements with a known constant step. Return -1 if that step
1677 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1679 static int
1680 compare_step_with_zero (gimple *stmt)
1682 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1683 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1684 tree step;
1685 if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
1686 step = STMT_VINFO_DR_STEP (stmt_info);
1687 else
1688 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1689 return tree_int_cst_compare (step, size_zero_node);
1692 /* If the target supports a permute mask that reverses the elements in
1693 a vector of type VECTYPE, return that mask, otherwise return null. */
1695 static tree
1696 perm_mask_for_reverse (tree vectype)
1698 int i, nunits;
1699 unsigned char *sel;
1701 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1702 sel = XALLOCAVEC (unsigned char, nunits);
1704 for (i = 0; i < nunits; ++i)
1705 sel[i] = nunits - 1 - i;
1707 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1708 return NULL_TREE;
1709 return vect_gen_perm_mask_checked (vectype, sel);
1712 /* A subroutine of get_load_store_type, with a subset of the same
1713 arguments. Handle the case where STMT is part of a grouped load
1714 or store.
1716 For stores, the statements in the group are all consecutive
1717 and there is no gap at the end. For loads, the statements in the
1718 group might not be consecutive; there can be gaps between statements
1719 as well as at the end. */
1721 static bool
1722 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1723 vec_load_store_type vls_type,
1724 vect_memory_access_type *memory_access_type)
1726 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1727 vec_info *vinfo = stmt_info->vinfo;
1728 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1729 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1730 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1731 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1732 bool single_element_p = (stmt == first_stmt
1733 && !GROUP_NEXT_ELEMENT (stmt_info));
1734 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1735 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1737 /* True if the vectorized statements would access beyond the last
1738 statement in the group. */
1739 bool overrun_p = false;
1741 /* True if we can cope with such overrun by peeling for gaps, so that
1742 there is at least one final scalar iteration after the vector loop. */
1743 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1745 /* There can only be a gap at the end of the group if the stride is
1746 known at compile time. */
1747 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1749 /* Stores can't yet have gaps. */
1750 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1752 if (slp)
1754 if (STMT_VINFO_STRIDED_P (stmt_info))
1756 /* Try to use consecutive accesses of GROUP_SIZE elements,
1757 separated by the stride, until we have a complete vector.
1758 Fall back to scalar accesses if that isn't possible. */
1759 if (nunits % group_size == 0)
1760 *memory_access_type = VMAT_STRIDED_SLP;
1761 else
1762 *memory_access_type = VMAT_ELEMENTWISE;
1764 else
1766 overrun_p = loop_vinfo && gap != 0;
1767 if (overrun_p && vls_type != VLS_LOAD)
1769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1770 "Grouped store with gaps requires"
1771 " non-consecutive accesses\n");
1772 return false;
1774 if (overrun_p && !can_overrun_p)
1776 if (dump_enabled_p ())
1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778 "Peeling for outer loop is not supported\n");
1779 return false;
1781 *memory_access_type = VMAT_CONTIGUOUS;
1784 else
1786 /* We can always handle this case using elementwise accesses,
1787 but see if something more efficient is available. */
1788 *memory_access_type = VMAT_ELEMENTWISE;
1790 /* If there is a gap at the end of the group then these optimizations
1791 would access excess elements in the last iteration. */
1792 bool would_overrun_p = (gap != 0);
1793 if (!STMT_VINFO_STRIDED_P (stmt_info)
1794 && (can_overrun_p || !would_overrun_p)
1795 && compare_step_with_zero (stmt) > 0)
1797 /* First try using LOAD/STORE_LANES. */
1798 if (vls_type == VLS_LOAD
1799 ? vect_load_lanes_supported (vectype, group_size)
1800 : vect_store_lanes_supported (vectype, group_size))
1802 *memory_access_type = VMAT_LOAD_STORE_LANES;
1803 overrun_p = would_overrun_p;
1806 /* If that fails, try using permuting loads. */
1807 if (*memory_access_type == VMAT_ELEMENTWISE
1808 && (vls_type == VLS_LOAD
1809 ? vect_grouped_load_supported (vectype, single_element_p,
1810 group_size)
1811 : vect_grouped_store_supported (vectype, group_size)))
1813 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1814 overrun_p = would_overrun_p;
1819 if (vls_type != VLS_LOAD && first_stmt == stmt)
1821 /* STMT is the leader of the group. Check the operands of all the
1822 stmts of the group. */
1823 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1824 while (next_stmt)
1826 gcc_assert (gimple_assign_single_p (next_stmt));
1827 tree op = gimple_assign_rhs1 (next_stmt);
1828 gimple *def_stmt;
1829 enum vect_def_type dt;
1830 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1832 if (dump_enabled_p ())
1833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1834 "use not simple.\n");
1835 return false;
1837 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1841 if (overrun_p)
1843 gcc_assert (can_overrun_p);
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846 "Data access with gaps requires scalar "
1847 "epilogue loop\n");
1848 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1851 return true;
1854 /* A subroutine of get_load_store_type, with a subset of the same
1855 arguments. Handle the case where STMT is a load or store that
1856 accesses consecutive elements with a negative step. */
1858 static vect_memory_access_type
1859 get_negative_load_store_type (gimple *stmt, tree vectype,
1860 vec_load_store_type vls_type,
1861 unsigned int ncopies)
1863 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1864 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1865 dr_alignment_support alignment_support_scheme;
1867 if (ncopies > 1)
1869 if (dump_enabled_p ())
1870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1871 "multiple types with negative step.\n");
1872 return VMAT_ELEMENTWISE;
1875 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1876 if (alignment_support_scheme != dr_aligned
1877 && alignment_support_scheme != dr_unaligned_supported)
1879 if (dump_enabled_p ())
1880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1881 "negative step but alignment required.\n");
1882 return VMAT_ELEMENTWISE;
1885 if (vls_type == VLS_STORE_INVARIANT)
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE, vect_location,
1889 "negative step with invariant source;"
1890 " no permute needed.\n");
1891 return VMAT_CONTIGUOUS_DOWN;
1894 if (!perm_mask_for_reverse (vectype))
1896 if (dump_enabled_p ())
1897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1898 "negative step and reversing not supported.\n");
1899 return VMAT_ELEMENTWISE;
1902 return VMAT_CONTIGUOUS_REVERSE;
1905 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1906 if there is a memory access type that the vectorized form can use,
1907 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1908 or scatters, fill in GS_INFO accordingly.
1910 SLP says whether we're performing SLP rather than loop vectorization.
1911 VECTYPE is the vector type that the vectorized statements will use.
1912 NCOPIES is the number of vector statements that will be needed. */
1914 static bool
1915 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1916 vec_load_store_type vls_type, unsigned int ncopies,
1917 vect_memory_access_type *memory_access_type,
1918 gather_scatter_info *gs_info)
1920 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1921 vec_info *vinfo = stmt_info->vinfo;
1922 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1923 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1925 *memory_access_type = VMAT_GATHER_SCATTER;
1926 gimple *def_stmt;
1927 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1928 gcc_unreachable ();
1929 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1930 &gs_info->offset_dt,
1931 &gs_info->offset_vectype))
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 "%s index use not simple.\n",
1936 vls_type == VLS_LOAD ? "gather" : "scatter");
1937 return false;
1940 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1942 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1943 memory_access_type))
1944 return false;
1946 else if (STMT_VINFO_STRIDED_P (stmt_info))
1948 gcc_assert (!slp);
1949 *memory_access_type = VMAT_ELEMENTWISE;
1951 else
1953 int cmp = compare_step_with_zero (stmt);
1954 if (cmp < 0)
1955 *memory_access_type = get_negative_load_store_type
1956 (stmt, vectype, vls_type, ncopies);
1957 else if (cmp == 0)
1959 gcc_assert (vls_type == VLS_LOAD);
1960 *memory_access_type = VMAT_INVARIANT;
1962 else
1963 *memory_access_type = VMAT_CONTIGUOUS;
1966 /* FIXME: At the moment the cost model seems to underestimate the
1967 cost of using elementwise accesses. This check preserves the
1968 traditional behavior until that can be fixed. */
1969 if (*memory_access_type == VMAT_ELEMENTWISE
1970 && !STMT_VINFO_STRIDED_P (stmt_info))
1972 if (dump_enabled_p ())
1973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1974 "not falling back to elementwise accesses\n");
1975 return false;
1977 return true;
1980 /* Function vectorizable_mask_load_store.
1982 Check if STMT performs a conditional load or store that can be vectorized.
1983 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1984 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1985 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1987 static bool
1988 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1989 gimple **vec_stmt, slp_tree slp_node)
1991 tree vec_dest = NULL;
1992 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1993 stmt_vec_info prev_stmt_info;
1994 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1995 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1996 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1997 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1998 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1999 tree rhs_vectype = NULL_TREE;
2000 tree mask_vectype;
2001 tree elem_type;
2002 gimple *new_stmt;
2003 tree dummy;
2004 tree dataref_ptr = NULL_TREE;
2005 gimple *ptr_incr;
2006 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2007 int ncopies;
2008 int i, j;
2009 bool inv_p;
2010 gather_scatter_info gs_info;
2011 vec_load_store_type vls_type;
2012 tree mask;
2013 gimple *def_stmt;
2014 enum vect_def_type dt;
2016 if (slp_node != NULL)
2017 return false;
2019 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2020 gcc_assert (ncopies >= 1);
2022 mask = gimple_call_arg (stmt, 2);
2024 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
2025 return false;
2027 /* FORNOW. This restriction should be relaxed. */
2028 if (nested_in_vect_loop && ncopies > 1)
2030 if (dump_enabled_p ())
2031 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2032 "multiple types in nested loop.");
2033 return false;
2036 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2037 return false;
2039 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2040 && ! vec_stmt)
2041 return false;
2043 if (!STMT_VINFO_DATA_REF (stmt_info))
2044 return false;
2046 elem_type = TREE_TYPE (vectype);
2048 if (TREE_CODE (mask) != SSA_NAME)
2049 return false;
2051 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2052 return false;
2054 if (!mask_vectype)
2055 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2057 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2058 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2059 return false;
2061 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2063 tree rhs = gimple_call_arg (stmt, 3);
2064 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2065 return false;
2066 if (dt == vect_constant_def || dt == vect_external_def)
2067 vls_type = VLS_STORE_INVARIANT;
2068 else
2069 vls_type = VLS_STORE;
2071 else
2072 vls_type = VLS_LOAD;
2074 vect_memory_access_type memory_access_type;
2075 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2076 &memory_access_type, &gs_info))
2077 return false;
2079 if (memory_access_type == VMAT_GATHER_SCATTER)
2081 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2082 tree masktype
2083 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2084 if (TREE_CODE (masktype) == INTEGER_TYPE)
2086 if (dump_enabled_p ())
2087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2088 "masked gather with integer mask not supported.");
2089 return false;
2092 else if (memory_access_type != VMAT_CONTIGUOUS)
2094 if (dump_enabled_p ())
2095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2096 "unsupported access type for masked %s.\n",
2097 vls_type == VLS_LOAD ? "load" : "store");
2098 return false;
2100 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2101 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2102 TYPE_MODE (mask_vectype),
2103 vls_type == VLS_LOAD)
2104 || (rhs_vectype
2105 && !useless_type_conversion_p (vectype, rhs_vectype)))
2106 return false;
2108 if (!vec_stmt) /* transformation not required. */
2110 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2111 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2112 if (vls_type == VLS_LOAD)
2113 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2114 NULL, NULL, NULL);
2115 else
2116 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2117 dt, NULL, NULL, NULL);
2118 return true;
2120 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2122 /** Transform. **/
2124 if (memory_access_type == VMAT_GATHER_SCATTER)
2126 tree vec_oprnd0 = NULL_TREE, op;
2127 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2128 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2129 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2130 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2131 tree mask_perm_mask = NULL_TREE;
2132 edge pe = loop_preheader_edge (loop);
2133 gimple_seq seq;
2134 basic_block new_bb;
2135 enum { NARROW, NONE, WIDEN } modifier;
2136 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2138 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2139 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2140 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2141 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2142 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2143 scaletype = TREE_VALUE (arglist);
2144 gcc_checking_assert (types_compatible_p (srctype, rettype)
2145 && types_compatible_p (srctype, masktype));
2147 if (nunits == gather_off_nunits)
2148 modifier = NONE;
2149 else if (nunits == gather_off_nunits / 2)
2151 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2152 modifier = WIDEN;
2154 for (i = 0; i < gather_off_nunits; ++i)
2155 sel[i] = i | nunits;
2157 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2159 else if (nunits == gather_off_nunits * 2)
2161 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2162 modifier = NARROW;
2164 for (i = 0; i < nunits; ++i)
2165 sel[i] = i < gather_off_nunits
2166 ? i : i + nunits - gather_off_nunits;
2168 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2169 ncopies *= 2;
2170 for (i = 0; i < nunits; ++i)
2171 sel[i] = i | gather_off_nunits;
2172 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2174 else
2175 gcc_unreachable ();
2177 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2179 ptr = fold_convert (ptrtype, gs_info.base);
2180 if (!is_gimple_min_invariant (ptr))
2182 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2183 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2184 gcc_assert (!new_bb);
2187 scale = build_int_cst (scaletype, gs_info.scale);
2189 prev_stmt_info = NULL;
2190 for (j = 0; j < ncopies; ++j)
2192 if (modifier == WIDEN && (j & 1))
2193 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2194 perm_mask, stmt, gsi);
2195 else if (j == 0)
2196 op = vec_oprnd0
2197 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2198 else
2199 op = vec_oprnd0
2200 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2202 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2204 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2205 == TYPE_VECTOR_SUBPARTS (idxtype));
2206 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2207 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2208 new_stmt
2209 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2210 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2211 op = var;
2214 if (mask_perm_mask && (j & 1))
2215 mask_op = permute_vec_elements (mask_op, mask_op,
2216 mask_perm_mask, stmt, gsi);
2217 else
2219 if (j == 0)
2220 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2221 else
2223 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2224 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2227 mask_op = vec_mask;
2228 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2230 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2231 == TYPE_VECTOR_SUBPARTS (masktype));
2232 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2233 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2234 new_stmt
2235 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2236 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2237 mask_op = var;
2241 new_stmt
2242 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2243 scale);
2245 if (!useless_type_conversion_p (vectype, rettype))
2247 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2248 == TYPE_VECTOR_SUBPARTS (rettype));
2249 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2250 gimple_call_set_lhs (new_stmt, op);
2251 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2252 var = make_ssa_name (vec_dest);
2253 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2254 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2256 else
2258 var = make_ssa_name (vec_dest, new_stmt);
2259 gimple_call_set_lhs (new_stmt, var);
2262 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2264 if (modifier == NARROW)
2266 if ((j & 1) == 0)
2268 prev_res = var;
2269 continue;
2271 var = permute_vec_elements (prev_res, var,
2272 perm_mask, stmt, gsi);
2273 new_stmt = SSA_NAME_DEF_STMT (var);
2276 if (prev_stmt_info == NULL)
2277 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2278 else
2279 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2280 prev_stmt_info = vinfo_for_stmt (new_stmt);
2283 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2284 from the IL. */
2285 if (STMT_VINFO_RELATED_STMT (stmt_info))
2287 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2288 stmt_info = vinfo_for_stmt (stmt);
2290 tree lhs = gimple_call_lhs (stmt);
2291 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2292 set_vinfo_for_stmt (new_stmt, stmt_info);
2293 set_vinfo_for_stmt (stmt, NULL);
2294 STMT_VINFO_STMT (stmt_info) = new_stmt;
2295 gsi_replace (gsi, new_stmt, true);
2296 return true;
2298 else if (vls_type != VLS_LOAD)
2300 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2301 prev_stmt_info = NULL;
2302 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2303 for (i = 0; i < ncopies; i++)
2305 unsigned align, misalign;
2307 if (i == 0)
2309 tree rhs = gimple_call_arg (stmt, 3);
2310 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2311 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2312 /* We should have catched mismatched types earlier. */
2313 gcc_assert (useless_type_conversion_p (vectype,
2314 TREE_TYPE (vec_rhs)));
2315 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2316 NULL_TREE, &dummy, gsi,
2317 &ptr_incr, false, &inv_p);
2318 gcc_assert (!inv_p);
2320 else
2322 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2323 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2324 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2325 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2326 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2327 TYPE_SIZE_UNIT (vectype));
2330 align = TYPE_ALIGN_UNIT (vectype);
2331 if (aligned_access_p (dr))
2332 misalign = 0;
2333 else if (DR_MISALIGNMENT (dr) == -1)
2335 align = TYPE_ALIGN_UNIT (elem_type);
2336 misalign = 0;
2338 else
2339 misalign = DR_MISALIGNMENT (dr);
2340 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2341 misalign);
2342 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2343 misalign ? misalign & -misalign : align);
2344 new_stmt
2345 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2346 ptr, vec_mask, vec_rhs);
2347 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2348 if (i == 0)
2349 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2350 else
2351 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2352 prev_stmt_info = vinfo_for_stmt (new_stmt);
2355 else
2357 tree vec_mask = NULL_TREE;
2358 prev_stmt_info = NULL;
2359 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2360 for (i = 0; i < ncopies; i++)
2362 unsigned align, misalign;
2364 if (i == 0)
2366 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2367 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2368 NULL_TREE, &dummy, gsi,
2369 &ptr_incr, false, &inv_p);
2370 gcc_assert (!inv_p);
2372 else
2374 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2375 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2376 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2377 TYPE_SIZE_UNIT (vectype));
2380 align = TYPE_ALIGN_UNIT (vectype);
2381 if (aligned_access_p (dr))
2382 misalign = 0;
2383 else if (DR_MISALIGNMENT (dr) == -1)
2385 align = TYPE_ALIGN_UNIT (elem_type);
2386 misalign = 0;
2388 else
2389 misalign = DR_MISALIGNMENT (dr);
2390 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2391 misalign);
2392 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2393 misalign ? misalign & -misalign : align);
2394 new_stmt
2395 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2396 ptr, vec_mask);
2397 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2398 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2399 if (i == 0)
2400 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2401 else
2402 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2403 prev_stmt_info = vinfo_for_stmt (new_stmt);
2407 if (vls_type == VLS_LOAD)
2409 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2410 from the IL. */
2411 if (STMT_VINFO_RELATED_STMT (stmt_info))
2413 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2414 stmt_info = vinfo_for_stmt (stmt);
2416 tree lhs = gimple_call_lhs (stmt);
2417 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2418 set_vinfo_for_stmt (new_stmt, stmt_info);
2419 set_vinfo_for_stmt (stmt, NULL);
2420 STMT_VINFO_STMT (stmt_info) = new_stmt;
2421 gsi_replace (gsi, new_stmt, true);
2424 return true;
2427 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2428 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2429 in a single step. On success, store the binary pack code in
2430 *CONVERT_CODE. */
2432 static bool
2433 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2434 tree_code *convert_code)
2436 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2437 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2438 return false;
2440 tree_code code;
2441 int multi_step_cvt = 0;
2442 auto_vec <tree, 8> interm_types;
2443 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2444 &code, &multi_step_cvt,
2445 &interm_types)
2446 || multi_step_cvt)
2447 return false;
2449 *convert_code = code;
2450 return true;
2453 /* Function vectorizable_call.
2455 Check if GS performs a function call that can be vectorized.
2456 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2457 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2458 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2460 static bool
2461 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2462 slp_tree slp_node)
2464 gcall *stmt;
2465 tree vec_dest;
2466 tree scalar_dest;
2467 tree op, type;
2468 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2469 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2470 tree vectype_out, vectype_in;
2471 int nunits_in;
2472 int nunits_out;
2473 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2474 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2475 vec_info *vinfo = stmt_info->vinfo;
2476 tree fndecl, new_temp, rhs_type;
2477 gimple *def_stmt;
2478 enum vect_def_type dt[3]
2479 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2480 gimple *new_stmt = NULL;
2481 int ncopies, j;
2482 vec<tree> vargs = vNULL;
2483 enum { NARROW, NONE, WIDEN } modifier;
2484 size_t i, nargs;
2485 tree lhs;
2487 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2488 return false;
2490 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2491 && ! vec_stmt)
2492 return false;
2494 /* Is GS a vectorizable call? */
2495 stmt = dyn_cast <gcall *> (gs);
2496 if (!stmt)
2497 return false;
2499 if (gimple_call_internal_p (stmt)
2500 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2501 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2502 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2503 slp_node);
2505 if (gimple_call_lhs (stmt) == NULL_TREE
2506 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2507 return false;
2509 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2511 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2513 /* Process function arguments. */
2514 rhs_type = NULL_TREE;
2515 vectype_in = NULL_TREE;
2516 nargs = gimple_call_num_args (stmt);
2518 /* Bail out if the function has more than three arguments, we do not have
2519 interesting builtin functions to vectorize with more than two arguments
2520 except for fma. No arguments is also not good. */
2521 if (nargs == 0 || nargs > 3)
2522 return false;
2524 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2525 if (gimple_call_internal_p (stmt)
2526 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2528 nargs = 0;
2529 rhs_type = unsigned_type_node;
2532 for (i = 0; i < nargs; i++)
2534 tree opvectype;
2536 op = gimple_call_arg (stmt, i);
2538 /* We can only handle calls with arguments of the same type. */
2539 if (rhs_type
2540 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2542 if (dump_enabled_p ())
2543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2544 "argument types differ.\n");
2545 return false;
2547 if (!rhs_type)
2548 rhs_type = TREE_TYPE (op);
2550 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2552 if (dump_enabled_p ())
2553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2554 "use not simple.\n");
2555 return false;
2558 if (!vectype_in)
2559 vectype_in = opvectype;
2560 else if (opvectype
2561 && opvectype != vectype_in)
2563 if (dump_enabled_p ())
2564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2565 "argument vector types differ.\n");
2566 return false;
2569 /* If all arguments are external or constant defs use a vector type with
2570 the same size as the output vector type. */
2571 if (!vectype_in)
2572 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2573 if (vec_stmt)
2574 gcc_assert (vectype_in);
2575 if (!vectype_in)
2577 if (dump_enabled_p ())
2579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2580 "no vectype for scalar type ");
2581 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2582 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2585 return false;
2588 /* FORNOW */
2589 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2590 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2591 if (nunits_in == nunits_out / 2)
2592 modifier = NARROW;
2593 else if (nunits_out == nunits_in)
2594 modifier = NONE;
2595 else if (nunits_out == nunits_in / 2)
2596 modifier = WIDEN;
2597 else
2598 return false;
2600 /* We only handle functions that do not read or clobber memory. */
2601 if (gimple_vuse (stmt))
2603 if (dump_enabled_p ())
2604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2605 "function reads from or writes to memory.\n");
2606 return false;
2609 /* For now, we only vectorize functions if a target specific builtin
2610 is available. TODO -- in some cases, it might be profitable to
2611 insert the calls for pieces of the vector, in order to be able
2612 to vectorize other operations in the loop. */
2613 fndecl = NULL_TREE;
2614 internal_fn ifn = IFN_LAST;
2615 combined_fn cfn = gimple_call_combined_fn (stmt);
2616 tree callee = gimple_call_fndecl (stmt);
2618 /* First try using an internal function. */
2619 tree_code convert_code = ERROR_MARK;
2620 if (cfn != CFN_LAST
2621 && (modifier == NONE
2622 || (modifier == NARROW
2623 && simple_integer_narrowing (vectype_out, vectype_in,
2624 &convert_code))))
2625 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2626 vectype_in);
2628 /* If that fails, try asking for a target-specific built-in function. */
2629 if (ifn == IFN_LAST)
2631 if (cfn != CFN_LAST)
2632 fndecl = targetm.vectorize.builtin_vectorized_function
2633 (cfn, vectype_out, vectype_in);
2634 else
2635 fndecl = targetm.vectorize.builtin_md_vectorized_function
2636 (callee, vectype_out, vectype_in);
2639 if (ifn == IFN_LAST && !fndecl)
2641 if (cfn == CFN_GOMP_SIMD_LANE
2642 && !slp_node
2643 && loop_vinfo
2644 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2645 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2646 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2647 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2649 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2650 { 0, 1, 2, ... vf - 1 } vector. */
2651 gcc_assert (nargs == 0);
2653 else
2655 if (dump_enabled_p ())
2656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2657 "function is not vectorizable.\n");
2658 return false;
2662 if (slp_node)
2663 ncopies = 1;
2664 else if (modifier == NARROW && ifn == IFN_LAST)
2665 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2666 else
2667 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2669 /* Sanity check: make sure that at least one copy of the vectorized stmt
2670 needs to be generated. */
2671 gcc_assert (ncopies >= 1);
2673 if (!vec_stmt) /* transformation not required. */
2675 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2676 if (dump_enabled_p ())
2677 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2678 "\n");
2679 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2680 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2681 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2682 vec_promote_demote, stmt_info, 0, vect_body);
2684 return true;
2687 /** Transform. **/
2689 if (dump_enabled_p ())
2690 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2692 /* Handle def. */
2693 scalar_dest = gimple_call_lhs (stmt);
2694 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2696 prev_stmt_info = NULL;
2697 if (modifier == NONE || ifn != IFN_LAST)
2699 tree prev_res = NULL_TREE;
2700 for (j = 0; j < ncopies; ++j)
2702 /* Build argument list for the vectorized call. */
2703 if (j == 0)
2704 vargs.create (nargs);
2705 else
2706 vargs.truncate (0);
2708 if (slp_node)
2710 auto_vec<vec<tree> > vec_defs (nargs);
2711 vec<tree> vec_oprnds0;
2713 for (i = 0; i < nargs; i++)
2714 vargs.quick_push (gimple_call_arg (stmt, i));
2715 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2716 vec_oprnds0 = vec_defs[0];
2718 /* Arguments are ready. Create the new vector stmt. */
2719 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2721 size_t k;
2722 for (k = 0; k < nargs; k++)
2724 vec<tree> vec_oprndsk = vec_defs[k];
2725 vargs[k] = vec_oprndsk[i];
2727 if (modifier == NARROW)
2729 tree half_res = make_ssa_name (vectype_in);
2730 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2731 gimple_call_set_lhs (new_stmt, half_res);
2732 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2733 if ((i & 1) == 0)
2735 prev_res = half_res;
2736 continue;
2738 new_temp = make_ssa_name (vec_dest);
2739 new_stmt = gimple_build_assign (new_temp, convert_code,
2740 prev_res, half_res);
2742 else
2744 if (ifn != IFN_LAST)
2745 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2746 else
2747 new_stmt = gimple_build_call_vec (fndecl, vargs);
2748 new_temp = make_ssa_name (vec_dest, new_stmt);
2749 gimple_call_set_lhs (new_stmt, new_temp);
2751 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2752 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2755 for (i = 0; i < nargs; i++)
2757 vec<tree> vec_oprndsi = vec_defs[i];
2758 vec_oprndsi.release ();
2760 continue;
2763 for (i = 0; i < nargs; i++)
2765 op = gimple_call_arg (stmt, i);
2766 if (j == 0)
2767 vec_oprnd0
2768 = vect_get_vec_def_for_operand (op, stmt);
2769 else
2771 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2772 vec_oprnd0
2773 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2776 vargs.quick_push (vec_oprnd0);
2779 if (gimple_call_internal_p (stmt)
2780 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2782 tree *v = XALLOCAVEC (tree, nunits_out);
2783 int k;
2784 for (k = 0; k < nunits_out; ++k)
2785 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2786 tree cst = build_vector (vectype_out, v);
2787 tree new_var
2788 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2789 gimple *init_stmt = gimple_build_assign (new_var, cst);
2790 vect_init_vector_1 (stmt, init_stmt, NULL);
2791 new_temp = make_ssa_name (vec_dest);
2792 new_stmt = gimple_build_assign (new_temp, new_var);
2794 else if (modifier == NARROW)
2796 tree half_res = make_ssa_name (vectype_in);
2797 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2798 gimple_call_set_lhs (new_stmt, half_res);
2799 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2800 if ((j & 1) == 0)
2802 prev_res = half_res;
2803 continue;
2805 new_temp = make_ssa_name (vec_dest);
2806 new_stmt = gimple_build_assign (new_temp, convert_code,
2807 prev_res, half_res);
2809 else
2811 if (ifn != IFN_LAST)
2812 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2813 else
2814 new_stmt = gimple_build_call_vec (fndecl, vargs);
2815 new_temp = make_ssa_name (vec_dest, new_stmt);
2816 gimple_call_set_lhs (new_stmt, new_temp);
2818 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2820 if (j == (modifier == NARROW ? 1 : 0))
2821 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2822 else
2823 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2825 prev_stmt_info = vinfo_for_stmt (new_stmt);
2828 else if (modifier == NARROW)
2830 for (j = 0; j < ncopies; ++j)
2832 /* Build argument list for the vectorized call. */
2833 if (j == 0)
2834 vargs.create (nargs * 2);
2835 else
2836 vargs.truncate (0);
2838 if (slp_node)
2840 auto_vec<vec<tree> > vec_defs (nargs);
2841 vec<tree> vec_oprnds0;
2843 for (i = 0; i < nargs; i++)
2844 vargs.quick_push (gimple_call_arg (stmt, i));
2845 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2846 vec_oprnds0 = vec_defs[0];
2848 /* Arguments are ready. Create the new vector stmt. */
2849 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2851 size_t k;
2852 vargs.truncate (0);
2853 for (k = 0; k < nargs; k++)
2855 vec<tree> vec_oprndsk = vec_defs[k];
2856 vargs.quick_push (vec_oprndsk[i]);
2857 vargs.quick_push (vec_oprndsk[i + 1]);
2859 if (ifn != IFN_LAST)
2860 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2861 else
2862 new_stmt = gimple_build_call_vec (fndecl, vargs);
2863 new_temp = make_ssa_name (vec_dest, new_stmt);
2864 gimple_call_set_lhs (new_stmt, new_temp);
2865 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2866 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2869 for (i = 0; i < nargs; i++)
2871 vec<tree> vec_oprndsi = vec_defs[i];
2872 vec_oprndsi.release ();
2874 continue;
2877 for (i = 0; i < nargs; i++)
2879 op = gimple_call_arg (stmt, i);
2880 if (j == 0)
2882 vec_oprnd0
2883 = vect_get_vec_def_for_operand (op, stmt);
2884 vec_oprnd1
2885 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2887 else
2889 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2890 vec_oprnd0
2891 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2892 vec_oprnd1
2893 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2896 vargs.quick_push (vec_oprnd0);
2897 vargs.quick_push (vec_oprnd1);
2900 new_stmt = gimple_build_call_vec (fndecl, vargs);
2901 new_temp = make_ssa_name (vec_dest, new_stmt);
2902 gimple_call_set_lhs (new_stmt, new_temp);
2903 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2905 if (j == 0)
2906 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2907 else
2908 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2910 prev_stmt_info = vinfo_for_stmt (new_stmt);
2913 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2915 else
2916 /* No current target implements this case. */
2917 return false;
2919 vargs.release ();
2921 /* The call in STMT might prevent it from being removed in dce.
2922 We however cannot remove it here, due to the way the ssa name
2923 it defines is mapped to the new definition. So just replace
2924 rhs of the statement with something harmless. */
2926 if (slp_node)
2927 return true;
2929 type = TREE_TYPE (scalar_dest);
2930 if (is_pattern_stmt_p (stmt_info))
2931 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2932 else
2933 lhs = gimple_call_lhs (stmt);
2935 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2936 set_vinfo_for_stmt (new_stmt, stmt_info);
2937 set_vinfo_for_stmt (stmt, NULL);
2938 STMT_VINFO_STMT (stmt_info) = new_stmt;
2939 gsi_replace (gsi, new_stmt, false);
2941 return true;
2945 struct simd_call_arg_info
2947 tree vectype;
2948 tree op;
2949 enum vect_def_type dt;
2950 HOST_WIDE_INT linear_step;
2951 unsigned int align;
2952 bool simd_lane_linear;
2955 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2956 is linear within simd lane (but not within whole loop), note it in
2957 *ARGINFO. */
2959 static void
2960 vect_simd_lane_linear (tree op, struct loop *loop,
2961 struct simd_call_arg_info *arginfo)
2963 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2965 if (!is_gimple_assign (def_stmt)
2966 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2967 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2968 return;
2970 tree base = gimple_assign_rhs1 (def_stmt);
2971 HOST_WIDE_INT linear_step = 0;
2972 tree v = gimple_assign_rhs2 (def_stmt);
2973 while (TREE_CODE (v) == SSA_NAME)
2975 tree t;
2976 def_stmt = SSA_NAME_DEF_STMT (v);
2977 if (is_gimple_assign (def_stmt))
2978 switch (gimple_assign_rhs_code (def_stmt))
2980 case PLUS_EXPR:
2981 t = gimple_assign_rhs2 (def_stmt);
2982 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2983 return;
2984 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2985 v = gimple_assign_rhs1 (def_stmt);
2986 continue;
2987 case MULT_EXPR:
2988 t = gimple_assign_rhs2 (def_stmt);
2989 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2990 return;
2991 linear_step = tree_to_shwi (t);
2992 v = gimple_assign_rhs1 (def_stmt);
2993 continue;
2994 CASE_CONVERT:
2995 t = gimple_assign_rhs1 (def_stmt);
2996 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2997 || (TYPE_PRECISION (TREE_TYPE (v))
2998 < TYPE_PRECISION (TREE_TYPE (t))))
2999 return;
3000 if (!linear_step)
3001 linear_step = 1;
3002 v = t;
3003 continue;
3004 default:
3005 return;
3007 else if (is_gimple_call (def_stmt)
3008 && gimple_call_internal_p (def_stmt)
3009 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
3010 && loop->simduid
3011 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3012 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3013 == loop->simduid))
3015 if (!linear_step)
3016 linear_step = 1;
3017 arginfo->linear_step = linear_step;
3018 arginfo->op = base;
3019 arginfo->simd_lane_linear = true;
3020 return;
3025 /* Function vectorizable_simd_clone_call.
3027 Check if STMT performs a function call that can be vectorized
3028 by calling a simd clone of the function.
3029 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3030 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3031 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3033 static bool
3034 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3035 gimple **vec_stmt, slp_tree slp_node)
3037 tree vec_dest;
3038 tree scalar_dest;
3039 tree op, type;
3040 tree vec_oprnd0 = NULL_TREE;
3041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3042 tree vectype;
3043 unsigned int nunits;
3044 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3045 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3046 vec_info *vinfo = stmt_info->vinfo;
3047 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3048 tree fndecl, new_temp;
3049 gimple *def_stmt;
3050 gimple *new_stmt = NULL;
3051 int ncopies, j;
3052 auto_vec<simd_call_arg_info> arginfo;
3053 vec<tree> vargs = vNULL;
3054 size_t i, nargs;
3055 tree lhs, rtype, ratype;
3056 vec<constructor_elt, va_gc> *ret_ctor_elts;
3058 /* Is STMT a vectorizable call? */
3059 if (!is_gimple_call (stmt))
3060 return false;
3062 fndecl = gimple_call_fndecl (stmt);
3063 if (fndecl == NULL_TREE)
3064 return false;
3066 struct cgraph_node *node = cgraph_node::get (fndecl);
3067 if (node == NULL || node->simd_clones == NULL)
3068 return false;
3070 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3071 return false;
3073 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3074 && ! vec_stmt)
3075 return false;
3077 if (gimple_call_lhs (stmt)
3078 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3079 return false;
3081 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3083 vectype = STMT_VINFO_VECTYPE (stmt_info);
3085 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3086 return false;
3088 /* FORNOW */
3089 if (slp_node)
3090 return false;
3092 /* Process function arguments. */
3093 nargs = gimple_call_num_args (stmt);
3095 /* Bail out if the function has zero arguments. */
3096 if (nargs == 0)
3097 return false;
3099 arginfo.reserve (nargs, true);
3101 for (i = 0; i < nargs; i++)
3103 simd_call_arg_info thisarginfo;
3104 affine_iv iv;
3106 thisarginfo.linear_step = 0;
3107 thisarginfo.align = 0;
3108 thisarginfo.op = NULL_TREE;
3109 thisarginfo.simd_lane_linear = false;
3111 op = gimple_call_arg (stmt, i);
3112 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3113 &thisarginfo.vectype)
3114 || thisarginfo.dt == vect_uninitialized_def)
3116 if (dump_enabled_p ())
3117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3118 "use not simple.\n");
3119 return false;
3122 if (thisarginfo.dt == vect_constant_def
3123 || thisarginfo.dt == vect_external_def)
3124 gcc_assert (thisarginfo.vectype == NULL_TREE);
3125 else
3126 gcc_assert (thisarginfo.vectype != NULL_TREE);
3128 /* For linear arguments, the analyze phase should have saved
3129 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3130 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3131 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3133 gcc_assert (vec_stmt);
3134 thisarginfo.linear_step
3135 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3136 thisarginfo.op
3137 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3138 thisarginfo.simd_lane_linear
3139 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3140 == boolean_true_node);
3141 /* If loop has been peeled for alignment, we need to adjust it. */
3142 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3143 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3144 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3146 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3147 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3148 tree opt = TREE_TYPE (thisarginfo.op);
3149 bias = fold_convert (TREE_TYPE (step), bias);
3150 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3151 thisarginfo.op
3152 = fold_build2 (POINTER_TYPE_P (opt)
3153 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3154 thisarginfo.op, bias);
3157 else if (!vec_stmt
3158 && thisarginfo.dt != vect_constant_def
3159 && thisarginfo.dt != vect_external_def
3160 && loop_vinfo
3161 && TREE_CODE (op) == SSA_NAME
3162 && simple_iv (loop, loop_containing_stmt (stmt), op,
3163 &iv, false)
3164 && tree_fits_shwi_p (iv.step))
3166 thisarginfo.linear_step = tree_to_shwi (iv.step);
3167 thisarginfo.op = iv.base;
3169 else if ((thisarginfo.dt == vect_constant_def
3170 || thisarginfo.dt == vect_external_def)
3171 && POINTER_TYPE_P (TREE_TYPE (op)))
3172 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3173 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3174 linear too. */
3175 if (POINTER_TYPE_P (TREE_TYPE (op))
3176 && !thisarginfo.linear_step
3177 && !vec_stmt
3178 && thisarginfo.dt != vect_constant_def
3179 && thisarginfo.dt != vect_external_def
3180 && loop_vinfo
3181 && !slp_node
3182 && TREE_CODE (op) == SSA_NAME)
3183 vect_simd_lane_linear (op, loop, &thisarginfo);
3185 arginfo.quick_push (thisarginfo);
3188 unsigned int badness = 0;
3189 struct cgraph_node *bestn = NULL;
3190 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3191 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3192 else
3193 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3194 n = n->simdclone->next_clone)
3196 unsigned int this_badness = 0;
3197 if (n->simdclone->simdlen
3198 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3199 || n->simdclone->nargs != nargs)
3200 continue;
3201 if (n->simdclone->simdlen
3202 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3203 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3204 - exact_log2 (n->simdclone->simdlen)) * 1024;
3205 if (n->simdclone->inbranch)
3206 this_badness += 2048;
3207 int target_badness = targetm.simd_clone.usable (n);
3208 if (target_badness < 0)
3209 continue;
3210 this_badness += target_badness * 512;
3211 /* FORNOW: Have to add code to add the mask argument. */
3212 if (n->simdclone->inbranch)
3213 continue;
3214 for (i = 0; i < nargs; i++)
3216 switch (n->simdclone->args[i].arg_type)
3218 case SIMD_CLONE_ARG_TYPE_VECTOR:
3219 if (!useless_type_conversion_p
3220 (n->simdclone->args[i].orig_type,
3221 TREE_TYPE (gimple_call_arg (stmt, i))))
3222 i = -1;
3223 else if (arginfo[i].dt == vect_constant_def
3224 || arginfo[i].dt == vect_external_def
3225 || arginfo[i].linear_step)
3226 this_badness += 64;
3227 break;
3228 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3229 if (arginfo[i].dt != vect_constant_def
3230 && arginfo[i].dt != vect_external_def)
3231 i = -1;
3232 break;
3233 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3234 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3235 if (arginfo[i].dt == vect_constant_def
3236 || arginfo[i].dt == vect_external_def
3237 || (arginfo[i].linear_step
3238 != n->simdclone->args[i].linear_step))
3239 i = -1;
3240 break;
3241 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3242 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3243 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3244 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3245 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3246 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3247 /* FORNOW */
3248 i = -1;
3249 break;
3250 case SIMD_CLONE_ARG_TYPE_MASK:
3251 gcc_unreachable ();
3253 if (i == (size_t) -1)
3254 break;
3255 if (n->simdclone->args[i].alignment > arginfo[i].align)
3257 i = -1;
3258 break;
3260 if (arginfo[i].align)
3261 this_badness += (exact_log2 (arginfo[i].align)
3262 - exact_log2 (n->simdclone->args[i].alignment));
3264 if (i == (size_t) -1)
3265 continue;
3266 if (bestn == NULL || this_badness < badness)
3268 bestn = n;
3269 badness = this_badness;
3273 if (bestn == NULL)
3274 return false;
3276 for (i = 0; i < nargs; i++)
3277 if ((arginfo[i].dt == vect_constant_def
3278 || arginfo[i].dt == vect_external_def)
3279 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3281 arginfo[i].vectype
3282 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3283 i)));
3284 if (arginfo[i].vectype == NULL
3285 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3286 > bestn->simdclone->simdlen))
3287 return false;
3290 fndecl = bestn->decl;
3291 nunits = bestn->simdclone->simdlen;
3292 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3294 /* If the function isn't const, only allow it in simd loops where user
3295 has asserted that at least nunits consecutive iterations can be
3296 performed using SIMD instructions. */
3297 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3298 && gimple_vuse (stmt))
3299 return false;
3301 /* Sanity check: make sure that at least one copy of the vectorized stmt
3302 needs to be generated. */
3303 gcc_assert (ncopies >= 1);
3305 if (!vec_stmt) /* transformation not required. */
3307 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3308 for (i = 0; i < nargs; i++)
3309 if ((bestn->simdclone->args[i].arg_type
3310 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3311 || (bestn->simdclone->args[i].arg_type
3312 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3314 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3315 + 1);
3316 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3317 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3318 ? size_type_node : TREE_TYPE (arginfo[i].op);
3319 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3320 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3321 tree sll = arginfo[i].simd_lane_linear
3322 ? boolean_true_node : boolean_false_node;
3323 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3325 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3326 if (dump_enabled_p ())
3327 dump_printf_loc (MSG_NOTE, vect_location,
3328 "=== vectorizable_simd_clone_call ===\n");
3329 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3330 return true;
3333 /** Transform. **/
3335 if (dump_enabled_p ())
3336 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3338 /* Handle def. */
3339 scalar_dest = gimple_call_lhs (stmt);
3340 vec_dest = NULL_TREE;
3341 rtype = NULL_TREE;
3342 ratype = NULL_TREE;
3343 if (scalar_dest)
3345 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3346 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3347 if (TREE_CODE (rtype) == ARRAY_TYPE)
3349 ratype = rtype;
3350 rtype = TREE_TYPE (ratype);
3354 prev_stmt_info = NULL;
3355 for (j = 0; j < ncopies; ++j)
3357 /* Build argument list for the vectorized call. */
3358 if (j == 0)
3359 vargs.create (nargs);
3360 else
3361 vargs.truncate (0);
3363 for (i = 0; i < nargs; i++)
3365 unsigned int k, l, m, o;
3366 tree atype;
3367 op = gimple_call_arg (stmt, i);
3368 switch (bestn->simdclone->args[i].arg_type)
3370 case SIMD_CLONE_ARG_TYPE_VECTOR:
3371 atype = bestn->simdclone->args[i].vector_type;
3372 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3373 for (m = j * o; m < (j + 1) * o; m++)
3375 if (TYPE_VECTOR_SUBPARTS (atype)
3376 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3378 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3379 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3380 / TYPE_VECTOR_SUBPARTS (atype));
3381 gcc_assert ((k & (k - 1)) == 0);
3382 if (m == 0)
3383 vec_oprnd0
3384 = vect_get_vec_def_for_operand (op, stmt);
3385 else
3387 vec_oprnd0 = arginfo[i].op;
3388 if ((m & (k - 1)) == 0)
3389 vec_oprnd0
3390 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3391 vec_oprnd0);
3393 arginfo[i].op = vec_oprnd0;
3394 vec_oprnd0
3395 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3396 size_int (prec),
3397 bitsize_int ((m & (k - 1)) * prec));
3398 new_stmt
3399 = gimple_build_assign (make_ssa_name (atype),
3400 vec_oprnd0);
3401 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3402 vargs.safe_push (gimple_assign_lhs (new_stmt));
3404 else
3406 k = (TYPE_VECTOR_SUBPARTS (atype)
3407 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3408 gcc_assert ((k & (k - 1)) == 0);
3409 vec<constructor_elt, va_gc> *ctor_elts;
3410 if (k != 1)
3411 vec_alloc (ctor_elts, k);
3412 else
3413 ctor_elts = NULL;
3414 for (l = 0; l < k; l++)
3416 if (m == 0 && l == 0)
3417 vec_oprnd0
3418 = vect_get_vec_def_for_operand (op, stmt);
3419 else
3420 vec_oprnd0
3421 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3422 arginfo[i].op);
3423 arginfo[i].op = vec_oprnd0;
3424 if (k == 1)
3425 break;
3426 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3427 vec_oprnd0);
3429 if (k == 1)
3430 vargs.safe_push (vec_oprnd0);
3431 else
3433 vec_oprnd0 = build_constructor (atype, ctor_elts);
3434 new_stmt
3435 = gimple_build_assign (make_ssa_name (atype),
3436 vec_oprnd0);
3437 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3438 vargs.safe_push (gimple_assign_lhs (new_stmt));
3442 break;
3443 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3444 vargs.safe_push (op);
3445 break;
3446 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3447 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3448 if (j == 0)
3450 gimple_seq stmts;
3451 arginfo[i].op
3452 = force_gimple_operand (arginfo[i].op, &stmts, true,
3453 NULL_TREE);
3454 if (stmts != NULL)
3456 basic_block new_bb;
3457 edge pe = loop_preheader_edge (loop);
3458 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3459 gcc_assert (!new_bb);
3461 if (arginfo[i].simd_lane_linear)
3463 vargs.safe_push (arginfo[i].op);
3464 break;
3466 tree phi_res = copy_ssa_name (op);
3467 gphi *new_phi = create_phi_node (phi_res, loop->header);
3468 set_vinfo_for_stmt (new_phi,
3469 new_stmt_vec_info (new_phi, loop_vinfo));
3470 add_phi_arg (new_phi, arginfo[i].op,
3471 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3472 enum tree_code code
3473 = POINTER_TYPE_P (TREE_TYPE (op))
3474 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3475 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3476 ? sizetype : TREE_TYPE (op);
3477 widest_int cst
3478 = wi::mul (bestn->simdclone->args[i].linear_step,
3479 ncopies * nunits);
3480 tree tcst = wide_int_to_tree (type, cst);
3481 tree phi_arg = copy_ssa_name (op);
3482 new_stmt
3483 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3484 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3485 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3486 set_vinfo_for_stmt (new_stmt,
3487 new_stmt_vec_info (new_stmt, loop_vinfo));
3488 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3489 UNKNOWN_LOCATION);
3490 arginfo[i].op = phi_res;
3491 vargs.safe_push (phi_res);
3493 else
3495 enum tree_code code
3496 = POINTER_TYPE_P (TREE_TYPE (op))
3497 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3498 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3499 ? sizetype : TREE_TYPE (op);
3500 widest_int cst
3501 = wi::mul (bestn->simdclone->args[i].linear_step,
3502 j * nunits);
3503 tree tcst = wide_int_to_tree (type, cst);
3504 new_temp = make_ssa_name (TREE_TYPE (op));
3505 new_stmt = gimple_build_assign (new_temp, code,
3506 arginfo[i].op, tcst);
3507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3508 vargs.safe_push (new_temp);
3510 break;
3511 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3512 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3513 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3514 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3515 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3516 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3517 default:
3518 gcc_unreachable ();
3522 new_stmt = gimple_build_call_vec (fndecl, vargs);
3523 if (vec_dest)
3525 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3526 if (ratype)
3527 new_temp = create_tmp_var (ratype);
3528 else if (TYPE_VECTOR_SUBPARTS (vectype)
3529 == TYPE_VECTOR_SUBPARTS (rtype))
3530 new_temp = make_ssa_name (vec_dest, new_stmt);
3531 else
3532 new_temp = make_ssa_name (rtype, new_stmt);
3533 gimple_call_set_lhs (new_stmt, new_temp);
3535 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3537 if (vec_dest)
3539 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3541 unsigned int k, l;
3542 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3543 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3544 gcc_assert ((k & (k - 1)) == 0);
3545 for (l = 0; l < k; l++)
3547 tree t;
3548 if (ratype)
3550 t = build_fold_addr_expr (new_temp);
3551 t = build2 (MEM_REF, vectype, t,
3552 build_int_cst (TREE_TYPE (t),
3553 l * prec / BITS_PER_UNIT));
3555 else
3556 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3557 size_int (prec), bitsize_int (l * prec));
3558 new_stmt
3559 = gimple_build_assign (make_ssa_name (vectype), t);
3560 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3561 if (j == 0 && l == 0)
3562 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3563 else
3564 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3566 prev_stmt_info = vinfo_for_stmt (new_stmt);
3569 if (ratype)
3571 tree clobber = build_constructor (ratype, NULL);
3572 TREE_THIS_VOLATILE (clobber) = 1;
3573 new_stmt = gimple_build_assign (new_temp, clobber);
3574 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3576 continue;
3578 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3580 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3581 / TYPE_VECTOR_SUBPARTS (rtype));
3582 gcc_assert ((k & (k - 1)) == 0);
3583 if ((j & (k - 1)) == 0)
3584 vec_alloc (ret_ctor_elts, k);
3585 if (ratype)
3587 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3588 for (m = 0; m < o; m++)
3590 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3591 size_int (m), NULL_TREE, NULL_TREE);
3592 new_stmt
3593 = gimple_build_assign (make_ssa_name (rtype), tem);
3594 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3595 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3596 gimple_assign_lhs (new_stmt));
3598 tree clobber = build_constructor (ratype, NULL);
3599 TREE_THIS_VOLATILE (clobber) = 1;
3600 new_stmt = gimple_build_assign (new_temp, clobber);
3601 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3603 else
3604 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3605 if ((j & (k - 1)) != k - 1)
3606 continue;
3607 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3608 new_stmt
3609 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3610 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3612 if ((unsigned) j == k - 1)
3613 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3614 else
3615 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3617 prev_stmt_info = vinfo_for_stmt (new_stmt);
3618 continue;
3620 else if (ratype)
3622 tree t = build_fold_addr_expr (new_temp);
3623 t = build2 (MEM_REF, vectype, t,
3624 build_int_cst (TREE_TYPE (t), 0));
3625 new_stmt
3626 = gimple_build_assign (make_ssa_name (vec_dest), t);
3627 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3628 tree clobber = build_constructor (ratype, NULL);
3629 TREE_THIS_VOLATILE (clobber) = 1;
3630 vect_finish_stmt_generation (stmt,
3631 gimple_build_assign (new_temp,
3632 clobber), gsi);
3636 if (j == 0)
3637 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3638 else
3639 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3641 prev_stmt_info = vinfo_for_stmt (new_stmt);
3644 vargs.release ();
3646 /* The call in STMT might prevent it from being removed in dce.
3647 We however cannot remove it here, due to the way the ssa name
3648 it defines is mapped to the new definition. So just replace
3649 rhs of the statement with something harmless. */
3651 if (slp_node)
3652 return true;
3654 if (scalar_dest)
3656 type = TREE_TYPE (scalar_dest);
3657 if (is_pattern_stmt_p (stmt_info))
3658 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3659 else
3660 lhs = gimple_call_lhs (stmt);
3661 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3663 else
3664 new_stmt = gimple_build_nop ();
3665 set_vinfo_for_stmt (new_stmt, stmt_info);
3666 set_vinfo_for_stmt (stmt, NULL);
3667 STMT_VINFO_STMT (stmt_info) = new_stmt;
3668 gsi_replace (gsi, new_stmt, true);
3669 unlink_stmt_vdef (stmt);
3671 return true;
3675 /* Function vect_gen_widened_results_half
3677 Create a vector stmt whose code, type, number of arguments, and result
3678 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3679 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3680 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3681 needs to be created (DECL is a function-decl of a target-builtin).
3682 STMT is the original scalar stmt that we are vectorizing. */
3684 static gimple *
3685 vect_gen_widened_results_half (enum tree_code code,
3686 tree decl,
3687 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3688 tree vec_dest, gimple_stmt_iterator *gsi,
3689 gimple *stmt)
3691 gimple *new_stmt;
3692 tree new_temp;
3694 /* Generate half of the widened result: */
3695 if (code == CALL_EXPR)
3697 /* Target specific support */
3698 if (op_type == binary_op)
3699 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3700 else
3701 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3702 new_temp = make_ssa_name (vec_dest, new_stmt);
3703 gimple_call_set_lhs (new_stmt, new_temp);
3705 else
3707 /* Generic support */
3708 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3709 if (op_type != binary_op)
3710 vec_oprnd1 = NULL;
3711 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3712 new_temp = make_ssa_name (vec_dest, new_stmt);
3713 gimple_assign_set_lhs (new_stmt, new_temp);
3715 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3717 return new_stmt;
3721 /* Get vectorized definitions for loop-based vectorization. For the first
3722 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3723 scalar operand), and for the rest we get a copy with
3724 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3725 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3726 The vectors are collected into VEC_OPRNDS. */
3728 static void
3729 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3730 vec<tree> *vec_oprnds, int multi_step_cvt)
3732 tree vec_oprnd;
3734 /* Get first vector operand. */
3735 /* All the vector operands except the very first one (that is scalar oprnd)
3736 are stmt copies. */
3737 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3738 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3739 else
3740 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3742 vec_oprnds->quick_push (vec_oprnd);
3744 /* Get second vector operand. */
3745 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3746 vec_oprnds->quick_push (vec_oprnd);
3748 *oprnd = vec_oprnd;
3750 /* For conversion in multiple steps, continue to get operands
3751 recursively. */
3752 if (multi_step_cvt)
3753 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3757 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3758 For multi-step conversions store the resulting vectors and call the function
3759 recursively. */
3761 static void
3762 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3763 int multi_step_cvt, gimple *stmt,
3764 vec<tree> vec_dsts,
3765 gimple_stmt_iterator *gsi,
3766 slp_tree slp_node, enum tree_code code,
3767 stmt_vec_info *prev_stmt_info)
3769 unsigned int i;
3770 tree vop0, vop1, new_tmp, vec_dest;
3771 gimple *new_stmt;
3772 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3774 vec_dest = vec_dsts.pop ();
3776 for (i = 0; i < vec_oprnds->length (); i += 2)
3778 /* Create demotion operation. */
3779 vop0 = (*vec_oprnds)[i];
3780 vop1 = (*vec_oprnds)[i + 1];
3781 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3782 new_tmp = make_ssa_name (vec_dest, new_stmt);
3783 gimple_assign_set_lhs (new_stmt, new_tmp);
3784 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3786 if (multi_step_cvt)
3787 /* Store the resulting vector for next recursive call. */
3788 (*vec_oprnds)[i/2] = new_tmp;
3789 else
3791 /* This is the last step of the conversion sequence. Store the
3792 vectors in SLP_NODE or in vector info of the scalar statement
3793 (or in STMT_VINFO_RELATED_STMT chain). */
3794 if (slp_node)
3795 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3796 else
3798 if (!*prev_stmt_info)
3799 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3800 else
3801 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3803 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3808 /* For multi-step demotion operations we first generate demotion operations
3809 from the source type to the intermediate types, and then combine the
3810 results (stored in VEC_OPRNDS) in demotion operation to the destination
3811 type. */
3812 if (multi_step_cvt)
3814 /* At each level of recursion we have half of the operands we had at the
3815 previous level. */
3816 vec_oprnds->truncate ((i+1)/2);
3817 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3818 stmt, vec_dsts, gsi, slp_node,
3819 VEC_PACK_TRUNC_EXPR,
3820 prev_stmt_info);
3823 vec_dsts.quick_push (vec_dest);
3827 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3828 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3829 the resulting vectors and call the function recursively. */
3831 static void
3832 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3833 vec<tree> *vec_oprnds1,
3834 gimple *stmt, tree vec_dest,
3835 gimple_stmt_iterator *gsi,
3836 enum tree_code code1,
3837 enum tree_code code2, tree decl1,
3838 tree decl2, int op_type)
3840 int i;
3841 tree vop0, vop1, new_tmp1, new_tmp2;
3842 gimple *new_stmt1, *new_stmt2;
3843 vec<tree> vec_tmp = vNULL;
3845 vec_tmp.create (vec_oprnds0->length () * 2);
3846 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3848 if (op_type == binary_op)
3849 vop1 = (*vec_oprnds1)[i];
3850 else
3851 vop1 = NULL_TREE;
3853 /* Generate the two halves of promotion operation. */
3854 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3855 op_type, vec_dest, gsi, stmt);
3856 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3857 op_type, vec_dest, gsi, stmt);
3858 if (is_gimple_call (new_stmt1))
3860 new_tmp1 = gimple_call_lhs (new_stmt1);
3861 new_tmp2 = gimple_call_lhs (new_stmt2);
3863 else
3865 new_tmp1 = gimple_assign_lhs (new_stmt1);
3866 new_tmp2 = gimple_assign_lhs (new_stmt2);
3869 /* Store the results for the next step. */
3870 vec_tmp.quick_push (new_tmp1);
3871 vec_tmp.quick_push (new_tmp2);
3874 vec_oprnds0->release ();
3875 *vec_oprnds0 = vec_tmp;
3879 /* Check if STMT performs a conversion operation, that can be vectorized.
3880 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3881 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3882 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3884 static bool
3885 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3886 gimple **vec_stmt, slp_tree slp_node)
3888 tree vec_dest;
3889 tree scalar_dest;
3890 tree op0, op1 = NULL_TREE;
3891 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3892 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3893 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3894 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3895 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3896 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3897 tree new_temp;
3898 gimple *def_stmt;
3899 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3900 gimple *new_stmt = NULL;
3901 stmt_vec_info prev_stmt_info;
3902 int nunits_in;
3903 int nunits_out;
3904 tree vectype_out, vectype_in;
3905 int ncopies, i, j;
3906 tree lhs_type, rhs_type;
3907 enum { NARROW, NONE, WIDEN } modifier;
3908 vec<tree> vec_oprnds0 = vNULL;
3909 vec<tree> vec_oprnds1 = vNULL;
3910 tree vop0;
3911 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3912 vec_info *vinfo = stmt_info->vinfo;
3913 int multi_step_cvt = 0;
3914 vec<tree> interm_types = vNULL;
3915 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3916 int op_type;
3917 machine_mode rhs_mode;
3918 unsigned short fltsz;
3920 /* Is STMT a vectorizable conversion? */
3922 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3923 return false;
3925 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3926 && ! vec_stmt)
3927 return false;
3929 if (!is_gimple_assign (stmt))
3930 return false;
3932 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3933 return false;
3935 code = gimple_assign_rhs_code (stmt);
3936 if (!CONVERT_EXPR_CODE_P (code)
3937 && code != FIX_TRUNC_EXPR
3938 && code != FLOAT_EXPR
3939 && code != WIDEN_MULT_EXPR
3940 && code != WIDEN_LSHIFT_EXPR)
3941 return false;
3943 op_type = TREE_CODE_LENGTH (code);
3945 /* Check types of lhs and rhs. */
3946 scalar_dest = gimple_assign_lhs (stmt);
3947 lhs_type = TREE_TYPE (scalar_dest);
3948 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3950 op0 = gimple_assign_rhs1 (stmt);
3951 rhs_type = TREE_TYPE (op0);
3953 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3954 && !((INTEGRAL_TYPE_P (lhs_type)
3955 && INTEGRAL_TYPE_P (rhs_type))
3956 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3957 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3958 return false;
3960 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3961 && ((INTEGRAL_TYPE_P (lhs_type)
3962 && (TYPE_PRECISION (lhs_type)
3963 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3964 || (INTEGRAL_TYPE_P (rhs_type)
3965 && (TYPE_PRECISION (rhs_type)
3966 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
3968 if (dump_enabled_p ())
3969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3970 "type conversion to/from bit-precision unsupported."
3971 "\n");
3972 return false;
3975 /* Check the operands of the operation. */
3976 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3978 if (dump_enabled_p ())
3979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3980 "use not simple.\n");
3981 return false;
3983 if (op_type == binary_op)
3985 bool ok;
3987 op1 = gimple_assign_rhs2 (stmt);
3988 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3989 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3990 OP1. */
3991 if (CONSTANT_CLASS_P (op0))
3992 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
3993 else
3994 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
3996 if (!ok)
3998 if (dump_enabled_p ())
3999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4000 "use not simple.\n");
4001 return false;
4005 /* If op0 is an external or constant defs use a vector type of
4006 the same size as the output vector type. */
4007 if (!vectype_in)
4008 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4009 if (vec_stmt)
4010 gcc_assert (vectype_in);
4011 if (!vectype_in)
4013 if (dump_enabled_p ())
4015 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4016 "no vectype for scalar type ");
4017 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4018 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4021 return false;
4024 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4025 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4027 if (dump_enabled_p ())
4029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4030 "can't convert between boolean and non "
4031 "boolean vectors");
4032 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4033 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4036 return false;
4039 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4040 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4041 if (nunits_in < nunits_out)
4042 modifier = NARROW;
4043 else if (nunits_out == nunits_in)
4044 modifier = NONE;
4045 else
4046 modifier = WIDEN;
4048 /* Multiple types in SLP are handled by creating the appropriate number of
4049 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4050 case of SLP. */
4051 if (slp_node)
4052 ncopies = 1;
4053 else if (modifier == NARROW)
4054 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4055 else
4056 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4058 /* Sanity check: make sure that at least one copy of the vectorized stmt
4059 needs to be generated. */
4060 gcc_assert (ncopies >= 1);
4062 /* Supportable by target? */
4063 switch (modifier)
4065 case NONE:
4066 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4067 return false;
4068 if (supportable_convert_operation (code, vectype_out, vectype_in,
4069 &decl1, &code1))
4070 break;
4071 /* FALLTHRU */
4072 unsupported:
4073 if (dump_enabled_p ())
4074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4075 "conversion not supported by target.\n");
4076 return false;
4078 case WIDEN:
4079 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4080 &code1, &code2, &multi_step_cvt,
4081 &interm_types))
4083 /* Binary widening operation can only be supported directly by the
4084 architecture. */
4085 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4086 break;
4089 if (code != FLOAT_EXPR
4090 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4091 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4092 goto unsupported;
4094 rhs_mode = TYPE_MODE (rhs_type);
4095 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4096 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4097 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4098 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4100 cvt_type
4101 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4102 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4103 if (cvt_type == NULL_TREE)
4104 goto unsupported;
4106 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4108 if (!supportable_convert_operation (code, vectype_out,
4109 cvt_type, &decl1, &codecvt1))
4110 goto unsupported;
4112 else if (!supportable_widening_operation (code, stmt, vectype_out,
4113 cvt_type, &codecvt1,
4114 &codecvt2, &multi_step_cvt,
4115 &interm_types))
4116 continue;
4117 else
4118 gcc_assert (multi_step_cvt == 0);
4120 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4121 vectype_in, &code1, &code2,
4122 &multi_step_cvt, &interm_types))
4123 break;
4126 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4127 goto unsupported;
4129 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4130 codecvt2 = ERROR_MARK;
4131 else
4133 multi_step_cvt++;
4134 interm_types.safe_push (cvt_type);
4135 cvt_type = NULL_TREE;
4137 break;
4139 case NARROW:
4140 gcc_assert (op_type == unary_op);
4141 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4142 &code1, &multi_step_cvt,
4143 &interm_types))
4144 break;
4146 if (code != FIX_TRUNC_EXPR
4147 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4148 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4149 goto unsupported;
4151 rhs_mode = TYPE_MODE (rhs_type);
4152 cvt_type
4153 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4154 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4155 if (cvt_type == NULL_TREE)
4156 goto unsupported;
4157 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4158 &decl1, &codecvt1))
4159 goto unsupported;
4160 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4161 &code1, &multi_step_cvt,
4162 &interm_types))
4163 break;
4164 goto unsupported;
4166 default:
4167 gcc_unreachable ();
4170 if (!vec_stmt) /* transformation not required. */
4172 if (dump_enabled_p ())
4173 dump_printf_loc (MSG_NOTE, vect_location,
4174 "=== vectorizable_conversion ===\n");
4175 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4177 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4178 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4180 else if (modifier == NARROW)
4182 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4183 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4185 else
4187 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4188 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4190 interm_types.release ();
4191 return true;
4194 /** Transform. **/
4195 if (dump_enabled_p ())
4196 dump_printf_loc (MSG_NOTE, vect_location,
4197 "transform conversion. ncopies = %d.\n", ncopies);
4199 if (op_type == binary_op)
4201 if (CONSTANT_CLASS_P (op0))
4202 op0 = fold_convert (TREE_TYPE (op1), op0);
4203 else if (CONSTANT_CLASS_P (op1))
4204 op1 = fold_convert (TREE_TYPE (op0), op1);
4207 /* In case of multi-step conversion, we first generate conversion operations
4208 to the intermediate types, and then from that types to the final one.
4209 We create vector destinations for the intermediate type (TYPES) received
4210 from supportable_*_operation, and store them in the correct order
4211 for future use in vect_create_vectorized_*_stmts (). */
4212 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4213 vec_dest = vect_create_destination_var (scalar_dest,
4214 (cvt_type && modifier == WIDEN)
4215 ? cvt_type : vectype_out);
4216 vec_dsts.quick_push (vec_dest);
4218 if (multi_step_cvt)
4220 for (i = interm_types.length () - 1;
4221 interm_types.iterate (i, &intermediate_type); i--)
4223 vec_dest = vect_create_destination_var (scalar_dest,
4224 intermediate_type);
4225 vec_dsts.quick_push (vec_dest);
4229 if (cvt_type)
4230 vec_dest = vect_create_destination_var (scalar_dest,
4231 modifier == WIDEN
4232 ? vectype_out : cvt_type);
4234 if (!slp_node)
4236 if (modifier == WIDEN)
4238 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4239 if (op_type == binary_op)
4240 vec_oprnds1.create (1);
4242 else if (modifier == NARROW)
4243 vec_oprnds0.create (
4244 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4246 else if (code == WIDEN_LSHIFT_EXPR)
4247 vec_oprnds1.create (slp_node->vec_stmts_size);
4249 last_oprnd = op0;
4250 prev_stmt_info = NULL;
4251 switch (modifier)
4253 case NONE:
4254 for (j = 0; j < ncopies; j++)
4256 if (j == 0)
4257 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4258 -1);
4259 else
4260 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4262 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4264 /* Arguments are ready, create the new vector stmt. */
4265 if (code1 == CALL_EXPR)
4267 new_stmt = gimple_build_call (decl1, 1, vop0);
4268 new_temp = make_ssa_name (vec_dest, new_stmt);
4269 gimple_call_set_lhs (new_stmt, new_temp);
4271 else
4273 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4274 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4275 new_temp = make_ssa_name (vec_dest, new_stmt);
4276 gimple_assign_set_lhs (new_stmt, new_temp);
4279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4280 if (slp_node)
4281 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4282 else
4284 if (!prev_stmt_info)
4285 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4286 else
4287 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4288 prev_stmt_info = vinfo_for_stmt (new_stmt);
4292 break;
4294 case WIDEN:
4295 /* In case the vectorization factor (VF) is bigger than the number
4296 of elements that we can fit in a vectype (nunits), we have to
4297 generate more than one vector stmt - i.e - we need to "unroll"
4298 the vector stmt by a factor VF/nunits. */
4299 for (j = 0; j < ncopies; j++)
4301 /* Handle uses. */
4302 if (j == 0)
4304 if (slp_node)
4306 if (code == WIDEN_LSHIFT_EXPR)
4308 unsigned int k;
4310 vec_oprnd1 = op1;
4311 /* Store vec_oprnd1 for every vector stmt to be created
4312 for SLP_NODE. We check during the analysis that all
4313 the shift arguments are the same. */
4314 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4315 vec_oprnds1.quick_push (vec_oprnd1);
4317 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4318 slp_node, -1);
4320 else
4321 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4322 &vec_oprnds1, slp_node, -1);
4324 else
4326 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4327 vec_oprnds0.quick_push (vec_oprnd0);
4328 if (op_type == binary_op)
4330 if (code == WIDEN_LSHIFT_EXPR)
4331 vec_oprnd1 = op1;
4332 else
4333 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4334 vec_oprnds1.quick_push (vec_oprnd1);
4338 else
4340 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4341 vec_oprnds0.truncate (0);
4342 vec_oprnds0.quick_push (vec_oprnd0);
4343 if (op_type == binary_op)
4345 if (code == WIDEN_LSHIFT_EXPR)
4346 vec_oprnd1 = op1;
4347 else
4348 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4349 vec_oprnd1);
4350 vec_oprnds1.truncate (0);
4351 vec_oprnds1.quick_push (vec_oprnd1);
4355 /* Arguments are ready. Create the new vector stmts. */
4356 for (i = multi_step_cvt; i >= 0; i--)
4358 tree this_dest = vec_dsts[i];
4359 enum tree_code c1 = code1, c2 = code2;
4360 if (i == 0 && codecvt2 != ERROR_MARK)
4362 c1 = codecvt1;
4363 c2 = codecvt2;
4365 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4366 &vec_oprnds1,
4367 stmt, this_dest, gsi,
4368 c1, c2, decl1, decl2,
4369 op_type);
4372 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4374 if (cvt_type)
4376 if (codecvt1 == CALL_EXPR)
4378 new_stmt = gimple_build_call (decl1, 1, vop0);
4379 new_temp = make_ssa_name (vec_dest, new_stmt);
4380 gimple_call_set_lhs (new_stmt, new_temp);
4382 else
4384 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4385 new_temp = make_ssa_name (vec_dest);
4386 new_stmt = gimple_build_assign (new_temp, codecvt1,
4387 vop0);
4390 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4392 else
4393 new_stmt = SSA_NAME_DEF_STMT (vop0);
4395 if (slp_node)
4396 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4397 else
4399 if (!prev_stmt_info)
4400 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4401 else
4402 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4403 prev_stmt_info = vinfo_for_stmt (new_stmt);
4408 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4409 break;
4411 case NARROW:
4412 /* In case the vectorization factor (VF) is bigger than the number
4413 of elements that we can fit in a vectype (nunits), we have to
4414 generate more than one vector stmt - i.e - we need to "unroll"
4415 the vector stmt by a factor VF/nunits. */
4416 for (j = 0; j < ncopies; j++)
4418 /* Handle uses. */
4419 if (slp_node)
4420 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4421 slp_node, -1);
4422 else
4424 vec_oprnds0.truncate (0);
4425 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4426 vect_pow2 (multi_step_cvt) - 1);
4429 /* Arguments are ready. Create the new vector stmts. */
4430 if (cvt_type)
4431 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4433 if (codecvt1 == CALL_EXPR)
4435 new_stmt = gimple_build_call (decl1, 1, vop0);
4436 new_temp = make_ssa_name (vec_dest, new_stmt);
4437 gimple_call_set_lhs (new_stmt, new_temp);
4439 else
4441 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4442 new_temp = make_ssa_name (vec_dest);
4443 new_stmt = gimple_build_assign (new_temp, codecvt1,
4444 vop0);
4447 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4448 vec_oprnds0[i] = new_temp;
4451 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4452 stmt, vec_dsts, gsi,
4453 slp_node, code1,
4454 &prev_stmt_info);
4457 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4458 break;
4461 vec_oprnds0.release ();
4462 vec_oprnds1.release ();
4463 interm_types.release ();
4465 return true;
4469 /* Function vectorizable_assignment.
4471 Check if STMT performs an assignment (copy) that can be vectorized.
4472 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4473 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4474 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4476 static bool
4477 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4478 gimple **vec_stmt, slp_tree slp_node)
4480 tree vec_dest;
4481 tree scalar_dest;
4482 tree op;
4483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4484 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4485 tree new_temp;
4486 gimple *def_stmt;
4487 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4488 int ncopies;
4489 int i, j;
4490 vec<tree> vec_oprnds = vNULL;
4491 tree vop;
4492 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4493 vec_info *vinfo = stmt_info->vinfo;
4494 gimple *new_stmt = NULL;
4495 stmt_vec_info prev_stmt_info = NULL;
4496 enum tree_code code;
4497 tree vectype_in;
4499 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4500 return false;
4502 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4503 && ! vec_stmt)
4504 return false;
4506 /* Is vectorizable assignment? */
4507 if (!is_gimple_assign (stmt))
4508 return false;
4510 scalar_dest = gimple_assign_lhs (stmt);
4511 if (TREE_CODE (scalar_dest) != SSA_NAME)
4512 return false;
4514 code = gimple_assign_rhs_code (stmt);
4515 if (gimple_assign_single_p (stmt)
4516 || code == PAREN_EXPR
4517 || CONVERT_EXPR_CODE_P (code))
4518 op = gimple_assign_rhs1 (stmt);
4519 else
4520 return false;
4522 if (code == VIEW_CONVERT_EXPR)
4523 op = TREE_OPERAND (op, 0);
4525 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4526 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4528 /* Multiple types in SLP are handled by creating the appropriate number of
4529 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4530 case of SLP. */
4531 if (slp_node)
4532 ncopies = 1;
4533 else
4534 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4536 gcc_assert (ncopies >= 1);
4538 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4540 if (dump_enabled_p ())
4541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4542 "use not simple.\n");
4543 return false;
4546 /* We can handle NOP_EXPR conversions that do not change the number
4547 of elements or the vector size. */
4548 if ((CONVERT_EXPR_CODE_P (code)
4549 || code == VIEW_CONVERT_EXPR)
4550 && (!vectype_in
4551 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4552 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4553 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4554 return false;
4556 /* We do not handle bit-precision changes. */
4557 if ((CONVERT_EXPR_CODE_P (code)
4558 || code == VIEW_CONVERT_EXPR)
4559 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4560 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4561 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4562 || ((TYPE_PRECISION (TREE_TYPE (op))
4563 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4564 /* But a conversion that does not change the bit-pattern is ok. */
4565 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4566 > TYPE_PRECISION (TREE_TYPE (op)))
4567 && TYPE_UNSIGNED (TREE_TYPE (op)))
4568 /* Conversion between boolean types of different sizes is
4569 a simple assignment in case their vectypes are same
4570 boolean vectors. */
4571 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4572 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4574 if (dump_enabled_p ())
4575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4576 "type conversion to/from bit-precision "
4577 "unsupported.\n");
4578 return false;
4581 if (!vec_stmt) /* transformation not required. */
4583 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4584 if (dump_enabled_p ())
4585 dump_printf_loc (MSG_NOTE, vect_location,
4586 "=== vectorizable_assignment ===\n");
4587 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4588 return true;
4591 /** Transform. **/
4592 if (dump_enabled_p ())
4593 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4595 /* Handle def. */
4596 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4598 /* Handle use. */
4599 for (j = 0; j < ncopies; j++)
4601 /* Handle uses. */
4602 if (j == 0)
4603 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4604 else
4605 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4607 /* Arguments are ready. create the new vector stmt. */
4608 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4610 if (CONVERT_EXPR_CODE_P (code)
4611 || code == VIEW_CONVERT_EXPR)
4612 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4613 new_stmt = gimple_build_assign (vec_dest, vop);
4614 new_temp = make_ssa_name (vec_dest, new_stmt);
4615 gimple_assign_set_lhs (new_stmt, new_temp);
4616 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4617 if (slp_node)
4618 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4621 if (slp_node)
4622 continue;
4624 if (j == 0)
4625 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4626 else
4627 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4629 prev_stmt_info = vinfo_for_stmt (new_stmt);
4632 vec_oprnds.release ();
4633 return true;
4637 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4638 either as shift by a scalar or by a vector. */
4640 bool
4641 vect_supportable_shift (enum tree_code code, tree scalar_type)
4644 machine_mode vec_mode;
4645 optab optab;
4646 int icode;
4647 tree vectype;
4649 vectype = get_vectype_for_scalar_type (scalar_type);
4650 if (!vectype)
4651 return false;
4653 optab = optab_for_tree_code (code, vectype, optab_scalar);
4654 if (!optab
4655 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4657 optab = optab_for_tree_code (code, vectype, optab_vector);
4658 if (!optab
4659 || (optab_handler (optab, TYPE_MODE (vectype))
4660 == CODE_FOR_nothing))
4661 return false;
4664 vec_mode = TYPE_MODE (vectype);
4665 icode = (int) optab_handler (optab, vec_mode);
4666 if (icode == CODE_FOR_nothing)
4667 return false;
4669 return true;
4673 /* Function vectorizable_shift.
4675 Check if STMT performs a shift operation that can be vectorized.
4676 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4677 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4678 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4680 static bool
4681 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4682 gimple **vec_stmt, slp_tree slp_node)
4684 tree vec_dest;
4685 tree scalar_dest;
4686 tree op0, op1 = NULL;
4687 tree vec_oprnd1 = NULL_TREE;
4688 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4689 tree vectype;
4690 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4691 enum tree_code code;
4692 machine_mode vec_mode;
4693 tree new_temp;
4694 optab optab;
4695 int icode;
4696 machine_mode optab_op2_mode;
4697 gimple *def_stmt;
4698 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4699 gimple *new_stmt = NULL;
4700 stmt_vec_info prev_stmt_info;
4701 int nunits_in;
4702 int nunits_out;
4703 tree vectype_out;
4704 tree op1_vectype;
4705 int ncopies;
4706 int j, i;
4707 vec<tree> vec_oprnds0 = vNULL;
4708 vec<tree> vec_oprnds1 = vNULL;
4709 tree vop0, vop1;
4710 unsigned int k;
4711 bool scalar_shift_arg = true;
4712 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4713 vec_info *vinfo = stmt_info->vinfo;
4714 int vf;
4716 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4717 return false;
4719 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4720 && ! vec_stmt)
4721 return false;
4723 /* Is STMT a vectorizable binary/unary operation? */
4724 if (!is_gimple_assign (stmt))
4725 return false;
4727 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4728 return false;
4730 code = gimple_assign_rhs_code (stmt);
4732 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4733 || code == RROTATE_EXPR))
4734 return false;
4736 scalar_dest = gimple_assign_lhs (stmt);
4737 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4738 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4739 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4741 if (dump_enabled_p ())
4742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4743 "bit-precision shifts not supported.\n");
4744 return false;
4747 op0 = gimple_assign_rhs1 (stmt);
4748 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4750 if (dump_enabled_p ())
4751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4752 "use not simple.\n");
4753 return false;
4755 /* If op0 is an external or constant def use a vector type with
4756 the same size as the output vector type. */
4757 if (!vectype)
4758 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4759 if (vec_stmt)
4760 gcc_assert (vectype);
4761 if (!vectype)
4763 if (dump_enabled_p ())
4764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4765 "no vectype for scalar type\n");
4766 return false;
4769 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4770 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4771 if (nunits_out != nunits_in)
4772 return false;
4774 op1 = gimple_assign_rhs2 (stmt);
4775 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4777 if (dump_enabled_p ())
4778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4779 "use not simple.\n");
4780 return false;
4783 if (loop_vinfo)
4784 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4785 else
4786 vf = 1;
4788 /* Multiple types in SLP are handled by creating the appropriate number of
4789 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4790 case of SLP. */
4791 if (slp_node)
4792 ncopies = 1;
4793 else
4794 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4796 gcc_assert (ncopies >= 1);
4798 /* Determine whether the shift amount is a vector, or scalar. If the
4799 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4801 if ((dt[1] == vect_internal_def
4802 || dt[1] == vect_induction_def)
4803 && !slp_node)
4804 scalar_shift_arg = false;
4805 else if (dt[1] == vect_constant_def
4806 || dt[1] == vect_external_def
4807 || dt[1] == vect_internal_def)
4809 /* In SLP, need to check whether the shift count is the same,
4810 in loops if it is a constant or invariant, it is always
4811 a scalar shift. */
4812 if (slp_node)
4814 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4815 gimple *slpstmt;
4817 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4818 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4819 scalar_shift_arg = false;
4822 /* If the shift amount is computed by a pattern stmt we cannot
4823 use the scalar amount directly thus give up and use a vector
4824 shift. */
4825 if (dt[1] == vect_internal_def)
4827 gimple *def = SSA_NAME_DEF_STMT (op1);
4828 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4829 scalar_shift_arg = false;
4832 else
4834 if (dump_enabled_p ())
4835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4836 "operand mode requires invariant argument.\n");
4837 return false;
4840 /* Vector shifted by vector. */
4841 if (!scalar_shift_arg)
4843 optab = optab_for_tree_code (code, vectype, optab_vector);
4844 if (dump_enabled_p ())
4845 dump_printf_loc (MSG_NOTE, vect_location,
4846 "vector/vector shift/rotate found.\n");
4848 if (!op1_vectype)
4849 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4850 if (op1_vectype == NULL_TREE
4851 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4853 if (dump_enabled_p ())
4854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4855 "unusable type for last operand in"
4856 " vector/vector shift/rotate.\n");
4857 return false;
4860 /* See if the machine has a vector shifted by scalar insn and if not
4861 then see if it has a vector shifted by vector insn. */
4862 else
4864 optab = optab_for_tree_code (code, vectype, optab_scalar);
4865 if (optab
4866 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4868 if (dump_enabled_p ())
4869 dump_printf_loc (MSG_NOTE, vect_location,
4870 "vector/scalar shift/rotate found.\n");
4872 else
4874 optab = optab_for_tree_code (code, vectype, optab_vector);
4875 if (optab
4876 && (optab_handler (optab, TYPE_MODE (vectype))
4877 != CODE_FOR_nothing))
4879 scalar_shift_arg = false;
4881 if (dump_enabled_p ())
4882 dump_printf_loc (MSG_NOTE, vect_location,
4883 "vector/vector shift/rotate found.\n");
4885 /* Unlike the other binary operators, shifts/rotates have
4886 the rhs being int, instead of the same type as the lhs,
4887 so make sure the scalar is the right type if we are
4888 dealing with vectors of long long/long/short/char. */
4889 if (dt[1] == vect_constant_def)
4890 op1 = fold_convert (TREE_TYPE (vectype), op1);
4891 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4892 TREE_TYPE (op1)))
4894 if (slp_node
4895 && TYPE_MODE (TREE_TYPE (vectype))
4896 != TYPE_MODE (TREE_TYPE (op1)))
4898 if (dump_enabled_p ())
4899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4900 "unusable type for last operand in"
4901 " vector/vector shift/rotate.\n");
4902 return false;
4904 if (vec_stmt && !slp_node)
4906 op1 = fold_convert (TREE_TYPE (vectype), op1);
4907 op1 = vect_init_vector (stmt, op1,
4908 TREE_TYPE (vectype), NULL);
4915 /* Supportable by target? */
4916 if (!optab)
4918 if (dump_enabled_p ())
4919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4920 "no optab.\n");
4921 return false;
4923 vec_mode = TYPE_MODE (vectype);
4924 icode = (int) optab_handler (optab, vec_mode);
4925 if (icode == CODE_FOR_nothing)
4927 if (dump_enabled_p ())
4928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4929 "op not supported by target.\n");
4930 /* Check only during analysis. */
4931 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4932 || (vf < vect_min_worthwhile_factor (code)
4933 && !vec_stmt))
4934 return false;
4935 if (dump_enabled_p ())
4936 dump_printf_loc (MSG_NOTE, vect_location,
4937 "proceeding using word mode.\n");
4940 /* Worthwhile without SIMD support? Check only during analysis. */
4941 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4942 && vf < vect_min_worthwhile_factor (code)
4943 && !vec_stmt)
4945 if (dump_enabled_p ())
4946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4947 "not worthwhile without SIMD support.\n");
4948 return false;
4951 if (!vec_stmt) /* transformation not required. */
4953 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4954 if (dump_enabled_p ())
4955 dump_printf_loc (MSG_NOTE, vect_location,
4956 "=== vectorizable_shift ===\n");
4957 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4958 return true;
4961 /** Transform. **/
4963 if (dump_enabled_p ())
4964 dump_printf_loc (MSG_NOTE, vect_location,
4965 "transform binary/unary operation.\n");
4967 /* Handle def. */
4968 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4970 prev_stmt_info = NULL;
4971 for (j = 0; j < ncopies; j++)
4973 /* Handle uses. */
4974 if (j == 0)
4976 if (scalar_shift_arg)
4978 /* Vector shl and shr insn patterns can be defined with scalar
4979 operand 2 (shift operand). In this case, use constant or loop
4980 invariant op1 directly, without extending it to vector mode
4981 first. */
4982 optab_op2_mode = insn_data[icode].operand[2].mode;
4983 if (!VECTOR_MODE_P (optab_op2_mode))
4985 if (dump_enabled_p ())
4986 dump_printf_loc (MSG_NOTE, vect_location,
4987 "operand 1 using scalar mode.\n");
4988 vec_oprnd1 = op1;
4989 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4990 vec_oprnds1.quick_push (vec_oprnd1);
4991 if (slp_node)
4993 /* Store vec_oprnd1 for every vector stmt to be created
4994 for SLP_NODE. We check during the analysis that all
4995 the shift arguments are the same.
4996 TODO: Allow different constants for different vector
4997 stmts generated for an SLP instance. */
4998 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4999 vec_oprnds1.quick_push (vec_oprnd1);
5004 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5005 (a special case for certain kind of vector shifts); otherwise,
5006 operand 1 should be of a vector type (the usual case). */
5007 if (vec_oprnd1)
5008 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5009 slp_node, -1);
5010 else
5011 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5012 slp_node, -1);
5014 else
5015 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5017 /* Arguments are ready. Create the new vector stmt. */
5018 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5020 vop1 = vec_oprnds1[i];
5021 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5022 new_temp = make_ssa_name (vec_dest, new_stmt);
5023 gimple_assign_set_lhs (new_stmt, new_temp);
5024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5025 if (slp_node)
5026 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5029 if (slp_node)
5030 continue;
5032 if (j == 0)
5033 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5034 else
5035 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5036 prev_stmt_info = vinfo_for_stmt (new_stmt);
5039 vec_oprnds0.release ();
5040 vec_oprnds1.release ();
5042 return true;
5046 /* Function vectorizable_operation.
5048 Check if STMT performs a binary, unary or ternary operation that can
5049 be vectorized.
5050 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5051 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5052 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5054 static bool
5055 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5056 gimple **vec_stmt, slp_tree slp_node)
5058 tree vec_dest;
5059 tree scalar_dest;
5060 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5061 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5062 tree vectype;
5063 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5064 enum tree_code code;
5065 machine_mode vec_mode;
5066 tree new_temp;
5067 int op_type;
5068 optab optab;
5069 bool target_support_p;
5070 gimple *def_stmt;
5071 enum vect_def_type dt[3]
5072 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5073 gimple *new_stmt = NULL;
5074 stmt_vec_info prev_stmt_info;
5075 int nunits_in;
5076 int nunits_out;
5077 tree vectype_out;
5078 int ncopies;
5079 int j, i;
5080 vec<tree> vec_oprnds0 = vNULL;
5081 vec<tree> vec_oprnds1 = vNULL;
5082 vec<tree> vec_oprnds2 = vNULL;
5083 tree vop0, vop1, vop2;
5084 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5085 vec_info *vinfo = stmt_info->vinfo;
5086 int vf;
5088 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5089 return false;
5091 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5092 && ! vec_stmt)
5093 return false;
5095 /* Is STMT a vectorizable binary/unary operation? */
5096 if (!is_gimple_assign (stmt))
5097 return false;
5099 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5100 return false;
5102 code = gimple_assign_rhs_code (stmt);
5104 /* For pointer addition, we should use the normal plus for
5105 the vector addition. */
5106 if (code == POINTER_PLUS_EXPR)
5107 code = PLUS_EXPR;
5109 /* Support only unary or binary operations. */
5110 op_type = TREE_CODE_LENGTH (code);
5111 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5113 if (dump_enabled_p ())
5114 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5115 "num. args = %d (not unary/binary/ternary op).\n",
5116 op_type);
5117 return false;
5120 scalar_dest = gimple_assign_lhs (stmt);
5121 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5123 /* Most operations cannot handle bit-precision types without extra
5124 truncations. */
5125 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5126 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5127 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5128 /* Exception are bitwise binary operations. */
5129 && code != BIT_IOR_EXPR
5130 && code != BIT_XOR_EXPR
5131 && code != BIT_AND_EXPR)
5133 if (dump_enabled_p ())
5134 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5135 "bit-precision arithmetic not supported.\n");
5136 return false;
5139 op0 = gimple_assign_rhs1 (stmt);
5140 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5142 if (dump_enabled_p ())
5143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5144 "use not simple.\n");
5145 return false;
5147 /* If op0 is an external or constant def use a vector type with
5148 the same size as the output vector type. */
5149 if (!vectype)
5151 /* For boolean type we cannot determine vectype by
5152 invariant value (don't know whether it is a vector
5153 of booleans or vector of integers). We use output
5154 vectype because operations on boolean don't change
5155 type. */
5156 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
5158 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
5160 if (dump_enabled_p ())
5161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5162 "not supported operation on bool value.\n");
5163 return false;
5165 vectype = vectype_out;
5167 else
5168 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5170 if (vec_stmt)
5171 gcc_assert (vectype);
5172 if (!vectype)
5174 if (dump_enabled_p ())
5176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5177 "no vectype for scalar type ");
5178 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5179 TREE_TYPE (op0));
5180 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5183 return false;
5186 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5187 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5188 if (nunits_out != nunits_in)
5189 return false;
5191 if (op_type == binary_op || op_type == ternary_op)
5193 op1 = gimple_assign_rhs2 (stmt);
5194 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5196 if (dump_enabled_p ())
5197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5198 "use not simple.\n");
5199 return false;
5202 if (op_type == ternary_op)
5204 op2 = gimple_assign_rhs3 (stmt);
5205 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5207 if (dump_enabled_p ())
5208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5209 "use not simple.\n");
5210 return false;
5214 if (loop_vinfo)
5215 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5216 else
5217 vf = 1;
5219 /* Multiple types in SLP are handled by creating the appropriate number of
5220 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5221 case of SLP. */
5222 if (slp_node)
5223 ncopies = 1;
5224 else
5225 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5227 gcc_assert (ncopies >= 1);
5229 /* Shifts are handled in vectorizable_shift (). */
5230 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5231 || code == RROTATE_EXPR)
5232 return false;
5234 /* Supportable by target? */
5236 vec_mode = TYPE_MODE (vectype);
5237 if (code == MULT_HIGHPART_EXPR)
5238 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5239 else
5241 optab = optab_for_tree_code (code, vectype, optab_default);
5242 if (!optab)
5244 if (dump_enabled_p ())
5245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5246 "no optab.\n");
5247 return false;
5249 target_support_p = (optab_handler (optab, vec_mode)
5250 != CODE_FOR_nothing);
5253 if (!target_support_p)
5255 if (dump_enabled_p ())
5256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5257 "op not supported by target.\n");
5258 /* Check only during analysis. */
5259 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5260 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5261 return false;
5262 if (dump_enabled_p ())
5263 dump_printf_loc (MSG_NOTE, vect_location,
5264 "proceeding using word mode.\n");
5267 /* Worthwhile without SIMD support? Check only during analysis. */
5268 if (!VECTOR_MODE_P (vec_mode)
5269 && !vec_stmt
5270 && vf < vect_min_worthwhile_factor (code))
5272 if (dump_enabled_p ())
5273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5274 "not worthwhile without SIMD support.\n");
5275 return false;
5278 if (!vec_stmt) /* transformation not required. */
5280 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5281 if (dump_enabled_p ())
5282 dump_printf_loc (MSG_NOTE, vect_location,
5283 "=== vectorizable_operation ===\n");
5284 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5285 return true;
5288 /** Transform. **/
5290 if (dump_enabled_p ())
5291 dump_printf_loc (MSG_NOTE, vect_location,
5292 "transform binary/unary operation.\n");
5294 /* Handle def. */
5295 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5297 /* In case the vectorization factor (VF) is bigger than the number
5298 of elements that we can fit in a vectype (nunits), we have to generate
5299 more than one vector stmt - i.e - we need to "unroll" the
5300 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5301 from one copy of the vector stmt to the next, in the field
5302 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5303 stages to find the correct vector defs to be used when vectorizing
5304 stmts that use the defs of the current stmt. The example below
5305 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5306 we need to create 4 vectorized stmts):
5308 before vectorization:
5309 RELATED_STMT VEC_STMT
5310 S1: x = memref - -
5311 S2: z = x + 1 - -
5313 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5314 there):
5315 RELATED_STMT VEC_STMT
5316 VS1_0: vx0 = memref0 VS1_1 -
5317 VS1_1: vx1 = memref1 VS1_2 -
5318 VS1_2: vx2 = memref2 VS1_3 -
5319 VS1_3: vx3 = memref3 - -
5320 S1: x = load - VS1_0
5321 S2: z = x + 1 - -
5323 step2: vectorize stmt S2 (done here):
5324 To vectorize stmt S2 we first need to find the relevant vector
5325 def for the first operand 'x'. This is, as usual, obtained from
5326 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5327 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5328 relevant vector def 'vx0'. Having found 'vx0' we can generate
5329 the vector stmt VS2_0, and as usual, record it in the
5330 STMT_VINFO_VEC_STMT of stmt S2.
5331 When creating the second copy (VS2_1), we obtain the relevant vector
5332 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5333 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5334 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5335 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5336 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5337 chain of stmts and pointers:
5338 RELATED_STMT VEC_STMT
5339 VS1_0: vx0 = memref0 VS1_1 -
5340 VS1_1: vx1 = memref1 VS1_2 -
5341 VS1_2: vx2 = memref2 VS1_3 -
5342 VS1_3: vx3 = memref3 - -
5343 S1: x = load - VS1_0
5344 VS2_0: vz0 = vx0 + v1 VS2_1 -
5345 VS2_1: vz1 = vx1 + v1 VS2_2 -
5346 VS2_2: vz2 = vx2 + v1 VS2_3 -
5347 VS2_3: vz3 = vx3 + v1 - -
5348 S2: z = x + 1 - VS2_0 */
5350 prev_stmt_info = NULL;
5351 for (j = 0; j < ncopies; j++)
5353 /* Handle uses. */
5354 if (j == 0)
5356 if (op_type == binary_op || op_type == ternary_op)
5357 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5358 slp_node, -1);
5359 else
5360 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5361 slp_node, -1);
5362 if (op_type == ternary_op)
5363 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5364 slp_node, -1);
5366 else
5368 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5369 if (op_type == ternary_op)
5371 tree vec_oprnd = vec_oprnds2.pop ();
5372 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5373 vec_oprnd));
5377 /* Arguments are ready. Create the new vector stmt. */
5378 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5380 vop1 = ((op_type == binary_op || op_type == ternary_op)
5381 ? vec_oprnds1[i] : NULL_TREE);
5382 vop2 = ((op_type == ternary_op)
5383 ? vec_oprnds2[i] : NULL_TREE);
5384 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5385 new_temp = make_ssa_name (vec_dest, new_stmt);
5386 gimple_assign_set_lhs (new_stmt, new_temp);
5387 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5388 if (slp_node)
5389 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5392 if (slp_node)
5393 continue;
5395 if (j == 0)
5396 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5397 else
5398 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5399 prev_stmt_info = vinfo_for_stmt (new_stmt);
5402 vec_oprnds0.release ();
5403 vec_oprnds1.release ();
5404 vec_oprnds2.release ();
5406 return true;
5409 /* A helper function to ensure data reference DR's base alignment
5410 for STMT_INFO. */
5412 static void
5413 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5415 if (!dr->aux)
5416 return;
5418 if (DR_VECT_AUX (dr)->base_misaligned)
5420 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5421 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5423 if (decl_in_symtab_p (base_decl))
5424 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5425 else
5427 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5428 DECL_USER_ALIGN (base_decl) = 1;
5430 DR_VECT_AUX (dr)->base_misaligned = false;
5435 /* Function vectorizable_store.
5437 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5438 can be vectorized.
5439 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5440 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5441 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5443 static bool
5444 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5445 slp_tree slp_node)
5447 tree scalar_dest;
5448 tree data_ref;
5449 tree op;
5450 tree vec_oprnd = NULL_TREE;
5451 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5452 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5453 tree elem_type;
5454 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5455 struct loop *loop = NULL;
5456 machine_mode vec_mode;
5457 tree dummy;
5458 enum dr_alignment_support alignment_support_scheme;
5459 gimple *def_stmt;
5460 enum vect_def_type dt;
5461 stmt_vec_info prev_stmt_info = NULL;
5462 tree dataref_ptr = NULL_TREE;
5463 tree dataref_offset = NULL_TREE;
5464 gimple *ptr_incr = NULL;
5465 int ncopies;
5466 int j;
5467 gimple *next_stmt, *first_stmt;
5468 bool grouped_store;
5469 unsigned int group_size, i;
5470 vec<tree> oprnds = vNULL;
5471 vec<tree> result_chain = vNULL;
5472 bool inv_p;
5473 tree offset = NULL_TREE;
5474 vec<tree> vec_oprnds = vNULL;
5475 bool slp = (slp_node != NULL);
5476 unsigned int vec_num;
5477 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5478 vec_info *vinfo = stmt_info->vinfo;
5479 tree aggr_type;
5480 gather_scatter_info gs_info;
5481 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5482 gimple *new_stmt;
5483 int vf;
5484 vec_load_store_type vls_type;
5486 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5487 return false;
5489 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5490 && ! vec_stmt)
5491 return false;
5493 /* Is vectorizable store? */
5495 if (!is_gimple_assign (stmt))
5496 return false;
5498 scalar_dest = gimple_assign_lhs (stmt);
5499 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5500 && is_pattern_stmt_p (stmt_info))
5501 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5502 if (TREE_CODE (scalar_dest) != ARRAY_REF
5503 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5504 && TREE_CODE (scalar_dest) != INDIRECT_REF
5505 && TREE_CODE (scalar_dest) != COMPONENT_REF
5506 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5507 && TREE_CODE (scalar_dest) != REALPART_EXPR
5508 && TREE_CODE (scalar_dest) != MEM_REF)
5509 return false;
5511 /* Cannot have hybrid store SLP -- that would mean storing to the
5512 same location twice. */
5513 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5515 gcc_assert (gimple_assign_single_p (stmt));
5517 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5518 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5520 if (loop_vinfo)
5522 loop = LOOP_VINFO_LOOP (loop_vinfo);
5523 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5525 else
5526 vf = 1;
5528 /* Multiple types in SLP are handled by creating the appropriate number of
5529 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5530 case of SLP. */
5531 if (slp)
5532 ncopies = 1;
5533 else
5534 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5536 gcc_assert (ncopies >= 1);
5538 /* FORNOW. This restriction should be relaxed. */
5539 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5541 if (dump_enabled_p ())
5542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5543 "multiple types in nested loop.\n");
5544 return false;
5547 op = gimple_assign_rhs1 (stmt);
5549 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5551 if (dump_enabled_p ())
5552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5553 "use not simple.\n");
5554 return false;
5557 if (dt == vect_constant_def || dt == vect_external_def)
5558 vls_type = VLS_STORE_INVARIANT;
5559 else
5560 vls_type = VLS_STORE;
5562 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5563 return false;
5565 elem_type = TREE_TYPE (vectype);
5566 vec_mode = TYPE_MODE (vectype);
5568 /* FORNOW. In some cases can vectorize even if data-type not supported
5569 (e.g. - array initialization with 0). */
5570 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5571 return false;
5573 if (!STMT_VINFO_DATA_REF (stmt_info))
5574 return false;
5576 vect_memory_access_type memory_access_type;
5577 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5578 &memory_access_type, &gs_info))
5579 return false;
5581 if (!vec_stmt) /* transformation not required. */
5583 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5584 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5585 /* The SLP costs are calculated during SLP analysis. */
5586 if (!PURE_SLP_STMT (stmt_info))
5587 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5588 NULL, NULL, NULL);
5589 return true;
5591 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5593 /** Transform. **/
5595 ensure_base_align (stmt_info, dr);
5597 if (memory_access_type == VMAT_GATHER_SCATTER)
5599 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5600 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5601 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5602 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5603 edge pe = loop_preheader_edge (loop);
5604 gimple_seq seq;
5605 basic_block new_bb;
5606 enum { NARROW, NONE, WIDEN } modifier;
5607 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5609 if (nunits == (unsigned int) scatter_off_nunits)
5610 modifier = NONE;
5611 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5613 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5614 modifier = WIDEN;
5616 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5617 sel[i] = i | nunits;
5619 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5620 gcc_assert (perm_mask != NULL_TREE);
5622 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5624 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5625 modifier = NARROW;
5627 for (i = 0; i < (unsigned int) nunits; ++i)
5628 sel[i] = i | scatter_off_nunits;
5630 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5631 gcc_assert (perm_mask != NULL_TREE);
5632 ncopies *= 2;
5634 else
5635 gcc_unreachable ();
5637 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5638 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5639 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5640 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5641 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5642 scaletype = TREE_VALUE (arglist);
5644 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5645 && TREE_CODE (rettype) == VOID_TYPE);
5647 ptr = fold_convert (ptrtype, gs_info.base);
5648 if (!is_gimple_min_invariant (ptr))
5650 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5651 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5652 gcc_assert (!new_bb);
5655 /* Currently we support only unconditional scatter stores,
5656 so mask should be all ones. */
5657 mask = build_int_cst (masktype, -1);
5658 mask = vect_init_vector (stmt, mask, masktype, NULL);
5660 scale = build_int_cst (scaletype, gs_info.scale);
5662 prev_stmt_info = NULL;
5663 for (j = 0; j < ncopies; ++j)
5665 if (j == 0)
5667 src = vec_oprnd1
5668 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5669 op = vec_oprnd0
5670 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5672 else if (modifier != NONE && (j & 1))
5674 if (modifier == WIDEN)
5676 src = vec_oprnd1
5677 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5678 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5679 stmt, gsi);
5681 else if (modifier == NARROW)
5683 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5684 stmt, gsi);
5685 op = vec_oprnd0
5686 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5687 vec_oprnd0);
5689 else
5690 gcc_unreachable ();
5692 else
5694 src = vec_oprnd1
5695 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5696 op = vec_oprnd0
5697 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5698 vec_oprnd0);
5701 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5703 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5704 == TYPE_VECTOR_SUBPARTS (srctype));
5705 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5706 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5707 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5708 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5709 src = var;
5712 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5714 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5715 == TYPE_VECTOR_SUBPARTS (idxtype));
5716 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5717 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5718 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5719 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5720 op = var;
5723 new_stmt
5724 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5726 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5728 if (prev_stmt_info == NULL)
5729 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5730 else
5731 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5732 prev_stmt_info = vinfo_for_stmt (new_stmt);
5734 return true;
5737 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5738 if (grouped_store)
5740 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5741 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5742 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5744 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5746 /* FORNOW */
5747 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5749 /* We vectorize all the stmts of the interleaving group when we
5750 reach the last stmt in the group. */
5751 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5752 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5753 && !slp)
5755 *vec_stmt = NULL;
5756 return true;
5759 if (slp)
5761 grouped_store = false;
5762 /* VEC_NUM is the number of vect stmts to be created for this
5763 group. */
5764 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5765 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5766 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5767 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5768 op = gimple_assign_rhs1 (first_stmt);
5770 else
5771 /* VEC_NUM is the number of vect stmts to be created for this
5772 group. */
5773 vec_num = group_size;
5775 else
5777 first_stmt = stmt;
5778 first_dr = dr;
5779 group_size = vec_num = 1;
5782 if (dump_enabled_p ())
5783 dump_printf_loc (MSG_NOTE, vect_location,
5784 "transform store. ncopies = %d\n", ncopies);
5786 if (memory_access_type == VMAT_ELEMENTWISE
5787 || memory_access_type == VMAT_STRIDED_SLP)
5789 gimple_stmt_iterator incr_gsi;
5790 bool insert_after;
5791 gimple *incr;
5792 tree offvar;
5793 tree ivstep;
5794 tree running_off;
5795 gimple_seq stmts = NULL;
5796 tree stride_base, stride_step, alias_off;
5797 tree vec_oprnd;
5798 unsigned int g;
5800 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5802 stride_base
5803 = fold_build_pointer_plus
5804 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5805 size_binop (PLUS_EXPR,
5806 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5807 convert_to_ptrofftype (DR_INIT(first_dr))));
5808 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5810 /* For a store with loop-invariant (but other than power-of-2)
5811 stride (i.e. not a grouped access) like so:
5813 for (i = 0; i < n; i += stride)
5814 array[i] = ...;
5816 we generate a new induction variable and new stores from
5817 the components of the (vectorized) rhs:
5819 for (j = 0; ; j += VF*stride)
5820 vectemp = ...;
5821 tmp1 = vectemp[0];
5822 array[j] = tmp1;
5823 tmp2 = vectemp[1];
5824 array[j + stride] = tmp2;
5828 unsigned nstores = nunits;
5829 unsigned lnel = 1;
5830 tree ltype = elem_type;
5831 if (slp)
5833 if (group_size < nunits
5834 && nunits % group_size == 0)
5836 nstores = nunits / group_size;
5837 lnel = group_size;
5838 ltype = build_vector_type (elem_type, group_size);
5840 else if (group_size >= nunits
5841 && group_size % nunits == 0)
5843 nstores = 1;
5844 lnel = nunits;
5845 ltype = vectype;
5847 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5848 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5851 ivstep = stride_step;
5852 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5853 build_int_cst (TREE_TYPE (ivstep), vf));
5855 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5857 create_iv (stride_base, ivstep, NULL,
5858 loop, &incr_gsi, insert_after,
5859 &offvar, NULL);
5860 incr = gsi_stmt (incr_gsi);
5861 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5863 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5864 if (stmts)
5865 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5867 prev_stmt_info = NULL;
5868 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5869 next_stmt = first_stmt;
5870 for (g = 0; g < group_size; g++)
5872 running_off = offvar;
5873 if (g)
5875 tree size = TYPE_SIZE_UNIT (ltype);
5876 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5877 size);
5878 tree newoff = copy_ssa_name (running_off, NULL);
5879 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5880 running_off, pos);
5881 vect_finish_stmt_generation (stmt, incr, gsi);
5882 running_off = newoff;
5884 unsigned int group_el = 0;
5885 unsigned HOST_WIDE_INT
5886 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
5887 for (j = 0; j < ncopies; j++)
5889 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5890 and first_stmt == stmt. */
5891 if (j == 0)
5893 if (slp)
5895 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5896 slp_node, -1);
5897 vec_oprnd = vec_oprnds[0];
5899 else
5901 gcc_assert (gimple_assign_single_p (next_stmt));
5902 op = gimple_assign_rhs1 (next_stmt);
5903 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5906 else
5908 if (slp)
5909 vec_oprnd = vec_oprnds[j];
5910 else
5912 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5913 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5917 for (i = 0; i < nstores; i++)
5919 tree newref, newoff;
5920 gimple *incr, *assign;
5921 tree size = TYPE_SIZE (ltype);
5922 /* Extract the i'th component. */
5923 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5924 bitsize_int (i), size);
5925 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5926 size, pos);
5928 elem = force_gimple_operand_gsi (gsi, elem, true,
5929 NULL_TREE, true,
5930 GSI_SAME_STMT);
5932 tree this_off = build_int_cst (TREE_TYPE (alias_off),
5933 group_el * elsz);
5934 newref = build2 (MEM_REF, ltype,
5935 running_off, this_off);
5937 /* And store it to *running_off. */
5938 assign = gimple_build_assign (newref, elem);
5939 vect_finish_stmt_generation (stmt, assign, gsi);
5941 group_el += lnel;
5942 if (! slp
5943 || group_el == group_size)
5945 newoff = copy_ssa_name (running_off, NULL);
5946 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5947 running_off, stride_step);
5948 vect_finish_stmt_generation (stmt, incr, gsi);
5950 running_off = newoff;
5951 group_el = 0;
5953 if (g == group_size - 1
5954 && !slp)
5956 if (j == 0 && i == 0)
5957 STMT_VINFO_VEC_STMT (stmt_info)
5958 = *vec_stmt = assign;
5959 else
5960 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5961 prev_stmt_info = vinfo_for_stmt (assign);
5965 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5966 if (slp)
5967 break;
5969 return true;
5972 auto_vec<tree> dr_chain (group_size);
5973 oprnds.create (group_size);
5975 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5976 gcc_assert (alignment_support_scheme);
5977 /* Targets with store-lane instructions must not require explicit
5978 realignment. */
5979 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
5980 || alignment_support_scheme == dr_aligned
5981 || alignment_support_scheme == dr_unaligned_supported);
5983 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
5984 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
5985 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5987 if (memory_access_type == VMAT_LOAD_STORE_LANES)
5988 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5989 else
5990 aggr_type = vectype;
5992 /* In case the vectorization factor (VF) is bigger than the number
5993 of elements that we can fit in a vectype (nunits), we have to generate
5994 more than one vector stmt - i.e - we need to "unroll" the
5995 vector stmt by a factor VF/nunits. For more details see documentation in
5996 vect_get_vec_def_for_copy_stmt. */
5998 /* In case of interleaving (non-unit grouped access):
6000 S1: &base + 2 = x2
6001 S2: &base = x0
6002 S3: &base + 1 = x1
6003 S4: &base + 3 = x3
6005 We create vectorized stores starting from base address (the access of the
6006 first stmt in the chain (S2 in the above example), when the last store stmt
6007 of the chain (S4) is reached:
6009 VS1: &base = vx2
6010 VS2: &base + vec_size*1 = vx0
6011 VS3: &base + vec_size*2 = vx1
6012 VS4: &base + vec_size*3 = vx3
6014 Then permutation statements are generated:
6016 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6017 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6020 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6021 (the order of the data-refs in the output of vect_permute_store_chain
6022 corresponds to the order of scalar stmts in the interleaving chain - see
6023 the documentation of vect_permute_store_chain()).
6025 In case of both multiple types and interleaving, above vector stores and
6026 permutation stmts are created for every copy. The result vector stmts are
6027 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6028 STMT_VINFO_RELATED_STMT for the next copies.
6031 prev_stmt_info = NULL;
6032 for (j = 0; j < ncopies; j++)
6035 if (j == 0)
6037 if (slp)
6039 /* Get vectorized arguments for SLP_NODE. */
6040 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6041 NULL, slp_node, -1);
6043 vec_oprnd = vec_oprnds[0];
6045 else
6047 /* For interleaved stores we collect vectorized defs for all the
6048 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6049 used as an input to vect_permute_store_chain(), and OPRNDS as
6050 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6052 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6053 OPRNDS are of size 1. */
6054 next_stmt = first_stmt;
6055 for (i = 0; i < group_size; i++)
6057 /* Since gaps are not supported for interleaved stores,
6058 GROUP_SIZE is the exact number of stmts in the chain.
6059 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6060 there is no interleaving, GROUP_SIZE is 1, and only one
6061 iteration of the loop will be executed. */
6062 gcc_assert (next_stmt
6063 && gimple_assign_single_p (next_stmt));
6064 op = gimple_assign_rhs1 (next_stmt);
6066 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6067 dr_chain.quick_push (vec_oprnd);
6068 oprnds.quick_push (vec_oprnd);
6069 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6073 /* We should have catched mismatched types earlier. */
6074 gcc_assert (useless_type_conversion_p (vectype,
6075 TREE_TYPE (vec_oprnd)));
6076 bool simd_lane_access_p
6077 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6078 if (simd_lane_access_p
6079 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6080 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6081 && integer_zerop (DR_OFFSET (first_dr))
6082 && integer_zerop (DR_INIT (first_dr))
6083 && alias_sets_conflict_p (get_alias_set (aggr_type),
6084 get_alias_set (DR_REF (first_dr))))
6086 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6087 dataref_offset = build_int_cst (reference_alias_ptr_type
6088 (DR_REF (first_dr)), 0);
6089 inv_p = false;
6091 else
6092 dataref_ptr
6093 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6094 simd_lane_access_p ? loop : NULL,
6095 offset, &dummy, gsi, &ptr_incr,
6096 simd_lane_access_p, &inv_p);
6097 gcc_assert (bb_vinfo || !inv_p);
6099 else
6101 /* For interleaved stores we created vectorized defs for all the
6102 defs stored in OPRNDS in the previous iteration (previous copy).
6103 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6104 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6105 next copy.
6106 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6107 OPRNDS are of size 1. */
6108 for (i = 0; i < group_size; i++)
6110 op = oprnds[i];
6111 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6112 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6113 dr_chain[i] = vec_oprnd;
6114 oprnds[i] = vec_oprnd;
6116 if (dataref_offset)
6117 dataref_offset
6118 = int_const_binop (PLUS_EXPR, dataref_offset,
6119 TYPE_SIZE_UNIT (aggr_type));
6120 else
6121 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6122 TYPE_SIZE_UNIT (aggr_type));
6125 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6127 tree vec_array;
6129 /* Combine all the vectors into an array. */
6130 vec_array = create_vector_array (vectype, vec_num);
6131 for (i = 0; i < vec_num; i++)
6133 vec_oprnd = dr_chain[i];
6134 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6137 /* Emit:
6138 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6139 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6140 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6141 gimple_call_set_lhs (new_stmt, data_ref);
6142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6144 else
6146 new_stmt = NULL;
6147 if (grouped_store)
6149 if (j == 0)
6150 result_chain.create (group_size);
6151 /* Permute. */
6152 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6153 &result_chain);
6156 next_stmt = first_stmt;
6157 for (i = 0; i < vec_num; i++)
6159 unsigned align, misalign;
6161 if (i > 0)
6162 /* Bump the vector pointer. */
6163 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6164 stmt, NULL_TREE);
6166 if (slp)
6167 vec_oprnd = vec_oprnds[i];
6168 else if (grouped_store)
6169 /* For grouped stores vectorized defs are interleaved in
6170 vect_permute_store_chain(). */
6171 vec_oprnd = result_chain[i];
6173 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
6174 dataref_ptr,
6175 dataref_offset
6176 ? dataref_offset
6177 : build_int_cst (reference_alias_ptr_type
6178 (DR_REF (first_dr)), 0));
6179 align = TYPE_ALIGN_UNIT (vectype);
6180 if (aligned_access_p (first_dr))
6181 misalign = 0;
6182 else if (DR_MISALIGNMENT (first_dr) == -1)
6184 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6185 align = TYPE_ALIGN_UNIT (elem_type);
6186 else
6187 align = get_object_alignment (DR_REF (first_dr))
6188 / BITS_PER_UNIT;
6189 misalign = 0;
6190 TREE_TYPE (data_ref)
6191 = build_aligned_type (TREE_TYPE (data_ref),
6192 align * BITS_PER_UNIT);
6194 else
6196 TREE_TYPE (data_ref)
6197 = build_aligned_type (TREE_TYPE (data_ref),
6198 TYPE_ALIGN (elem_type));
6199 misalign = DR_MISALIGNMENT (first_dr);
6201 if (dataref_offset == NULL_TREE
6202 && TREE_CODE (dataref_ptr) == SSA_NAME)
6203 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6204 misalign);
6206 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6208 tree perm_mask = perm_mask_for_reverse (vectype);
6209 tree perm_dest
6210 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6211 vectype);
6212 tree new_temp = make_ssa_name (perm_dest);
6214 /* Generate the permute statement. */
6215 gimple *perm_stmt
6216 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6217 vec_oprnd, perm_mask);
6218 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6220 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6221 vec_oprnd = new_temp;
6224 /* Arguments are ready. Create the new vector stmt. */
6225 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6226 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6228 if (slp)
6229 continue;
6231 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6232 if (!next_stmt)
6233 break;
6236 if (!slp)
6238 if (j == 0)
6239 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6240 else
6241 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6242 prev_stmt_info = vinfo_for_stmt (new_stmt);
6246 oprnds.release ();
6247 result_chain.release ();
6248 vec_oprnds.release ();
6250 return true;
6253 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6254 VECTOR_CST mask. No checks are made that the target platform supports the
6255 mask, so callers may wish to test can_vec_perm_p separately, or use
6256 vect_gen_perm_mask_checked. */
6258 tree
6259 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6261 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6262 int i, nunits;
6264 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6266 mask_elt_type = lang_hooks.types.type_for_mode
6267 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6268 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6270 mask_elts = XALLOCAVEC (tree, nunits);
6271 for (i = nunits - 1; i >= 0; i--)
6272 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6273 mask_vec = build_vector (mask_type, mask_elts);
6275 return mask_vec;
6278 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6279 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6281 tree
6282 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6284 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6285 return vect_gen_perm_mask_any (vectype, sel);
6288 /* Given a vector variable X and Y, that was generated for the scalar
6289 STMT, generate instructions to permute the vector elements of X and Y
6290 using permutation mask MASK_VEC, insert them at *GSI and return the
6291 permuted vector variable. */
6293 static tree
6294 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6295 gimple_stmt_iterator *gsi)
6297 tree vectype = TREE_TYPE (x);
6298 tree perm_dest, data_ref;
6299 gimple *perm_stmt;
6301 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6302 data_ref = make_ssa_name (perm_dest);
6304 /* Generate the permute statement. */
6305 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6306 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6308 return data_ref;
6311 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6312 inserting them on the loops preheader edge. Returns true if we
6313 were successful in doing so (and thus STMT can be moved then),
6314 otherwise returns false. */
6316 static bool
6317 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6319 ssa_op_iter i;
6320 tree op;
6321 bool any = false;
6323 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6325 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6326 if (!gimple_nop_p (def_stmt)
6327 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6329 /* Make sure we don't need to recurse. While we could do
6330 so in simple cases when there are more complex use webs
6331 we don't have an easy way to preserve stmt order to fulfil
6332 dependencies within them. */
6333 tree op2;
6334 ssa_op_iter i2;
6335 if (gimple_code (def_stmt) == GIMPLE_PHI)
6336 return false;
6337 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6339 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6340 if (!gimple_nop_p (def_stmt2)
6341 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6342 return false;
6344 any = true;
6348 if (!any)
6349 return true;
6351 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6353 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6354 if (!gimple_nop_p (def_stmt)
6355 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6357 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6358 gsi_remove (&gsi, false);
6359 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6363 return true;
6366 /* vectorizable_load.
6368 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6369 can be vectorized.
6370 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6371 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6372 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6374 static bool
6375 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6376 slp_tree slp_node, slp_instance slp_node_instance)
6378 tree scalar_dest;
6379 tree vec_dest = NULL;
6380 tree data_ref = NULL;
6381 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6382 stmt_vec_info prev_stmt_info;
6383 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6384 struct loop *loop = NULL;
6385 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6386 bool nested_in_vect_loop = false;
6387 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6388 tree elem_type;
6389 tree new_temp;
6390 machine_mode mode;
6391 gimple *new_stmt = NULL;
6392 tree dummy;
6393 enum dr_alignment_support alignment_support_scheme;
6394 tree dataref_ptr = NULL_TREE;
6395 tree dataref_offset = NULL_TREE;
6396 gimple *ptr_incr = NULL;
6397 int ncopies;
6398 int i, j, group_size = -1, group_gap_adj;
6399 tree msq = NULL_TREE, lsq;
6400 tree offset = NULL_TREE;
6401 tree byte_offset = NULL_TREE;
6402 tree realignment_token = NULL_TREE;
6403 gphi *phi = NULL;
6404 vec<tree> dr_chain = vNULL;
6405 bool grouped_load = false;
6406 gimple *first_stmt;
6407 gimple *first_stmt_for_drptr = NULL;
6408 bool inv_p;
6409 bool compute_in_loop = false;
6410 struct loop *at_loop;
6411 int vec_num;
6412 bool slp = (slp_node != NULL);
6413 bool slp_perm = false;
6414 enum tree_code code;
6415 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6416 int vf;
6417 tree aggr_type;
6418 gather_scatter_info gs_info;
6419 vec_info *vinfo = stmt_info->vinfo;
6421 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6422 return false;
6424 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6425 && ! vec_stmt)
6426 return false;
6428 /* Is vectorizable load? */
6429 if (!is_gimple_assign (stmt))
6430 return false;
6432 scalar_dest = gimple_assign_lhs (stmt);
6433 if (TREE_CODE (scalar_dest) != SSA_NAME)
6434 return false;
6436 code = gimple_assign_rhs_code (stmt);
6437 if (code != ARRAY_REF
6438 && code != BIT_FIELD_REF
6439 && code != INDIRECT_REF
6440 && code != COMPONENT_REF
6441 && code != IMAGPART_EXPR
6442 && code != REALPART_EXPR
6443 && code != MEM_REF
6444 && TREE_CODE_CLASS (code) != tcc_declaration)
6445 return false;
6447 if (!STMT_VINFO_DATA_REF (stmt_info))
6448 return false;
6450 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6451 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6453 if (loop_vinfo)
6455 loop = LOOP_VINFO_LOOP (loop_vinfo);
6456 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6457 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6459 else
6460 vf = 1;
6462 /* Multiple types in SLP are handled by creating the appropriate number of
6463 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6464 case of SLP. */
6465 if (slp)
6466 ncopies = 1;
6467 else
6468 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6470 gcc_assert (ncopies >= 1);
6472 /* FORNOW. This restriction should be relaxed. */
6473 if (nested_in_vect_loop && ncopies > 1)
6475 if (dump_enabled_p ())
6476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6477 "multiple types in nested loop.\n");
6478 return false;
6481 /* Invalidate assumptions made by dependence analysis when vectorization
6482 on the unrolled body effectively re-orders stmts. */
6483 if (ncopies > 1
6484 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6485 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6486 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6488 if (dump_enabled_p ())
6489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6490 "cannot perform implicit CSE when unrolling "
6491 "with negative dependence distance\n");
6492 return false;
6495 elem_type = TREE_TYPE (vectype);
6496 mode = TYPE_MODE (vectype);
6498 /* FORNOW. In some cases can vectorize even if data-type not supported
6499 (e.g. - data copies). */
6500 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6502 if (dump_enabled_p ())
6503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6504 "Aligned load, but unsupported type.\n");
6505 return false;
6508 /* Check if the load is a part of an interleaving chain. */
6509 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6511 grouped_load = true;
6512 /* FORNOW */
6513 gcc_assert (!nested_in_vect_loop);
6514 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6516 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6517 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6519 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6520 slp_perm = true;
6522 /* ??? The following is overly pessimistic (as well as the loop
6523 case above) in the case we can statically determine the excess
6524 elements loaded are within the bounds of a decl that is accessed.
6525 Likewise for BB vectorizations using masked loads is a possibility. */
6526 if (bb_vinfo && slp_perm && group_size % nunits != 0)
6528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6529 "BB vectorization with gaps at the end of a load "
6530 "is not supported\n");
6531 return false;
6534 /* Invalidate assumptions made by dependence analysis when vectorization
6535 on the unrolled body effectively re-orders stmts. */
6536 if (!PURE_SLP_STMT (stmt_info)
6537 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6538 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6539 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6541 if (dump_enabled_p ())
6542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6543 "cannot perform implicit CSE when performing "
6544 "group loads with negative dependence distance\n");
6545 return false;
6548 /* Similarly when the stmt is a load that is both part of a SLP
6549 instance and a loop vectorized stmt via the same-dr mechanism
6550 we have to give up. */
6551 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6552 && (STMT_SLP_TYPE (stmt_info)
6553 != STMT_SLP_TYPE (vinfo_for_stmt
6554 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6556 if (dump_enabled_p ())
6557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6558 "conflicting SLP types for CSEd load\n");
6559 return false;
6563 vect_memory_access_type memory_access_type;
6564 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6565 &memory_access_type, &gs_info))
6566 return false;
6568 if (!vec_stmt) /* transformation not required. */
6570 if (!slp)
6571 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6572 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6573 /* The SLP costs are calculated during SLP analysis. */
6574 if (!PURE_SLP_STMT (stmt_info))
6575 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6576 NULL, NULL, NULL);
6577 return true;
6580 if (!slp)
6581 gcc_assert (memory_access_type
6582 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6584 if (dump_enabled_p ())
6585 dump_printf_loc (MSG_NOTE, vect_location,
6586 "transform load. ncopies = %d\n", ncopies);
6588 /** Transform. **/
6590 ensure_base_align (stmt_info, dr);
6592 if (memory_access_type == VMAT_GATHER_SCATTER)
6594 tree vec_oprnd0 = NULL_TREE, op;
6595 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6596 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6597 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6598 edge pe = loop_preheader_edge (loop);
6599 gimple_seq seq;
6600 basic_block new_bb;
6601 enum { NARROW, NONE, WIDEN } modifier;
6602 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6604 if (nunits == gather_off_nunits)
6605 modifier = NONE;
6606 else if (nunits == gather_off_nunits / 2)
6608 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6609 modifier = WIDEN;
6611 for (i = 0; i < gather_off_nunits; ++i)
6612 sel[i] = i | nunits;
6614 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6616 else if (nunits == gather_off_nunits * 2)
6618 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6619 modifier = NARROW;
6621 for (i = 0; i < nunits; ++i)
6622 sel[i] = i < gather_off_nunits
6623 ? i : i + nunits - gather_off_nunits;
6625 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6626 ncopies *= 2;
6628 else
6629 gcc_unreachable ();
6631 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6632 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6633 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6634 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6635 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6636 scaletype = TREE_VALUE (arglist);
6637 gcc_checking_assert (types_compatible_p (srctype, rettype));
6639 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6641 ptr = fold_convert (ptrtype, gs_info.base);
6642 if (!is_gimple_min_invariant (ptr))
6644 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6645 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6646 gcc_assert (!new_bb);
6649 /* Currently we support only unconditional gather loads,
6650 so mask should be all ones. */
6651 if (TREE_CODE (masktype) == INTEGER_TYPE)
6652 mask = build_int_cst (masktype, -1);
6653 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6655 mask = build_int_cst (TREE_TYPE (masktype), -1);
6656 mask = build_vector_from_val (masktype, mask);
6657 mask = vect_init_vector (stmt, mask, masktype, NULL);
6659 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6661 REAL_VALUE_TYPE r;
6662 long tmp[6];
6663 for (j = 0; j < 6; ++j)
6664 tmp[j] = -1;
6665 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6666 mask = build_real (TREE_TYPE (masktype), r);
6667 mask = build_vector_from_val (masktype, mask);
6668 mask = vect_init_vector (stmt, mask, masktype, NULL);
6670 else
6671 gcc_unreachable ();
6673 scale = build_int_cst (scaletype, gs_info.scale);
6675 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6676 merge = build_int_cst (TREE_TYPE (rettype), 0);
6677 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6679 REAL_VALUE_TYPE r;
6680 long tmp[6];
6681 for (j = 0; j < 6; ++j)
6682 tmp[j] = 0;
6683 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6684 merge = build_real (TREE_TYPE (rettype), r);
6686 else
6687 gcc_unreachable ();
6688 merge = build_vector_from_val (rettype, merge);
6689 merge = vect_init_vector (stmt, merge, rettype, NULL);
6691 prev_stmt_info = NULL;
6692 for (j = 0; j < ncopies; ++j)
6694 if (modifier == WIDEN && (j & 1))
6695 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6696 perm_mask, stmt, gsi);
6697 else if (j == 0)
6698 op = vec_oprnd0
6699 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6700 else
6701 op = vec_oprnd0
6702 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6704 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6706 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6707 == TYPE_VECTOR_SUBPARTS (idxtype));
6708 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6709 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6710 new_stmt
6711 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6712 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6713 op = var;
6716 new_stmt
6717 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6719 if (!useless_type_conversion_p (vectype, rettype))
6721 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6722 == TYPE_VECTOR_SUBPARTS (rettype));
6723 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6724 gimple_call_set_lhs (new_stmt, op);
6725 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6726 var = make_ssa_name (vec_dest);
6727 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6728 new_stmt
6729 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6731 else
6733 var = make_ssa_name (vec_dest, new_stmt);
6734 gimple_call_set_lhs (new_stmt, var);
6737 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6739 if (modifier == NARROW)
6741 if ((j & 1) == 0)
6743 prev_res = var;
6744 continue;
6746 var = permute_vec_elements (prev_res, var,
6747 perm_mask, stmt, gsi);
6748 new_stmt = SSA_NAME_DEF_STMT (var);
6751 if (prev_stmt_info == NULL)
6752 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6753 else
6754 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6755 prev_stmt_info = vinfo_for_stmt (new_stmt);
6757 return true;
6760 if (memory_access_type == VMAT_ELEMENTWISE
6761 || memory_access_type == VMAT_STRIDED_SLP)
6763 gimple_stmt_iterator incr_gsi;
6764 bool insert_after;
6765 gimple *incr;
6766 tree offvar;
6767 tree ivstep;
6768 tree running_off;
6769 vec<constructor_elt, va_gc> *v = NULL;
6770 gimple_seq stmts = NULL;
6771 tree stride_base, stride_step, alias_off;
6773 gcc_assert (!nested_in_vect_loop);
6775 if (slp && grouped_load)
6776 first_dr = STMT_VINFO_DATA_REF
6777 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6778 else
6779 first_dr = dr;
6781 stride_base
6782 = fold_build_pointer_plus
6783 (DR_BASE_ADDRESS (first_dr),
6784 size_binop (PLUS_EXPR,
6785 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6786 convert_to_ptrofftype (DR_INIT (first_dr))));
6787 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6789 /* For a load with loop-invariant (but other than power-of-2)
6790 stride (i.e. not a grouped access) like so:
6792 for (i = 0; i < n; i += stride)
6793 ... = array[i];
6795 we generate a new induction variable and new accesses to
6796 form a new vector (or vectors, depending on ncopies):
6798 for (j = 0; ; j += VF*stride)
6799 tmp1 = array[j];
6800 tmp2 = array[j + stride];
6802 vectemp = {tmp1, tmp2, ...}
6805 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6806 build_int_cst (TREE_TYPE (stride_step), vf));
6808 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6810 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6811 loop, &incr_gsi, insert_after,
6812 &offvar, NULL);
6813 incr = gsi_stmt (incr_gsi);
6814 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6816 stride_step = force_gimple_operand (unshare_expr (stride_step),
6817 &stmts, true, NULL_TREE);
6818 if (stmts)
6819 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6821 prev_stmt_info = NULL;
6822 running_off = offvar;
6823 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6824 int nloads = nunits;
6825 int lnel = 1;
6826 tree ltype = TREE_TYPE (vectype);
6827 auto_vec<tree> dr_chain;
6828 if (memory_access_type == VMAT_STRIDED_SLP)
6830 nloads = nunits / group_size;
6831 if (group_size < nunits)
6833 lnel = group_size;
6834 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6836 else
6838 lnel = nunits;
6839 ltype = vectype;
6841 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6843 if (slp)
6845 /* For SLP permutation support we need to load the whole group,
6846 not only the number of vector stmts the permutation result
6847 fits in. */
6848 if (slp_perm)
6850 ncopies = (group_size * vf + nunits - 1) / nunits;
6851 dr_chain.create (ncopies);
6853 else
6854 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6856 int group_el = 0;
6857 unsigned HOST_WIDE_INT
6858 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6859 for (j = 0; j < ncopies; j++)
6861 if (nloads > 1)
6862 vec_alloc (v, nloads);
6863 for (i = 0; i < nloads; i++)
6865 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6866 group_el * elsz);
6867 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6868 build2 (MEM_REF, ltype,
6869 running_off, this_off));
6870 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6871 if (nloads > 1)
6872 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
6873 gimple_assign_lhs (new_stmt));
6875 group_el += lnel;
6876 if (! slp
6877 || group_el == group_size)
6879 tree newoff = copy_ssa_name (running_off);
6880 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6881 running_off, stride_step);
6882 vect_finish_stmt_generation (stmt, incr, gsi);
6884 running_off = newoff;
6885 group_el = 0;
6888 if (nloads > 1)
6890 tree vec_inv = build_constructor (vectype, v);
6891 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6892 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6895 if (slp)
6897 if (slp_perm)
6898 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6899 else
6900 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6902 else
6904 if (j == 0)
6905 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6906 else
6907 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6908 prev_stmt_info = vinfo_for_stmt (new_stmt);
6911 if (slp_perm)
6912 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6913 slp_node_instance, false);
6914 return true;
6917 if (grouped_load)
6919 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6920 /* For SLP vectorization we directly vectorize a subchain
6921 without permutation. */
6922 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6923 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6924 /* For BB vectorization always use the first stmt to base
6925 the data ref pointer on. */
6926 if (bb_vinfo)
6927 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6929 /* Check if the chain of loads is already vectorized. */
6930 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6931 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6932 ??? But we can only do so if there is exactly one
6933 as we have no way to get at the rest. Leave the CSE
6934 opportunity alone.
6935 ??? With the group load eventually participating
6936 in multiple different permutations (having multiple
6937 slp nodes which refer to the same group) the CSE
6938 is even wrong code. See PR56270. */
6939 && !slp)
6941 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6942 return true;
6944 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6945 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6946 group_gap_adj = 0;
6948 /* VEC_NUM is the number of vect stmts to be created for this group. */
6949 if (slp)
6951 grouped_load = false;
6952 /* For SLP permutation support we need to load the whole group,
6953 not only the number of vector stmts the permutation result
6954 fits in. */
6955 if (slp_perm)
6956 vec_num = (group_size * vf + nunits - 1) / nunits;
6957 else
6958 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6959 group_gap_adj = vf * group_size - nunits * vec_num;
6961 else
6962 vec_num = group_size;
6964 else
6966 first_stmt = stmt;
6967 first_dr = dr;
6968 group_size = vec_num = 1;
6969 group_gap_adj = 0;
6972 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6973 gcc_assert (alignment_support_scheme);
6974 /* Targets with load-lane instructions must not require explicit
6975 realignment. */
6976 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6977 || alignment_support_scheme == dr_aligned
6978 || alignment_support_scheme == dr_unaligned_supported);
6980 /* In case the vectorization factor (VF) is bigger than the number
6981 of elements that we can fit in a vectype (nunits), we have to generate
6982 more than one vector stmt - i.e - we need to "unroll" the
6983 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6984 from one copy of the vector stmt to the next, in the field
6985 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6986 stages to find the correct vector defs to be used when vectorizing
6987 stmts that use the defs of the current stmt. The example below
6988 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6989 need to create 4 vectorized stmts):
6991 before vectorization:
6992 RELATED_STMT VEC_STMT
6993 S1: x = memref - -
6994 S2: z = x + 1 - -
6996 step 1: vectorize stmt S1:
6997 We first create the vector stmt VS1_0, and, as usual, record a
6998 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6999 Next, we create the vector stmt VS1_1, and record a pointer to
7000 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7001 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7002 stmts and pointers:
7003 RELATED_STMT VEC_STMT
7004 VS1_0: vx0 = memref0 VS1_1 -
7005 VS1_1: vx1 = memref1 VS1_2 -
7006 VS1_2: vx2 = memref2 VS1_3 -
7007 VS1_3: vx3 = memref3 - -
7008 S1: x = load - VS1_0
7009 S2: z = x + 1 - -
7011 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7012 information we recorded in RELATED_STMT field is used to vectorize
7013 stmt S2. */
7015 /* In case of interleaving (non-unit grouped access):
7017 S1: x2 = &base + 2
7018 S2: x0 = &base
7019 S3: x1 = &base + 1
7020 S4: x3 = &base + 3
7022 Vectorized loads are created in the order of memory accesses
7023 starting from the access of the first stmt of the chain:
7025 VS1: vx0 = &base
7026 VS2: vx1 = &base + vec_size*1
7027 VS3: vx3 = &base + vec_size*2
7028 VS4: vx4 = &base + vec_size*3
7030 Then permutation statements are generated:
7032 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7033 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7036 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7037 (the order of the data-refs in the output of vect_permute_load_chain
7038 corresponds to the order of scalar stmts in the interleaving chain - see
7039 the documentation of vect_permute_load_chain()).
7040 The generation of permutation stmts and recording them in
7041 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7043 In case of both multiple types and interleaving, the vector loads and
7044 permutation stmts above are created for every copy. The result vector
7045 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7046 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7048 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7049 on a target that supports unaligned accesses (dr_unaligned_supported)
7050 we generate the following code:
7051 p = initial_addr;
7052 indx = 0;
7053 loop {
7054 p = p + indx * vectype_size;
7055 vec_dest = *(p);
7056 indx = indx + 1;
7059 Otherwise, the data reference is potentially unaligned on a target that
7060 does not support unaligned accesses (dr_explicit_realign_optimized) -
7061 then generate the following code, in which the data in each iteration is
7062 obtained by two vector loads, one from the previous iteration, and one
7063 from the current iteration:
7064 p1 = initial_addr;
7065 msq_init = *(floor(p1))
7066 p2 = initial_addr + VS - 1;
7067 realignment_token = call target_builtin;
7068 indx = 0;
7069 loop {
7070 p2 = p2 + indx * vectype_size
7071 lsq = *(floor(p2))
7072 vec_dest = realign_load (msq, lsq, realignment_token)
7073 indx = indx + 1;
7074 msq = lsq;
7075 } */
7077 /* If the misalignment remains the same throughout the execution of the
7078 loop, we can create the init_addr and permutation mask at the loop
7079 preheader. Otherwise, it needs to be created inside the loop.
7080 This can only occur when vectorizing memory accesses in the inner-loop
7081 nested within an outer-loop that is being vectorized. */
7083 if (nested_in_vect_loop
7084 && (TREE_INT_CST_LOW (DR_STEP (dr))
7085 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7087 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7088 compute_in_loop = true;
7091 if ((alignment_support_scheme == dr_explicit_realign_optimized
7092 || alignment_support_scheme == dr_explicit_realign)
7093 && !compute_in_loop)
7095 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7096 alignment_support_scheme, NULL_TREE,
7097 &at_loop);
7098 if (alignment_support_scheme == dr_explicit_realign_optimized)
7100 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7101 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7102 size_one_node);
7105 else
7106 at_loop = loop;
7108 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7109 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7111 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7112 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7113 else
7114 aggr_type = vectype;
7116 prev_stmt_info = NULL;
7117 for (j = 0; j < ncopies; j++)
7119 /* 1. Create the vector or array pointer update chain. */
7120 if (j == 0)
7122 bool simd_lane_access_p
7123 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7124 if (simd_lane_access_p
7125 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7126 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7127 && integer_zerop (DR_OFFSET (first_dr))
7128 && integer_zerop (DR_INIT (first_dr))
7129 && alias_sets_conflict_p (get_alias_set (aggr_type),
7130 get_alias_set (DR_REF (first_dr)))
7131 && (alignment_support_scheme == dr_aligned
7132 || alignment_support_scheme == dr_unaligned_supported))
7134 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7135 dataref_offset = build_int_cst (reference_alias_ptr_type
7136 (DR_REF (first_dr)), 0);
7137 inv_p = false;
7139 else if (first_stmt_for_drptr
7140 && first_stmt != first_stmt_for_drptr)
7142 dataref_ptr
7143 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7144 at_loop, offset, &dummy, gsi,
7145 &ptr_incr, simd_lane_access_p,
7146 &inv_p, byte_offset);
7147 /* Adjust the pointer by the difference to first_stmt. */
7148 data_reference_p ptrdr
7149 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7150 tree diff = fold_convert (sizetype,
7151 size_binop (MINUS_EXPR,
7152 DR_INIT (first_dr),
7153 DR_INIT (ptrdr)));
7154 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7155 stmt, diff);
7157 else
7158 dataref_ptr
7159 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7160 offset, &dummy, gsi, &ptr_incr,
7161 simd_lane_access_p, &inv_p,
7162 byte_offset);
7164 else if (dataref_offset)
7165 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7166 TYPE_SIZE_UNIT (aggr_type));
7167 else
7168 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7169 TYPE_SIZE_UNIT (aggr_type));
7171 if (grouped_load || slp_perm)
7172 dr_chain.create (vec_num);
7174 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7176 tree vec_array;
7178 vec_array = create_vector_array (vectype, vec_num);
7180 /* Emit:
7181 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7182 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
7183 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7184 gimple_call_set_lhs (new_stmt, vec_array);
7185 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7187 /* Extract each vector into an SSA_NAME. */
7188 for (i = 0; i < vec_num; i++)
7190 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7191 vec_array, i);
7192 dr_chain.quick_push (new_temp);
7195 /* Record the mapping between SSA_NAMEs and statements. */
7196 vect_record_grouped_load_vectors (stmt, dr_chain);
7198 else
7200 for (i = 0; i < vec_num; i++)
7202 if (i > 0)
7203 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7204 stmt, NULL_TREE);
7206 /* 2. Create the vector-load in the loop. */
7207 switch (alignment_support_scheme)
7209 case dr_aligned:
7210 case dr_unaligned_supported:
7212 unsigned int align, misalign;
7214 data_ref
7215 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7216 dataref_offset
7217 ? dataref_offset
7218 : build_int_cst (reference_alias_ptr_type
7219 (DR_REF (first_dr)), 0));
7220 align = TYPE_ALIGN_UNIT (vectype);
7221 if (alignment_support_scheme == dr_aligned)
7223 gcc_assert (aligned_access_p (first_dr));
7224 misalign = 0;
7226 else if (DR_MISALIGNMENT (first_dr) == -1)
7228 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7229 align = TYPE_ALIGN_UNIT (elem_type);
7230 else
7231 align = (get_object_alignment (DR_REF (first_dr))
7232 / BITS_PER_UNIT);
7233 misalign = 0;
7234 TREE_TYPE (data_ref)
7235 = build_aligned_type (TREE_TYPE (data_ref),
7236 align * BITS_PER_UNIT);
7238 else
7240 TREE_TYPE (data_ref)
7241 = build_aligned_type (TREE_TYPE (data_ref),
7242 TYPE_ALIGN (elem_type));
7243 misalign = DR_MISALIGNMENT (first_dr);
7245 if (dataref_offset == NULL_TREE
7246 && TREE_CODE (dataref_ptr) == SSA_NAME)
7247 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7248 align, misalign);
7249 break;
7251 case dr_explicit_realign:
7253 tree ptr, bump;
7255 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7257 if (compute_in_loop)
7258 msq = vect_setup_realignment (first_stmt, gsi,
7259 &realignment_token,
7260 dr_explicit_realign,
7261 dataref_ptr, NULL);
7263 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7264 ptr = copy_ssa_name (dataref_ptr);
7265 else
7266 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7267 new_stmt = gimple_build_assign
7268 (ptr, BIT_AND_EXPR, dataref_ptr,
7269 build_int_cst
7270 (TREE_TYPE (dataref_ptr),
7271 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7272 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7273 data_ref
7274 = build2 (MEM_REF, vectype, ptr,
7275 build_int_cst (reference_alias_ptr_type
7276 (DR_REF (first_dr)), 0));
7277 vec_dest = vect_create_destination_var (scalar_dest,
7278 vectype);
7279 new_stmt = gimple_build_assign (vec_dest, data_ref);
7280 new_temp = make_ssa_name (vec_dest, new_stmt);
7281 gimple_assign_set_lhs (new_stmt, new_temp);
7282 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7283 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7284 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7285 msq = new_temp;
7287 bump = size_binop (MULT_EXPR, vs,
7288 TYPE_SIZE_UNIT (elem_type));
7289 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7290 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7291 new_stmt = gimple_build_assign
7292 (NULL_TREE, BIT_AND_EXPR, ptr,
7293 build_int_cst
7294 (TREE_TYPE (ptr),
7295 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7296 ptr = copy_ssa_name (ptr, new_stmt);
7297 gimple_assign_set_lhs (new_stmt, ptr);
7298 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7299 data_ref
7300 = build2 (MEM_REF, vectype, ptr,
7301 build_int_cst (reference_alias_ptr_type
7302 (DR_REF (first_dr)), 0));
7303 break;
7305 case dr_explicit_realign_optimized:
7306 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7307 new_temp = copy_ssa_name (dataref_ptr);
7308 else
7309 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7310 new_stmt = gimple_build_assign
7311 (new_temp, BIT_AND_EXPR, dataref_ptr,
7312 build_int_cst
7313 (TREE_TYPE (dataref_ptr),
7314 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7315 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7316 data_ref
7317 = build2 (MEM_REF, vectype, new_temp,
7318 build_int_cst (reference_alias_ptr_type
7319 (DR_REF (first_dr)), 0));
7320 break;
7321 default:
7322 gcc_unreachable ();
7324 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7325 new_stmt = gimple_build_assign (vec_dest, data_ref);
7326 new_temp = make_ssa_name (vec_dest, new_stmt);
7327 gimple_assign_set_lhs (new_stmt, new_temp);
7328 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7330 /* 3. Handle explicit realignment if necessary/supported.
7331 Create in loop:
7332 vec_dest = realign_load (msq, lsq, realignment_token) */
7333 if (alignment_support_scheme == dr_explicit_realign_optimized
7334 || alignment_support_scheme == dr_explicit_realign)
7336 lsq = gimple_assign_lhs (new_stmt);
7337 if (!realignment_token)
7338 realignment_token = dataref_ptr;
7339 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7340 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7341 msq, lsq, realignment_token);
7342 new_temp = make_ssa_name (vec_dest, new_stmt);
7343 gimple_assign_set_lhs (new_stmt, new_temp);
7344 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7346 if (alignment_support_scheme == dr_explicit_realign_optimized)
7348 gcc_assert (phi);
7349 if (i == vec_num - 1 && j == ncopies - 1)
7350 add_phi_arg (phi, lsq,
7351 loop_latch_edge (containing_loop),
7352 UNKNOWN_LOCATION);
7353 msq = lsq;
7357 /* 4. Handle invariant-load. */
7358 if (inv_p && !bb_vinfo)
7360 gcc_assert (!grouped_load);
7361 /* If we have versioned for aliasing or the loop doesn't
7362 have any data dependencies that would preclude this,
7363 then we are sure this is a loop invariant load and
7364 thus we can insert it on the preheader edge. */
7365 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7366 && !nested_in_vect_loop
7367 && hoist_defs_of_uses (stmt, loop))
7369 if (dump_enabled_p ())
7371 dump_printf_loc (MSG_NOTE, vect_location,
7372 "hoisting out of the vectorized "
7373 "loop: ");
7374 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7376 tree tem = copy_ssa_name (scalar_dest);
7377 gsi_insert_on_edge_immediate
7378 (loop_preheader_edge (loop),
7379 gimple_build_assign (tem,
7380 unshare_expr
7381 (gimple_assign_rhs1 (stmt))));
7382 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7383 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7384 set_vinfo_for_stmt (new_stmt,
7385 new_stmt_vec_info (new_stmt, vinfo));
7387 else
7389 gimple_stmt_iterator gsi2 = *gsi;
7390 gsi_next (&gsi2);
7391 new_temp = vect_init_vector (stmt, scalar_dest,
7392 vectype, &gsi2);
7393 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7397 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7399 tree perm_mask = perm_mask_for_reverse (vectype);
7400 new_temp = permute_vec_elements (new_temp, new_temp,
7401 perm_mask, stmt, gsi);
7402 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7405 /* Collect vector loads and later create their permutation in
7406 vect_transform_grouped_load (). */
7407 if (grouped_load || slp_perm)
7408 dr_chain.quick_push (new_temp);
7410 /* Store vector loads in the corresponding SLP_NODE. */
7411 if (slp && !slp_perm)
7412 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7414 /* Bump the vector pointer to account for a gap or for excess
7415 elements loaded for a permuted SLP load. */
7416 if (group_gap_adj != 0)
7418 bool ovf;
7419 tree bump
7420 = wide_int_to_tree (sizetype,
7421 wi::smul (TYPE_SIZE_UNIT (elem_type),
7422 group_gap_adj, &ovf));
7423 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7424 stmt, bump);
7428 if (slp && !slp_perm)
7429 continue;
7431 if (slp_perm)
7433 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7434 slp_node_instance, false))
7436 dr_chain.release ();
7437 return false;
7440 else
7442 if (grouped_load)
7444 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7445 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7446 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7448 else
7450 if (j == 0)
7451 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7452 else
7453 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7454 prev_stmt_info = vinfo_for_stmt (new_stmt);
7457 dr_chain.release ();
7460 return true;
7463 /* Function vect_is_simple_cond.
7465 Input:
7466 LOOP - the loop that is being vectorized.
7467 COND - Condition that is checked for simple use.
7469 Output:
7470 *COMP_VECTYPE - the vector type for the comparison.
7472 Returns whether a COND can be vectorized. Checks whether
7473 condition operands are supportable using vec_is_simple_use. */
7475 static bool
7476 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7478 tree lhs, rhs;
7479 enum vect_def_type dt;
7480 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7482 /* Mask case. */
7483 if (TREE_CODE (cond) == SSA_NAME
7484 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7486 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7487 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7488 &dt, comp_vectype)
7489 || !*comp_vectype
7490 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7491 return false;
7492 return true;
7495 if (!COMPARISON_CLASS_P (cond))
7496 return false;
7498 lhs = TREE_OPERAND (cond, 0);
7499 rhs = TREE_OPERAND (cond, 1);
7501 if (TREE_CODE (lhs) == SSA_NAME)
7503 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7504 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7505 return false;
7507 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7508 && TREE_CODE (lhs) != FIXED_CST)
7509 return false;
7511 if (TREE_CODE (rhs) == SSA_NAME)
7513 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7514 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7515 return false;
7517 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7518 && TREE_CODE (rhs) != FIXED_CST)
7519 return false;
7521 if (vectype1 && vectype2
7522 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7523 return false;
7525 *comp_vectype = vectype1 ? vectype1 : vectype2;
7526 return true;
7529 /* vectorizable_condition.
7531 Check if STMT is conditional modify expression that can be vectorized.
7532 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7533 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7534 at GSI.
7536 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7537 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7538 else clause if it is 2).
7540 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7542 bool
7543 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7544 gimple **vec_stmt, tree reduc_def, int reduc_index,
7545 slp_tree slp_node)
7547 tree scalar_dest = NULL_TREE;
7548 tree vec_dest = NULL_TREE;
7549 tree cond_expr, then_clause, else_clause;
7550 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7551 tree comp_vectype = NULL_TREE;
7552 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7553 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7554 tree vec_compare;
7555 tree new_temp;
7556 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7557 enum vect_def_type dt, dts[4];
7558 int ncopies;
7559 enum tree_code code;
7560 stmt_vec_info prev_stmt_info = NULL;
7561 int i, j;
7562 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7563 vec<tree> vec_oprnds0 = vNULL;
7564 vec<tree> vec_oprnds1 = vNULL;
7565 vec<tree> vec_oprnds2 = vNULL;
7566 vec<tree> vec_oprnds3 = vNULL;
7567 tree vec_cmp_type;
7568 bool masked = false;
7570 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7571 return false;
7573 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7575 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7576 return false;
7578 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7579 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7580 && reduc_def))
7581 return false;
7583 /* FORNOW: not yet supported. */
7584 if (STMT_VINFO_LIVE_P (stmt_info))
7586 if (dump_enabled_p ())
7587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7588 "value used after loop.\n");
7589 return false;
7593 /* Is vectorizable conditional operation? */
7594 if (!is_gimple_assign (stmt))
7595 return false;
7597 code = gimple_assign_rhs_code (stmt);
7599 if (code != COND_EXPR)
7600 return false;
7602 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7603 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7604 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7606 if (slp_node)
7607 ncopies = 1;
7608 else
7609 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7611 gcc_assert (ncopies >= 1);
7612 if (reduc_index && ncopies > 1)
7613 return false; /* FORNOW */
7615 cond_expr = gimple_assign_rhs1 (stmt);
7616 then_clause = gimple_assign_rhs2 (stmt);
7617 else_clause = gimple_assign_rhs3 (stmt);
7619 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7620 || !comp_vectype)
7621 return false;
7623 gimple *def_stmt;
7624 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7625 &vectype1))
7626 return false;
7627 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7628 &vectype2))
7629 return false;
7631 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7632 return false;
7634 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7635 return false;
7637 masked = !COMPARISON_CLASS_P (cond_expr);
7638 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7640 if (vec_cmp_type == NULL_TREE)
7641 return false;
7643 if (!vec_stmt)
7645 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7646 return expand_vec_cond_expr_p (vectype, comp_vectype);
7649 /* Transform. */
7651 if (!slp_node)
7653 vec_oprnds0.create (1);
7654 vec_oprnds1.create (1);
7655 vec_oprnds2.create (1);
7656 vec_oprnds3.create (1);
7659 /* Handle def. */
7660 scalar_dest = gimple_assign_lhs (stmt);
7661 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7663 /* Handle cond expr. */
7664 for (j = 0; j < ncopies; j++)
7666 gassign *new_stmt = NULL;
7667 if (j == 0)
7669 if (slp_node)
7671 auto_vec<tree, 4> ops;
7672 auto_vec<vec<tree>, 4> vec_defs;
7674 if (masked)
7675 ops.safe_push (cond_expr);
7676 else
7678 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7679 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7681 ops.safe_push (then_clause);
7682 ops.safe_push (else_clause);
7683 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7684 vec_oprnds3 = vec_defs.pop ();
7685 vec_oprnds2 = vec_defs.pop ();
7686 if (!masked)
7687 vec_oprnds1 = vec_defs.pop ();
7688 vec_oprnds0 = vec_defs.pop ();
7690 else
7692 gimple *gtemp;
7693 if (masked)
7695 vec_cond_lhs
7696 = vect_get_vec_def_for_operand (cond_expr, stmt,
7697 comp_vectype);
7698 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7699 &gtemp, &dts[0]);
7701 else
7703 vec_cond_lhs =
7704 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7705 stmt, comp_vectype);
7706 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7707 loop_vinfo, &gtemp, &dts[0]);
7709 vec_cond_rhs =
7710 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7711 stmt, comp_vectype);
7712 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7713 loop_vinfo, &gtemp, &dts[1]);
7715 if (reduc_index == 1)
7716 vec_then_clause = reduc_def;
7717 else
7719 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7720 stmt);
7721 vect_is_simple_use (then_clause, loop_vinfo,
7722 &gtemp, &dts[2]);
7724 if (reduc_index == 2)
7725 vec_else_clause = reduc_def;
7726 else
7728 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7729 stmt);
7730 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7734 else
7736 vec_cond_lhs
7737 = vect_get_vec_def_for_stmt_copy (dts[0],
7738 vec_oprnds0.pop ());
7739 if (!masked)
7740 vec_cond_rhs
7741 = vect_get_vec_def_for_stmt_copy (dts[1],
7742 vec_oprnds1.pop ());
7744 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7745 vec_oprnds2.pop ());
7746 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7747 vec_oprnds3.pop ());
7750 if (!slp_node)
7752 vec_oprnds0.quick_push (vec_cond_lhs);
7753 if (!masked)
7754 vec_oprnds1.quick_push (vec_cond_rhs);
7755 vec_oprnds2.quick_push (vec_then_clause);
7756 vec_oprnds3.quick_push (vec_else_clause);
7759 /* Arguments are ready. Create the new vector stmt. */
7760 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7762 vec_then_clause = vec_oprnds2[i];
7763 vec_else_clause = vec_oprnds3[i];
7765 if (masked)
7766 vec_compare = vec_cond_lhs;
7767 else
7769 vec_cond_rhs = vec_oprnds1[i];
7770 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7771 vec_cond_lhs, vec_cond_rhs);
7773 new_temp = make_ssa_name (vec_dest);
7774 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7775 vec_compare, vec_then_clause,
7776 vec_else_clause);
7777 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7778 if (slp_node)
7779 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7782 if (slp_node)
7783 continue;
7785 if (j == 0)
7786 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7787 else
7788 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7790 prev_stmt_info = vinfo_for_stmt (new_stmt);
7793 vec_oprnds0.release ();
7794 vec_oprnds1.release ();
7795 vec_oprnds2.release ();
7796 vec_oprnds3.release ();
7798 return true;
7801 /* vectorizable_comparison.
7803 Check if STMT is comparison expression that can be vectorized.
7804 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7805 comparison, put it in VEC_STMT, and insert it at GSI.
7807 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7809 static bool
7810 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7811 gimple **vec_stmt, tree reduc_def,
7812 slp_tree slp_node)
7814 tree lhs, rhs1, rhs2;
7815 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7816 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7817 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7818 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7819 tree new_temp;
7820 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7821 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7822 unsigned nunits;
7823 int ncopies;
7824 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7825 stmt_vec_info prev_stmt_info = NULL;
7826 int i, j;
7827 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7828 vec<tree> vec_oprnds0 = vNULL;
7829 vec<tree> vec_oprnds1 = vNULL;
7830 gimple *def_stmt;
7831 tree mask_type;
7832 tree mask;
7834 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7835 return false;
7837 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
7838 return false;
7840 mask_type = vectype;
7841 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7843 if (slp_node)
7844 ncopies = 1;
7845 else
7846 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7848 gcc_assert (ncopies >= 1);
7849 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7850 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7851 && reduc_def))
7852 return false;
7854 if (STMT_VINFO_LIVE_P (stmt_info))
7856 if (dump_enabled_p ())
7857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7858 "value used after loop.\n");
7859 return false;
7862 if (!is_gimple_assign (stmt))
7863 return false;
7865 code = gimple_assign_rhs_code (stmt);
7867 if (TREE_CODE_CLASS (code) != tcc_comparison)
7868 return false;
7870 rhs1 = gimple_assign_rhs1 (stmt);
7871 rhs2 = gimple_assign_rhs2 (stmt);
7873 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7874 &dts[0], &vectype1))
7875 return false;
7877 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7878 &dts[1], &vectype2))
7879 return false;
7881 if (vectype1 && vectype2
7882 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7883 return false;
7885 vectype = vectype1 ? vectype1 : vectype2;
7887 /* Invariant comparison. */
7888 if (!vectype)
7890 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
7891 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
7892 return false;
7894 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7895 return false;
7897 /* Can't compare mask and non-mask types. */
7898 if (vectype1 && vectype2
7899 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
7900 return false;
7902 /* Boolean values may have another representation in vectors
7903 and therefore we prefer bit operations over comparison for
7904 them (which also works for scalar masks). We store opcodes
7905 to use in bitop1 and bitop2. Statement is vectorized as
7906 BITOP2 (rhs1 BITOP1 rhs2) or
7907 rhs1 BITOP2 (BITOP1 rhs2)
7908 depending on bitop1 and bitop2 arity. */
7909 if (VECTOR_BOOLEAN_TYPE_P (vectype))
7911 if (code == GT_EXPR)
7913 bitop1 = BIT_NOT_EXPR;
7914 bitop2 = BIT_AND_EXPR;
7916 else if (code == GE_EXPR)
7918 bitop1 = BIT_NOT_EXPR;
7919 bitop2 = BIT_IOR_EXPR;
7921 else if (code == LT_EXPR)
7923 bitop1 = BIT_NOT_EXPR;
7924 bitop2 = BIT_AND_EXPR;
7925 std::swap (rhs1, rhs2);
7926 std::swap (dts[0], dts[1]);
7928 else if (code == LE_EXPR)
7930 bitop1 = BIT_NOT_EXPR;
7931 bitop2 = BIT_IOR_EXPR;
7932 std::swap (rhs1, rhs2);
7933 std::swap (dts[0], dts[1]);
7935 else
7937 bitop1 = BIT_XOR_EXPR;
7938 if (code == EQ_EXPR)
7939 bitop2 = BIT_NOT_EXPR;
7943 if (!vec_stmt)
7945 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7946 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
7947 dts, NULL, NULL);
7948 if (bitop1 == NOP_EXPR)
7949 return expand_vec_cmp_expr_p (vectype, mask_type);
7950 else
7952 machine_mode mode = TYPE_MODE (vectype);
7953 optab optab;
7955 optab = optab_for_tree_code (bitop1, vectype, optab_default);
7956 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7957 return false;
7959 if (bitop2 != NOP_EXPR)
7961 optab = optab_for_tree_code (bitop2, vectype, optab_default);
7962 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7963 return false;
7965 return true;
7969 /* Transform. */
7970 if (!slp_node)
7972 vec_oprnds0.create (1);
7973 vec_oprnds1.create (1);
7976 /* Handle def. */
7977 lhs = gimple_assign_lhs (stmt);
7978 mask = vect_create_destination_var (lhs, mask_type);
7980 /* Handle cmp expr. */
7981 for (j = 0; j < ncopies; j++)
7983 gassign *new_stmt = NULL;
7984 if (j == 0)
7986 if (slp_node)
7988 auto_vec<tree, 2> ops;
7989 auto_vec<vec<tree>, 2> vec_defs;
7991 ops.safe_push (rhs1);
7992 ops.safe_push (rhs2);
7993 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7994 vec_oprnds1 = vec_defs.pop ();
7995 vec_oprnds0 = vec_defs.pop ();
7997 else
7999 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8000 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8003 else
8005 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8006 vec_oprnds0.pop ());
8007 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8008 vec_oprnds1.pop ());
8011 if (!slp_node)
8013 vec_oprnds0.quick_push (vec_rhs1);
8014 vec_oprnds1.quick_push (vec_rhs2);
8017 /* Arguments are ready. Create the new vector stmt. */
8018 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8020 vec_rhs2 = vec_oprnds1[i];
8022 new_temp = make_ssa_name (mask);
8023 if (bitop1 == NOP_EXPR)
8025 new_stmt = gimple_build_assign (new_temp, code,
8026 vec_rhs1, vec_rhs2);
8027 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8029 else
8031 if (bitop1 == BIT_NOT_EXPR)
8032 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8033 else
8034 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8035 vec_rhs2);
8036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8037 if (bitop2 != NOP_EXPR)
8039 tree res = make_ssa_name (mask);
8040 if (bitop2 == BIT_NOT_EXPR)
8041 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8042 else
8043 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8044 new_temp);
8045 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8048 if (slp_node)
8049 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8052 if (slp_node)
8053 continue;
8055 if (j == 0)
8056 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8057 else
8058 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8060 prev_stmt_info = vinfo_for_stmt (new_stmt);
8063 vec_oprnds0.release ();
8064 vec_oprnds1.release ();
8066 return true;
8069 /* Make sure the statement is vectorizable. */
8071 bool
8072 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8074 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8075 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8076 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8077 bool ok;
8078 tree scalar_type, vectype;
8079 gimple *pattern_stmt;
8080 gimple_seq pattern_def_seq;
8082 if (dump_enabled_p ())
8084 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8085 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8088 if (gimple_has_volatile_ops (stmt))
8090 if (dump_enabled_p ())
8091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8092 "not vectorized: stmt has volatile operands\n");
8094 return false;
8097 /* Skip stmts that do not need to be vectorized. In loops this is expected
8098 to include:
8099 - the COND_EXPR which is the loop exit condition
8100 - any LABEL_EXPRs in the loop
8101 - computations that are used only for array indexing or loop control.
8102 In basic blocks we only analyze statements that are a part of some SLP
8103 instance, therefore, all the statements are relevant.
8105 Pattern statement needs to be analyzed instead of the original statement
8106 if the original statement is not relevant. Otherwise, we analyze both
8107 statements. In basic blocks we are called from some SLP instance
8108 traversal, don't analyze pattern stmts instead, the pattern stmts
8109 already will be part of SLP instance. */
8111 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8112 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8113 && !STMT_VINFO_LIVE_P (stmt_info))
8115 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8116 && pattern_stmt
8117 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8118 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8120 /* Analyze PATTERN_STMT instead of the original stmt. */
8121 stmt = pattern_stmt;
8122 stmt_info = vinfo_for_stmt (pattern_stmt);
8123 if (dump_enabled_p ())
8125 dump_printf_loc (MSG_NOTE, vect_location,
8126 "==> examining pattern statement: ");
8127 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8130 else
8132 if (dump_enabled_p ())
8133 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8135 return true;
8138 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8139 && node == NULL
8140 && pattern_stmt
8141 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8142 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8144 /* Analyze PATTERN_STMT too. */
8145 if (dump_enabled_p ())
8147 dump_printf_loc (MSG_NOTE, vect_location,
8148 "==> examining pattern statement: ");
8149 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8152 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8153 return false;
8156 if (is_pattern_stmt_p (stmt_info)
8157 && node == NULL
8158 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8160 gimple_stmt_iterator si;
8162 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8164 gimple *pattern_def_stmt = gsi_stmt (si);
8165 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8166 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8168 /* Analyze def stmt of STMT if it's a pattern stmt. */
8169 if (dump_enabled_p ())
8171 dump_printf_loc (MSG_NOTE, vect_location,
8172 "==> examining pattern def statement: ");
8173 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8176 if (!vect_analyze_stmt (pattern_def_stmt,
8177 need_to_vectorize, node))
8178 return false;
8183 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8185 case vect_internal_def:
8186 break;
8188 case vect_reduction_def:
8189 case vect_nested_cycle:
8190 gcc_assert (!bb_vinfo
8191 && (relevance == vect_used_in_outer
8192 || relevance == vect_used_in_outer_by_reduction
8193 || relevance == vect_used_by_reduction
8194 || relevance == vect_unused_in_scope
8195 || relevance == vect_used_only_live));
8196 break;
8198 case vect_induction_def:
8199 case vect_constant_def:
8200 case vect_external_def:
8201 case vect_unknown_def_type:
8202 default:
8203 gcc_unreachable ();
8206 if (bb_vinfo)
8208 gcc_assert (PURE_SLP_STMT (stmt_info));
8210 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8211 if (dump_enabled_p ())
8213 dump_printf_loc (MSG_NOTE, vect_location,
8214 "get vectype for scalar type: ");
8215 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8216 dump_printf (MSG_NOTE, "\n");
8219 vectype = get_vectype_for_scalar_type (scalar_type);
8220 if (!vectype)
8222 if (dump_enabled_p ())
8224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8225 "not SLPed: unsupported data-type ");
8226 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8227 scalar_type);
8228 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8230 return false;
8233 if (dump_enabled_p ())
8235 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8236 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8237 dump_printf (MSG_NOTE, "\n");
8240 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8243 if (STMT_VINFO_RELEVANT_P (stmt_info))
8245 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8246 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8247 || (is_gimple_call (stmt)
8248 && gimple_call_lhs (stmt) == NULL_TREE));
8249 *need_to_vectorize = true;
8252 if (PURE_SLP_STMT (stmt_info) && !node)
8254 dump_printf_loc (MSG_NOTE, vect_location,
8255 "handled only by SLP analysis\n");
8256 return true;
8259 ok = true;
8260 if (!bb_vinfo
8261 && (STMT_VINFO_RELEVANT_P (stmt_info)
8262 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8263 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8264 || vectorizable_conversion (stmt, NULL, NULL, node)
8265 || vectorizable_shift (stmt, NULL, NULL, node)
8266 || vectorizable_operation (stmt, NULL, NULL, node)
8267 || vectorizable_assignment (stmt, NULL, NULL, node)
8268 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8269 || vectorizable_call (stmt, NULL, NULL, node)
8270 || vectorizable_store (stmt, NULL, NULL, node)
8271 || vectorizable_reduction (stmt, NULL, NULL, node)
8272 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8273 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8274 else
8276 if (bb_vinfo)
8277 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8278 || vectorizable_conversion (stmt, NULL, NULL, node)
8279 || vectorizable_shift (stmt, NULL, NULL, node)
8280 || vectorizable_operation (stmt, NULL, NULL, node)
8281 || vectorizable_assignment (stmt, NULL, NULL, node)
8282 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8283 || vectorizable_call (stmt, NULL, NULL, node)
8284 || vectorizable_store (stmt, NULL, NULL, node)
8285 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8286 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8289 if (!ok)
8291 if (dump_enabled_p ())
8293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8294 "not vectorized: relevant stmt not ");
8295 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8296 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8299 return false;
8302 if (bb_vinfo)
8303 return true;
8305 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8306 need extra handling, except for vectorizable reductions. */
8307 if (STMT_VINFO_LIVE_P (stmt_info)
8308 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8309 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8311 if (!ok)
8313 if (dump_enabled_p ())
8315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8316 "not vectorized: live stmt not ");
8317 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8318 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8321 return false;
8324 return true;
8328 /* Function vect_transform_stmt.
8330 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8332 bool
8333 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8334 bool *grouped_store, slp_tree slp_node,
8335 slp_instance slp_node_instance)
8337 bool is_store = false;
8338 gimple *vec_stmt = NULL;
8339 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8340 bool done;
8342 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8343 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8345 switch (STMT_VINFO_TYPE (stmt_info))
8347 case type_demotion_vec_info_type:
8348 case type_promotion_vec_info_type:
8349 case type_conversion_vec_info_type:
8350 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8351 gcc_assert (done);
8352 break;
8354 case induc_vec_info_type:
8355 gcc_assert (!slp_node);
8356 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8357 gcc_assert (done);
8358 break;
8360 case shift_vec_info_type:
8361 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8362 gcc_assert (done);
8363 break;
8365 case op_vec_info_type:
8366 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8367 gcc_assert (done);
8368 break;
8370 case assignment_vec_info_type:
8371 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8372 gcc_assert (done);
8373 break;
8375 case load_vec_info_type:
8376 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8377 slp_node_instance);
8378 gcc_assert (done);
8379 break;
8381 case store_vec_info_type:
8382 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8383 gcc_assert (done);
8384 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8386 /* In case of interleaving, the whole chain is vectorized when the
8387 last store in the chain is reached. Store stmts before the last
8388 one are skipped, and there vec_stmt_info shouldn't be freed
8389 meanwhile. */
8390 *grouped_store = true;
8391 if (STMT_VINFO_VEC_STMT (stmt_info))
8392 is_store = true;
8394 else
8395 is_store = true;
8396 break;
8398 case condition_vec_info_type:
8399 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8400 gcc_assert (done);
8401 break;
8403 case comparison_vec_info_type:
8404 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8405 gcc_assert (done);
8406 break;
8408 case call_vec_info_type:
8409 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8410 stmt = gsi_stmt (*gsi);
8411 if (is_gimple_call (stmt)
8412 && gimple_call_internal_p (stmt)
8413 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8414 is_store = true;
8415 break;
8417 case call_simd_clone_vec_info_type:
8418 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8419 stmt = gsi_stmt (*gsi);
8420 break;
8422 case reduc_vec_info_type:
8423 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8424 gcc_assert (done);
8425 break;
8427 default:
8428 if (!STMT_VINFO_LIVE_P (stmt_info))
8430 if (dump_enabled_p ())
8431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8432 "stmt not supported.\n");
8433 gcc_unreachable ();
8437 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8438 This would break hybrid SLP vectorization. */
8439 if (slp_node)
8440 gcc_assert (!vec_stmt
8441 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8443 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8444 is being vectorized, but outside the immediately enclosing loop. */
8445 if (vec_stmt
8446 && STMT_VINFO_LOOP_VINFO (stmt_info)
8447 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8448 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8449 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8450 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8451 || STMT_VINFO_RELEVANT (stmt_info) ==
8452 vect_used_in_outer_by_reduction))
8454 struct loop *innerloop = LOOP_VINFO_LOOP (
8455 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8456 imm_use_iterator imm_iter;
8457 use_operand_p use_p;
8458 tree scalar_dest;
8459 gimple *exit_phi;
8461 if (dump_enabled_p ())
8462 dump_printf_loc (MSG_NOTE, vect_location,
8463 "Record the vdef for outer-loop vectorization.\n");
8465 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8466 (to be used when vectorizing outer-loop stmts that use the DEF of
8467 STMT). */
8468 if (gimple_code (stmt) == GIMPLE_PHI)
8469 scalar_dest = PHI_RESULT (stmt);
8470 else
8471 scalar_dest = gimple_assign_lhs (stmt);
8473 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8475 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8477 exit_phi = USE_STMT (use_p);
8478 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8483 /* Handle stmts whose DEF is used outside the loop-nest that is
8484 being vectorized. */
8485 if (slp_node)
8487 gimple *slp_stmt;
8488 int i;
8489 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8491 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8492 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8493 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8495 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8496 &vec_stmt);
8497 gcc_assert (done);
8501 else if (STMT_VINFO_LIVE_P (stmt_info)
8502 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8504 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8505 gcc_assert (done);
8508 if (vec_stmt)
8509 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8511 return is_store;
8515 /* Remove a group of stores (for SLP or interleaving), free their
8516 stmt_vec_info. */
8518 void
8519 vect_remove_stores (gimple *first_stmt)
8521 gimple *next = first_stmt;
8522 gimple *tmp;
8523 gimple_stmt_iterator next_si;
8525 while (next)
8527 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8529 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8530 if (is_pattern_stmt_p (stmt_info))
8531 next = STMT_VINFO_RELATED_STMT (stmt_info);
8532 /* Free the attached stmt_vec_info and remove the stmt. */
8533 next_si = gsi_for_stmt (next);
8534 unlink_stmt_vdef (next);
8535 gsi_remove (&next_si, true);
8536 release_defs (next);
8537 free_stmt_vec_info (next);
8538 next = tmp;
8543 /* Function new_stmt_vec_info.
8545 Create and initialize a new stmt_vec_info struct for STMT. */
8547 stmt_vec_info
8548 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8550 stmt_vec_info res;
8551 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8553 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8554 STMT_VINFO_STMT (res) = stmt;
8555 res->vinfo = vinfo;
8556 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8557 STMT_VINFO_LIVE_P (res) = false;
8558 STMT_VINFO_VECTYPE (res) = NULL;
8559 STMT_VINFO_VEC_STMT (res) = NULL;
8560 STMT_VINFO_VECTORIZABLE (res) = true;
8561 STMT_VINFO_IN_PATTERN_P (res) = false;
8562 STMT_VINFO_RELATED_STMT (res) = NULL;
8563 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8564 STMT_VINFO_DATA_REF (res) = NULL;
8565 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8567 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8568 STMT_VINFO_DR_OFFSET (res) = NULL;
8569 STMT_VINFO_DR_INIT (res) = NULL;
8570 STMT_VINFO_DR_STEP (res) = NULL;
8571 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8573 if (gimple_code (stmt) == GIMPLE_PHI
8574 && is_loop_header_bb_p (gimple_bb (stmt)))
8575 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8576 else
8577 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8579 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8580 STMT_SLP_TYPE (res) = loop_vect;
8581 STMT_VINFO_NUM_SLP_USES (res) = 0;
8583 GROUP_FIRST_ELEMENT (res) = NULL;
8584 GROUP_NEXT_ELEMENT (res) = NULL;
8585 GROUP_SIZE (res) = 0;
8586 GROUP_STORE_COUNT (res) = 0;
8587 GROUP_GAP (res) = 0;
8588 GROUP_SAME_DR_STMT (res) = NULL;
8590 return res;
8594 /* Create a hash table for stmt_vec_info. */
8596 void
8597 init_stmt_vec_info_vec (void)
8599 gcc_assert (!stmt_vec_info_vec.exists ());
8600 stmt_vec_info_vec.create (50);
8604 /* Free hash table for stmt_vec_info. */
8606 void
8607 free_stmt_vec_info_vec (void)
8609 unsigned int i;
8610 stmt_vec_info info;
8611 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8612 if (info != NULL)
8613 free_stmt_vec_info (STMT_VINFO_STMT (info));
8614 gcc_assert (stmt_vec_info_vec.exists ());
8615 stmt_vec_info_vec.release ();
8619 /* Free stmt vectorization related info. */
8621 void
8622 free_stmt_vec_info (gimple *stmt)
8624 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8626 if (!stmt_info)
8627 return;
8629 /* Check if this statement has a related "pattern stmt"
8630 (introduced by the vectorizer during the pattern recognition
8631 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8632 too. */
8633 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8635 stmt_vec_info patt_info
8636 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8637 if (patt_info)
8639 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8640 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8641 gimple_set_bb (patt_stmt, NULL);
8642 tree lhs = gimple_get_lhs (patt_stmt);
8643 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8644 release_ssa_name (lhs);
8645 if (seq)
8647 gimple_stmt_iterator si;
8648 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8650 gimple *seq_stmt = gsi_stmt (si);
8651 gimple_set_bb (seq_stmt, NULL);
8652 lhs = gimple_get_lhs (seq_stmt);
8653 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8654 release_ssa_name (lhs);
8655 free_stmt_vec_info (seq_stmt);
8658 free_stmt_vec_info (patt_stmt);
8662 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8663 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8664 set_vinfo_for_stmt (stmt, NULL);
8665 free (stmt_info);
8669 /* Function get_vectype_for_scalar_type_and_size.
8671 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8672 by the target. */
8674 static tree
8675 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8677 machine_mode inner_mode = TYPE_MODE (scalar_type);
8678 machine_mode simd_mode;
8679 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8680 int nunits;
8681 tree vectype;
8683 if (nbytes == 0)
8684 return NULL_TREE;
8686 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8687 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8688 return NULL_TREE;
8690 /* For vector types of elements whose mode precision doesn't
8691 match their types precision we use a element type of mode
8692 precision. The vectorization routines will have to make sure
8693 they support the proper result truncation/extension.
8694 We also make sure to build vector types with INTEGER_TYPE
8695 component type only. */
8696 if (INTEGRAL_TYPE_P (scalar_type)
8697 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8698 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8699 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8700 TYPE_UNSIGNED (scalar_type));
8702 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8703 When the component mode passes the above test simply use a type
8704 corresponding to that mode. The theory is that any use that
8705 would cause problems with this will disable vectorization anyway. */
8706 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8707 && !INTEGRAL_TYPE_P (scalar_type))
8708 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8710 /* We can't build a vector type of elements with alignment bigger than
8711 their size. */
8712 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8713 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8714 TYPE_UNSIGNED (scalar_type));
8716 /* If we felt back to using the mode fail if there was
8717 no scalar type for it. */
8718 if (scalar_type == NULL_TREE)
8719 return NULL_TREE;
8721 /* If no size was supplied use the mode the target prefers. Otherwise
8722 lookup a vector mode of the specified size. */
8723 if (size == 0)
8724 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8725 else
8726 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8727 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8728 if (nunits <= 1)
8729 return NULL_TREE;
8731 vectype = build_vector_type (scalar_type, nunits);
8733 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8734 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8735 return NULL_TREE;
8737 return vectype;
8740 unsigned int current_vector_size;
8742 /* Function get_vectype_for_scalar_type.
8744 Returns the vector type corresponding to SCALAR_TYPE as supported
8745 by the target. */
8747 tree
8748 get_vectype_for_scalar_type (tree scalar_type)
8750 tree vectype;
8751 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8752 current_vector_size);
8753 if (vectype
8754 && current_vector_size == 0)
8755 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8756 return vectype;
8759 /* Function get_mask_type_for_scalar_type.
8761 Returns the mask type corresponding to a result of comparison
8762 of vectors of specified SCALAR_TYPE as supported by target. */
8764 tree
8765 get_mask_type_for_scalar_type (tree scalar_type)
8767 tree vectype = get_vectype_for_scalar_type (scalar_type);
8769 if (!vectype)
8770 return NULL;
8772 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8773 current_vector_size);
8776 /* Function get_same_sized_vectype
8778 Returns a vector type corresponding to SCALAR_TYPE of size
8779 VECTOR_TYPE if supported by the target. */
8781 tree
8782 get_same_sized_vectype (tree scalar_type, tree vector_type)
8784 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8785 return build_same_sized_truth_vector_type (vector_type);
8787 return get_vectype_for_scalar_type_and_size
8788 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8791 /* Function vect_is_simple_use.
8793 Input:
8794 VINFO - the vect info of the loop or basic block that is being vectorized.
8795 OPERAND - operand in the loop or bb.
8796 Output:
8797 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8798 DT - the type of definition
8800 Returns whether a stmt with OPERAND can be vectorized.
8801 For loops, supportable operands are constants, loop invariants, and operands
8802 that are defined by the current iteration of the loop. Unsupportable
8803 operands are those that are defined by a previous iteration of the loop (as
8804 is the case in reduction/induction computations).
8805 For basic blocks, supportable operands are constants and bb invariants.
8806 For now, operands defined outside the basic block are not supported. */
8808 bool
8809 vect_is_simple_use (tree operand, vec_info *vinfo,
8810 gimple **def_stmt, enum vect_def_type *dt)
8812 *def_stmt = NULL;
8813 *dt = vect_unknown_def_type;
8815 if (dump_enabled_p ())
8817 dump_printf_loc (MSG_NOTE, vect_location,
8818 "vect_is_simple_use: operand ");
8819 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8820 dump_printf (MSG_NOTE, "\n");
8823 if (CONSTANT_CLASS_P (operand))
8825 *dt = vect_constant_def;
8826 return true;
8829 if (is_gimple_min_invariant (operand))
8831 *dt = vect_external_def;
8832 return true;
8835 if (TREE_CODE (operand) != SSA_NAME)
8837 if (dump_enabled_p ())
8838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8839 "not ssa-name.\n");
8840 return false;
8843 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8845 *dt = vect_external_def;
8846 return true;
8849 *def_stmt = SSA_NAME_DEF_STMT (operand);
8850 if (dump_enabled_p ())
8852 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8853 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8856 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8857 *dt = vect_external_def;
8858 else
8860 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8861 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8864 if (dump_enabled_p ())
8866 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8867 switch (*dt)
8869 case vect_uninitialized_def:
8870 dump_printf (MSG_NOTE, "uninitialized\n");
8871 break;
8872 case vect_constant_def:
8873 dump_printf (MSG_NOTE, "constant\n");
8874 break;
8875 case vect_external_def:
8876 dump_printf (MSG_NOTE, "external\n");
8877 break;
8878 case vect_internal_def:
8879 dump_printf (MSG_NOTE, "internal\n");
8880 break;
8881 case vect_induction_def:
8882 dump_printf (MSG_NOTE, "induction\n");
8883 break;
8884 case vect_reduction_def:
8885 dump_printf (MSG_NOTE, "reduction\n");
8886 break;
8887 case vect_double_reduction_def:
8888 dump_printf (MSG_NOTE, "double reduction\n");
8889 break;
8890 case vect_nested_cycle:
8891 dump_printf (MSG_NOTE, "nested cycle\n");
8892 break;
8893 case vect_unknown_def_type:
8894 dump_printf (MSG_NOTE, "unknown\n");
8895 break;
8899 if (*dt == vect_unknown_def_type)
8901 if (dump_enabled_p ())
8902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8903 "Unsupported pattern.\n");
8904 return false;
8907 switch (gimple_code (*def_stmt))
8909 case GIMPLE_PHI:
8910 case GIMPLE_ASSIGN:
8911 case GIMPLE_CALL:
8912 break;
8913 default:
8914 if (dump_enabled_p ())
8915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8916 "unsupported defining stmt:\n");
8917 return false;
8920 return true;
8923 /* Function vect_is_simple_use.
8925 Same as vect_is_simple_use but also determines the vector operand
8926 type of OPERAND and stores it to *VECTYPE. If the definition of
8927 OPERAND is vect_uninitialized_def, vect_constant_def or
8928 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8929 is responsible to compute the best suited vector type for the
8930 scalar operand. */
8932 bool
8933 vect_is_simple_use (tree operand, vec_info *vinfo,
8934 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
8936 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
8937 return false;
8939 /* Now get a vector type if the def is internal, otherwise supply
8940 NULL_TREE and leave it up to the caller to figure out a proper
8941 type for the use stmt. */
8942 if (*dt == vect_internal_def
8943 || *dt == vect_induction_def
8944 || *dt == vect_reduction_def
8945 || *dt == vect_double_reduction_def
8946 || *dt == vect_nested_cycle)
8948 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8950 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8951 && !STMT_VINFO_RELEVANT (stmt_info)
8952 && !STMT_VINFO_LIVE_P (stmt_info))
8953 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8955 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8956 gcc_assert (*vectype != NULL_TREE);
8958 else if (*dt == vect_uninitialized_def
8959 || *dt == vect_constant_def
8960 || *dt == vect_external_def)
8961 *vectype = NULL_TREE;
8962 else
8963 gcc_unreachable ();
8965 return true;
8969 /* Function supportable_widening_operation
8971 Check whether an operation represented by the code CODE is a
8972 widening operation that is supported by the target platform in
8973 vector form (i.e., when operating on arguments of type VECTYPE_IN
8974 producing a result of type VECTYPE_OUT).
8976 Widening operations we currently support are NOP (CONVERT), FLOAT
8977 and WIDEN_MULT. This function checks if these operations are supported
8978 by the target platform either directly (via vector tree-codes), or via
8979 target builtins.
8981 Output:
8982 - CODE1 and CODE2 are codes of vector operations to be used when
8983 vectorizing the operation, if available.
8984 - MULTI_STEP_CVT determines the number of required intermediate steps in
8985 case of multi-step conversion (like char->short->int - in that case
8986 MULTI_STEP_CVT will be 1).
8987 - INTERM_TYPES contains the intermediate type required to perform the
8988 widening operation (short in the above example). */
8990 bool
8991 supportable_widening_operation (enum tree_code code, gimple *stmt,
8992 tree vectype_out, tree vectype_in,
8993 enum tree_code *code1, enum tree_code *code2,
8994 int *multi_step_cvt,
8995 vec<tree> *interm_types)
8997 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8998 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8999 struct loop *vect_loop = NULL;
9000 machine_mode vec_mode;
9001 enum insn_code icode1, icode2;
9002 optab optab1, optab2;
9003 tree vectype = vectype_in;
9004 tree wide_vectype = vectype_out;
9005 enum tree_code c1, c2;
9006 int i;
9007 tree prev_type, intermediate_type;
9008 machine_mode intermediate_mode, prev_mode;
9009 optab optab3, optab4;
9011 *multi_step_cvt = 0;
9012 if (loop_info)
9013 vect_loop = LOOP_VINFO_LOOP (loop_info);
9015 switch (code)
9017 case WIDEN_MULT_EXPR:
9018 /* The result of a vectorized widening operation usually requires
9019 two vectors (because the widened results do not fit into one vector).
9020 The generated vector results would normally be expected to be
9021 generated in the same order as in the original scalar computation,
9022 i.e. if 8 results are generated in each vector iteration, they are
9023 to be organized as follows:
9024 vect1: [res1,res2,res3,res4],
9025 vect2: [res5,res6,res7,res8].
9027 However, in the special case that the result of the widening
9028 operation is used in a reduction computation only, the order doesn't
9029 matter (because when vectorizing a reduction we change the order of
9030 the computation). Some targets can take advantage of this and
9031 generate more efficient code. For example, targets like Altivec,
9032 that support widen_mult using a sequence of {mult_even,mult_odd}
9033 generate the following vectors:
9034 vect1: [res1,res3,res5,res7],
9035 vect2: [res2,res4,res6,res8].
9037 When vectorizing outer-loops, we execute the inner-loop sequentially
9038 (each vectorized inner-loop iteration contributes to VF outer-loop
9039 iterations in parallel). We therefore don't allow to change the
9040 order of the computation in the inner-loop during outer-loop
9041 vectorization. */
9042 /* TODO: Another case in which order doesn't *really* matter is when we
9043 widen and then contract again, e.g. (short)((int)x * y >> 8).
9044 Normally, pack_trunc performs an even/odd permute, whereas the
9045 repack from an even/odd expansion would be an interleave, which
9046 would be significantly simpler for e.g. AVX2. */
9047 /* In any case, in order to avoid duplicating the code below, recurse
9048 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9049 are properly set up for the caller. If we fail, we'll continue with
9050 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9051 if (vect_loop
9052 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9053 && !nested_in_vect_loop_p (vect_loop, stmt)
9054 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9055 stmt, vectype_out, vectype_in,
9056 code1, code2, multi_step_cvt,
9057 interm_types))
9059 /* Elements in a vector with vect_used_by_reduction property cannot
9060 be reordered if the use chain with this property does not have the
9061 same operation. One such an example is s += a * b, where elements
9062 in a and b cannot be reordered. Here we check if the vector defined
9063 by STMT is only directly used in the reduction statement. */
9064 tree lhs = gimple_assign_lhs (stmt);
9065 use_operand_p dummy;
9066 gimple *use_stmt;
9067 stmt_vec_info use_stmt_info = NULL;
9068 if (single_imm_use (lhs, &dummy, &use_stmt)
9069 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9070 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9071 return true;
9073 c1 = VEC_WIDEN_MULT_LO_EXPR;
9074 c2 = VEC_WIDEN_MULT_HI_EXPR;
9075 break;
9077 case DOT_PROD_EXPR:
9078 c1 = DOT_PROD_EXPR;
9079 c2 = DOT_PROD_EXPR;
9080 break;
9082 case SAD_EXPR:
9083 c1 = SAD_EXPR;
9084 c2 = SAD_EXPR;
9085 break;
9087 case VEC_WIDEN_MULT_EVEN_EXPR:
9088 /* Support the recursion induced just above. */
9089 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9090 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9091 break;
9093 case WIDEN_LSHIFT_EXPR:
9094 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9095 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9096 break;
9098 CASE_CONVERT:
9099 c1 = VEC_UNPACK_LO_EXPR;
9100 c2 = VEC_UNPACK_HI_EXPR;
9101 break;
9103 case FLOAT_EXPR:
9104 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9105 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9106 break;
9108 case FIX_TRUNC_EXPR:
9109 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9110 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9111 computing the operation. */
9112 return false;
9114 default:
9115 gcc_unreachable ();
9118 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9119 std::swap (c1, c2);
9121 if (code == FIX_TRUNC_EXPR)
9123 /* The signedness is determined from output operand. */
9124 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9125 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9127 else
9129 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9130 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9133 if (!optab1 || !optab2)
9134 return false;
9136 vec_mode = TYPE_MODE (vectype);
9137 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9138 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9139 return false;
9141 *code1 = c1;
9142 *code2 = c2;
9144 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9145 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9146 /* For scalar masks we may have different boolean
9147 vector types having the same QImode. Thus we
9148 add additional check for elements number. */
9149 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9150 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9151 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9153 /* Check if it's a multi-step conversion that can be done using intermediate
9154 types. */
9156 prev_type = vectype;
9157 prev_mode = vec_mode;
9159 if (!CONVERT_EXPR_CODE_P (code))
9160 return false;
9162 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9163 intermediate steps in promotion sequence. We try
9164 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9165 not. */
9166 interm_types->create (MAX_INTERM_CVT_STEPS);
9167 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9169 intermediate_mode = insn_data[icode1].operand[0].mode;
9170 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9172 intermediate_type
9173 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9174 current_vector_size);
9175 if (intermediate_mode != TYPE_MODE (intermediate_type))
9176 return false;
9178 else
9179 intermediate_type
9180 = lang_hooks.types.type_for_mode (intermediate_mode,
9181 TYPE_UNSIGNED (prev_type));
9183 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9184 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9186 if (!optab3 || !optab4
9187 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9188 || insn_data[icode1].operand[0].mode != intermediate_mode
9189 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9190 || insn_data[icode2].operand[0].mode != intermediate_mode
9191 || ((icode1 = optab_handler (optab3, intermediate_mode))
9192 == CODE_FOR_nothing)
9193 || ((icode2 = optab_handler (optab4, intermediate_mode))
9194 == CODE_FOR_nothing))
9195 break;
9197 interm_types->quick_push (intermediate_type);
9198 (*multi_step_cvt)++;
9200 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9201 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9202 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9203 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9204 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9206 prev_type = intermediate_type;
9207 prev_mode = intermediate_mode;
9210 interm_types->release ();
9211 return false;
9215 /* Function supportable_narrowing_operation
9217 Check whether an operation represented by the code CODE is a
9218 narrowing operation that is supported by the target platform in
9219 vector form (i.e., when operating on arguments of type VECTYPE_IN
9220 and producing a result of type VECTYPE_OUT).
9222 Narrowing operations we currently support are NOP (CONVERT) and
9223 FIX_TRUNC. This function checks if these operations are supported by
9224 the target platform directly via vector tree-codes.
9226 Output:
9227 - CODE1 is the code of a vector operation to be used when
9228 vectorizing the operation, if available.
9229 - MULTI_STEP_CVT determines the number of required intermediate steps in
9230 case of multi-step conversion (like int->short->char - in that case
9231 MULTI_STEP_CVT will be 1).
9232 - INTERM_TYPES contains the intermediate type required to perform the
9233 narrowing operation (short in the above example). */
9235 bool
9236 supportable_narrowing_operation (enum tree_code code,
9237 tree vectype_out, tree vectype_in,
9238 enum tree_code *code1, int *multi_step_cvt,
9239 vec<tree> *interm_types)
9241 machine_mode vec_mode;
9242 enum insn_code icode1;
9243 optab optab1, interm_optab;
9244 tree vectype = vectype_in;
9245 tree narrow_vectype = vectype_out;
9246 enum tree_code c1;
9247 tree intermediate_type, prev_type;
9248 machine_mode intermediate_mode, prev_mode;
9249 int i;
9250 bool uns;
9252 *multi_step_cvt = 0;
9253 switch (code)
9255 CASE_CONVERT:
9256 c1 = VEC_PACK_TRUNC_EXPR;
9257 break;
9259 case FIX_TRUNC_EXPR:
9260 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9261 break;
9263 case FLOAT_EXPR:
9264 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9265 tree code and optabs used for computing the operation. */
9266 return false;
9268 default:
9269 gcc_unreachable ();
9272 if (code == FIX_TRUNC_EXPR)
9273 /* The signedness is determined from output operand. */
9274 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9275 else
9276 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9278 if (!optab1)
9279 return false;
9281 vec_mode = TYPE_MODE (vectype);
9282 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9283 return false;
9285 *code1 = c1;
9287 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9288 /* For scalar masks we may have different boolean
9289 vector types having the same QImode. Thus we
9290 add additional check for elements number. */
9291 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9292 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9293 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9295 /* Check if it's a multi-step conversion that can be done using intermediate
9296 types. */
9297 prev_mode = vec_mode;
9298 prev_type = vectype;
9299 if (code == FIX_TRUNC_EXPR)
9300 uns = TYPE_UNSIGNED (vectype_out);
9301 else
9302 uns = TYPE_UNSIGNED (vectype);
9304 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9305 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9306 costly than signed. */
9307 if (code == FIX_TRUNC_EXPR && uns)
9309 enum insn_code icode2;
9311 intermediate_type
9312 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9313 interm_optab
9314 = optab_for_tree_code (c1, intermediate_type, optab_default);
9315 if (interm_optab != unknown_optab
9316 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9317 && insn_data[icode1].operand[0].mode
9318 == insn_data[icode2].operand[0].mode)
9320 uns = false;
9321 optab1 = interm_optab;
9322 icode1 = icode2;
9326 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9327 intermediate steps in promotion sequence. We try
9328 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9329 interm_types->create (MAX_INTERM_CVT_STEPS);
9330 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9332 intermediate_mode = insn_data[icode1].operand[0].mode;
9333 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9335 intermediate_type
9336 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9337 current_vector_size);
9338 if (intermediate_mode != TYPE_MODE (intermediate_type))
9339 return false;
9341 else
9342 intermediate_type
9343 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9344 interm_optab
9345 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9346 optab_default);
9347 if (!interm_optab
9348 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9349 || insn_data[icode1].operand[0].mode != intermediate_mode
9350 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9351 == CODE_FOR_nothing))
9352 break;
9354 interm_types->quick_push (intermediate_type);
9355 (*multi_step_cvt)++;
9357 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9358 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9359 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9360 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9362 prev_mode = intermediate_mode;
9363 prev_type = intermediate_type;
9364 optab1 = interm_optab;
9367 interm_types->release ();
9368 return false;