* pt.c (coerce_template_parms): Fix indentation.
[official-gcc.git] / gcc / tree-vect-stmts.c
blobd1d97cb662e53d6becf4db53140e6d269d4d62a9
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
582 /* We are also not interested in uses on loop PHI backedges that are
583 inductions. Otherwise we'll needlessly vectorize the IV increment
584 and cause hybrid SLP for SLP inductions. */
585 else if (gimple_code (stmt) == GIMPLE_PHI
586 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
587 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
588 == use))
590 if (dump_enabled_p ())
591 dump_printf_loc (MSG_NOTE, vect_location,
592 "induction value on backedge.\n");
593 return true;
597 vect_mark_relevant (worklist, def_stmt, relevant, false);
598 return true;
602 /* Function vect_mark_stmts_to_be_vectorized.
604 Not all stmts in the loop need to be vectorized. For example:
606 for i...
607 for j...
608 1. T0 = i + j
609 2. T1 = a[T0]
611 3. j = j + 1
613 Stmt 1 and 3 do not need to be vectorized, because loop control and
614 addressing of vectorized data-refs are handled differently.
616 This pass detects such stmts. */
618 bool
619 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
621 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
622 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
623 unsigned int nbbs = loop->num_nodes;
624 gimple_stmt_iterator si;
625 gimple *stmt;
626 unsigned int i;
627 stmt_vec_info stmt_vinfo;
628 basic_block bb;
629 gimple *phi;
630 bool live_p;
631 enum vect_relevant relevant;
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location,
635 "=== vect_mark_stmts_to_be_vectorized ===\n");
637 auto_vec<gimple *, 64> worklist;
639 /* 1. Init worklist. */
640 for (i = 0; i < nbbs; i++)
642 bb = bbs[i];
643 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
645 phi = gsi_stmt (si);
646 if (dump_enabled_p ())
648 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
649 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
652 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
653 vect_mark_relevant (&worklist, phi, relevant, live_p);
655 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
657 stmt = gsi_stmt (si);
658 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
661 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
664 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
665 vect_mark_relevant (&worklist, stmt, relevant, live_p);
669 /* 2. Process_worklist */
670 while (worklist.length () > 0)
672 use_operand_p use_p;
673 ssa_op_iter iter;
675 stmt = worklist.pop ();
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
679 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
682 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
683 (DEF_STMT) as relevant/irrelevant according to the relevance property
684 of STMT. */
685 stmt_vinfo = vinfo_for_stmt (stmt);
686 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
688 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
689 propagated as is to the DEF_STMTs of its USEs.
691 One exception is when STMT has been identified as defining a reduction
692 variable; in this case we set the relevance to vect_used_by_reduction.
693 This is because we distinguish between two kinds of relevant stmts -
694 those that are used by a reduction computation, and those that are
695 (also) used by a regular computation. This allows us later on to
696 identify stmts that are used solely by a reduction, and therefore the
697 order of the results that they produce does not have to be kept. */
699 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
701 case vect_reduction_def:
702 gcc_assert (relevant != vect_unused_in_scope);
703 if (relevant != vect_unused_in_scope
704 && relevant != vect_used_in_scope
705 && relevant != vect_used_by_reduction
706 && relevant != vect_used_only_live)
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
710 "unsupported use of reduction.\n");
711 return false;
713 break;
715 case vect_nested_cycle:
716 if (relevant != vect_unused_in_scope
717 && relevant != vect_used_in_outer_by_reduction
718 && relevant != vect_used_in_outer)
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "unsupported use of nested cycle.\n");
724 return false;
726 break;
728 case vect_double_reduction_def:
729 if (relevant != vect_unused_in_scope
730 && relevant != vect_used_by_reduction
731 && relevant != vect_used_only_live)
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
735 "unsupported use of double reduction.\n");
737 return false;
739 break;
741 default:
742 break;
745 if (is_pattern_stmt_p (stmt_vinfo))
747 /* Pattern statements are not inserted into the code, so
748 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
749 have to scan the RHS or function arguments instead. */
750 if (is_gimple_assign (stmt))
752 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
753 tree op = gimple_assign_rhs1 (stmt);
755 i = 1;
756 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
758 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
759 relevant, &worklist, false)
760 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
761 relevant, &worklist, false))
762 return false;
763 i = 2;
765 for (; i < gimple_num_ops (stmt); i++)
767 op = gimple_op (stmt, i);
768 if (TREE_CODE (op) == SSA_NAME
769 && !process_use (stmt, op, loop_vinfo, relevant,
770 &worklist, false))
771 return false;
774 else if (is_gimple_call (stmt))
776 for (i = 0; i < gimple_call_num_args (stmt); i++)
778 tree arg = gimple_call_arg (stmt, i);
779 if (!process_use (stmt, arg, loop_vinfo, relevant,
780 &worklist, false))
781 return false;
785 else
786 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
788 tree op = USE_FROM_PTR (use_p);
789 if (!process_use (stmt, op, loop_vinfo, relevant,
790 &worklist, false))
791 return false;
794 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
796 gather_scatter_info gs_info;
797 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
798 gcc_unreachable ();
799 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
800 &worklist, true))
801 return false;
803 } /* while worklist */
805 return true;
809 /* Function vect_model_simple_cost.
811 Models cost for simple operations, i.e. those that only emit ncopies of a
812 single op. Right now, this does not account for multiple insns that could
813 be generated for the single vector op. We will handle that shortly. */
815 void
816 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
817 enum vect_def_type *dt,
818 int ndts,
819 stmt_vector_for_cost *prologue_cost_vec,
820 stmt_vector_for_cost *body_cost_vec)
822 int i;
823 int inside_cost = 0, prologue_cost = 0;
825 /* The SLP costs were already calculated during SLP tree build. */
826 if (PURE_SLP_STMT (stmt_info))
827 return;
829 /* Cost the "broadcast" of a scalar operand in to a vector operand.
830 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
831 cost model. */
832 for (i = 0; i < ndts; i++)
833 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
834 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
835 stmt_info, 0, vect_prologue);
837 /* Pass the inside-of-loop statements to the target-specific cost model. */
838 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
839 stmt_info, 0, vect_body);
841 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE, vect_location,
843 "vect_model_simple_cost: inside_cost = %d, "
844 "prologue_cost = %d .\n", inside_cost, prologue_cost);
848 /* Model cost for type demotion and promotion operations. PWR is normally
849 zero for single-step promotions and demotions. It will be one if
850 two-step promotion/demotion is required, and so on. Each additional
851 step doubles the number of instructions required. */
853 static void
854 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
855 enum vect_def_type *dt, int pwr)
857 int i, tmp;
858 int inside_cost = 0, prologue_cost = 0;
859 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
860 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
861 void *target_cost_data;
863 /* The SLP costs were already calculated during SLP tree build. */
864 if (PURE_SLP_STMT (stmt_info))
865 return;
867 if (loop_vinfo)
868 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
869 else
870 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
872 for (i = 0; i < pwr + 1; i++)
874 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
875 (i + 1) : i;
876 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
877 vec_promote_demote, stmt_info, 0,
878 vect_body);
881 /* FORNOW: Assuming maximum 2 args per stmts. */
882 for (i = 0; i < 2; i++)
883 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
884 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
885 stmt_info, 0, vect_prologue);
887 if (dump_enabled_p ())
888 dump_printf_loc (MSG_NOTE, vect_location,
889 "vect_model_promotion_demotion_cost: inside_cost = %d, "
890 "prologue_cost = %d .\n", inside_cost, prologue_cost);
893 /* Function vect_model_store_cost
895 Models cost for stores. In the case of grouped accesses, one access
896 has the overhead of the grouped access attributed to it. */
898 void
899 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
900 vect_memory_access_type memory_access_type,
901 enum vect_def_type dt, slp_tree slp_node,
902 stmt_vector_for_cost *prologue_cost_vec,
903 stmt_vector_for_cost *body_cost_vec)
905 unsigned int inside_cost = 0, prologue_cost = 0;
906 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
907 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
908 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
910 if (dt == vect_constant_def || dt == vect_external_def)
911 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
912 stmt_info, 0, vect_prologue);
914 /* Grouped stores update all elements in the group at once,
915 so we want the DR for the first statement. */
916 if (!slp_node && grouped_access_p)
918 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
919 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
922 /* True if we should include any once-per-group costs as well as
923 the cost of the statement itself. For SLP we only get called
924 once per group anyhow. */
925 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
927 /* We assume that the cost of a single store-lanes instruction is
928 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
929 access is instead being provided by a permute-and-store operation,
930 include the cost of the permutes. */
931 if (first_stmt_p
932 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
934 /* Uses a high and low interleave or shuffle operations for each
935 needed permute. */
936 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
937 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
938 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
939 stmt_info, 0, vect_body);
941 if (dump_enabled_p ())
942 dump_printf_loc (MSG_NOTE, vect_location,
943 "vect_model_store_cost: strided group_size = %d .\n",
944 group_size);
947 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
948 /* Costs of the stores. */
949 if (memory_access_type == VMAT_ELEMENTWISE
950 || memory_access_type == VMAT_GATHER_SCATTER)
951 /* N scalar stores plus extracting the elements. */
952 inside_cost += record_stmt_cost (body_cost_vec,
953 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
954 scalar_store, stmt_info, 0, vect_body);
955 else
956 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_STRIDED_SLP)
960 inside_cost += record_stmt_cost (body_cost_vec,
961 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
962 vec_to_scalar, stmt_info, 0, vect_body);
964 if (dump_enabled_p ())
965 dump_printf_loc (MSG_NOTE, vect_location,
966 "vect_model_store_cost: inside_cost = %d, "
967 "prologue_cost = %d .\n", inside_cost, prologue_cost);
971 /* Calculate cost of DR's memory access. */
972 void
973 vect_get_store_cost (struct data_reference *dr, int ncopies,
974 unsigned int *inside_cost,
975 stmt_vector_for_cost *body_cost_vec)
977 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
978 gimple *stmt = DR_STMT (dr);
979 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
981 switch (alignment_support_scheme)
983 case dr_aligned:
985 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
986 vector_store, stmt_info, 0,
987 vect_body);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE, vect_location,
991 "vect_model_store_cost: aligned.\n");
992 break;
995 case dr_unaligned_supported:
997 /* Here, we assign an additional cost for the unaligned store. */
998 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
999 unaligned_store, stmt_info,
1000 DR_MISALIGNMENT (dr), vect_body);
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE, vect_location,
1003 "vect_model_store_cost: unaligned supported by "
1004 "hardware.\n");
1005 break;
1008 case dr_unaligned_unsupported:
1010 *inside_cost = VECT_MAX_COST;
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1014 "vect_model_store_cost: unsupported access.\n");
1015 break;
1018 default:
1019 gcc_unreachable ();
1024 /* Function vect_model_load_cost
1026 Models cost for loads. In the case of grouped accesses, one access has
1027 the overhead of the grouped access attributed to it. Since unaligned
1028 accesses are supported for loads, we also account for the costs of the
1029 access scheme chosen. */
1031 void
1032 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1033 vect_memory_access_type memory_access_type,
1034 slp_tree slp_node,
1035 stmt_vector_for_cost *prologue_cost_vec,
1036 stmt_vector_for_cost *body_cost_vec)
1038 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1039 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1040 unsigned int inside_cost = 0, prologue_cost = 0;
1041 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1043 /* Grouped loads read all elements in the group at once,
1044 so we want the DR for the first statement. */
1045 if (!slp_node && grouped_access_p)
1047 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1048 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1051 /* True if we should include any once-per-group costs as well as
1052 the cost of the statement itself. For SLP we only get called
1053 once per group anyhow. */
1054 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1056 /* We assume that the cost of a single load-lanes instruction is
1057 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1058 access is instead being provided by a load-and-permute operation,
1059 include the cost of the permutes. */
1060 if (first_stmt_p
1061 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1063 /* Uses an even and odd extract operations or shuffle operations
1064 for each needed permute. */
1065 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1066 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1067 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1068 stmt_info, 0, vect_body);
1070 if (dump_enabled_p ())
1071 dump_printf_loc (MSG_NOTE, vect_location,
1072 "vect_model_load_cost: strided group_size = %d .\n",
1073 group_size);
1076 /* The loads themselves. */
1077 if (memory_access_type == VMAT_ELEMENTWISE
1078 || memory_access_type == VMAT_GATHER_SCATTER)
1080 /* N scalar loads plus gathering them into a vector. */
1081 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1082 inside_cost += record_stmt_cost (body_cost_vec,
1083 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1084 scalar_load, stmt_info, 0, vect_body);
1086 else
1087 vect_get_load_cost (dr, ncopies, first_stmt_p,
1088 &inside_cost, &prologue_cost,
1089 prologue_cost_vec, body_cost_vec, true);
1090 if (memory_access_type == VMAT_ELEMENTWISE
1091 || memory_access_type == VMAT_STRIDED_SLP)
1092 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1093 stmt_info, 0, vect_body);
1095 if (dump_enabled_p ())
1096 dump_printf_loc (MSG_NOTE, vect_location,
1097 "vect_model_load_cost: inside_cost = %d, "
1098 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1102 /* Calculate cost of DR's memory access. */
1103 void
1104 vect_get_load_cost (struct data_reference *dr, int ncopies,
1105 bool add_realign_cost, unsigned int *inside_cost,
1106 unsigned int *prologue_cost,
1107 stmt_vector_for_cost *prologue_cost_vec,
1108 stmt_vector_for_cost *body_cost_vec,
1109 bool record_prologue_costs)
1111 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1112 gimple *stmt = DR_STMT (dr);
1113 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1115 switch (alignment_support_scheme)
1117 case dr_aligned:
1119 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1120 stmt_info, 0, vect_body);
1122 if (dump_enabled_p ())
1123 dump_printf_loc (MSG_NOTE, vect_location,
1124 "vect_model_load_cost: aligned.\n");
1126 break;
1128 case dr_unaligned_supported:
1130 /* Here, we assign an additional cost for the unaligned load. */
1131 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1132 unaligned_load, stmt_info,
1133 DR_MISALIGNMENT (dr), vect_body);
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_NOTE, vect_location,
1137 "vect_model_load_cost: unaligned supported by "
1138 "hardware.\n");
1140 break;
1142 case dr_explicit_realign:
1144 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1145 vector_load, stmt_info, 0, vect_body);
1146 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1147 vec_perm, stmt_info, 0, vect_body);
1149 /* FIXME: If the misalignment remains fixed across the iterations of
1150 the containing loop, the following cost should be added to the
1151 prologue costs. */
1152 if (targetm.vectorize.builtin_mask_for_load)
1153 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1154 stmt_info, 0, vect_body);
1156 if (dump_enabled_p ())
1157 dump_printf_loc (MSG_NOTE, vect_location,
1158 "vect_model_load_cost: explicit realign\n");
1160 break;
1162 case dr_explicit_realign_optimized:
1164 if (dump_enabled_p ())
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "vect_model_load_cost: unaligned software "
1167 "pipelined.\n");
1169 /* Unaligned software pipeline has a load of an address, an initial
1170 load, and possibly a mask operation to "prime" the loop. However,
1171 if this is an access in a group of loads, which provide grouped
1172 access, then the above cost should only be considered for one
1173 access in the group. Inside the loop, there is a load op
1174 and a realignment op. */
1176 if (add_realign_cost && record_prologue_costs)
1178 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1179 vector_stmt, stmt_info,
1180 0, vect_prologue);
1181 if (targetm.vectorize.builtin_mask_for_load)
1182 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1183 vector_stmt, stmt_info,
1184 0, vect_prologue);
1187 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1188 stmt_info, 0, vect_body);
1189 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1190 stmt_info, 0, vect_body);
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE, vect_location,
1194 "vect_model_load_cost: explicit realign optimized"
1195 "\n");
1197 break;
1200 case dr_unaligned_unsupported:
1202 *inside_cost = VECT_MAX_COST;
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1206 "vect_model_load_cost: unsupported access.\n");
1207 break;
1210 default:
1211 gcc_unreachable ();
1215 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1216 the loop preheader for the vectorized stmt STMT. */
1218 static void
1219 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1221 if (gsi)
1222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1223 else
1225 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1226 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228 if (loop_vinfo)
1230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1231 basic_block new_bb;
1232 edge pe;
1234 if (nested_in_vect_loop_p (loop, stmt))
1235 loop = loop->inner;
1237 pe = loop_preheader_edge (loop);
1238 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1239 gcc_assert (!new_bb);
1241 else
1243 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1244 basic_block bb;
1245 gimple_stmt_iterator gsi_bb_start;
1247 gcc_assert (bb_vinfo);
1248 bb = BB_VINFO_BB (bb_vinfo);
1249 gsi_bb_start = gsi_after_labels (bb);
1250 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1254 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "created new init_stmt: ");
1258 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1262 /* Function vect_init_vector.
1264 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1265 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1266 vector type a vector with all elements equal to VAL is created first.
1267 Place the initialization at BSI if it is not NULL. Otherwise, place the
1268 initialization at the loop preheader.
1269 Return the DEF of INIT_STMT.
1270 It will be used in the vectorization of STMT. */
1272 tree
1273 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1275 gimple *init_stmt;
1276 tree new_temp;
1278 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1279 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1281 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1282 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1284 /* Scalar boolean value should be transformed into
1285 all zeros or all ones value before building a vector. */
1286 if (VECTOR_BOOLEAN_TYPE_P (type))
1288 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1289 tree false_val = build_zero_cst (TREE_TYPE (type));
1291 if (CONSTANT_CLASS_P (val))
1292 val = integer_zerop (val) ? false_val : true_val;
1293 else
1295 new_temp = make_ssa_name (TREE_TYPE (type));
1296 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1297 val, true_val, false_val);
1298 vect_init_vector_1 (stmt, init_stmt, gsi);
1299 val = new_temp;
1302 else if (CONSTANT_CLASS_P (val))
1303 val = fold_convert (TREE_TYPE (type), val);
1304 else
1306 new_temp = make_ssa_name (TREE_TYPE (type));
1307 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1308 init_stmt = gimple_build_assign (new_temp,
1309 fold_build1 (VIEW_CONVERT_EXPR,
1310 TREE_TYPE (type),
1311 val));
1312 else
1313 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1314 vect_init_vector_1 (stmt, init_stmt, gsi);
1315 val = new_temp;
1318 val = build_vector_from_val (type, val);
1321 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1322 init_stmt = gimple_build_assign (new_temp, val);
1323 vect_init_vector_1 (stmt, init_stmt, gsi);
1324 return new_temp;
1327 /* Function vect_get_vec_def_for_operand_1.
1329 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1330 DT that will be used in the vectorized stmt. */
1332 tree
1333 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1335 tree vec_oprnd;
1336 gimple *vec_stmt;
1337 stmt_vec_info def_stmt_info = NULL;
1339 switch (dt)
1341 /* operand is a constant or a loop invariant. */
1342 case vect_constant_def:
1343 case vect_external_def:
1344 /* Code should use vect_get_vec_def_for_operand. */
1345 gcc_unreachable ();
1347 /* operand is defined inside the loop. */
1348 case vect_internal_def:
1350 /* Get the def from the vectorized stmt. */
1351 def_stmt_info = vinfo_for_stmt (def_stmt);
1353 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1354 /* Get vectorized pattern statement. */
1355 if (!vec_stmt
1356 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1357 && !STMT_VINFO_RELEVANT (def_stmt_info))
1358 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1359 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1360 gcc_assert (vec_stmt);
1361 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1362 vec_oprnd = PHI_RESULT (vec_stmt);
1363 else if (is_gimple_call (vec_stmt))
1364 vec_oprnd = gimple_call_lhs (vec_stmt);
1365 else
1366 vec_oprnd = gimple_assign_lhs (vec_stmt);
1367 return vec_oprnd;
1370 /* operand is defined by a loop header phi - reduction */
1371 case vect_reduction_def:
1372 case vect_double_reduction_def:
1373 case vect_nested_cycle:
1374 /* Code should use get_initial_def_for_reduction. */
1375 gcc_unreachable ();
1377 /* operand is defined by loop-header phi - induction. */
1378 case vect_induction_def:
1380 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1382 /* Get the def from the vectorized stmt. */
1383 def_stmt_info = vinfo_for_stmt (def_stmt);
1384 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1385 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1386 vec_oprnd = PHI_RESULT (vec_stmt);
1387 else
1388 vec_oprnd = gimple_get_lhs (vec_stmt);
1389 return vec_oprnd;
1392 default:
1393 gcc_unreachable ();
1398 /* Function vect_get_vec_def_for_operand.
1400 OP is an operand in STMT. This function returns a (vector) def that will be
1401 used in the vectorized stmt for STMT.
1403 In the case that OP is an SSA_NAME which is defined in the loop, then
1404 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1406 In case OP is an invariant or constant, a new stmt that creates a vector def
1407 needs to be introduced. VECTYPE may be used to specify a required type for
1408 vector invariant. */
1410 tree
1411 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1413 gimple *def_stmt;
1414 enum vect_def_type dt;
1415 bool is_simple_use;
1416 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1417 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1419 if (dump_enabled_p ())
1421 dump_printf_loc (MSG_NOTE, vect_location,
1422 "vect_get_vec_def_for_operand: ");
1423 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1424 dump_printf (MSG_NOTE, "\n");
1427 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1428 gcc_assert (is_simple_use);
1429 if (def_stmt && dump_enabled_p ())
1431 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1432 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1435 if (dt == vect_constant_def || dt == vect_external_def)
1437 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1438 tree vector_type;
1440 if (vectype)
1441 vector_type = vectype;
1442 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1443 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1444 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1445 else
1446 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1448 gcc_assert (vector_type);
1449 return vect_init_vector (stmt, op, vector_type, NULL);
1451 else
1452 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1456 /* Function vect_get_vec_def_for_stmt_copy
1458 Return a vector-def for an operand. This function is used when the
1459 vectorized stmt to be created (by the caller to this function) is a "copy"
1460 created in case the vectorized result cannot fit in one vector, and several
1461 copies of the vector-stmt are required. In this case the vector-def is
1462 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1463 of the stmt that defines VEC_OPRND.
1464 DT is the type of the vector def VEC_OPRND.
1466 Context:
1467 In case the vectorization factor (VF) is bigger than the number
1468 of elements that can fit in a vectype (nunits), we have to generate
1469 more than one vector stmt to vectorize the scalar stmt. This situation
1470 arises when there are multiple data-types operated upon in the loop; the
1471 smallest data-type determines the VF, and as a result, when vectorizing
1472 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1473 vector stmt (each computing a vector of 'nunits' results, and together
1474 computing 'VF' results in each iteration). This function is called when
1475 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1476 which VF=16 and nunits=4, so the number of copies required is 4):
1478 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1480 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1481 VS1.1: vx.1 = memref1 VS1.2
1482 VS1.2: vx.2 = memref2 VS1.3
1483 VS1.3: vx.3 = memref3
1485 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1486 VSnew.1: vz1 = vx.1 + ... VSnew.2
1487 VSnew.2: vz2 = vx.2 + ... VSnew.3
1488 VSnew.3: vz3 = vx.3 + ...
1490 The vectorization of S1 is explained in vectorizable_load.
1491 The vectorization of S2:
1492 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1493 the function 'vect_get_vec_def_for_operand' is called to
1494 get the relevant vector-def for each operand of S2. For operand x it
1495 returns the vector-def 'vx.0'.
1497 To create the remaining copies of the vector-stmt (VSnew.j), this
1498 function is called to get the relevant vector-def for each operand. It is
1499 obtained from the respective VS1.j stmt, which is recorded in the
1500 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1502 For example, to obtain the vector-def 'vx.1' in order to create the
1503 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1504 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1505 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1506 and return its def ('vx.1').
1507 Overall, to create the above sequence this function will be called 3 times:
1508 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1509 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1510 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1512 tree
1513 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1515 gimple *vec_stmt_for_operand;
1516 stmt_vec_info def_stmt_info;
1518 /* Do nothing; can reuse same def. */
1519 if (dt == vect_external_def || dt == vect_constant_def )
1520 return vec_oprnd;
1522 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1523 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1524 gcc_assert (def_stmt_info);
1525 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1526 gcc_assert (vec_stmt_for_operand);
1527 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1528 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1529 else
1530 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1531 return vec_oprnd;
1535 /* Get vectorized definitions for the operands to create a copy of an original
1536 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1538 static void
1539 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1540 vec<tree> *vec_oprnds0,
1541 vec<tree> *vec_oprnds1)
1543 tree vec_oprnd = vec_oprnds0->pop ();
1545 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1546 vec_oprnds0->quick_push (vec_oprnd);
1548 if (vec_oprnds1 && vec_oprnds1->length ())
1550 vec_oprnd = vec_oprnds1->pop ();
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1552 vec_oprnds1->quick_push (vec_oprnd);
1557 /* Get vectorized definitions for OP0 and OP1.
1558 REDUC_INDEX is the index of reduction operand in case of reduction,
1559 and -1 otherwise. */
1561 void
1562 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1563 vec<tree> *vec_oprnds0,
1564 vec<tree> *vec_oprnds1,
1565 slp_tree slp_node, int reduc_index)
1567 if (slp_node)
1569 int nops = (op1 == NULL_TREE) ? 1 : 2;
1570 auto_vec<tree> ops (nops);
1571 auto_vec<vec<tree> > vec_defs (nops);
1573 ops.quick_push (op0);
1574 if (op1)
1575 ops.quick_push (op1);
1577 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1579 *vec_oprnds0 = vec_defs[0];
1580 if (op1)
1581 *vec_oprnds1 = vec_defs[1];
1583 else
1585 tree vec_oprnd;
1587 vec_oprnds0->create (1);
1588 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1589 vec_oprnds0->quick_push (vec_oprnd);
1591 if (op1)
1593 vec_oprnds1->create (1);
1594 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1595 vec_oprnds1->quick_push (vec_oprnd);
1601 /* Function vect_finish_stmt_generation.
1603 Insert a new stmt. */
1605 void
1606 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1607 gimple_stmt_iterator *gsi)
1609 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1610 vec_info *vinfo = stmt_info->vinfo;
1612 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1614 if (!gsi_end_p (*gsi)
1615 && gimple_has_mem_ops (vec_stmt))
1617 gimple *at_stmt = gsi_stmt (*gsi);
1618 tree vuse = gimple_vuse (at_stmt);
1619 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1621 tree vdef = gimple_vdef (at_stmt);
1622 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1623 /* If we have an SSA vuse and insert a store, update virtual
1624 SSA form to avoid triggering the renamer. Do so only
1625 if we can easily see all uses - which is what almost always
1626 happens with the way vectorized stmts are inserted. */
1627 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1628 && ((is_gimple_assign (vec_stmt)
1629 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1630 || (is_gimple_call (vec_stmt)
1631 && !(gimple_call_flags (vec_stmt)
1632 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1634 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1635 gimple_set_vdef (vec_stmt, new_vdef);
1636 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1640 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1642 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1644 if (dump_enabled_p ())
1646 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1647 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1650 gimple_set_location (vec_stmt, gimple_location (stmt));
1652 /* While EH edges will generally prevent vectorization, stmt might
1653 e.g. be in a must-not-throw region. Ensure newly created stmts
1654 that could throw are part of the same region. */
1655 int lp_nr = lookup_stmt_eh_lp (stmt);
1656 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1657 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1660 /* We want to vectorize a call to combined function CFN with function
1661 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1662 as the types of all inputs. Check whether this is possible using
1663 an internal function, returning its code if so or IFN_LAST if not. */
1665 static internal_fn
1666 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1667 tree vectype_out, tree vectype_in)
1669 internal_fn ifn;
1670 if (internal_fn_p (cfn))
1671 ifn = as_internal_fn (cfn);
1672 else
1673 ifn = associated_internal_fn (fndecl);
1674 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1676 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1677 if (info.vectorizable)
1679 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1680 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1681 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1682 OPTIMIZE_FOR_SPEED))
1683 return ifn;
1686 return IFN_LAST;
1690 static tree permute_vec_elements (tree, tree, tree, gimple *,
1691 gimple_stmt_iterator *);
1693 /* STMT is a non-strided load or store, meaning that it accesses
1694 elements with a known constant step. Return -1 if that step
1695 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1697 static int
1698 compare_step_with_zero (gimple *stmt)
1700 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1701 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1702 tree step;
1703 if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
1704 step = STMT_VINFO_DR_STEP (stmt_info);
1705 else
1706 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1707 return tree_int_cst_compare (step, size_zero_node);
1710 /* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1713 static tree
1714 perm_mask_for_reverse (tree vectype)
1716 int i, nunits;
1717 unsigned char *sel;
1719 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1720 sel = XALLOCAVEC (unsigned char, nunits);
1722 for (i = 0; i < nunits; ++i)
1723 sel[i] = nunits - 1 - i;
1725 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1726 return NULL_TREE;
1727 return vect_gen_perm_mask_checked (vectype, sel);
1730 /* A subroutine of get_load_store_type, with a subset of the same
1731 arguments. Handle the case where STMT is part of a grouped load
1732 or store.
1734 For stores, the statements in the group are all consecutive
1735 and there is no gap at the end. For loads, the statements in the
1736 group might not be consecutive; there can be gaps between statements
1737 as well as at the end. */
1739 static bool
1740 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1741 vec_load_store_type vls_type,
1742 vect_memory_access_type *memory_access_type)
1744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1745 vec_info *vinfo = stmt_info->vinfo;
1746 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1747 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1748 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1749 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1750 bool single_element_p = (stmt == first_stmt
1751 && !GROUP_NEXT_ELEMENT (stmt_info));
1752 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1753 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1755 /* True if the vectorized statements would access beyond the last
1756 statement in the group. */
1757 bool overrun_p = false;
1759 /* True if we can cope with such overrun by peeling for gaps, so that
1760 there is at least one final scalar iteration after the vector loop. */
1761 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1763 /* There can only be a gap at the end of the group if the stride is
1764 known at compile time. */
1765 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1767 /* Stores can't yet have gaps. */
1768 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1770 if (slp)
1772 if (STMT_VINFO_STRIDED_P (stmt_info))
1774 /* Try to use consecutive accesses of GROUP_SIZE elements,
1775 separated by the stride, until we have a complete vector.
1776 Fall back to scalar accesses if that isn't possible. */
1777 if (nunits % group_size == 0)
1778 *memory_access_type = VMAT_STRIDED_SLP;
1779 else
1780 *memory_access_type = VMAT_ELEMENTWISE;
1782 else
1784 overrun_p = loop_vinfo && gap != 0;
1785 if (overrun_p && vls_type != VLS_LOAD)
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1788 "Grouped store with gaps requires"
1789 " non-consecutive accesses\n");
1790 return false;
1792 /* If the access is aligned an overrun is fine. */
1793 if (overrun_p
1794 && aligned_access_p
1795 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1796 overrun_p = false;
1797 if (overrun_p && !can_overrun_p)
1799 if (dump_enabled_p ())
1800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1801 "Peeling for outer loop is not supported\n");
1802 return false;
1804 *memory_access_type = VMAT_CONTIGUOUS;
1807 else
1809 /* We can always handle this case using elementwise accesses,
1810 but see if something more efficient is available. */
1811 *memory_access_type = VMAT_ELEMENTWISE;
1813 /* If there is a gap at the end of the group then these optimizations
1814 would access excess elements in the last iteration. */
1815 bool would_overrun_p = (gap != 0);
1816 /* If the access is aligned an overrun is fine, but only if the
1817 overrun is not inside an unused vector (if the gap is as large
1818 or larger than a vector). */
1819 if (would_overrun_p
1820 && gap < nunits
1821 && aligned_access_p
1822 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1823 would_overrun_p = false;
1824 if (!STMT_VINFO_STRIDED_P (stmt_info)
1825 && (can_overrun_p || !would_overrun_p)
1826 && compare_step_with_zero (stmt) > 0)
1828 /* First try using LOAD/STORE_LANES. */
1829 if (vls_type == VLS_LOAD
1830 ? vect_load_lanes_supported (vectype, group_size)
1831 : vect_store_lanes_supported (vectype, group_size))
1833 *memory_access_type = VMAT_LOAD_STORE_LANES;
1834 overrun_p = would_overrun_p;
1837 /* If that fails, try using permuting loads. */
1838 if (*memory_access_type == VMAT_ELEMENTWISE
1839 && (vls_type == VLS_LOAD
1840 ? vect_grouped_load_supported (vectype, single_element_p,
1841 group_size)
1842 : vect_grouped_store_supported (vectype, group_size)))
1844 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1845 overrun_p = would_overrun_p;
1850 if (vls_type != VLS_LOAD && first_stmt == stmt)
1852 /* STMT is the leader of the group. Check the operands of all the
1853 stmts of the group. */
1854 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1855 while (next_stmt)
1857 gcc_assert (gimple_assign_single_p (next_stmt));
1858 tree op = gimple_assign_rhs1 (next_stmt);
1859 gimple *def_stmt;
1860 enum vect_def_type dt;
1861 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1863 if (dump_enabled_p ())
1864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1865 "use not simple.\n");
1866 return false;
1868 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1872 if (overrun_p)
1874 gcc_assert (can_overrun_p);
1875 if (dump_enabled_p ())
1876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1877 "Data access with gaps requires scalar "
1878 "epilogue loop\n");
1879 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1882 return true;
1885 /* A subroutine of get_load_store_type, with a subset of the same
1886 arguments. Handle the case where STMT is a load or store that
1887 accesses consecutive elements with a negative step. */
1889 static vect_memory_access_type
1890 get_negative_load_store_type (gimple *stmt, tree vectype,
1891 vec_load_store_type vls_type,
1892 unsigned int ncopies)
1894 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1895 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1896 dr_alignment_support alignment_support_scheme;
1898 if (ncopies > 1)
1900 if (dump_enabled_p ())
1901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1902 "multiple types with negative step.\n");
1903 return VMAT_ELEMENTWISE;
1906 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1907 if (alignment_support_scheme != dr_aligned
1908 && alignment_support_scheme != dr_unaligned_supported)
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1912 "negative step but alignment required.\n");
1913 return VMAT_ELEMENTWISE;
1916 if (vls_type == VLS_STORE_INVARIANT)
1918 if (dump_enabled_p ())
1919 dump_printf_loc (MSG_NOTE, vect_location,
1920 "negative step with invariant source;"
1921 " no permute needed.\n");
1922 return VMAT_CONTIGUOUS_DOWN;
1925 if (!perm_mask_for_reverse (vectype))
1927 if (dump_enabled_p ())
1928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1929 "negative step and reversing not supported.\n");
1930 return VMAT_ELEMENTWISE;
1933 return VMAT_CONTIGUOUS_REVERSE;
1936 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1937 if there is a memory access type that the vectorized form can use,
1938 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1939 or scatters, fill in GS_INFO accordingly.
1941 SLP says whether we're performing SLP rather than loop vectorization.
1942 VECTYPE is the vector type that the vectorized statements will use.
1943 NCOPIES is the number of vector statements that will be needed. */
1945 static bool
1946 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1947 vec_load_store_type vls_type, unsigned int ncopies,
1948 vect_memory_access_type *memory_access_type,
1949 gather_scatter_info *gs_info)
1951 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1952 vec_info *vinfo = stmt_info->vinfo;
1953 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1954 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1956 *memory_access_type = VMAT_GATHER_SCATTER;
1957 gimple *def_stmt;
1958 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1959 gcc_unreachable ();
1960 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1961 &gs_info->offset_dt,
1962 &gs_info->offset_vectype))
1964 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1966 "%s index use not simple.\n",
1967 vls_type == VLS_LOAD ? "gather" : "scatter");
1968 return false;
1971 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1973 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1974 memory_access_type))
1975 return false;
1977 else if (STMT_VINFO_STRIDED_P (stmt_info))
1979 gcc_assert (!slp);
1980 *memory_access_type = VMAT_ELEMENTWISE;
1982 else
1984 int cmp = compare_step_with_zero (stmt);
1985 if (cmp < 0)
1986 *memory_access_type = get_negative_load_store_type
1987 (stmt, vectype, vls_type, ncopies);
1988 else if (cmp == 0)
1990 gcc_assert (vls_type == VLS_LOAD);
1991 *memory_access_type = VMAT_INVARIANT;
1993 else
1994 *memory_access_type = VMAT_CONTIGUOUS;
1997 /* FIXME: At the moment the cost model seems to underestimate the
1998 cost of using elementwise accesses. This check preserves the
1999 traditional behavior until that can be fixed. */
2000 if (*memory_access_type == VMAT_ELEMENTWISE
2001 && !STMT_VINFO_STRIDED_P (stmt_info))
2003 if (dump_enabled_p ())
2004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2005 "not falling back to elementwise accesses\n");
2006 return false;
2008 return true;
2011 /* Function vectorizable_mask_load_store.
2013 Check if STMT performs a conditional load or store that can be vectorized.
2014 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2015 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2016 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2018 static bool
2019 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2020 gimple **vec_stmt, slp_tree slp_node)
2022 tree vec_dest = NULL;
2023 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2024 stmt_vec_info prev_stmt_info;
2025 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2026 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2027 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2028 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2029 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2030 tree rhs_vectype = NULL_TREE;
2031 tree mask_vectype;
2032 tree elem_type;
2033 gimple *new_stmt;
2034 tree dummy;
2035 tree dataref_ptr = NULL_TREE;
2036 gimple *ptr_incr;
2037 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2038 int ncopies;
2039 int i, j;
2040 bool inv_p;
2041 gather_scatter_info gs_info;
2042 vec_load_store_type vls_type;
2043 tree mask;
2044 gimple *def_stmt;
2045 enum vect_def_type dt;
2047 if (slp_node != NULL)
2048 return false;
2050 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2051 gcc_assert (ncopies >= 1);
2053 mask = gimple_call_arg (stmt, 2);
2055 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2056 return false;
2058 /* FORNOW. This restriction should be relaxed. */
2059 if (nested_in_vect_loop && ncopies > 1)
2061 if (dump_enabled_p ())
2062 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2063 "multiple types in nested loop.");
2064 return false;
2067 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2068 return false;
2070 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2071 && ! vec_stmt)
2072 return false;
2074 if (!STMT_VINFO_DATA_REF (stmt_info))
2075 return false;
2077 elem_type = TREE_TYPE (vectype);
2079 if (TREE_CODE (mask) != SSA_NAME)
2080 return false;
2082 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2083 return false;
2085 if (!mask_vectype)
2086 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2088 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2089 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2090 return false;
2092 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2094 tree rhs = gimple_call_arg (stmt, 3);
2095 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2096 return false;
2097 if (dt == vect_constant_def || dt == vect_external_def)
2098 vls_type = VLS_STORE_INVARIANT;
2099 else
2100 vls_type = VLS_STORE;
2102 else
2103 vls_type = VLS_LOAD;
2105 vect_memory_access_type memory_access_type;
2106 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2107 &memory_access_type, &gs_info))
2108 return false;
2110 if (memory_access_type == VMAT_GATHER_SCATTER)
2112 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2113 tree masktype
2114 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2115 if (TREE_CODE (masktype) == INTEGER_TYPE)
2117 if (dump_enabled_p ())
2118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2119 "masked gather with integer mask not supported.");
2120 return false;
2123 else if (memory_access_type != VMAT_CONTIGUOUS)
2125 if (dump_enabled_p ())
2126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2127 "unsupported access type for masked %s.\n",
2128 vls_type == VLS_LOAD ? "load" : "store");
2129 return false;
2131 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2132 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2133 TYPE_MODE (mask_vectype),
2134 vls_type == VLS_LOAD)
2135 || (rhs_vectype
2136 && !useless_type_conversion_p (vectype, rhs_vectype)))
2137 return false;
2139 if (!vec_stmt) /* transformation not required. */
2141 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2142 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2143 if (vls_type == VLS_LOAD)
2144 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2145 NULL, NULL, NULL);
2146 else
2147 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2148 dt, NULL, NULL, NULL);
2149 return true;
2151 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2153 /* Transform. */
2155 if (memory_access_type == VMAT_GATHER_SCATTER)
2157 tree vec_oprnd0 = NULL_TREE, op;
2158 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2159 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2160 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2161 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2162 tree mask_perm_mask = NULL_TREE;
2163 edge pe = loop_preheader_edge (loop);
2164 gimple_seq seq;
2165 basic_block new_bb;
2166 enum { NARROW, NONE, WIDEN } modifier;
2167 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2169 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2170 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2171 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2172 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2173 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2174 scaletype = TREE_VALUE (arglist);
2175 gcc_checking_assert (types_compatible_p (srctype, rettype)
2176 && types_compatible_p (srctype, masktype));
2178 if (nunits == gather_off_nunits)
2179 modifier = NONE;
2180 else if (nunits == gather_off_nunits / 2)
2182 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2183 modifier = WIDEN;
2185 for (i = 0; i < gather_off_nunits; ++i)
2186 sel[i] = i | nunits;
2188 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2190 else if (nunits == gather_off_nunits * 2)
2192 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2193 modifier = NARROW;
2195 for (i = 0; i < nunits; ++i)
2196 sel[i] = i < gather_off_nunits
2197 ? i : i + nunits - gather_off_nunits;
2199 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2200 ncopies *= 2;
2201 for (i = 0; i < nunits; ++i)
2202 sel[i] = i | gather_off_nunits;
2203 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2205 else
2206 gcc_unreachable ();
2208 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2210 ptr = fold_convert (ptrtype, gs_info.base);
2211 if (!is_gimple_min_invariant (ptr))
2213 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2214 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2215 gcc_assert (!new_bb);
2218 scale = build_int_cst (scaletype, gs_info.scale);
2220 prev_stmt_info = NULL;
2221 for (j = 0; j < ncopies; ++j)
2223 if (modifier == WIDEN && (j & 1))
2224 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2225 perm_mask, stmt, gsi);
2226 else if (j == 0)
2227 op = vec_oprnd0
2228 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2229 else
2230 op = vec_oprnd0
2231 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2233 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2235 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2236 == TYPE_VECTOR_SUBPARTS (idxtype));
2237 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2238 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2239 new_stmt
2240 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2241 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2242 op = var;
2245 if (mask_perm_mask && (j & 1))
2246 mask_op = permute_vec_elements (mask_op, mask_op,
2247 mask_perm_mask, stmt, gsi);
2248 else
2250 if (j == 0)
2251 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2252 else
2254 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2255 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2258 mask_op = vec_mask;
2259 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2261 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2262 == TYPE_VECTOR_SUBPARTS (masktype));
2263 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2264 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2265 new_stmt
2266 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2267 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2268 mask_op = var;
2272 new_stmt
2273 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2274 scale);
2276 if (!useless_type_conversion_p (vectype, rettype))
2278 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2279 == TYPE_VECTOR_SUBPARTS (rettype));
2280 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2281 gimple_call_set_lhs (new_stmt, op);
2282 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2283 var = make_ssa_name (vec_dest);
2284 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2285 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2287 else
2289 var = make_ssa_name (vec_dest, new_stmt);
2290 gimple_call_set_lhs (new_stmt, var);
2293 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2295 if (modifier == NARROW)
2297 if ((j & 1) == 0)
2299 prev_res = var;
2300 continue;
2302 var = permute_vec_elements (prev_res, var,
2303 perm_mask, stmt, gsi);
2304 new_stmt = SSA_NAME_DEF_STMT (var);
2307 if (prev_stmt_info == NULL)
2308 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2309 else
2310 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2311 prev_stmt_info = vinfo_for_stmt (new_stmt);
2314 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2315 from the IL. */
2316 if (STMT_VINFO_RELATED_STMT (stmt_info))
2318 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2319 stmt_info = vinfo_for_stmt (stmt);
2321 tree lhs = gimple_call_lhs (stmt);
2322 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2323 set_vinfo_for_stmt (new_stmt, stmt_info);
2324 set_vinfo_for_stmt (stmt, NULL);
2325 STMT_VINFO_STMT (stmt_info) = new_stmt;
2326 gsi_replace (gsi, new_stmt, true);
2327 return true;
2329 else if (vls_type != VLS_LOAD)
2331 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2332 prev_stmt_info = NULL;
2333 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2334 for (i = 0; i < ncopies; i++)
2336 unsigned align, misalign;
2338 if (i == 0)
2340 tree rhs = gimple_call_arg (stmt, 3);
2341 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2342 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2343 /* We should have catched mismatched types earlier. */
2344 gcc_assert (useless_type_conversion_p (vectype,
2345 TREE_TYPE (vec_rhs)));
2346 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2347 NULL_TREE, &dummy, gsi,
2348 &ptr_incr, false, &inv_p);
2349 gcc_assert (!inv_p);
2351 else
2353 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2354 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2355 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2356 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2357 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2358 TYPE_SIZE_UNIT (vectype));
2361 align = TYPE_ALIGN_UNIT (vectype);
2362 if (aligned_access_p (dr))
2363 misalign = 0;
2364 else if (DR_MISALIGNMENT (dr) == -1)
2366 align = TYPE_ALIGN_UNIT (elem_type);
2367 misalign = 0;
2369 else
2370 misalign = DR_MISALIGNMENT (dr);
2371 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2372 misalign);
2373 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2374 misalign ? least_bit_hwi (misalign) : align);
2375 new_stmt
2376 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2377 ptr, vec_mask, vec_rhs);
2378 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2379 if (i == 0)
2380 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2381 else
2382 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2383 prev_stmt_info = vinfo_for_stmt (new_stmt);
2386 else
2388 tree vec_mask = NULL_TREE;
2389 prev_stmt_info = NULL;
2390 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2391 for (i = 0; i < ncopies; i++)
2393 unsigned align, misalign;
2395 if (i == 0)
2397 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2398 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2399 NULL_TREE, &dummy, gsi,
2400 &ptr_incr, false, &inv_p);
2401 gcc_assert (!inv_p);
2403 else
2405 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2406 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2407 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2408 TYPE_SIZE_UNIT (vectype));
2411 align = TYPE_ALIGN_UNIT (vectype);
2412 if (aligned_access_p (dr))
2413 misalign = 0;
2414 else if (DR_MISALIGNMENT (dr) == -1)
2416 align = TYPE_ALIGN_UNIT (elem_type);
2417 misalign = 0;
2419 else
2420 misalign = DR_MISALIGNMENT (dr);
2421 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2422 misalign);
2423 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2424 misalign ? least_bit_hwi (misalign) : align);
2425 new_stmt
2426 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2427 ptr, vec_mask);
2428 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2429 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2430 if (i == 0)
2431 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2432 else
2433 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2434 prev_stmt_info = vinfo_for_stmt (new_stmt);
2438 if (vls_type == VLS_LOAD)
2440 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2441 from the IL. */
2442 if (STMT_VINFO_RELATED_STMT (stmt_info))
2444 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2445 stmt_info = vinfo_for_stmt (stmt);
2447 tree lhs = gimple_call_lhs (stmt);
2448 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2449 set_vinfo_for_stmt (new_stmt, stmt_info);
2450 set_vinfo_for_stmt (stmt, NULL);
2451 STMT_VINFO_STMT (stmt_info) = new_stmt;
2452 gsi_replace (gsi, new_stmt, true);
2455 return true;
2458 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2460 static bool
2461 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2462 gimple **vec_stmt, slp_tree slp_node,
2463 tree vectype_in, enum vect_def_type *dt)
2465 tree op, vectype;
2466 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2467 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2468 unsigned ncopies, nunits;
2470 op = gimple_call_arg (stmt, 0);
2471 vectype = STMT_VINFO_VECTYPE (stmt_info);
2472 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2474 /* Multiple types in SLP are handled by creating the appropriate number of
2475 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2476 case of SLP. */
2477 if (slp_node)
2478 ncopies = 1;
2479 else
2480 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2482 gcc_assert (ncopies >= 1);
2484 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2485 if (! char_vectype)
2486 return false;
2488 unsigned char *elts
2489 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype));
2490 unsigned char *elt = elts;
2491 unsigned word_bytes = TYPE_VECTOR_SUBPARTS (char_vectype) / nunits;
2492 for (unsigned i = 0; i < nunits; ++i)
2493 for (unsigned j = 0; j < word_bytes; ++j)
2494 *elt++ = (i + 1) * word_bytes - j - 1;
2496 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
2497 return false;
2499 if (! vec_stmt)
2501 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2502 if (dump_enabled_p ())
2503 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2504 "\n");
2505 if (! PURE_SLP_STMT (stmt_info))
2507 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2508 1, vector_stmt, stmt_info, 0, vect_prologue);
2509 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2510 ncopies, vec_perm, stmt_info, 0, vect_body);
2512 return true;
2515 tree *telts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (char_vectype));
2516 for (unsigned i = 0; i < TYPE_VECTOR_SUBPARTS (char_vectype); ++i)
2517 telts[i] = build_int_cst (char_type_node, elts[i]);
2518 tree bswap_vconst = build_vector (char_vectype, telts);
2520 /* Transform. */
2521 vec<tree> vec_oprnds = vNULL;
2522 gimple *new_stmt = NULL;
2523 stmt_vec_info prev_stmt_info = NULL;
2524 for (unsigned j = 0; j < ncopies; j++)
2526 /* Handle uses. */
2527 if (j == 0)
2528 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2529 else
2530 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2532 /* Arguments are ready. create the new vector stmt. */
2533 unsigned i;
2534 tree vop;
2535 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2537 tree tem = make_ssa_name (char_vectype);
2538 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2539 char_vectype, vop));
2540 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2541 tree tem2 = make_ssa_name (char_vectype);
2542 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2543 tem, tem, bswap_vconst);
2544 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2545 tem = make_ssa_name (vectype);
2546 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2547 vectype, tem2));
2548 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2549 if (slp_node)
2550 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2553 if (slp_node)
2554 continue;
2556 if (j == 0)
2557 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2558 else
2559 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2561 prev_stmt_info = vinfo_for_stmt (new_stmt);
2564 vec_oprnds.release ();
2565 return true;
2568 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2569 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2570 in a single step. On success, store the binary pack code in
2571 *CONVERT_CODE. */
2573 static bool
2574 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2575 tree_code *convert_code)
2577 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2578 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2579 return false;
2581 tree_code code;
2582 int multi_step_cvt = 0;
2583 auto_vec <tree, 8> interm_types;
2584 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2585 &code, &multi_step_cvt,
2586 &interm_types)
2587 || multi_step_cvt)
2588 return false;
2590 *convert_code = code;
2591 return true;
2594 /* Function vectorizable_call.
2596 Check if GS performs a function call that can be vectorized.
2597 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2598 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2599 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2601 static bool
2602 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2603 slp_tree slp_node)
2605 gcall *stmt;
2606 tree vec_dest;
2607 tree scalar_dest;
2608 tree op, type;
2609 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2610 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2611 tree vectype_out, vectype_in;
2612 int nunits_in;
2613 int nunits_out;
2614 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2615 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2616 vec_info *vinfo = stmt_info->vinfo;
2617 tree fndecl, new_temp, rhs_type;
2618 gimple *def_stmt;
2619 enum vect_def_type dt[3]
2620 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2621 int ndts = 3;
2622 gimple *new_stmt = NULL;
2623 int ncopies, j;
2624 vec<tree> vargs = vNULL;
2625 enum { NARROW, NONE, WIDEN } modifier;
2626 size_t i, nargs;
2627 tree lhs;
2629 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2630 return false;
2632 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2633 && ! vec_stmt)
2634 return false;
2636 /* Is GS a vectorizable call? */
2637 stmt = dyn_cast <gcall *> (gs);
2638 if (!stmt)
2639 return false;
2641 if (gimple_call_internal_p (stmt)
2642 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2643 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2644 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2645 slp_node);
2647 if (gimple_call_lhs (stmt) == NULL_TREE
2648 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2649 return false;
2651 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2653 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2655 /* Process function arguments. */
2656 rhs_type = NULL_TREE;
2657 vectype_in = NULL_TREE;
2658 nargs = gimple_call_num_args (stmt);
2660 /* Bail out if the function has more than three arguments, we do not have
2661 interesting builtin functions to vectorize with more than two arguments
2662 except for fma. No arguments is also not good. */
2663 if (nargs == 0 || nargs > 3)
2664 return false;
2666 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2667 if (gimple_call_internal_p (stmt)
2668 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2670 nargs = 0;
2671 rhs_type = unsigned_type_node;
2674 for (i = 0; i < nargs; i++)
2676 tree opvectype;
2678 op = gimple_call_arg (stmt, i);
2680 /* We can only handle calls with arguments of the same type. */
2681 if (rhs_type
2682 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2684 if (dump_enabled_p ())
2685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2686 "argument types differ.\n");
2687 return false;
2689 if (!rhs_type)
2690 rhs_type = TREE_TYPE (op);
2692 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2694 if (dump_enabled_p ())
2695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2696 "use not simple.\n");
2697 return false;
2700 if (!vectype_in)
2701 vectype_in = opvectype;
2702 else if (opvectype
2703 && opvectype != vectype_in)
2705 if (dump_enabled_p ())
2706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2707 "argument vector types differ.\n");
2708 return false;
2711 /* If all arguments are external or constant defs use a vector type with
2712 the same size as the output vector type. */
2713 if (!vectype_in)
2714 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2715 if (vec_stmt)
2716 gcc_assert (vectype_in);
2717 if (!vectype_in)
2719 if (dump_enabled_p ())
2721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2722 "no vectype for scalar type ");
2723 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2724 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2727 return false;
2730 /* FORNOW */
2731 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2732 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2733 if (nunits_in == nunits_out / 2)
2734 modifier = NARROW;
2735 else if (nunits_out == nunits_in)
2736 modifier = NONE;
2737 else if (nunits_out == nunits_in / 2)
2738 modifier = WIDEN;
2739 else
2740 return false;
2742 /* We only handle functions that do not read or clobber memory. */
2743 if (gimple_vuse (stmt))
2745 if (dump_enabled_p ())
2746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2747 "function reads from or writes to memory.\n");
2748 return false;
2751 /* For now, we only vectorize functions if a target specific builtin
2752 is available. TODO -- in some cases, it might be profitable to
2753 insert the calls for pieces of the vector, in order to be able
2754 to vectorize other operations in the loop. */
2755 fndecl = NULL_TREE;
2756 internal_fn ifn = IFN_LAST;
2757 combined_fn cfn = gimple_call_combined_fn (stmt);
2758 tree callee = gimple_call_fndecl (stmt);
2760 /* First try using an internal function. */
2761 tree_code convert_code = ERROR_MARK;
2762 if (cfn != CFN_LAST
2763 && (modifier == NONE
2764 || (modifier == NARROW
2765 && simple_integer_narrowing (vectype_out, vectype_in,
2766 &convert_code))))
2767 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2768 vectype_in);
2770 /* If that fails, try asking for a target-specific built-in function. */
2771 if (ifn == IFN_LAST)
2773 if (cfn != CFN_LAST)
2774 fndecl = targetm.vectorize.builtin_vectorized_function
2775 (cfn, vectype_out, vectype_in);
2776 else
2777 fndecl = targetm.vectorize.builtin_md_vectorized_function
2778 (callee, vectype_out, vectype_in);
2781 if (ifn == IFN_LAST && !fndecl)
2783 if (cfn == CFN_GOMP_SIMD_LANE
2784 && !slp_node
2785 && loop_vinfo
2786 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2787 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2788 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2789 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2791 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2792 { 0, 1, 2, ... vf - 1 } vector. */
2793 gcc_assert (nargs == 0);
2795 else if (modifier == NONE
2796 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2797 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2798 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2799 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2800 vectype_in, dt);
2801 else
2803 if (dump_enabled_p ())
2804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2805 "function is not vectorizable.\n");
2806 return false;
2810 if (slp_node)
2811 ncopies = 1;
2812 else if (modifier == NARROW && ifn == IFN_LAST)
2813 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2814 else
2815 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2817 /* Sanity check: make sure that at least one copy of the vectorized stmt
2818 needs to be generated. */
2819 gcc_assert (ncopies >= 1);
2821 if (!vec_stmt) /* transformation not required. */
2823 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2824 if (dump_enabled_p ())
2825 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2826 "\n");
2827 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2828 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2829 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2830 vec_promote_demote, stmt_info, 0, vect_body);
2832 return true;
2835 /* Transform. */
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2840 /* Handle def. */
2841 scalar_dest = gimple_call_lhs (stmt);
2842 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2844 prev_stmt_info = NULL;
2845 if (modifier == NONE || ifn != IFN_LAST)
2847 tree prev_res = NULL_TREE;
2848 for (j = 0; j < ncopies; ++j)
2850 /* Build argument list for the vectorized call. */
2851 if (j == 0)
2852 vargs.create (nargs);
2853 else
2854 vargs.truncate (0);
2856 if (slp_node)
2858 auto_vec<vec<tree> > vec_defs (nargs);
2859 vec<tree> vec_oprnds0;
2861 for (i = 0; i < nargs; i++)
2862 vargs.quick_push (gimple_call_arg (stmt, i));
2863 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2864 vec_oprnds0 = vec_defs[0];
2866 /* Arguments are ready. Create the new vector stmt. */
2867 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2869 size_t k;
2870 for (k = 0; k < nargs; k++)
2872 vec<tree> vec_oprndsk = vec_defs[k];
2873 vargs[k] = vec_oprndsk[i];
2875 if (modifier == NARROW)
2877 tree half_res = make_ssa_name (vectype_in);
2878 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2879 gimple_call_set_lhs (new_stmt, half_res);
2880 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2881 if ((i & 1) == 0)
2883 prev_res = half_res;
2884 continue;
2886 new_temp = make_ssa_name (vec_dest);
2887 new_stmt = gimple_build_assign (new_temp, convert_code,
2888 prev_res, half_res);
2890 else
2892 if (ifn != IFN_LAST)
2893 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2894 else
2895 new_stmt = gimple_build_call_vec (fndecl, vargs);
2896 new_temp = make_ssa_name (vec_dest, new_stmt);
2897 gimple_call_set_lhs (new_stmt, new_temp);
2899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2900 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2903 for (i = 0; i < nargs; i++)
2905 vec<tree> vec_oprndsi = vec_defs[i];
2906 vec_oprndsi.release ();
2908 continue;
2911 for (i = 0; i < nargs; i++)
2913 op = gimple_call_arg (stmt, i);
2914 if (j == 0)
2915 vec_oprnd0
2916 = vect_get_vec_def_for_operand (op, stmt);
2917 else
2919 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2920 vec_oprnd0
2921 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2924 vargs.quick_push (vec_oprnd0);
2927 if (gimple_call_internal_p (stmt)
2928 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2930 tree *v = XALLOCAVEC (tree, nunits_out);
2931 int k;
2932 for (k = 0; k < nunits_out; ++k)
2933 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2934 tree cst = build_vector (vectype_out, v);
2935 tree new_var
2936 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2937 gimple *init_stmt = gimple_build_assign (new_var, cst);
2938 vect_init_vector_1 (stmt, init_stmt, NULL);
2939 new_temp = make_ssa_name (vec_dest);
2940 new_stmt = gimple_build_assign (new_temp, new_var);
2942 else if (modifier == NARROW)
2944 tree half_res = make_ssa_name (vectype_in);
2945 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2946 gimple_call_set_lhs (new_stmt, half_res);
2947 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2948 if ((j & 1) == 0)
2950 prev_res = half_res;
2951 continue;
2953 new_temp = make_ssa_name (vec_dest);
2954 new_stmt = gimple_build_assign (new_temp, convert_code,
2955 prev_res, half_res);
2957 else
2959 if (ifn != IFN_LAST)
2960 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2961 else
2962 new_stmt = gimple_build_call_vec (fndecl, vargs);
2963 new_temp = make_ssa_name (vec_dest, new_stmt);
2964 gimple_call_set_lhs (new_stmt, new_temp);
2966 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2968 if (j == (modifier == NARROW ? 1 : 0))
2969 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2970 else
2971 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2973 prev_stmt_info = vinfo_for_stmt (new_stmt);
2976 else if (modifier == NARROW)
2978 for (j = 0; j < ncopies; ++j)
2980 /* Build argument list for the vectorized call. */
2981 if (j == 0)
2982 vargs.create (nargs * 2);
2983 else
2984 vargs.truncate (0);
2986 if (slp_node)
2988 auto_vec<vec<tree> > vec_defs (nargs);
2989 vec<tree> vec_oprnds0;
2991 for (i = 0; i < nargs; i++)
2992 vargs.quick_push (gimple_call_arg (stmt, i));
2993 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2994 vec_oprnds0 = vec_defs[0];
2996 /* Arguments are ready. Create the new vector stmt. */
2997 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2999 size_t k;
3000 vargs.truncate (0);
3001 for (k = 0; k < nargs; k++)
3003 vec<tree> vec_oprndsk = vec_defs[k];
3004 vargs.quick_push (vec_oprndsk[i]);
3005 vargs.quick_push (vec_oprndsk[i + 1]);
3007 if (ifn != IFN_LAST)
3008 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
3009 else
3010 new_stmt = gimple_build_call_vec (fndecl, vargs);
3011 new_temp = make_ssa_name (vec_dest, new_stmt);
3012 gimple_call_set_lhs (new_stmt, new_temp);
3013 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3014 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3017 for (i = 0; i < nargs; i++)
3019 vec<tree> vec_oprndsi = vec_defs[i];
3020 vec_oprndsi.release ();
3022 continue;
3025 for (i = 0; i < nargs; i++)
3027 op = gimple_call_arg (stmt, i);
3028 if (j == 0)
3030 vec_oprnd0
3031 = vect_get_vec_def_for_operand (op, stmt);
3032 vec_oprnd1
3033 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3035 else
3037 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3038 vec_oprnd0
3039 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3040 vec_oprnd1
3041 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3044 vargs.quick_push (vec_oprnd0);
3045 vargs.quick_push (vec_oprnd1);
3048 new_stmt = gimple_build_call_vec (fndecl, vargs);
3049 new_temp = make_ssa_name (vec_dest, new_stmt);
3050 gimple_call_set_lhs (new_stmt, new_temp);
3051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3053 if (j == 0)
3054 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3055 else
3056 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3058 prev_stmt_info = vinfo_for_stmt (new_stmt);
3061 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3063 else
3064 /* No current target implements this case. */
3065 return false;
3067 vargs.release ();
3069 /* The call in STMT might prevent it from being removed in dce.
3070 We however cannot remove it here, due to the way the ssa name
3071 it defines is mapped to the new definition. So just replace
3072 rhs of the statement with something harmless. */
3074 if (slp_node)
3075 return true;
3077 type = TREE_TYPE (scalar_dest);
3078 if (is_pattern_stmt_p (stmt_info))
3079 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3080 else
3081 lhs = gimple_call_lhs (stmt);
3083 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3084 set_vinfo_for_stmt (new_stmt, stmt_info);
3085 set_vinfo_for_stmt (stmt, NULL);
3086 STMT_VINFO_STMT (stmt_info) = new_stmt;
3087 gsi_replace (gsi, new_stmt, false);
3089 return true;
3093 struct simd_call_arg_info
3095 tree vectype;
3096 tree op;
3097 HOST_WIDE_INT linear_step;
3098 enum vect_def_type dt;
3099 unsigned int align;
3100 bool simd_lane_linear;
3103 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3104 is linear within simd lane (but not within whole loop), note it in
3105 *ARGINFO. */
3107 static void
3108 vect_simd_lane_linear (tree op, struct loop *loop,
3109 struct simd_call_arg_info *arginfo)
3111 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3113 if (!is_gimple_assign (def_stmt)
3114 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3115 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3116 return;
3118 tree base = gimple_assign_rhs1 (def_stmt);
3119 HOST_WIDE_INT linear_step = 0;
3120 tree v = gimple_assign_rhs2 (def_stmt);
3121 while (TREE_CODE (v) == SSA_NAME)
3123 tree t;
3124 def_stmt = SSA_NAME_DEF_STMT (v);
3125 if (is_gimple_assign (def_stmt))
3126 switch (gimple_assign_rhs_code (def_stmt))
3128 case PLUS_EXPR:
3129 t = gimple_assign_rhs2 (def_stmt);
3130 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3131 return;
3132 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3133 v = gimple_assign_rhs1 (def_stmt);
3134 continue;
3135 case MULT_EXPR:
3136 t = gimple_assign_rhs2 (def_stmt);
3137 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3138 return;
3139 linear_step = tree_to_shwi (t);
3140 v = gimple_assign_rhs1 (def_stmt);
3141 continue;
3142 CASE_CONVERT:
3143 t = gimple_assign_rhs1 (def_stmt);
3144 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3145 || (TYPE_PRECISION (TREE_TYPE (v))
3146 < TYPE_PRECISION (TREE_TYPE (t))))
3147 return;
3148 if (!linear_step)
3149 linear_step = 1;
3150 v = t;
3151 continue;
3152 default:
3153 return;
3155 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3156 && loop->simduid
3157 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3158 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3159 == loop->simduid))
3161 if (!linear_step)
3162 linear_step = 1;
3163 arginfo->linear_step = linear_step;
3164 arginfo->op = base;
3165 arginfo->simd_lane_linear = true;
3166 return;
3171 /* Function vectorizable_simd_clone_call.
3173 Check if STMT performs a function call that can be vectorized
3174 by calling a simd clone of the function.
3175 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3176 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3177 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3179 static bool
3180 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3181 gimple **vec_stmt, slp_tree slp_node)
3183 tree vec_dest;
3184 tree scalar_dest;
3185 tree op, type;
3186 tree vec_oprnd0 = NULL_TREE;
3187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3188 tree vectype;
3189 unsigned int nunits;
3190 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3191 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3192 vec_info *vinfo = stmt_info->vinfo;
3193 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3194 tree fndecl, new_temp;
3195 gimple *def_stmt;
3196 gimple *new_stmt = NULL;
3197 int ncopies, j;
3198 auto_vec<simd_call_arg_info> arginfo;
3199 vec<tree> vargs = vNULL;
3200 size_t i, nargs;
3201 tree lhs, rtype, ratype;
3202 vec<constructor_elt, va_gc> *ret_ctor_elts;
3204 /* Is STMT a vectorizable call? */
3205 if (!is_gimple_call (stmt))
3206 return false;
3208 fndecl = gimple_call_fndecl (stmt);
3209 if (fndecl == NULL_TREE)
3210 return false;
3212 struct cgraph_node *node = cgraph_node::get (fndecl);
3213 if (node == NULL || node->simd_clones == NULL)
3214 return false;
3216 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3217 return false;
3219 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3220 && ! vec_stmt)
3221 return false;
3223 if (gimple_call_lhs (stmt)
3224 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3225 return false;
3227 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3229 vectype = STMT_VINFO_VECTYPE (stmt_info);
3231 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3232 return false;
3234 /* FORNOW */
3235 if (slp_node)
3236 return false;
3238 /* Process function arguments. */
3239 nargs = gimple_call_num_args (stmt);
3241 /* Bail out if the function has zero arguments. */
3242 if (nargs == 0)
3243 return false;
3245 arginfo.reserve (nargs, true);
3247 for (i = 0; i < nargs; i++)
3249 simd_call_arg_info thisarginfo;
3250 affine_iv iv;
3252 thisarginfo.linear_step = 0;
3253 thisarginfo.align = 0;
3254 thisarginfo.op = NULL_TREE;
3255 thisarginfo.simd_lane_linear = false;
3257 op = gimple_call_arg (stmt, i);
3258 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3259 &thisarginfo.vectype)
3260 || thisarginfo.dt == vect_uninitialized_def)
3262 if (dump_enabled_p ())
3263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3264 "use not simple.\n");
3265 return false;
3268 if (thisarginfo.dt == vect_constant_def
3269 || thisarginfo.dt == vect_external_def)
3270 gcc_assert (thisarginfo.vectype == NULL_TREE);
3271 else
3272 gcc_assert (thisarginfo.vectype != NULL_TREE);
3274 /* For linear arguments, the analyze phase should have saved
3275 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3276 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3277 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3279 gcc_assert (vec_stmt);
3280 thisarginfo.linear_step
3281 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3282 thisarginfo.op
3283 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3284 thisarginfo.simd_lane_linear
3285 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3286 == boolean_true_node);
3287 /* If loop has been peeled for alignment, we need to adjust it. */
3288 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3289 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3290 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3292 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3293 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3294 tree opt = TREE_TYPE (thisarginfo.op);
3295 bias = fold_convert (TREE_TYPE (step), bias);
3296 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3297 thisarginfo.op
3298 = fold_build2 (POINTER_TYPE_P (opt)
3299 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3300 thisarginfo.op, bias);
3303 else if (!vec_stmt
3304 && thisarginfo.dt != vect_constant_def
3305 && thisarginfo.dt != vect_external_def
3306 && loop_vinfo
3307 && TREE_CODE (op) == SSA_NAME
3308 && simple_iv (loop, loop_containing_stmt (stmt), op,
3309 &iv, false)
3310 && tree_fits_shwi_p (iv.step))
3312 thisarginfo.linear_step = tree_to_shwi (iv.step);
3313 thisarginfo.op = iv.base;
3315 else if ((thisarginfo.dt == vect_constant_def
3316 || thisarginfo.dt == vect_external_def)
3317 && POINTER_TYPE_P (TREE_TYPE (op)))
3318 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3319 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3320 linear too. */
3321 if (POINTER_TYPE_P (TREE_TYPE (op))
3322 && !thisarginfo.linear_step
3323 && !vec_stmt
3324 && thisarginfo.dt != vect_constant_def
3325 && thisarginfo.dt != vect_external_def
3326 && loop_vinfo
3327 && !slp_node
3328 && TREE_CODE (op) == SSA_NAME)
3329 vect_simd_lane_linear (op, loop, &thisarginfo);
3331 arginfo.quick_push (thisarginfo);
3334 unsigned int badness = 0;
3335 struct cgraph_node *bestn = NULL;
3336 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3337 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3338 else
3339 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3340 n = n->simdclone->next_clone)
3342 unsigned int this_badness = 0;
3343 if (n->simdclone->simdlen
3344 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3345 || n->simdclone->nargs != nargs)
3346 continue;
3347 if (n->simdclone->simdlen
3348 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3349 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3350 - exact_log2 (n->simdclone->simdlen)) * 1024;
3351 if (n->simdclone->inbranch)
3352 this_badness += 2048;
3353 int target_badness = targetm.simd_clone.usable (n);
3354 if (target_badness < 0)
3355 continue;
3356 this_badness += target_badness * 512;
3357 /* FORNOW: Have to add code to add the mask argument. */
3358 if (n->simdclone->inbranch)
3359 continue;
3360 for (i = 0; i < nargs; i++)
3362 switch (n->simdclone->args[i].arg_type)
3364 case SIMD_CLONE_ARG_TYPE_VECTOR:
3365 if (!useless_type_conversion_p
3366 (n->simdclone->args[i].orig_type,
3367 TREE_TYPE (gimple_call_arg (stmt, i))))
3368 i = -1;
3369 else if (arginfo[i].dt == vect_constant_def
3370 || arginfo[i].dt == vect_external_def
3371 || arginfo[i].linear_step)
3372 this_badness += 64;
3373 break;
3374 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3375 if (arginfo[i].dt != vect_constant_def
3376 && arginfo[i].dt != vect_external_def)
3377 i = -1;
3378 break;
3379 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3380 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3381 if (arginfo[i].dt == vect_constant_def
3382 || arginfo[i].dt == vect_external_def
3383 || (arginfo[i].linear_step
3384 != n->simdclone->args[i].linear_step))
3385 i = -1;
3386 break;
3387 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3388 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3389 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3390 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3391 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3392 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3393 /* FORNOW */
3394 i = -1;
3395 break;
3396 case SIMD_CLONE_ARG_TYPE_MASK:
3397 gcc_unreachable ();
3399 if (i == (size_t) -1)
3400 break;
3401 if (n->simdclone->args[i].alignment > arginfo[i].align)
3403 i = -1;
3404 break;
3406 if (arginfo[i].align)
3407 this_badness += (exact_log2 (arginfo[i].align)
3408 - exact_log2 (n->simdclone->args[i].alignment));
3410 if (i == (size_t) -1)
3411 continue;
3412 if (bestn == NULL || this_badness < badness)
3414 bestn = n;
3415 badness = this_badness;
3419 if (bestn == NULL)
3420 return false;
3422 for (i = 0; i < nargs; i++)
3423 if ((arginfo[i].dt == vect_constant_def
3424 || arginfo[i].dt == vect_external_def)
3425 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3427 arginfo[i].vectype
3428 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3429 i)));
3430 if (arginfo[i].vectype == NULL
3431 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3432 > bestn->simdclone->simdlen))
3433 return false;
3436 fndecl = bestn->decl;
3437 nunits = bestn->simdclone->simdlen;
3438 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3440 /* If the function isn't const, only allow it in simd loops where user
3441 has asserted that at least nunits consecutive iterations can be
3442 performed using SIMD instructions. */
3443 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3444 && gimple_vuse (stmt))
3445 return false;
3447 /* Sanity check: make sure that at least one copy of the vectorized stmt
3448 needs to be generated. */
3449 gcc_assert (ncopies >= 1);
3451 if (!vec_stmt) /* transformation not required. */
3453 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3454 for (i = 0; i < nargs; i++)
3455 if ((bestn->simdclone->args[i].arg_type
3456 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3457 || (bestn->simdclone->args[i].arg_type
3458 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3460 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3461 + 1);
3462 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3463 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3464 ? size_type_node : TREE_TYPE (arginfo[i].op);
3465 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3466 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3467 tree sll = arginfo[i].simd_lane_linear
3468 ? boolean_true_node : boolean_false_node;
3469 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3471 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3472 if (dump_enabled_p ())
3473 dump_printf_loc (MSG_NOTE, vect_location,
3474 "=== vectorizable_simd_clone_call ===\n");
3475 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3476 return true;
3479 /* Transform. */
3481 if (dump_enabled_p ())
3482 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3484 /* Handle def. */
3485 scalar_dest = gimple_call_lhs (stmt);
3486 vec_dest = NULL_TREE;
3487 rtype = NULL_TREE;
3488 ratype = NULL_TREE;
3489 if (scalar_dest)
3491 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3492 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3493 if (TREE_CODE (rtype) == ARRAY_TYPE)
3495 ratype = rtype;
3496 rtype = TREE_TYPE (ratype);
3500 prev_stmt_info = NULL;
3501 for (j = 0; j < ncopies; ++j)
3503 /* Build argument list for the vectorized call. */
3504 if (j == 0)
3505 vargs.create (nargs);
3506 else
3507 vargs.truncate (0);
3509 for (i = 0; i < nargs; i++)
3511 unsigned int k, l, m, o;
3512 tree atype;
3513 op = gimple_call_arg (stmt, i);
3514 switch (bestn->simdclone->args[i].arg_type)
3516 case SIMD_CLONE_ARG_TYPE_VECTOR:
3517 atype = bestn->simdclone->args[i].vector_type;
3518 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3519 for (m = j * o; m < (j + 1) * o; m++)
3521 if (TYPE_VECTOR_SUBPARTS (atype)
3522 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3524 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3525 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3526 / TYPE_VECTOR_SUBPARTS (atype));
3527 gcc_assert ((k & (k - 1)) == 0);
3528 if (m == 0)
3529 vec_oprnd0
3530 = vect_get_vec_def_for_operand (op, stmt);
3531 else
3533 vec_oprnd0 = arginfo[i].op;
3534 if ((m & (k - 1)) == 0)
3535 vec_oprnd0
3536 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3537 vec_oprnd0);
3539 arginfo[i].op = vec_oprnd0;
3540 vec_oprnd0
3541 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3542 size_int (prec),
3543 bitsize_int ((m & (k - 1)) * prec));
3544 new_stmt
3545 = gimple_build_assign (make_ssa_name (atype),
3546 vec_oprnd0);
3547 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3548 vargs.safe_push (gimple_assign_lhs (new_stmt));
3550 else
3552 k = (TYPE_VECTOR_SUBPARTS (atype)
3553 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3554 gcc_assert ((k & (k - 1)) == 0);
3555 vec<constructor_elt, va_gc> *ctor_elts;
3556 if (k != 1)
3557 vec_alloc (ctor_elts, k);
3558 else
3559 ctor_elts = NULL;
3560 for (l = 0; l < k; l++)
3562 if (m == 0 && l == 0)
3563 vec_oprnd0
3564 = vect_get_vec_def_for_operand (op, stmt);
3565 else
3566 vec_oprnd0
3567 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3568 arginfo[i].op);
3569 arginfo[i].op = vec_oprnd0;
3570 if (k == 1)
3571 break;
3572 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3573 vec_oprnd0);
3575 if (k == 1)
3576 vargs.safe_push (vec_oprnd0);
3577 else
3579 vec_oprnd0 = build_constructor (atype, ctor_elts);
3580 new_stmt
3581 = gimple_build_assign (make_ssa_name (atype),
3582 vec_oprnd0);
3583 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3584 vargs.safe_push (gimple_assign_lhs (new_stmt));
3588 break;
3589 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3590 vargs.safe_push (op);
3591 break;
3592 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3593 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3594 if (j == 0)
3596 gimple_seq stmts;
3597 arginfo[i].op
3598 = force_gimple_operand (arginfo[i].op, &stmts, true,
3599 NULL_TREE);
3600 if (stmts != NULL)
3602 basic_block new_bb;
3603 edge pe = loop_preheader_edge (loop);
3604 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3605 gcc_assert (!new_bb);
3607 if (arginfo[i].simd_lane_linear)
3609 vargs.safe_push (arginfo[i].op);
3610 break;
3612 tree phi_res = copy_ssa_name (op);
3613 gphi *new_phi = create_phi_node (phi_res, loop->header);
3614 set_vinfo_for_stmt (new_phi,
3615 new_stmt_vec_info (new_phi, loop_vinfo));
3616 add_phi_arg (new_phi, arginfo[i].op,
3617 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3618 enum tree_code code
3619 = POINTER_TYPE_P (TREE_TYPE (op))
3620 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3621 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3622 ? sizetype : TREE_TYPE (op);
3623 widest_int cst
3624 = wi::mul (bestn->simdclone->args[i].linear_step,
3625 ncopies * nunits);
3626 tree tcst = wide_int_to_tree (type, cst);
3627 tree phi_arg = copy_ssa_name (op);
3628 new_stmt
3629 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3630 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3631 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3632 set_vinfo_for_stmt (new_stmt,
3633 new_stmt_vec_info (new_stmt, loop_vinfo));
3634 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3635 UNKNOWN_LOCATION);
3636 arginfo[i].op = phi_res;
3637 vargs.safe_push (phi_res);
3639 else
3641 enum tree_code code
3642 = POINTER_TYPE_P (TREE_TYPE (op))
3643 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3644 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3645 ? sizetype : TREE_TYPE (op);
3646 widest_int cst
3647 = wi::mul (bestn->simdclone->args[i].linear_step,
3648 j * nunits);
3649 tree tcst = wide_int_to_tree (type, cst);
3650 new_temp = make_ssa_name (TREE_TYPE (op));
3651 new_stmt = gimple_build_assign (new_temp, code,
3652 arginfo[i].op, tcst);
3653 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3654 vargs.safe_push (new_temp);
3656 break;
3657 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3658 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3659 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3660 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3661 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3662 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3663 default:
3664 gcc_unreachable ();
3668 new_stmt = gimple_build_call_vec (fndecl, vargs);
3669 if (vec_dest)
3671 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3672 if (ratype)
3673 new_temp = create_tmp_var (ratype);
3674 else if (TYPE_VECTOR_SUBPARTS (vectype)
3675 == TYPE_VECTOR_SUBPARTS (rtype))
3676 new_temp = make_ssa_name (vec_dest, new_stmt);
3677 else
3678 new_temp = make_ssa_name (rtype, new_stmt);
3679 gimple_call_set_lhs (new_stmt, new_temp);
3681 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3683 if (vec_dest)
3685 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3687 unsigned int k, l;
3688 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3689 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3690 gcc_assert ((k & (k - 1)) == 0);
3691 for (l = 0; l < k; l++)
3693 tree t;
3694 if (ratype)
3696 t = build_fold_addr_expr (new_temp);
3697 t = build2 (MEM_REF, vectype, t,
3698 build_int_cst (TREE_TYPE (t),
3699 l * prec / BITS_PER_UNIT));
3701 else
3702 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3703 size_int (prec), bitsize_int (l * prec));
3704 new_stmt
3705 = gimple_build_assign (make_ssa_name (vectype), t);
3706 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3707 if (j == 0 && l == 0)
3708 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3709 else
3710 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3712 prev_stmt_info = vinfo_for_stmt (new_stmt);
3715 if (ratype)
3717 tree clobber = build_constructor (ratype, NULL);
3718 TREE_THIS_VOLATILE (clobber) = 1;
3719 new_stmt = gimple_build_assign (new_temp, clobber);
3720 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3722 continue;
3724 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3726 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3727 / TYPE_VECTOR_SUBPARTS (rtype));
3728 gcc_assert ((k & (k - 1)) == 0);
3729 if ((j & (k - 1)) == 0)
3730 vec_alloc (ret_ctor_elts, k);
3731 if (ratype)
3733 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3734 for (m = 0; m < o; m++)
3736 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3737 size_int (m), NULL_TREE, NULL_TREE);
3738 new_stmt
3739 = gimple_build_assign (make_ssa_name (rtype), tem);
3740 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3741 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3742 gimple_assign_lhs (new_stmt));
3744 tree clobber = build_constructor (ratype, NULL);
3745 TREE_THIS_VOLATILE (clobber) = 1;
3746 new_stmt = gimple_build_assign (new_temp, clobber);
3747 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3749 else
3750 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3751 if ((j & (k - 1)) != k - 1)
3752 continue;
3753 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3754 new_stmt
3755 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3756 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3758 if ((unsigned) j == k - 1)
3759 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3760 else
3761 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3763 prev_stmt_info = vinfo_for_stmt (new_stmt);
3764 continue;
3766 else if (ratype)
3768 tree t = build_fold_addr_expr (new_temp);
3769 t = build2 (MEM_REF, vectype, t,
3770 build_int_cst (TREE_TYPE (t), 0));
3771 new_stmt
3772 = gimple_build_assign (make_ssa_name (vec_dest), t);
3773 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3774 tree clobber = build_constructor (ratype, NULL);
3775 TREE_THIS_VOLATILE (clobber) = 1;
3776 vect_finish_stmt_generation (stmt,
3777 gimple_build_assign (new_temp,
3778 clobber), gsi);
3782 if (j == 0)
3783 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3784 else
3785 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3787 prev_stmt_info = vinfo_for_stmt (new_stmt);
3790 vargs.release ();
3792 /* The call in STMT might prevent it from being removed in dce.
3793 We however cannot remove it here, due to the way the ssa name
3794 it defines is mapped to the new definition. So just replace
3795 rhs of the statement with something harmless. */
3797 if (slp_node)
3798 return true;
3800 if (scalar_dest)
3802 type = TREE_TYPE (scalar_dest);
3803 if (is_pattern_stmt_p (stmt_info))
3804 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3805 else
3806 lhs = gimple_call_lhs (stmt);
3807 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3809 else
3810 new_stmt = gimple_build_nop ();
3811 set_vinfo_for_stmt (new_stmt, stmt_info);
3812 set_vinfo_for_stmt (stmt, NULL);
3813 STMT_VINFO_STMT (stmt_info) = new_stmt;
3814 gsi_replace (gsi, new_stmt, true);
3815 unlink_stmt_vdef (stmt);
3817 return true;
3821 /* Function vect_gen_widened_results_half
3823 Create a vector stmt whose code, type, number of arguments, and result
3824 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3825 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3826 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3827 needs to be created (DECL is a function-decl of a target-builtin).
3828 STMT is the original scalar stmt that we are vectorizing. */
3830 static gimple *
3831 vect_gen_widened_results_half (enum tree_code code,
3832 tree decl,
3833 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3834 tree vec_dest, gimple_stmt_iterator *gsi,
3835 gimple *stmt)
3837 gimple *new_stmt;
3838 tree new_temp;
3840 /* Generate half of the widened result: */
3841 if (code == CALL_EXPR)
3843 /* Target specific support */
3844 if (op_type == binary_op)
3845 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3846 else
3847 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3848 new_temp = make_ssa_name (vec_dest, new_stmt);
3849 gimple_call_set_lhs (new_stmt, new_temp);
3851 else
3853 /* Generic support */
3854 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3855 if (op_type != binary_op)
3856 vec_oprnd1 = NULL;
3857 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3858 new_temp = make_ssa_name (vec_dest, new_stmt);
3859 gimple_assign_set_lhs (new_stmt, new_temp);
3861 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3863 return new_stmt;
3867 /* Get vectorized definitions for loop-based vectorization. For the first
3868 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3869 scalar operand), and for the rest we get a copy with
3870 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3871 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3872 The vectors are collected into VEC_OPRNDS. */
3874 static void
3875 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3876 vec<tree> *vec_oprnds, int multi_step_cvt)
3878 tree vec_oprnd;
3880 /* Get first vector operand. */
3881 /* All the vector operands except the very first one (that is scalar oprnd)
3882 are stmt copies. */
3883 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3884 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3885 else
3886 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3888 vec_oprnds->quick_push (vec_oprnd);
3890 /* Get second vector operand. */
3891 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3892 vec_oprnds->quick_push (vec_oprnd);
3894 *oprnd = vec_oprnd;
3896 /* For conversion in multiple steps, continue to get operands
3897 recursively. */
3898 if (multi_step_cvt)
3899 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3903 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3904 For multi-step conversions store the resulting vectors and call the function
3905 recursively. */
3907 static void
3908 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3909 int multi_step_cvt, gimple *stmt,
3910 vec<tree> vec_dsts,
3911 gimple_stmt_iterator *gsi,
3912 slp_tree slp_node, enum tree_code code,
3913 stmt_vec_info *prev_stmt_info)
3915 unsigned int i;
3916 tree vop0, vop1, new_tmp, vec_dest;
3917 gimple *new_stmt;
3918 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3920 vec_dest = vec_dsts.pop ();
3922 for (i = 0; i < vec_oprnds->length (); i += 2)
3924 /* Create demotion operation. */
3925 vop0 = (*vec_oprnds)[i];
3926 vop1 = (*vec_oprnds)[i + 1];
3927 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3928 new_tmp = make_ssa_name (vec_dest, new_stmt);
3929 gimple_assign_set_lhs (new_stmt, new_tmp);
3930 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3932 if (multi_step_cvt)
3933 /* Store the resulting vector for next recursive call. */
3934 (*vec_oprnds)[i/2] = new_tmp;
3935 else
3937 /* This is the last step of the conversion sequence. Store the
3938 vectors in SLP_NODE or in vector info of the scalar statement
3939 (or in STMT_VINFO_RELATED_STMT chain). */
3940 if (slp_node)
3941 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3942 else
3944 if (!*prev_stmt_info)
3945 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3946 else
3947 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3949 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3954 /* For multi-step demotion operations we first generate demotion operations
3955 from the source type to the intermediate types, and then combine the
3956 results (stored in VEC_OPRNDS) in demotion operation to the destination
3957 type. */
3958 if (multi_step_cvt)
3960 /* At each level of recursion we have half of the operands we had at the
3961 previous level. */
3962 vec_oprnds->truncate ((i+1)/2);
3963 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3964 stmt, vec_dsts, gsi, slp_node,
3965 VEC_PACK_TRUNC_EXPR,
3966 prev_stmt_info);
3969 vec_dsts.quick_push (vec_dest);
3973 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3974 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3975 the resulting vectors and call the function recursively. */
3977 static void
3978 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3979 vec<tree> *vec_oprnds1,
3980 gimple *stmt, tree vec_dest,
3981 gimple_stmt_iterator *gsi,
3982 enum tree_code code1,
3983 enum tree_code code2, tree decl1,
3984 tree decl2, int op_type)
3986 int i;
3987 tree vop0, vop1, new_tmp1, new_tmp2;
3988 gimple *new_stmt1, *new_stmt2;
3989 vec<tree> vec_tmp = vNULL;
3991 vec_tmp.create (vec_oprnds0->length () * 2);
3992 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3994 if (op_type == binary_op)
3995 vop1 = (*vec_oprnds1)[i];
3996 else
3997 vop1 = NULL_TREE;
3999 /* Generate the two halves of promotion operation. */
4000 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4001 op_type, vec_dest, gsi, stmt);
4002 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4003 op_type, vec_dest, gsi, stmt);
4004 if (is_gimple_call (new_stmt1))
4006 new_tmp1 = gimple_call_lhs (new_stmt1);
4007 new_tmp2 = gimple_call_lhs (new_stmt2);
4009 else
4011 new_tmp1 = gimple_assign_lhs (new_stmt1);
4012 new_tmp2 = gimple_assign_lhs (new_stmt2);
4015 /* Store the results for the next step. */
4016 vec_tmp.quick_push (new_tmp1);
4017 vec_tmp.quick_push (new_tmp2);
4020 vec_oprnds0->release ();
4021 *vec_oprnds0 = vec_tmp;
4025 /* Check if STMT performs a conversion operation, that can be vectorized.
4026 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4027 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4028 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4030 static bool
4031 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4032 gimple **vec_stmt, slp_tree slp_node)
4034 tree vec_dest;
4035 tree scalar_dest;
4036 tree op0, op1 = NULL_TREE;
4037 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4039 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4040 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4041 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4042 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4043 tree new_temp;
4044 gimple *def_stmt;
4045 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4046 int ndts = 2;
4047 gimple *new_stmt = NULL;
4048 stmt_vec_info prev_stmt_info;
4049 int nunits_in;
4050 int nunits_out;
4051 tree vectype_out, vectype_in;
4052 int ncopies, i, j;
4053 tree lhs_type, rhs_type;
4054 enum { NARROW, NONE, WIDEN } modifier;
4055 vec<tree> vec_oprnds0 = vNULL;
4056 vec<tree> vec_oprnds1 = vNULL;
4057 tree vop0;
4058 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4059 vec_info *vinfo = stmt_info->vinfo;
4060 int multi_step_cvt = 0;
4061 vec<tree> interm_types = vNULL;
4062 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4063 int op_type;
4064 machine_mode rhs_mode;
4065 unsigned short fltsz;
4067 /* Is STMT a vectorizable conversion? */
4069 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4070 return false;
4072 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4073 && ! vec_stmt)
4074 return false;
4076 if (!is_gimple_assign (stmt))
4077 return false;
4079 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4080 return false;
4082 code = gimple_assign_rhs_code (stmt);
4083 if (!CONVERT_EXPR_CODE_P (code)
4084 && code != FIX_TRUNC_EXPR
4085 && code != FLOAT_EXPR
4086 && code != WIDEN_MULT_EXPR
4087 && code != WIDEN_LSHIFT_EXPR)
4088 return false;
4090 op_type = TREE_CODE_LENGTH (code);
4092 /* Check types of lhs and rhs. */
4093 scalar_dest = gimple_assign_lhs (stmt);
4094 lhs_type = TREE_TYPE (scalar_dest);
4095 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4097 op0 = gimple_assign_rhs1 (stmt);
4098 rhs_type = TREE_TYPE (op0);
4100 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4101 && !((INTEGRAL_TYPE_P (lhs_type)
4102 && INTEGRAL_TYPE_P (rhs_type))
4103 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4104 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4105 return false;
4107 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4108 && ((INTEGRAL_TYPE_P (lhs_type)
4109 && (TYPE_PRECISION (lhs_type)
4110 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
4111 || (INTEGRAL_TYPE_P (rhs_type)
4112 && (TYPE_PRECISION (rhs_type)
4113 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4115 if (dump_enabled_p ())
4116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4117 "type conversion to/from bit-precision unsupported."
4118 "\n");
4119 return false;
4122 /* Check the operands of the operation. */
4123 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4125 if (dump_enabled_p ())
4126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4127 "use not simple.\n");
4128 return false;
4130 if (op_type == binary_op)
4132 bool ok;
4134 op1 = gimple_assign_rhs2 (stmt);
4135 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4136 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4137 OP1. */
4138 if (CONSTANT_CLASS_P (op0))
4139 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4140 else
4141 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4143 if (!ok)
4145 if (dump_enabled_p ())
4146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4147 "use not simple.\n");
4148 return false;
4152 /* If op0 is an external or constant defs use a vector type of
4153 the same size as the output vector type. */
4154 if (!vectype_in)
4155 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4156 if (vec_stmt)
4157 gcc_assert (vectype_in);
4158 if (!vectype_in)
4160 if (dump_enabled_p ())
4162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4163 "no vectype for scalar type ");
4164 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4165 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4168 return false;
4171 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4172 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4174 if (dump_enabled_p ())
4176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4177 "can't convert between boolean and non "
4178 "boolean vectors");
4179 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4180 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4183 return false;
4186 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4187 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4188 if (nunits_in < nunits_out)
4189 modifier = NARROW;
4190 else if (nunits_out == nunits_in)
4191 modifier = NONE;
4192 else
4193 modifier = WIDEN;
4195 /* Multiple types in SLP are handled by creating the appropriate number of
4196 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4197 case of SLP. */
4198 if (slp_node)
4199 ncopies = 1;
4200 else if (modifier == NARROW)
4201 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4202 else
4203 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4205 /* Sanity check: make sure that at least one copy of the vectorized stmt
4206 needs to be generated. */
4207 gcc_assert (ncopies >= 1);
4209 /* Supportable by target? */
4210 switch (modifier)
4212 case NONE:
4213 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4214 return false;
4215 if (supportable_convert_operation (code, vectype_out, vectype_in,
4216 &decl1, &code1))
4217 break;
4218 /* FALLTHRU */
4219 unsupported:
4220 if (dump_enabled_p ())
4221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4222 "conversion not supported by target.\n");
4223 return false;
4225 case WIDEN:
4226 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4227 &code1, &code2, &multi_step_cvt,
4228 &interm_types))
4230 /* Binary widening operation can only be supported directly by the
4231 architecture. */
4232 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4233 break;
4236 if (code != FLOAT_EXPR
4237 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4238 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4239 goto unsupported;
4241 rhs_mode = TYPE_MODE (rhs_type);
4242 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4243 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4244 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4245 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4247 cvt_type
4248 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4249 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4250 if (cvt_type == NULL_TREE)
4251 goto unsupported;
4253 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4255 if (!supportable_convert_operation (code, vectype_out,
4256 cvt_type, &decl1, &codecvt1))
4257 goto unsupported;
4259 else if (!supportable_widening_operation (code, stmt, vectype_out,
4260 cvt_type, &codecvt1,
4261 &codecvt2, &multi_step_cvt,
4262 &interm_types))
4263 continue;
4264 else
4265 gcc_assert (multi_step_cvt == 0);
4267 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4268 vectype_in, &code1, &code2,
4269 &multi_step_cvt, &interm_types))
4270 break;
4273 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4274 goto unsupported;
4276 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4277 codecvt2 = ERROR_MARK;
4278 else
4280 multi_step_cvt++;
4281 interm_types.safe_push (cvt_type);
4282 cvt_type = NULL_TREE;
4284 break;
4286 case NARROW:
4287 gcc_assert (op_type == unary_op);
4288 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4289 &code1, &multi_step_cvt,
4290 &interm_types))
4291 break;
4293 if (code != FIX_TRUNC_EXPR
4294 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4295 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4296 goto unsupported;
4298 rhs_mode = TYPE_MODE (rhs_type);
4299 cvt_type
4300 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4301 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4302 if (cvt_type == NULL_TREE)
4303 goto unsupported;
4304 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4305 &decl1, &codecvt1))
4306 goto unsupported;
4307 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4308 &code1, &multi_step_cvt,
4309 &interm_types))
4310 break;
4311 goto unsupported;
4313 default:
4314 gcc_unreachable ();
4317 if (!vec_stmt) /* transformation not required. */
4319 if (dump_enabled_p ())
4320 dump_printf_loc (MSG_NOTE, vect_location,
4321 "=== vectorizable_conversion ===\n");
4322 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4324 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4325 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4327 else if (modifier == NARROW)
4329 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4330 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4332 else
4334 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4335 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4337 interm_types.release ();
4338 return true;
4341 /* Transform. */
4342 if (dump_enabled_p ())
4343 dump_printf_loc (MSG_NOTE, vect_location,
4344 "transform conversion. ncopies = %d.\n", ncopies);
4346 if (op_type == binary_op)
4348 if (CONSTANT_CLASS_P (op0))
4349 op0 = fold_convert (TREE_TYPE (op1), op0);
4350 else if (CONSTANT_CLASS_P (op1))
4351 op1 = fold_convert (TREE_TYPE (op0), op1);
4354 /* In case of multi-step conversion, we first generate conversion operations
4355 to the intermediate types, and then from that types to the final one.
4356 We create vector destinations for the intermediate type (TYPES) received
4357 from supportable_*_operation, and store them in the correct order
4358 for future use in vect_create_vectorized_*_stmts (). */
4359 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4360 vec_dest = vect_create_destination_var (scalar_dest,
4361 (cvt_type && modifier == WIDEN)
4362 ? cvt_type : vectype_out);
4363 vec_dsts.quick_push (vec_dest);
4365 if (multi_step_cvt)
4367 for (i = interm_types.length () - 1;
4368 interm_types.iterate (i, &intermediate_type); i--)
4370 vec_dest = vect_create_destination_var (scalar_dest,
4371 intermediate_type);
4372 vec_dsts.quick_push (vec_dest);
4376 if (cvt_type)
4377 vec_dest = vect_create_destination_var (scalar_dest,
4378 modifier == WIDEN
4379 ? vectype_out : cvt_type);
4381 if (!slp_node)
4383 if (modifier == WIDEN)
4385 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4386 if (op_type == binary_op)
4387 vec_oprnds1.create (1);
4389 else if (modifier == NARROW)
4390 vec_oprnds0.create (
4391 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4393 else if (code == WIDEN_LSHIFT_EXPR)
4394 vec_oprnds1.create (slp_node->vec_stmts_size);
4396 last_oprnd = op0;
4397 prev_stmt_info = NULL;
4398 switch (modifier)
4400 case NONE:
4401 for (j = 0; j < ncopies; j++)
4403 if (j == 0)
4404 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4405 -1);
4406 else
4407 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4409 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4411 /* Arguments are ready, create the new vector stmt. */
4412 if (code1 == CALL_EXPR)
4414 new_stmt = gimple_build_call (decl1, 1, vop0);
4415 new_temp = make_ssa_name (vec_dest, new_stmt);
4416 gimple_call_set_lhs (new_stmt, new_temp);
4418 else
4420 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4421 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4422 new_temp = make_ssa_name (vec_dest, new_stmt);
4423 gimple_assign_set_lhs (new_stmt, new_temp);
4426 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4427 if (slp_node)
4428 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4429 else
4431 if (!prev_stmt_info)
4432 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4433 else
4434 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4435 prev_stmt_info = vinfo_for_stmt (new_stmt);
4439 break;
4441 case WIDEN:
4442 /* In case the vectorization factor (VF) is bigger than the number
4443 of elements that we can fit in a vectype (nunits), we have to
4444 generate more than one vector stmt - i.e - we need to "unroll"
4445 the vector stmt by a factor VF/nunits. */
4446 for (j = 0; j < ncopies; j++)
4448 /* Handle uses. */
4449 if (j == 0)
4451 if (slp_node)
4453 if (code == WIDEN_LSHIFT_EXPR)
4455 unsigned int k;
4457 vec_oprnd1 = op1;
4458 /* Store vec_oprnd1 for every vector stmt to be created
4459 for SLP_NODE. We check during the analysis that all
4460 the shift arguments are the same. */
4461 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4462 vec_oprnds1.quick_push (vec_oprnd1);
4464 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4465 slp_node, -1);
4467 else
4468 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4469 &vec_oprnds1, slp_node, -1);
4471 else
4473 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4474 vec_oprnds0.quick_push (vec_oprnd0);
4475 if (op_type == binary_op)
4477 if (code == WIDEN_LSHIFT_EXPR)
4478 vec_oprnd1 = op1;
4479 else
4480 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4481 vec_oprnds1.quick_push (vec_oprnd1);
4485 else
4487 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4488 vec_oprnds0.truncate (0);
4489 vec_oprnds0.quick_push (vec_oprnd0);
4490 if (op_type == binary_op)
4492 if (code == WIDEN_LSHIFT_EXPR)
4493 vec_oprnd1 = op1;
4494 else
4495 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4496 vec_oprnd1);
4497 vec_oprnds1.truncate (0);
4498 vec_oprnds1.quick_push (vec_oprnd1);
4502 /* Arguments are ready. Create the new vector stmts. */
4503 for (i = multi_step_cvt; i >= 0; i--)
4505 tree this_dest = vec_dsts[i];
4506 enum tree_code c1 = code1, c2 = code2;
4507 if (i == 0 && codecvt2 != ERROR_MARK)
4509 c1 = codecvt1;
4510 c2 = codecvt2;
4512 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4513 &vec_oprnds1,
4514 stmt, this_dest, gsi,
4515 c1, c2, decl1, decl2,
4516 op_type);
4519 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4521 if (cvt_type)
4523 if (codecvt1 == CALL_EXPR)
4525 new_stmt = gimple_build_call (decl1, 1, vop0);
4526 new_temp = make_ssa_name (vec_dest, new_stmt);
4527 gimple_call_set_lhs (new_stmt, new_temp);
4529 else
4531 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4532 new_temp = make_ssa_name (vec_dest);
4533 new_stmt = gimple_build_assign (new_temp, codecvt1,
4534 vop0);
4537 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4539 else
4540 new_stmt = SSA_NAME_DEF_STMT (vop0);
4542 if (slp_node)
4543 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4544 else
4546 if (!prev_stmt_info)
4547 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4548 else
4549 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4550 prev_stmt_info = vinfo_for_stmt (new_stmt);
4555 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4556 break;
4558 case NARROW:
4559 /* In case the vectorization factor (VF) is bigger than the number
4560 of elements that we can fit in a vectype (nunits), we have to
4561 generate more than one vector stmt - i.e - we need to "unroll"
4562 the vector stmt by a factor VF/nunits. */
4563 for (j = 0; j < ncopies; j++)
4565 /* Handle uses. */
4566 if (slp_node)
4567 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4568 slp_node, -1);
4569 else
4571 vec_oprnds0.truncate (0);
4572 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4573 vect_pow2 (multi_step_cvt) - 1);
4576 /* Arguments are ready. Create the new vector stmts. */
4577 if (cvt_type)
4578 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4580 if (codecvt1 == CALL_EXPR)
4582 new_stmt = gimple_build_call (decl1, 1, vop0);
4583 new_temp = make_ssa_name (vec_dest, new_stmt);
4584 gimple_call_set_lhs (new_stmt, new_temp);
4586 else
4588 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4589 new_temp = make_ssa_name (vec_dest);
4590 new_stmt = gimple_build_assign (new_temp, codecvt1,
4591 vop0);
4594 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4595 vec_oprnds0[i] = new_temp;
4598 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4599 stmt, vec_dsts, gsi,
4600 slp_node, code1,
4601 &prev_stmt_info);
4604 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4605 break;
4608 vec_oprnds0.release ();
4609 vec_oprnds1.release ();
4610 interm_types.release ();
4612 return true;
4616 /* Function vectorizable_assignment.
4618 Check if STMT performs an assignment (copy) that can be vectorized.
4619 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4620 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4621 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4623 static bool
4624 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4625 gimple **vec_stmt, slp_tree slp_node)
4627 tree vec_dest;
4628 tree scalar_dest;
4629 tree op;
4630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4631 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4632 tree new_temp;
4633 gimple *def_stmt;
4634 enum vect_def_type dt[1] = {vect_unknown_def_type};
4635 int ndts = 1;
4636 int ncopies;
4637 int i, j;
4638 vec<tree> vec_oprnds = vNULL;
4639 tree vop;
4640 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4641 vec_info *vinfo = stmt_info->vinfo;
4642 gimple *new_stmt = NULL;
4643 stmt_vec_info prev_stmt_info = NULL;
4644 enum tree_code code;
4645 tree vectype_in;
4647 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4648 return false;
4650 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4651 && ! vec_stmt)
4652 return false;
4654 /* Is vectorizable assignment? */
4655 if (!is_gimple_assign (stmt))
4656 return false;
4658 scalar_dest = gimple_assign_lhs (stmt);
4659 if (TREE_CODE (scalar_dest) != SSA_NAME)
4660 return false;
4662 code = gimple_assign_rhs_code (stmt);
4663 if (gimple_assign_single_p (stmt)
4664 || code == PAREN_EXPR
4665 || CONVERT_EXPR_CODE_P (code))
4666 op = gimple_assign_rhs1 (stmt);
4667 else
4668 return false;
4670 if (code == VIEW_CONVERT_EXPR)
4671 op = TREE_OPERAND (op, 0);
4673 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4674 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4676 /* Multiple types in SLP are handled by creating the appropriate number of
4677 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4678 case of SLP. */
4679 if (slp_node)
4680 ncopies = 1;
4681 else
4682 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4684 gcc_assert (ncopies >= 1);
4686 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4690 "use not simple.\n");
4691 return false;
4694 /* We can handle NOP_EXPR conversions that do not change the number
4695 of elements or the vector size. */
4696 if ((CONVERT_EXPR_CODE_P (code)
4697 || code == VIEW_CONVERT_EXPR)
4698 && (!vectype_in
4699 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4700 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4701 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4702 return false;
4704 /* We do not handle bit-precision changes. */
4705 if ((CONVERT_EXPR_CODE_P (code)
4706 || code == VIEW_CONVERT_EXPR)
4707 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4708 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4709 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4710 || ((TYPE_PRECISION (TREE_TYPE (op))
4711 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4712 /* But a conversion that does not change the bit-pattern is ok. */
4713 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4714 > TYPE_PRECISION (TREE_TYPE (op)))
4715 && TYPE_UNSIGNED (TREE_TYPE (op)))
4716 /* Conversion between boolean types of different sizes is
4717 a simple assignment in case their vectypes are same
4718 boolean vectors. */
4719 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4720 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4722 if (dump_enabled_p ())
4723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4724 "type conversion to/from bit-precision "
4725 "unsupported.\n");
4726 return false;
4729 if (!vec_stmt) /* transformation not required. */
4731 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4732 if (dump_enabled_p ())
4733 dump_printf_loc (MSG_NOTE, vect_location,
4734 "=== vectorizable_assignment ===\n");
4735 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4736 return true;
4739 /* Transform. */
4740 if (dump_enabled_p ())
4741 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4743 /* Handle def. */
4744 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4746 /* Handle use. */
4747 for (j = 0; j < ncopies; j++)
4749 /* Handle uses. */
4750 if (j == 0)
4751 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4752 else
4753 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4755 /* Arguments are ready. create the new vector stmt. */
4756 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4758 if (CONVERT_EXPR_CODE_P (code)
4759 || code == VIEW_CONVERT_EXPR)
4760 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4761 new_stmt = gimple_build_assign (vec_dest, vop);
4762 new_temp = make_ssa_name (vec_dest, new_stmt);
4763 gimple_assign_set_lhs (new_stmt, new_temp);
4764 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4765 if (slp_node)
4766 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4769 if (slp_node)
4770 continue;
4772 if (j == 0)
4773 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4774 else
4775 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4777 prev_stmt_info = vinfo_for_stmt (new_stmt);
4780 vec_oprnds.release ();
4781 return true;
4785 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4786 either as shift by a scalar or by a vector. */
4788 bool
4789 vect_supportable_shift (enum tree_code code, tree scalar_type)
4792 machine_mode vec_mode;
4793 optab optab;
4794 int icode;
4795 tree vectype;
4797 vectype = get_vectype_for_scalar_type (scalar_type);
4798 if (!vectype)
4799 return false;
4801 optab = optab_for_tree_code (code, vectype, optab_scalar);
4802 if (!optab
4803 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4805 optab = optab_for_tree_code (code, vectype, optab_vector);
4806 if (!optab
4807 || (optab_handler (optab, TYPE_MODE (vectype))
4808 == CODE_FOR_nothing))
4809 return false;
4812 vec_mode = TYPE_MODE (vectype);
4813 icode = (int) optab_handler (optab, vec_mode);
4814 if (icode == CODE_FOR_nothing)
4815 return false;
4817 return true;
4821 /* Function vectorizable_shift.
4823 Check if STMT performs a shift operation that can be vectorized.
4824 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4825 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4826 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4828 static bool
4829 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4830 gimple **vec_stmt, slp_tree slp_node)
4832 tree vec_dest;
4833 tree scalar_dest;
4834 tree op0, op1 = NULL;
4835 tree vec_oprnd1 = NULL_TREE;
4836 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4837 tree vectype;
4838 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4839 enum tree_code code;
4840 machine_mode vec_mode;
4841 tree new_temp;
4842 optab optab;
4843 int icode;
4844 machine_mode optab_op2_mode;
4845 gimple *def_stmt;
4846 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4847 int ndts = 2;
4848 gimple *new_stmt = NULL;
4849 stmt_vec_info prev_stmt_info;
4850 int nunits_in;
4851 int nunits_out;
4852 tree vectype_out;
4853 tree op1_vectype;
4854 int ncopies;
4855 int j, i;
4856 vec<tree> vec_oprnds0 = vNULL;
4857 vec<tree> vec_oprnds1 = vNULL;
4858 tree vop0, vop1;
4859 unsigned int k;
4860 bool scalar_shift_arg = true;
4861 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4862 vec_info *vinfo = stmt_info->vinfo;
4863 int vf;
4865 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4866 return false;
4868 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4869 && ! vec_stmt)
4870 return false;
4872 /* Is STMT a vectorizable binary/unary operation? */
4873 if (!is_gimple_assign (stmt))
4874 return false;
4876 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4877 return false;
4879 code = gimple_assign_rhs_code (stmt);
4881 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4882 || code == RROTATE_EXPR))
4883 return false;
4885 scalar_dest = gimple_assign_lhs (stmt);
4886 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4887 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4888 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4890 if (dump_enabled_p ())
4891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4892 "bit-precision shifts not supported.\n");
4893 return false;
4896 op0 = gimple_assign_rhs1 (stmt);
4897 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4899 if (dump_enabled_p ())
4900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4901 "use not simple.\n");
4902 return false;
4904 /* If op0 is an external or constant def use a vector type with
4905 the same size as the output vector type. */
4906 if (!vectype)
4907 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4908 if (vec_stmt)
4909 gcc_assert (vectype);
4910 if (!vectype)
4912 if (dump_enabled_p ())
4913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4914 "no vectype for scalar type\n");
4915 return false;
4918 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4919 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4920 if (nunits_out != nunits_in)
4921 return false;
4923 op1 = gimple_assign_rhs2 (stmt);
4924 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4926 if (dump_enabled_p ())
4927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4928 "use not simple.\n");
4929 return false;
4932 if (loop_vinfo)
4933 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4934 else
4935 vf = 1;
4937 /* Multiple types in SLP are handled by creating the appropriate number of
4938 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4939 case of SLP. */
4940 if (slp_node)
4941 ncopies = 1;
4942 else
4943 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4945 gcc_assert (ncopies >= 1);
4947 /* Determine whether the shift amount is a vector, or scalar. If the
4948 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4950 if ((dt[1] == vect_internal_def
4951 || dt[1] == vect_induction_def)
4952 && !slp_node)
4953 scalar_shift_arg = false;
4954 else if (dt[1] == vect_constant_def
4955 || dt[1] == vect_external_def
4956 || dt[1] == vect_internal_def)
4958 /* In SLP, need to check whether the shift count is the same,
4959 in loops if it is a constant or invariant, it is always
4960 a scalar shift. */
4961 if (slp_node)
4963 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4964 gimple *slpstmt;
4966 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4967 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4968 scalar_shift_arg = false;
4971 /* If the shift amount is computed by a pattern stmt we cannot
4972 use the scalar amount directly thus give up and use a vector
4973 shift. */
4974 if (dt[1] == vect_internal_def)
4976 gimple *def = SSA_NAME_DEF_STMT (op1);
4977 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4978 scalar_shift_arg = false;
4981 else
4983 if (dump_enabled_p ())
4984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4985 "operand mode requires invariant argument.\n");
4986 return false;
4989 /* Vector shifted by vector. */
4990 if (!scalar_shift_arg)
4992 optab = optab_for_tree_code (code, vectype, optab_vector);
4993 if (dump_enabled_p ())
4994 dump_printf_loc (MSG_NOTE, vect_location,
4995 "vector/vector shift/rotate found.\n");
4997 if (!op1_vectype)
4998 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4999 if (op1_vectype == NULL_TREE
5000 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5002 if (dump_enabled_p ())
5003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5004 "unusable type for last operand in"
5005 " vector/vector shift/rotate.\n");
5006 return false;
5009 /* See if the machine has a vector shifted by scalar insn and if not
5010 then see if it has a vector shifted by vector insn. */
5011 else
5013 optab = optab_for_tree_code (code, vectype, optab_scalar);
5014 if (optab
5015 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5017 if (dump_enabled_p ())
5018 dump_printf_loc (MSG_NOTE, vect_location,
5019 "vector/scalar shift/rotate found.\n");
5021 else
5023 optab = optab_for_tree_code (code, vectype, optab_vector);
5024 if (optab
5025 && (optab_handler (optab, TYPE_MODE (vectype))
5026 != CODE_FOR_nothing))
5028 scalar_shift_arg = false;
5030 if (dump_enabled_p ())
5031 dump_printf_loc (MSG_NOTE, vect_location,
5032 "vector/vector shift/rotate found.\n");
5034 /* Unlike the other binary operators, shifts/rotates have
5035 the rhs being int, instead of the same type as the lhs,
5036 so make sure the scalar is the right type if we are
5037 dealing with vectors of long long/long/short/char. */
5038 if (dt[1] == vect_constant_def)
5039 op1 = fold_convert (TREE_TYPE (vectype), op1);
5040 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5041 TREE_TYPE (op1)))
5043 if (slp_node
5044 && TYPE_MODE (TREE_TYPE (vectype))
5045 != TYPE_MODE (TREE_TYPE (op1)))
5047 if (dump_enabled_p ())
5048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5049 "unusable type for last operand in"
5050 " vector/vector shift/rotate.\n");
5051 return false;
5053 if (vec_stmt && !slp_node)
5055 op1 = fold_convert (TREE_TYPE (vectype), op1);
5056 op1 = vect_init_vector (stmt, op1,
5057 TREE_TYPE (vectype), NULL);
5064 /* Supportable by target? */
5065 if (!optab)
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "no optab.\n");
5070 return false;
5072 vec_mode = TYPE_MODE (vectype);
5073 icode = (int) optab_handler (optab, vec_mode);
5074 if (icode == CODE_FOR_nothing)
5076 if (dump_enabled_p ())
5077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5078 "op not supported by target.\n");
5079 /* Check only during analysis. */
5080 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5081 || (vf < vect_min_worthwhile_factor (code)
5082 && !vec_stmt))
5083 return false;
5084 if (dump_enabled_p ())
5085 dump_printf_loc (MSG_NOTE, vect_location,
5086 "proceeding using word mode.\n");
5089 /* Worthwhile without SIMD support? Check only during analysis. */
5090 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5091 && vf < vect_min_worthwhile_factor (code)
5092 && !vec_stmt)
5094 if (dump_enabled_p ())
5095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5096 "not worthwhile without SIMD support.\n");
5097 return false;
5100 if (!vec_stmt) /* transformation not required. */
5102 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_NOTE, vect_location,
5105 "=== vectorizable_shift ===\n");
5106 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5107 return true;
5110 /* Transform. */
5112 if (dump_enabled_p ())
5113 dump_printf_loc (MSG_NOTE, vect_location,
5114 "transform binary/unary operation.\n");
5116 /* Handle def. */
5117 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5119 prev_stmt_info = NULL;
5120 for (j = 0; j < ncopies; j++)
5122 /* Handle uses. */
5123 if (j == 0)
5125 if (scalar_shift_arg)
5127 /* Vector shl and shr insn patterns can be defined with scalar
5128 operand 2 (shift operand). In this case, use constant or loop
5129 invariant op1 directly, without extending it to vector mode
5130 first. */
5131 optab_op2_mode = insn_data[icode].operand[2].mode;
5132 if (!VECTOR_MODE_P (optab_op2_mode))
5134 if (dump_enabled_p ())
5135 dump_printf_loc (MSG_NOTE, vect_location,
5136 "operand 1 using scalar mode.\n");
5137 vec_oprnd1 = op1;
5138 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5139 vec_oprnds1.quick_push (vec_oprnd1);
5140 if (slp_node)
5142 /* Store vec_oprnd1 for every vector stmt to be created
5143 for SLP_NODE. We check during the analysis that all
5144 the shift arguments are the same.
5145 TODO: Allow different constants for different vector
5146 stmts generated for an SLP instance. */
5147 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5148 vec_oprnds1.quick_push (vec_oprnd1);
5153 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5154 (a special case for certain kind of vector shifts); otherwise,
5155 operand 1 should be of a vector type (the usual case). */
5156 if (vec_oprnd1)
5157 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5158 slp_node, -1);
5159 else
5160 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5161 slp_node, -1);
5163 else
5164 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5166 /* Arguments are ready. Create the new vector stmt. */
5167 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5169 vop1 = vec_oprnds1[i];
5170 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5171 new_temp = make_ssa_name (vec_dest, new_stmt);
5172 gimple_assign_set_lhs (new_stmt, new_temp);
5173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5174 if (slp_node)
5175 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5178 if (slp_node)
5179 continue;
5181 if (j == 0)
5182 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5183 else
5184 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5185 prev_stmt_info = vinfo_for_stmt (new_stmt);
5188 vec_oprnds0.release ();
5189 vec_oprnds1.release ();
5191 return true;
5195 /* Function vectorizable_operation.
5197 Check if STMT performs a binary, unary or ternary operation that can
5198 be vectorized.
5199 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5200 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5201 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5203 static bool
5204 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5205 gimple **vec_stmt, slp_tree slp_node)
5207 tree vec_dest;
5208 tree scalar_dest;
5209 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5210 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5211 tree vectype;
5212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5213 enum tree_code code;
5214 machine_mode vec_mode;
5215 tree new_temp;
5216 int op_type;
5217 optab optab;
5218 bool target_support_p;
5219 gimple *def_stmt;
5220 enum vect_def_type dt[3]
5221 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5222 int ndts = 3;
5223 gimple *new_stmt = NULL;
5224 stmt_vec_info prev_stmt_info;
5225 int nunits_in;
5226 int nunits_out;
5227 tree vectype_out;
5228 int ncopies;
5229 int j, i;
5230 vec<tree> vec_oprnds0 = vNULL;
5231 vec<tree> vec_oprnds1 = vNULL;
5232 vec<tree> vec_oprnds2 = vNULL;
5233 tree vop0, vop1, vop2;
5234 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5235 vec_info *vinfo = stmt_info->vinfo;
5236 int vf;
5238 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5239 return false;
5241 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5242 && ! vec_stmt)
5243 return false;
5245 /* Is STMT a vectorizable binary/unary operation? */
5246 if (!is_gimple_assign (stmt))
5247 return false;
5249 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5250 return false;
5252 code = gimple_assign_rhs_code (stmt);
5254 /* For pointer addition, we should use the normal plus for
5255 the vector addition. */
5256 if (code == POINTER_PLUS_EXPR)
5257 code = PLUS_EXPR;
5259 /* Support only unary or binary operations. */
5260 op_type = TREE_CODE_LENGTH (code);
5261 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5263 if (dump_enabled_p ())
5264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5265 "num. args = %d (not unary/binary/ternary op).\n",
5266 op_type);
5267 return false;
5270 scalar_dest = gimple_assign_lhs (stmt);
5271 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5273 /* Most operations cannot handle bit-precision types without extra
5274 truncations. */
5275 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5276 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5277 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5278 /* Exception are bitwise binary operations. */
5279 && code != BIT_IOR_EXPR
5280 && code != BIT_XOR_EXPR
5281 && code != BIT_AND_EXPR)
5283 if (dump_enabled_p ())
5284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5285 "bit-precision arithmetic not supported.\n");
5286 return false;
5289 op0 = gimple_assign_rhs1 (stmt);
5290 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5292 if (dump_enabled_p ())
5293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5294 "use not simple.\n");
5295 return false;
5297 /* If op0 is an external or constant def use a vector type with
5298 the same size as the output vector type. */
5299 if (!vectype)
5301 /* For boolean type we cannot determine vectype by
5302 invariant value (don't know whether it is a vector
5303 of booleans or vector of integers). We use output
5304 vectype because operations on boolean don't change
5305 type. */
5306 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5308 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5310 if (dump_enabled_p ())
5311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5312 "not supported operation on bool value.\n");
5313 return false;
5315 vectype = vectype_out;
5317 else
5318 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5320 if (vec_stmt)
5321 gcc_assert (vectype);
5322 if (!vectype)
5324 if (dump_enabled_p ())
5326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5327 "no vectype for scalar type ");
5328 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5329 TREE_TYPE (op0));
5330 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5333 return false;
5336 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5337 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5338 if (nunits_out != nunits_in)
5339 return false;
5341 if (op_type == binary_op || op_type == ternary_op)
5343 op1 = gimple_assign_rhs2 (stmt);
5344 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5346 if (dump_enabled_p ())
5347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5348 "use not simple.\n");
5349 return false;
5352 if (op_type == ternary_op)
5354 op2 = gimple_assign_rhs3 (stmt);
5355 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5357 if (dump_enabled_p ())
5358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5359 "use not simple.\n");
5360 return false;
5364 if (loop_vinfo)
5365 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5366 else
5367 vf = 1;
5369 /* Multiple types in SLP are handled by creating the appropriate number of
5370 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5371 case of SLP. */
5372 if (slp_node)
5373 ncopies = 1;
5374 else
5375 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5377 gcc_assert (ncopies >= 1);
5379 /* Shifts are handled in vectorizable_shift (). */
5380 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5381 || code == RROTATE_EXPR)
5382 return false;
5384 /* Supportable by target? */
5386 vec_mode = TYPE_MODE (vectype);
5387 if (code == MULT_HIGHPART_EXPR)
5388 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5389 else
5391 optab = optab_for_tree_code (code, vectype, optab_default);
5392 if (!optab)
5394 if (dump_enabled_p ())
5395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5396 "no optab.\n");
5397 return false;
5399 target_support_p = (optab_handler (optab, vec_mode)
5400 != CODE_FOR_nothing);
5403 if (!target_support_p)
5405 if (dump_enabled_p ())
5406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5407 "op not supported by target.\n");
5408 /* Check only during analysis. */
5409 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5410 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5411 return false;
5412 if (dump_enabled_p ())
5413 dump_printf_loc (MSG_NOTE, vect_location,
5414 "proceeding using word mode.\n");
5417 /* Worthwhile without SIMD support? Check only during analysis. */
5418 if (!VECTOR_MODE_P (vec_mode)
5419 && !vec_stmt
5420 && vf < vect_min_worthwhile_factor (code))
5422 if (dump_enabled_p ())
5423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5424 "not worthwhile without SIMD support.\n");
5425 return false;
5428 if (!vec_stmt) /* transformation not required. */
5430 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5431 if (dump_enabled_p ())
5432 dump_printf_loc (MSG_NOTE, vect_location,
5433 "=== vectorizable_operation ===\n");
5434 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5435 return true;
5438 /* Transform. */
5440 if (dump_enabled_p ())
5441 dump_printf_loc (MSG_NOTE, vect_location,
5442 "transform binary/unary operation.\n");
5444 /* Handle def. */
5445 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5447 /* In case the vectorization factor (VF) is bigger than the number
5448 of elements that we can fit in a vectype (nunits), we have to generate
5449 more than one vector stmt - i.e - we need to "unroll" the
5450 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5451 from one copy of the vector stmt to the next, in the field
5452 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5453 stages to find the correct vector defs to be used when vectorizing
5454 stmts that use the defs of the current stmt. The example below
5455 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5456 we need to create 4 vectorized stmts):
5458 before vectorization:
5459 RELATED_STMT VEC_STMT
5460 S1: x = memref - -
5461 S2: z = x + 1 - -
5463 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5464 there):
5465 RELATED_STMT VEC_STMT
5466 VS1_0: vx0 = memref0 VS1_1 -
5467 VS1_1: vx1 = memref1 VS1_2 -
5468 VS1_2: vx2 = memref2 VS1_3 -
5469 VS1_3: vx3 = memref3 - -
5470 S1: x = load - VS1_0
5471 S2: z = x + 1 - -
5473 step2: vectorize stmt S2 (done here):
5474 To vectorize stmt S2 we first need to find the relevant vector
5475 def for the first operand 'x'. This is, as usual, obtained from
5476 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5477 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5478 relevant vector def 'vx0'. Having found 'vx0' we can generate
5479 the vector stmt VS2_0, and as usual, record it in the
5480 STMT_VINFO_VEC_STMT of stmt S2.
5481 When creating the second copy (VS2_1), we obtain the relevant vector
5482 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5483 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5484 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5485 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5486 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5487 chain of stmts and pointers:
5488 RELATED_STMT VEC_STMT
5489 VS1_0: vx0 = memref0 VS1_1 -
5490 VS1_1: vx1 = memref1 VS1_2 -
5491 VS1_2: vx2 = memref2 VS1_3 -
5492 VS1_3: vx3 = memref3 - -
5493 S1: x = load - VS1_0
5494 VS2_0: vz0 = vx0 + v1 VS2_1 -
5495 VS2_1: vz1 = vx1 + v1 VS2_2 -
5496 VS2_2: vz2 = vx2 + v1 VS2_3 -
5497 VS2_3: vz3 = vx3 + v1 - -
5498 S2: z = x + 1 - VS2_0 */
5500 prev_stmt_info = NULL;
5501 for (j = 0; j < ncopies; j++)
5503 /* Handle uses. */
5504 if (j == 0)
5506 if (op_type == binary_op || op_type == ternary_op)
5507 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5508 slp_node, -1);
5509 else
5510 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5511 slp_node, -1);
5512 if (op_type == ternary_op)
5513 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5514 slp_node, -1);
5516 else
5518 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5519 if (op_type == ternary_op)
5521 tree vec_oprnd = vec_oprnds2.pop ();
5522 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5523 vec_oprnd));
5527 /* Arguments are ready. Create the new vector stmt. */
5528 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5530 vop1 = ((op_type == binary_op || op_type == ternary_op)
5531 ? vec_oprnds1[i] : NULL_TREE);
5532 vop2 = ((op_type == ternary_op)
5533 ? vec_oprnds2[i] : NULL_TREE);
5534 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5535 new_temp = make_ssa_name (vec_dest, new_stmt);
5536 gimple_assign_set_lhs (new_stmt, new_temp);
5537 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5538 if (slp_node)
5539 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5542 if (slp_node)
5543 continue;
5545 if (j == 0)
5546 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5547 else
5548 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5549 prev_stmt_info = vinfo_for_stmt (new_stmt);
5552 vec_oprnds0.release ();
5553 vec_oprnds1.release ();
5554 vec_oprnds2.release ();
5556 return true;
5559 /* A helper function to ensure data reference DR's base alignment
5560 for STMT_INFO. */
5562 static void
5563 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5565 if (!dr->aux)
5566 return;
5568 if (DR_VECT_AUX (dr)->base_misaligned)
5570 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5571 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5573 if (decl_in_symtab_p (base_decl))
5574 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5575 else
5577 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5578 DECL_USER_ALIGN (base_decl) = 1;
5580 DR_VECT_AUX (dr)->base_misaligned = false;
5585 /* Function get_group_alias_ptr_type.
5587 Return the alias type for the group starting at FIRST_STMT. */
5589 static tree
5590 get_group_alias_ptr_type (gimple *first_stmt)
5592 struct data_reference *first_dr, *next_dr;
5593 gimple *next_stmt;
5595 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5596 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5597 while (next_stmt)
5599 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5600 if (get_alias_set (DR_REF (first_dr))
5601 != get_alias_set (DR_REF (next_dr)))
5603 if (dump_enabled_p ())
5604 dump_printf_loc (MSG_NOTE, vect_location,
5605 "conflicting alias set types.\n");
5606 return ptr_type_node;
5608 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5610 return reference_alias_ptr_type (DR_REF (first_dr));
5614 /* Function vectorizable_store.
5616 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5617 can be vectorized.
5618 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5619 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5620 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5622 static bool
5623 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5624 slp_tree slp_node)
5626 tree scalar_dest;
5627 tree data_ref;
5628 tree op;
5629 tree vec_oprnd = NULL_TREE;
5630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5631 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5632 tree elem_type;
5633 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5634 struct loop *loop = NULL;
5635 machine_mode vec_mode;
5636 tree dummy;
5637 enum dr_alignment_support alignment_support_scheme;
5638 gimple *def_stmt;
5639 enum vect_def_type dt;
5640 stmt_vec_info prev_stmt_info = NULL;
5641 tree dataref_ptr = NULL_TREE;
5642 tree dataref_offset = NULL_TREE;
5643 gimple *ptr_incr = NULL;
5644 int ncopies;
5645 int j;
5646 gimple *next_stmt, *first_stmt;
5647 bool grouped_store;
5648 unsigned int group_size, i;
5649 vec<tree> oprnds = vNULL;
5650 vec<tree> result_chain = vNULL;
5651 bool inv_p;
5652 tree offset = NULL_TREE;
5653 vec<tree> vec_oprnds = vNULL;
5654 bool slp = (slp_node != NULL);
5655 unsigned int vec_num;
5656 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5657 vec_info *vinfo = stmt_info->vinfo;
5658 tree aggr_type;
5659 gather_scatter_info gs_info;
5660 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5661 gimple *new_stmt;
5662 int vf;
5663 vec_load_store_type vls_type;
5664 tree ref_type;
5666 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5667 return false;
5669 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5670 && ! vec_stmt)
5671 return false;
5673 /* Is vectorizable store? */
5675 if (!is_gimple_assign (stmt))
5676 return false;
5678 scalar_dest = gimple_assign_lhs (stmt);
5679 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5680 && is_pattern_stmt_p (stmt_info))
5681 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5682 if (TREE_CODE (scalar_dest) != ARRAY_REF
5683 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5684 && TREE_CODE (scalar_dest) != INDIRECT_REF
5685 && TREE_CODE (scalar_dest) != COMPONENT_REF
5686 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5687 && TREE_CODE (scalar_dest) != REALPART_EXPR
5688 && TREE_CODE (scalar_dest) != MEM_REF)
5689 return false;
5691 /* Cannot have hybrid store SLP -- that would mean storing to the
5692 same location twice. */
5693 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5695 gcc_assert (gimple_assign_single_p (stmt));
5697 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5698 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5700 if (loop_vinfo)
5702 loop = LOOP_VINFO_LOOP (loop_vinfo);
5703 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5705 else
5706 vf = 1;
5708 /* Multiple types in SLP are handled by creating the appropriate number of
5709 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5710 case of SLP. */
5711 if (slp)
5712 ncopies = 1;
5713 else
5714 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5716 gcc_assert (ncopies >= 1);
5718 /* FORNOW. This restriction should be relaxed. */
5719 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5721 if (dump_enabled_p ())
5722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5723 "multiple types in nested loop.\n");
5724 return false;
5727 op = gimple_assign_rhs1 (stmt);
5729 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5731 if (dump_enabled_p ())
5732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5733 "use not simple.\n");
5734 return false;
5737 if (dt == vect_constant_def || dt == vect_external_def)
5738 vls_type = VLS_STORE_INVARIANT;
5739 else
5740 vls_type = VLS_STORE;
5742 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5743 return false;
5745 elem_type = TREE_TYPE (vectype);
5746 vec_mode = TYPE_MODE (vectype);
5748 /* FORNOW. In some cases can vectorize even if data-type not supported
5749 (e.g. - array initialization with 0). */
5750 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5751 return false;
5753 if (!STMT_VINFO_DATA_REF (stmt_info))
5754 return false;
5756 vect_memory_access_type memory_access_type;
5757 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5758 &memory_access_type, &gs_info))
5759 return false;
5761 if (!vec_stmt) /* transformation not required. */
5763 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5764 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5765 /* The SLP costs are calculated during SLP analysis. */
5766 if (!PURE_SLP_STMT (stmt_info))
5767 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5768 NULL, NULL, NULL);
5769 return true;
5771 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5773 /* Transform. */
5775 ensure_base_align (stmt_info, dr);
5777 if (memory_access_type == VMAT_GATHER_SCATTER)
5779 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5780 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5781 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5782 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5783 edge pe = loop_preheader_edge (loop);
5784 gimple_seq seq;
5785 basic_block new_bb;
5786 enum { NARROW, NONE, WIDEN } modifier;
5787 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5789 if (nunits == (unsigned int) scatter_off_nunits)
5790 modifier = NONE;
5791 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5793 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5794 modifier = WIDEN;
5796 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5797 sel[i] = i | nunits;
5799 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5800 gcc_assert (perm_mask != NULL_TREE);
5802 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5804 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5805 modifier = NARROW;
5807 for (i = 0; i < (unsigned int) nunits; ++i)
5808 sel[i] = i | scatter_off_nunits;
5810 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5811 gcc_assert (perm_mask != NULL_TREE);
5812 ncopies *= 2;
5814 else
5815 gcc_unreachable ();
5817 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5818 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5819 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5820 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5821 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5822 scaletype = TREE_VALUE (arglist);
5824 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5825 && TREE_CODE (rettype) == VOID_TYPE);
5827 ptr = fold_convert (ptrtype, gs_info.base);
5828 if (!is_gimple_min_invariant (ptr))
5830 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5831 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5832 gcc_assert (!new_bb);
5835 /* Currently we support only unconditional scatter stores,
5836 so mask should be all ones. */
5837 mask = build_int_cst (masktype, -1);
5838 mask = vect_init_vector (stmt, mask, masktype, NULL);
5840 scale = build_int_cst (scaletype, gs_info.scale);
5842 prev_stmt_info = NULL;
5843 for (j = 0; j < ncopies; ++j)
5845 if (j == 0)
5847 src = vec_oprnd1
5848 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5849 op = vec_oprnd0
5850 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5852 else if (modifier != NONE && (j & 1))
5854 if (modifier == WIDEN)
5856 src = vec_oprnd1
5857 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5858 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5859 stmt, gsi);
5861 else if (modifier == NARROW)
5863 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5864 stmt, gsi);
5865 op = vec_oprnd0
5866 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5867 vec_oprnd0);
5869 else
5870 gcc_unreachable ();
5872 else
5874 src = vec_oprnd1
5875 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5876 op = vec_oprnd0
5877 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5878 vec_oprnd0);
5881 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5883 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5884 == TYPE_VECTOR_SUBPARTS (srctype));
5885 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5886 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5887 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5888 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5889 src = var;
5892 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5894 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5895 == TYPE_VECTOR_SUBPARTS (idxtype));
5896 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5897 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5898 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5900 op = var;
5903 new_stmt
5904 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5906 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5908 if (prev_stmt_info == NULL)
5909 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5910 else
5911 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5912 prev_stmt_info = vinfo_for_stmt (new_stmt);
5914 return true;
5917 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5918 if (grouped_store)
5920 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5921 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5922 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5924 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5926 /* FORNOW */
5927 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5929 /* We vectorize all the stmts of the interleaving group when we
5930 reach the last stmt in the group. */
5931 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5932 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5933 && !slp)
5935 *vec_stmt = NULL;
5936 return true;
5939 if (slp)
5941 grouped_store = false;
5942 /* VEC_NUM is the number of vect stmts to be created for this
5943 group. */
5944 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5945 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5946 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5947 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5948 op = gimple_assign_rhs1 (first_stmt);
5950 else
5951 /* VEC_NUM is the number of vect stmts to be created for this
5952 group. */
5953 vec_num = group_size;
5955 ref_type = get_group_alias_ptr_type (first_stmt);
5957 else
5959 first_stmt = stmt;
5960 first_dr = dr;
5961 group_size = vec_num = 1;
5962 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5965 if (dump_enabled_p ())
5966 dump_printf_loc (MSG_NOTE, vect_location,
5967 "transform store. ncopies = %d\n", ncopies);
5969 if (memory_access_type == VMAT_ELEMENTWISE
5970 || memory_access_type == VMAT_STRIDED_SLP)
5972 gimple_stmt_iterator incr_gsi;
5973 bool insert_after;
5974 gimple *incr;
5975 tree offvar;
5976 tree ivstep;
5977 tree running_off;
5978 gimple_seq stmts = NULL;
5979 tree stride_base, stride_step, alias_off;
5980 tree vec_oprnd;
5981 unsigned int g;
5983 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5985 stride_base
5986 = fold_build_pointer_plus
5987 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5988 size_binop (PLUS_EXPR,
5989 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5990 convert_to_ptrofftype (DR_INIT (first_dr))));
5991 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5993 /* For a store with loop-invariant (but other than power-of-2)
5994 stride (i.e. not a grouped access) like so:
5996 for (i = 0; i < n; i += stride)
5997 array[i] = ...;
5999 we generate a new induction variable and new stores from
6000 the components of the (vectorized) rhs:
6002 for (j = 0; ; j += VF*stride)
6003 vectemp = ...;
6004 tmp1 = vectemp[0];
6005 array[j] = tmp1;
6006 tmp2 = vectemp[1];
6007 array[j + stride] = tmp2;
6011 unsigned nstores = nunits;
6012 unsigned lnel = 1;
6013 tree ltype = elem_type;
6014 if (slp)
6016 if (group_size < nunits
6017 && nunits % group_size == 0)
6019 nstores = nunits / group_size;
6020 lnel = group_size;
6021 ltype = build_vector_type (elem_type, group_size);
6023 else if (group_size >= nunits
6024 && group_size % nunits == 0)
6026 nstores = 1;
6027 lnel = nunits;
6028 ltype = vectype;
6030 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6031 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6034 ivstep = stride_step;
6035 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6036 build_int_cst (TREE_TYPE (ivstep), vf));
6038 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6040 create_iv (stride_base, ivstep, NULL,
6041 loop, &incr_gsi, insert_after,
6042 &offvar, NULL);
6043 incr = gsi_stmt (incr_gsi);
6044 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6046 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6047 if (stmts)
6048 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6050 prev_stmt_info = NULL;
6051 alias_off = build_int_cst (ref_type, 0);
6052 next_stmt = first_stmt;
6053 for (g = 0; g < group_size; g++)
6055 running_off = offvar;
6056 if (g)
6058 tree size = TYPE_SIZE_UNIT (ltype);
6059 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6060 size);
6061 tree newoff = copy_ssa_name (running_off, NULL);
6062 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6063 running_off, pos);
6064 vect_finish_stmt_generation (stmt, incr, gsi);
6065 running_off = newoff;
6067 unsigned int group_el = 0;
6068 unsigned HOST_WIDE_INT
6069 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6070 for (j = 0; j < ncopies; j++)
6072 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6073 and first_stmt == stmt. */
6074 if (j == 0)
6076 if (slp)
6078 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6079 slp_node, -1);
6080 vec_oprnd = vec_oprnds[0];
6082 else
6084 gcc_assert (gimple_assign_single_p (next_stmt));
6085 op = gimple_assign_rhs1 (next_stmt);
6086 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6089 else
6091 if (slp)
6092 vec_oprnd = vec_oprnds[j];
6093 else
6095 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6096 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6100 for (i = 0; i < nstores; i++)
6102 tree newref, newoff;
6103 gimple *incr, *assign;
6104 tree size = TYPE_SIZE (ltype);
6105 /* Extract the i'th component. */
6106 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6107 bitsize_int (i), size);
6108 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6109 size, pos);
6111 elem = force_gimple_operand_gsi (gsi, elem, true,
6112 NULL_TREE, true,
6113 GSI_SAME_STMT);
6115 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6116 group_el * elsz);
6117 newref = build2 (MEM_REF, ltype,
6118 running_off, this_off);
6120 /* And store it to *running_off. */
6121 assign = gimple_build_assign (newref, elem);
6122 vect_finish_stmt_generation (stmt, assign, gsi);
6124 group_el += lnel;
6125 if (! slp
6126 || group_el == group_size)
6128 newoff = copy_ssa_name (running_off, NULL);
6129 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6130 running_off, stride_step);
6131 vect_finish_stmt_generation (stmt, incr, gsi);
6133 running_off = newoff;
6134 group_el = 0;
6136 if (g == group_size - 1
6137 && !slp)
6139 if (j == 0 && i == 0)
6140 STMT_VINFO_VEC_STMT (stmt_info)
6141 = *vec_stmt = assign;
6142 else
6143 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6144 prev_stmt_info = vinfo_for_stmt (assign);
6148 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6149 if (slp)
6150 break;
6153 vec_oprnds.release ();
6154 return true;
6157 auto_vec<tree> dr_chain (group_size);
6158 oprnds.create (group_size);
6160 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6161 gcc_assert (alignment_support_scheme);
6162 /* Targets with store-lane instructions must not require explicit
6163 realignment. */
6164 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6165 || alignment_support_scheme == dr_aligned
6166 || alignment_support_scheme == dr_unaligned_supported);
6168 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6169 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6170 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6172 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6173 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6174 else
6175 aggr_type = vectype;
6177 /* In case the vectorization factor (VF) is bigger than the number
6178 of elements that we can fit in a vectype (nunits), we have to generate
6179 more than one vector stmt - i.e - we need to "unroll" the
6180 vector stmt by a factor VF/nunits. For more details see documentation in
6181 vect_get_vec_def_for_copy_stmt. */
6183 /* In case of interleaving (non-unit grouped access):
6185 S1: &base + 2 = x2
6186 S2: &base = x0
6187 S3: &base + 1 = x1
6188 S4: &base + 3 = x3
6190 We create vectorized stores starting from base address (the access of the
6191 first stmt in the chain (S2 in the above example), when the last store stmt
6192 of the chain (S4) is reached:
6194 VS1: &base = vx2
6195 VS2: &base + vec_size*1 = vx0
6196 VS3: &base + vec_size*2 = vx1
6197 VS4: &base + vec_size*3 = vx3
6199 Then permutation statements are generated:
6201 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6202 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6205 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6206 (the order of the data-refs in the output of vect_permute_store_chain
6207 corresponds to the order of scalar stmts in the interleaving chain - see
6208 the documentation of vect_permute_store_chain()).
6210 In case of both multiple types and interleaving, above vector stores and
6211 permutation stmts are created for every copy. The result vector stmts are
6212 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6213 STMT_VINFO_RELATED_STMT for the next copies.
6216 prev_stmt_info = NULL;
6217 for (j = 0; j < ncopies; j++)
6220 if (j == 0)
6222 if (slp)
6224 /* Get vectorized arguments for SLP_NODE. */
6225 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6226 NULL, slp_node, -1);
6228 vec_oprnd = vec_oprnds[0];
6230 else
6232 /* For interleaved stores we collect vectorized defs for all the
6233 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6234 used as an input to vect_permute_store_chain(), and OPRNDS as
6235 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6237 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6238 OPRNDS are of size 1. */
6239 next_stmt = first_stmt;
6240 for (i = 0; i < group_size; i++)
6242 /* Since gaps are not supported for interleaved stores,
6243 GROUP_SIZE is the exact number of stmts in the chain.
6244 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6245 there is no interleaving, GROUP_SIZE is 1, and only one
6246 iteration of the loop will be executed. */
6247 gcc_assert (next_stmt
6248 && gimple_assign_single_p (next_stmt));
6249 op = gimple_assign_rhs1 (next_stmt);
6251 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6252 dr_chain.quick_push (vec_oprnd);
6253 oprnds.quick_push (vec_oprnd);
6254 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6258 /* We should have catched mismatched types earlier. */
6259 gcc_assert (useless_type_conversion_p (vectype,
6260 TREE_TYPE (vec_oprnd)));
6261 bool simd_lane_access_p
6262 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6263 if (simd_lane_access_p
6264 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6265 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6266 && integer_zerop (DR_OFFSET (first_dr))
6267 && integer_zerop (DR_INIT (first_dr))
6268 && alias_sets_conflict_p (get_alias_set (aggr_type),
6269 get_alias_set (TREE_TYPE (ref_type))))
6271 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6272 dataref_offset = build_int_cst (ref_type, 0);
6273 inv_p = false;
6275 else
6276 dataref_ptr
6277 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6278 simd_lane_access_p ? loop : NULL,
6279 offset, &dummy, gsi, &ptr_incr,
6280 simd_lane_access_p, &inv_p);
6281 gcc_assert (bb_vinfo || !inv_p);
6283 else
6285 /* For interleaved stores we created vectorized defs for all the
6286 defs stored in OPRNDS in the previous iteration (previous copy).
6287 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6288 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6289 next copy.
6290 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6291 OPRNDS are of size 1. */
6292 for (i = 0; i < group_size; i++)
6294 op = oprnds[i];
6295 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6296 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6297 dr_chain[i] = vec_oprnd;
6298 oprnds[i] = vec_oprnd;
6300 if (dataref_offset)
6301 dataref_offset
6302 = int_const_binop (PLUS_EXPR, dataref_offset,
6303 TYPE_SIZE_UNIT (aggr_type));
6304 else
6305 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6306 TYPE_SIZE_UNIT (aggr_type));
6309 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6311 tree vec_array;
6313 /* Combine all the vectors into an array. */
6314 vec_array = create_vector_array (vectype, vec_num);
6315 for (i = 0; i < vec_num; i++)
6317 vec_oprnd = dr_chain[i];
6318 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6321 /* Emit:
6322 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6323 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6324 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6325 gimple_call_set_lhs (new_stmt, data_ref);
6326 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6328 else
6330 new_stmt = NULL;
6331 if (grouped_store)
6333 if (j == 0)
6334 result_chain.create (group_size);
6335 /* Permute. */
6336 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6337 &result_chain);
6340 next_stmt = first_stmt;
6341 for (i = 0; i < vec_num; i++)
6343 unsigned align, misalign;
6345 if (i > 0)
6346 /* Bump the vector pointer. */
6347 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6348 stmt, NULL_TREE);
6350 if (slp)
6351 vec_oprnd = vec_oprnds[i];
6352 else if (grouped_store)
6353 /* For grouped stores vectorized defs are interleaved in
6354 vect_permute_store_chain(). */
6355 vec_oprnd = result_chain[i];
6357 data_ref = fold_build2 (MEM_REF, vectype,
6358 dataref_ptr,
6359 dataref_offset
6360 ? dataref_offset
6361 : build_int_cst (ref_type, 0));
6362 align = TYPE_ALIGN_UNIT (vectype);
6363 if (aligned_access_p (first_dr))
6364 misalign = 0;
6365 else if (DR_MISALIGNMENT (first_dr) == -1)
6367 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6368 align = TYPE_ALIGN_UNIT (elem_type);
6369 else
6370 align = get_object_alignment (DR_REF (first_dr))
6371 / BITS_PER_UNIT;
6372 misalign = 0;
6373 TREE_TYPE (data_ref)
6374 = build_aligned_type (TREE_TYPE (data_ref),
6375 align * BITS_PER_UNIT);
6377 else
6379 TREE_TYPE (data_ref)
6380 = build_aligned_type (TREE_TYPE (data_ref),
6381 TYPE_ALIGN (elem_type));
6382 misalign = DR_MISALIGNMENT (first_dr);
6384 if (dataref_offset == NULL_TREE
6385 && TREE_CODE (dataref_ptr) == SSA_NAME)
6386 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6387 misalign);
6389 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6391 tree perm_mask = perm_mask_for_reverse (vectype);
6392 tree perm_dest
6393 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6394 vectype);
6395 tree new_temp = make_ssa_name (perm_dest);
6397 /* Generate the permute statement. */
6398 gimple *perm_stmt
6399 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6400 vec_oprnd, perm_mask);
6401 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6403 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6404 vec_oprnd = new_temp;
6407 /* Arguments are ready. Create the new vector stmt. */
6408 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6409 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6411 if (slp)
6412 continue;
6414 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6415 if (!next_stmt)
6416 break;
6419 if (!slp)
6421 if (j == 0)
6422 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6423 else
6424 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6425 prev_stmt_info = vinfo_for_stmt (new_stmt);
6429 oprnds.release ();
6430 result_chain.release ();
6431 vec_oprnds.release ();
6433 return true;
6436 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6437 VECTOR_CST mask. No checks are made that the target platform supports the
6438 mask, so callers may wish to test can_vec_perm_p separately, or use
6439 vect_gen_perm_mask_checked. */
6441 tree
6442 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6444 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6445 int i, nunits;
6447 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6449 mask_elt_type = lang_hooks.types.type_for_mode
6450 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6451 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6453 mask_elts = XALLOCAVEC (tree, nunits);
6454 for (i = nunits - 1; i >= 0; i--)
6455 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6456 mask_vec = build_vector (mask_type, mask_elts);
6458 return mask_vec;
6461 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6462 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6464 tree
6465 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6467 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6468 return vect_gen_perm_mask_any (vectype, sel);
6471 /* Given a vector variable X and Y, that was generated for the scalar
6472 STMT, generate instructions to permute the vector elements of X and Y
6473 using permutation mask MASK_VEC, insert them at *GSI and return the
6474 permuted vector variable. */
6476 static tree
6477 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6478 gimple_stmt_iterator *gsi)
6480 tree vectype = TREE_TYPE (x);
6481 tree perm_dest, data_ref;
6482 gimple *perm_stmt;
6484 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6485 data_ref = make_ssa_name (perm_dest);
6487 /* Generate the permute statement. */
6488 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6489 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6491 return data_ref;
6494 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6495 inserting them on the loops preheader edge. Returns true if we
6496 were successful in doing so (and thus STMT can be moved then),
6497 otherwise returns false. */
6499 static bool
6500 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6502 ssa_op_iter i;
6503 tree op;
6504 bool any = false;
6506 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6508 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6509 if (!gimple_nop_p (def_stmt)
6510 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6512 /* Make sure we don't need to recurse. While we could do
6513 so in simple cases when there are more complex use webs
6514 we don't have an easy way to preserve stmt order to fulfil
6515 dependencies within them. */
6516 tree op2;
6517 ssa_op_iter i2;
6518 if (gimple_code (def_stmt) == GIMPLE_PHI)
6519 return false;
6520 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6522 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6523 if (!gimple_nop_p (def_stmt2)
6524 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6525 return false;
6527 any = true;
6531 if (!any)
6532 return true;
6534 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6536 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6537 if (!gimple_nop_p (def_stmt)
6538 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6540 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6541 gsi_remove (&gsi, false);
6542 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6546 return true;
6549 /* vectorizable_load.
6551 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6552 can be vectorized.
6553 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6554 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6555 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6557 static bool
6558 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6559 slp_tree slp_node, slp_instance slp_node_instance)
6561 tree scalar_dest;
6562 tree vec_dest = NULL;
6563 tree data_ref = NULL;
6564 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6565 stmt_vec_info prev_stmt_info;
6566 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6567 struct loop *loop = NULL;
6568 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6569 bool nested_in_vect_loop = false;
6570 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6571 tree elem_type;
6572 tree new_temp;
6573 machine_mode mode;
6574 gimple *new_stmt = NULL;
6575 tree dummy;
6576 enum dr_alignment_support alignment_support_scheme;
6577 tree dataref_ptr = NULL_TREE;
6578 tree dataref_offset = NULL_TREE;
6579 gimple *ptr_incr = NULL;
6580 int ncopies;
6581 int i, j, group_size, group_gap_adj;
6582 tree msq = NULL_TREE, lsq;
6583 tree offset = NULL_TREE;
6584 tree byte_offset = NULL_TREE;
6585 tree realignment_token = NULL_TREE;
6586 gphi *phi = NULL;
6587 vec<tree> dr_chain = vNULL;
6588 bool grouped_load = false;
6589 gimple *first_stmt;
6590 gimple *first_stmt_for_drptr = NULL;
6591 bool inv_p;
6592 bool compute_in_loop = false;
6593 struct loop *at_loop;
6594 int vec_num;
6595 bool slp = (slp_node != NULL);
6596 bool slp_perm = false;
6597 enum tree_code code;
6598 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6599 int vf;
6600 tree aggr_type;
6601 gather_scatter_info gs_info;
6602 vec_info *vinfo = stmt_info->vinfo;
6603 tree ref_type;
6605 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6606 return false;
6608 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6609 && ! vec_stmt)
6610 return false;
6612 /* Is vectorizable load? */
6613 if (!is_gimple_assign (stmt))
6614 return false;
6616 scalar_dest = gimple_assign_lhs (stmt);
6617 if (TREE_CODE (scalar_dest) != SSA_NAME)
6618 return false;
6620 code = gimple_assign_rhs_code (stmt);
6621 if (code != ARRAY_REF
6622 && code != BIT_FIELD_REF
6623 && code != INDIRECT_REF
6624 && code != COMPONENT_REF
6625 && code != IMAGPART_EXPR
6626 && code != REALPART_EXPR
6627 && code != MEM_REF
6628 && TREE_CODE_CLASS (code) != tcc_declaration)
6629 return false;
6631 if (!STMT_VINFO_DATA_REF (stmt_info))
6632 return false;
6634 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6635 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6637 if (loop_vinfo)
6639 loop = LOOP_VINFO_LOOP (loop_vinfo);
6640 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6641 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6643 else
6644 vf = 1;
6646 /* Multiple types in SLP are handled by creating the appropriate number of
6647 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6648 case of SLP. */
6649 if (slp)
6650 ncopies = 1;
6651 else
6652 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6654 gcc_assert (ncopies >= 1);
6656 /* FORNOW. This restriction should be relaxed. */
6657 if (nested_in_vect_loop && ncopies > 1)
6659 if (dump_enabled_p ())
6660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6661 "multiple types in nested loop.\n");
6662 return false;
6665 /* Invalidate assumptions made by dependence analysis when vectorization
6666 on the unrolled body effectively re-orders stmts. */
6667 if (ncopies > 1
6668 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6669 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6670 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6672 if (dump_enabled_p ())
6673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6674 "cannot perform implicit CSE when unrolling "
6675 "with negative dependence distance\n");
6676 return false;
6679 elem_type = TREE_TYPE (vectype);
6680 mode = TYPE_MODE (vectype);
6682 /* FORNOW. In some cases can vectorize even if data-type not supported
6683 (e.g. - data copies). */
6684 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6686 if (dump_enabled_p ())
6687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6688 "Aligned load, but unsupported type.\n");
6689 return false;
6692 /* Check if the load is a part of an interleaving chain. */
6693 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6695 grouped_load = true;
6696 /* FORNOW */
6697 gcc_assert (!nested_in_vect_loop);
6698 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6700 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6701 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6703 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6704 slp_perm = true;
6706 /* Invalidate assumptions made by dependence analysis when vectorization
6707 on the unrolled body effectively re-orders stmts. */
6708 if (!PURE_SLP_STMT (stmt_info)
6709 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6710 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6711 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6713 if (dump_enabled_p ())
6714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6715 "cannot perform implicit CSE when performing "
6716 "group loads with negative dependence distance\n");
6717 return false;
6720 /* Similarly when the stmt is a load that is both part of a SLP
6721 instance and a loop vectorized stmt via the same-dr mechanism
6722 we have to give up. */
6723 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6724 && (STMT_SLP_TYPE (stmt_info)
6725 != STMT_SLP_TYPE (vinfo_for_stmt
6726 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6728 if (dump_enabled_p ())
6729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6730 "conflicting SLP types for CSEd load\n");
6731 return false;
6735 vect_memory_access_type memory_access_type;
6736 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6737 &memory_access_type, &gs_info))
6738 return false;
6740 if (!vec_stmt) /* transformation not required. */
6742 if (!slp)
6743 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6744 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6745 /* The SLP costs are calculated during SLP analysis. */
6746 if (!PURE_SLP_STMT (stmt_info))
6747 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6748 NULL, NULL, NULL);
6749 return true;
6752 if (!slp)
6753 gcc_assert (memory_access_type
6754 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6756 if (dump_enabled_p ())
6757 dump_printf_loc (MSG_NOTE, vect_location,
6758 "transform load. ncopies = %d\n", ncopies);
6760 /* Transform. */
6762 ensure_base_align (stmt_info, dr);
6764 if (memory_access_type == VMAT_GATHER_SCATTER)
6766 tree vec_oprnd0 = NULL_TREE, op;
6767 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6768 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6769 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6770 edge pe = loop_preheader_edge (loop);
6771 gimple_seq seq;
6772 basic_block new_bb;
6773 enum { NARROW, NONE, WIDEN } modifier;
6774 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6776 if (nunits == gather_off_nunits)
6777 modifier = NONE;
6778 else if (nunits == gather_off_nunits / 2)
6780 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6781 modifier = WIDEN;
6783 for (i = 0; i < gather_off_nunits; ++i)
6784 sel[i] = i | nunits;
6786 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6788 else if (nunits == gather_off_nunits * 2)
6790 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6791 modifier = NARROW;
6793 for (i = 0; i < nunits; ++i)
6794 sel[i] = i < gather_off_nunits
6795 ? i : i + nunits - gather_off_nunits;
6797 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6798 ncopies *= 2;
6800 else
6801 gcc_unreachable ();
6803 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6804 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6805 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6806 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6807 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6808 scaletype = TREE_VALUE (arglist);
6809 gcc_checking_assert (types_compatible_p (srctype, rettype));
6811 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6813 ptr = fold_convert (ptrtype, gs_info.base);
6814 if (!is_gimple_min_invariant (ptr))
6816 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6817 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6818 gcc_assert (!new_bb);
6821 /* Currently we support only unconditional gather loads,
6822 so mask should be all ones. */
6823 if (TREE_CODE (masktype) == INTEGER_TYPE)
6824 mask = build_int_cst (masktype, -1);
6825 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6827 mask = build_int_cst (TREE_TYPE (masktype), -1);
6828 mask = build_vector_from_val (masktype, mask);
6829 mask = vect_init_vector (stmt, mask, masktype, NULL);
6831 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6833 REAL_VALUE_TYPE r;
6834 long tmp[6];
6835 for (j = 0; j < 6; ++j)
6836 tmp[j] = -1;
6837 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6838 mask = build_real (TREE_TYPE (masktype), r);
6839 mask = build_vector_from_val (masktype, mask);
6840 mask = vect_init_vector (stmt, mask, masktype, NULL);
6842 else
6843 gcc_unreachable ();
6845 scale = build_int_cst (scaletype, gs_info.scale);
6847 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6848 merge = build_int_cst (TREE_TYPE (rettype), 0);
6849 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6851 REAL_VALUE_TYPE r;
6852 long tmp[6];
6853 for (j = 0; j < 6; ++j)
6854 tmp[j] = 0;
6855 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6856 merge = build_real (TREE_TYPE (rettype), r);
6858 else
6859 gcc_unreachable ();
6860 merge = build_vector_from_val (rettype, merge);
6861 merge = vect_init_vector (stmt, merge, rettype, NULL);
6863 prev_stmt_info = NULL;
6864 for (j = 0; j < ncopies; ++j)
6866 if (modifier == WIDEN && (j & 1))
6867 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6868 perm_mask, stmt, gsi);
6869 else if (j == 0)
6870 op = vec_oprnd0
6871 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6872 else
6873 op = vec_oprnd0
6874 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6876 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6878 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6879 == TYPE_VECTOR_SUBPARTS (idxtype));
6880 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6881 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6882 new_stmt
6883 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6884 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6885 op = var;
6888 new_stmt
6889 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6891 if (!useless_type_conversion_p (vectype, rettype))
6893 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6894 == TYPE_VECTOR_SUBPARTS (rettype));
6895 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6896 gimple_call_set_lhs (new_stmt, op);
6897 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6898 var = make_ssa_name (vec_dest);
6899 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6900 new_stmt
6901 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6903 else
6905 var = make_ssa_name (vec_dest, new_stmt);
6906 gimple_call_set_lhs (new_stmt, var);
6909 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6911 if (modifier == NARROW)
6913 if ((j & 1) == 0)
6915 prev_res = var;
6916 continue;
6918 var = permute_vec_elements (prev_res, var,
6919 perm_mask, stmt, gsi);
6920 new_stmt = SSA_NAME_DEF_STMT (var);
6923 if (prev_stmt_info == NULL)
6924 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6925 else
6926 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6927 prev_stmt_info = vinfo_for_stmt (new_stmt);
6929 return true;
6932 if (memory_access_type == VMAT_ELEMENTWISE
6933 || memory_access_type == VMAT_STRIDED_SLP)
6935 gimple_stmt_iterator incr_gsi;
6936 bool insert_after;
6937 gimple *incr;
6938 tree offvar;
6939 tree ivstep;
6940 tree running_off;
6941 vec<constructor_elt, va_gc> *v = NULL;
6942 gimple_seq stmts = NULL;
6943 tree stride_base, stride_step, alias_off;
6945 gcc_assert (!nested_in_vect_loop);
6947 if (slp && grouped_load)
6949 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6950 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6951 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6952 ref_type = get_group_alias_ptr_type (first_stmt);
6954 else
6956 first_stmt = stmt;
6957 first_dr = dr;
6958 group_size = 1;
6959 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6962 stride_base
6963 = fold_build_pointer_plus
6964 (DR_BASE_ADDRESS (first_dr),
6965 size_binop (PLUS_EXPR,
6966 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6967 convert_to_ptrofftype (DR_INIT (first_dr))));
6968 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6970 /* For a load with loop-invariant (but other than power-of-2)
6971 stride (i.e. not a grouped access) like so:
6973 for (i = 0; i < n; i += stride)
6974 ... = array[i];
6976 we generate a new induction variable and new accesses to
6977 form a new vector (or vectors, depending on ncopies):
6979 for (j = 0; ; j += VF*stride)
6980 tmp1 = array[j];
6981 tmp2 = array[j + stride];
6983 vectemp = {tmp1, tmp2, ...}
6986 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6987 build_int_cst (TREE_TYPE (stride_step), vf));
6989 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6991 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6992 loop, &incr_gsi, insert_after,
6993 &offvar, NULL);
6994 incr = gsi_stmt (incr_gsi);
6995 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6997 stride_step = force_gimple_operand (unshare_expr (stride_step),
6998 &stmts, true, NULL_TREE);
6999 if (stmts)
7000 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7002 prev_stmt_info = NULL;
7003 running_off = offvar;
7004 alias_off = build_int_cst (ref_type, 0);
7005 int nloads = nunits;
7006 int lnel = 1;
7007 tree ltype = TREE_TYPE (vectype);
7008 tree lvectype = vectype;
7009 auto_vec<tree> dr_chain;
7010 if (memory_access_type == VMAT_STRIDED_SLP)
7012 if (group_size < nunits)
7014 /* Avoid emitting a constructor of vector elements by performing
7015 the loads using an integer type of the same size,
7016 constructing a vector of those and then re-interpreting it
7017 as the original vector type. This works around the fact
7018 that the vec_init optab was only designed for scalar
7019 element modes and thus expansion goes through memory.
7020 This avoids a huge runtime penalty due to the general
7021 inability to perform store forwarding from smaller stores
7022 to a larger load. */
7023 unsigned lsize
7024 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7025 enum machine_mode elmode = mode_for_size (lsize, MODE_INT, 0);
7026 enum machine_mode vmode = mode_for_vector (elmode,
7027 nunits / group_size);
7028 /* If we can't construct such a vector fall back to
7029 element loads of the original vector type. */
7030 if (VECTOR_MODE_P (vmode)
7031 && optab_handler (vec_init_optab, vmode) != CODE_FOR_nothing)
7033 nloads = nunits / group_size;
7034 lnel = group_size;
7035 ltype = build_nonstandard_integer_type (lsize, 1);
7036 lvectype = build_vector_type (ltype, nloads);
7039 else
7041 nloads = 1;
7042 lnel = nunits;
7043 ltype = vectype;
7045 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7047 if (slp)
7049 /* For SLP permutation support we need to load the whole group,
7050 not only the number of vector stmts the permutation result
7051 fits in. */
7052 if (slp_perm)
7054 ncopies = (group_size * vf + nunits - 1) / nunits;
7055 dr_chain.create (ncopies);
7057 else
7058 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7060 int group_el = 0;
7061 unsigned HOST_WIDE_INT
7062 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7063 for (j = 0; j < ncopies; j++)
7065 if (nloads > 1)
7066 vec_alloc (v, nloads);
7067 for (i = 0; i < nloads; i++)
7069 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7070 group_el * elsz);
7071 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7072 build2 (MEM_REF, ltype,
7073 running_off, this_off));
7074 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7075 if (nloads > 1)
7076 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7077 gimple_assign_lhs (new_stmt));
7079 group_el += lnel;
7080 if (! slp
7081 || group_el == group_size)
7083 tree newoff = copy_ssa_name (running_off);
7084 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7085 running_off, stride_step);
7086 vect_finish_stmt_generation (stmt, incr, gsi);
7088 running_off = newoff;
7089 group_el = 0;
7092 if (nloads > 1)
7094 tree vec_inv = build_constructor (lvectype, v);
7095 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7096 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7097 if (lvectype != vectype)
7099 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7100 VIEW_CONVERT_EXPR,
7101 build1 (VIEW_CONVERT_EXPR,
7102 vectype, new_temp));
7103 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7107 if (slp)
7109 if (slp_perm)
7110 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7111 else
7112 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7114 else
7116 if (j == 0)
7117 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7118 else
7119 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7120 prev_stmt_info = vinfo_for_stmt (new_stmt);
7123 if (slp_perm)
7125 unsigned n_perms;
7126 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7127 slp_node_instance, false, &n_perms);
7129 return true;
7132 if (grouped_load)
7134 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7135 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7136 /* For SLP vectorization we directly vectorize a subchain
7137 without permutation. */
7138 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7139 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7140 /* For BB vectorization always use the first stmt to base
7141 the data ref pointer on. */
7142 if (bb_vinfo)
7143 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7145 /* Check if the chain of loads is already vectorized. */
7146 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7147 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7148 ??? But we can only do so if there is exactly one
7149 as we have no way to get at the rest. Leave the CSE
7150 opportunity alone.
7151 ??? With the group load eventually participating
7152 in multiple different permutations (having multiple
7153 slp nodes which refer to the same group) the CSE
7154 is even wrong code. See PR56270. */
7155 && !slp)
7157 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7158 return true;
7160 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7161 group_gap_adj = 0;
7163 /* VEC_NUM is the number of vect stmts to be created for this group. */
7164 if (slp)
7166 grouped_load = false;
7167 /* For SLP permutation support we need to load the whole group,
7168 not only the number of vector stmts the permutation result
7169 fits in. */
7170 if (slp_perm)
7171 vec_num = (group_size * vf + nunits - 1) / nunits;
7172 else
7173 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7174 group_gap_adj = vf * group_size - nunits * vec_num;
7176 else
7177 vec_num = group_size;
7179 ref_type = get_group_alias_ptr_type (first_stmt);
7181 else
7183 first_stmt = stmt;
7184 first_dr = dr;
7185 group_size = vec_num = 1;
7186 group_gap_adj = 0;
7187 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7190 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7191 gcc_assert (alignment_support_scheme);
7192 /* Targets with load-lane instructions must not require explicit
7193 realignment. */
7194 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7195 || alignment_support_scheme == dr_aligned
7196 || alignment_support_scheme == dr_unaligned_supported);
7198 /* In case the vectorization factor (VF) is bigger than the number
7199 of elements that we can fit in a vectype (nunits), we have to generate
7200 more than one vector stmt - i.e - we need to "unroll" the
7201 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7202 from one copy of the vector stmt to the next, in the field
7203 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7204 stages to find the correct vector defs to be used when vectorizing
7205 stmts that use the defs of the current stmt. The example below
7206 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7207 need to create 4 vectorized stmts):
7209 before vectorization:
7210 RELATED_STMT VEC_STMT
7211 S1: x = memref - -
7212 S2: z = x + 1 - -
7214 step 1: vectorize stmt S1:
7215 We first create the vector stmt VS1_0, and, as usual, record a
7216 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7217 Next, we create the vector stmt VS1_1, and record a pointer to
7218 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7219 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7220 stmts and pointers:
7221 RELATED_STMT VEC_STMT
7222 VS1_0: vx0 = memref0 VS1_1 -
7223 VS1_1: vx1 = memref1 VS1_2 -
7224 VS1_2: vx2 = memref2 VS1_3 -
7225 VS1_3: vx3 = memref3 - -
7226 S1: x = load - VS1_0
7227 S2: z = x + 1 - -
7229 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7230 information we recorded in RELATED_STMT field is used to vectorize
7231 stmt S2. */
7233 /* In case of interleaving (non-unit grouped access):
7235 S1: x2 = &base + 2
7236 S2: x0 = &base
7237 S3: x1 = &base + 1
7238 S4: x3 = &base + 3
7240 Vectorized loads are created in the order of memory accesses
7241 starting from the access of the first stmt of the chain:
7243 VS1: vx0 = &base
7244 VS2: vx1 = &base + vec_size*1
7245 VS3: vx3 = &base + vec_size*2
7246 VS4: vx4 = &base + vec_size*3
7248 Then permutation statements are generated:
7250 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7251 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7254 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7255 (the order of the data-refs in the output of vect_permute_load_chain
7256 corresponds to the order of scalar stmts in the interleaving chain - see
7257 the documentation of vect_permute_load_chain()).
7258 The generation of permutation stmts and recording them in
7259 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7261 In case of both multiple types and interleaving, the vector loads and
7262 permutation stmts above are created for every copy. The result vector
7263 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7264 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7266 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7267 on a target that supports unaligned accesses (dr_unaligned_supported)
7268 we generate the following code:
7269 p = initial_addr;
7270 indx = 0;
7271 loop {
7272 p = p + indx * vectype_size;
7273 vec_dest = *(p);
7274 indx = indx + 1;
7277 Otherwise, the data reference is potentially unaligned on a target that
7278 does not support unaligned accesses (dr_explicit_realign_optimized) -
7279 then generate the following code, in which the data in each iteration is
7280 obtained by two vector loads, one from the previous iteration, and one
7281 from the current iteration:
7282 p1 = initial_addr;
7283 msq_init = *(floor(p1))
7284 p2 = initial_addr + VS - 1;
7285 realignment_token = call target_builtin;
7286 indx = 0;
7287 loop {
7288 p2 = p2 + indx * vectype_size
7289 lsq = *(floor(p2))
7290 vec_dest = realign_load (msq, lsq, realignment_token)
7291 indx = indx + 1;
7292 msq = lsq;
7293 } */
7295 /* If the misalignment remains the same throughout the execution of the
7296 loop, we can create the init_addr and permutation mask at the loop
7297 preheader. Otherwise, it needs to be created inside the loop.
7298 This can only occur when vectorizing memory accesses in the inner-loop
7299 nested within an outer-loop that is being vectorized. */
7301 if (nested_in_vect_loop
7302 && (TREE_INT_CST_LOW (DR_STEP (dr))
7303 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7305 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7306 compute_in_loop = true;
7309 if ((alignment_support_scheme == dr_explicit_realign_optimized
7310 || alignment_support_scheme == dr_explicit_realign)
7311 && !compute_in_loop)
7313 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7314 alignment_support_scheme, NULL_TREE,
7315 &at_loop);
7316 if (alignment_support_scheme == dr_explicit_realign_optimized)
7318 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7319 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7320 size_one_node);
7323 else
7324 at_loop = loop;
7326 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7327 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7329 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7330 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7331 else
7332 aggr_type = vectype;
7334 prev_stmt_info = NULL;
7335 for (j = 0; j < ncopies; j++)
7337 /* 1. Create the vector or array pointer update chain. */
7338 if (j == 0)
7340 bool simd_lane_access_p
7341 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7342 if (simd_lane_access_p
7343 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7344 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7345 && integer_zerop (DR_OFFSET (first_dr))
7346 && integer_zerop (DR_INIT (first_dr))
7347 && alias_sets_conflict_p (get_alias_set (aggr_type),
7348 get_alias_set (TREE_TYPE (ref_type)))
7349 && (alignment_support_scheme == dr_aligned
7350 || alignment_support_scheme == dr_unaligned_supported))
7352 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7353 dataref_offset = build_int_cst (ref_type, 0);
7354 inv_p = false;
7356 else if (first_stmt_for_drptr
7357 && first_stmt != first_stmt_for_drptr)
7359 dataref_ptr
7360 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7361 at_loop, offset, &dummy, gsi,
7362 &ptr_incr, simd_lane_access_p,
7363 &inv_p, byte_offset);
7364 /* Adjust the pointer by the difference to first_stmt. */
7365 data_reference_p ptrdr
7366 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7367 tree diff = fold_convert (sizetype,
7368 size_binop (MINUS_EXPR,
7369 DR_INIT (first_dr),
7370 DR_INIT (ptrdr)));
7371 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7372 stmt, diff);
7374 else
7375 dataref_ptr
7376 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7377 offset, &dummy, gsi, &ptr_incr,
7378 simd_lane_access_p, &inv_p,
7379 byte_offset);
7381 else if (dataref_offset)
7382 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7383 TYPE_SIZE_UNIT (aggr_type));
7384 else
7385 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7386 TYPE_SIZE_UNIT (aggr_type));
7388 if (grouped_load || slp_perm)
7389 dr_chain.create (vec_num);
7391 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7393 tree vec_array;
7395 vec_array = create_vector_array (vectype, vec_num);
7397 /* Emit:
7398 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7399 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7400 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7401 gimple_call_set_lhs (new_stmt, vec_array);
7402 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7404 /* Extract each vector into an SSA_NAME. */
7405 for (i = 0; i < vec_num; i++)
7407 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7408 vec_array, i);
7409 dr_chain.quick_push (new_temp);
7412 /* Record the mapping between SSA_NAMEs and statements. */
7413 vect_record_grouped_load_vectors (stmt, dr_chain);
7415 else
7417 for (i = 0; i < vec_num; i++)
7419 if (i > 0)
7420 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7421 stmt, NULL_TREE);
7423 /* 2. Create the vector-load in the loop. */
7424 switch (alignment_support_scheme)
7426 case dr_aligned:
7427 case dr_unaligned_supported:
7429 unsigned int align, misalign;
7431 data_ref
7432 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7433 dataref_offset
7434 ? dataref_offset
7435 : build_int_cst (ref_type, 0));
7436 align = TYPE_ALIGN_UNIT (vectype);
7437 if (alignment_support_scheme == dr_aligned)
7439 gcc_assert (aligned_access_p (first_dr));
7440 misalign = 0;
7442 else if (DR_MISALIGNMENT (first_dr) == -1)
7444 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7445 align = TYPE_ALIGN_UNIT (elem_type);
7446 else
7447 align = (get_object_alignment (DR_REF (first_dr))
7448 / BITS_PER_UNIT);
7449 misalign = 0;
7450 TREE_TYPE (data_ref)
7451 = build_aligned_type (TREE_TYPE (data_ref),
7452 align * BITS_PER_UNIT);
7454 else
7456 TREE_TYPE (data_ref)
7457 = build_aligned_type (TREE_TYPE (data_ref),
7458 TYPE_ALIGN (elem_type));
7459 misalign = DR_MISALIGNMENT (first_dr);
7461 if (dataref_offset == NULL_TREE
7462 && TREE_CODE (dataref_ptr) == SSA_NAME)
7463 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7464 align, misalign);
7465 break;
7467 case dr_explicit_realign:
7469 tree ptr, bump;
7471 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7473 if (compute_in_loop)
7474 msq = vect_setup_realignment (first_stmt, gsi,
7475 &realignment_token,
7476 dr_explicit_realign,
7477 dataref_ptr, NULL);
7479 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7480 ptr = copy_ssa_name (dataref_ptr);
7481 else
7482 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7483 new_stmt = gimple_build_assign
7484 (ptr, BIT_AND_EXPR, dataref_ptr,
7485 build_int_cst
7486 (TREE_TYPE (dataref_ptr),
7487 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7488 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7489 data_ref
7490 = build2 (MEM_REF, vectype, ptr,
7491 build_int_cst (ref_type, 0));
7492 vec_dest = vect_create_destination_var (scalar_dest,
7493 vectype);
7494 new_stmt = gimple_build_assign (vec_dest, data_ref);
7495 new_temp = make_ssa_name (vec_dest, new_stmt);
7496 gimple_assign_set_lhs (new_stmt, new_temp);
7497 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7498 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7499 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7500 msq = new_temp;
7502 bump = size_binop (MULT_EXPR, vs,
7503 TYPE_SIZE_UNIT (elem_type));
7504 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7505 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7506 new_stmt = gimple_build_assign
7507 (NULL_TREE, BIT_AND_EXPR, ptr,
7508 build_int_cst
7509 (TREE_TYPE (ptr),
7510 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7511 ptr = copy_ssa_name (ptr, new_stmt);
7512 gimple_assign_set_lhs (new_stmt, ptr);
7513 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7514 data_ref
7515 = build2 (MEM_REF, vectype, ptr,
7516 build_int_cst (ref_type, 0));
7517 break;
7519 case dr_explicit_realign_optimized:
7520 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7521 new_temp = copy_ssa_name (dataref_ptr);
7522 else
7523 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7524 new_stmt = gimple_build_assign
7525 (new_temp, BIT_AND_EXPR, dataref_ptr,
7526 build_int_cst
7527 (TREE_TYPE (dataref_ptr),
7528 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7530 data_ref
7531 = build2 (MEM_REF, vectype, new_temp,
7532 build_int_cst (ref_type, 0));
7533 break;
7534 default:
7535 gcc_unreachable ();
7537 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7538 new_stmt = gimple_build_assign (vec_dest, data_ref);
7539 new_temp = make_ssa_name (vec_dest, new_stmt);
7540 gimple_assign_set_lhs (new_stmt, new_temp);
7541 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7543 /* 3. Handle explicit realignment if necessary/supported.
7544 Create in loop:
7545 vec_dest = realign_load (msq, lsq, realignment_token) */
7546 if (alignment_support_scheme == dr_explicit_realign_optimized
7547 || alignment_support_scheme == dr_explicit_realign)
7549 lsq = gimple_assign_lhs (new_stmt);
7550 if (!realignment_token)
7551 realignment_token = dataref_ptr;
7552 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7553 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7554 msq, lsq, realignment_token);
7555 new_temp = make_ssa_name (vec_dest, new_stmt);
7556 gimple_assign_set_lhs (new_stmt, new_temp);
7557 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7559 if (alignment_support_scheme == dr_explicit_realign_optimized)
7561 gcc_assert (phi);
7562 if (i == vec_num - 1 && j == ncopies - 1)
7563 add_phi_arg (phi, lsq,
7564 loop_latch_edge (containing_loop),
7565 UNKNOWN_LOCATION);
7566 msq = lsq;
7570 /* 4. Handle invariant-load. */
7571 if (inv_p && !bb_vinfo)
7573 gcc_assert (!grouped_load);
7574 /* If we have versioned for aliasing or the loop doesn't
7575 have any data dependencies that would preclude this,
7576 then we are sure this is a loop invariant load and
7577 thus we can insert it on the preheader edge. */
7578 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7579 && !nested_in_vect_loop
7580 && hoist_defs_of_uses (stmt, loop))
7582 if (dump_enabled_p ())
7584 dump_printf_loc (MSG_NOTE, vect_location,
7585 "hoisting out of the vectorized "
7586 "loop: ");
7587 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7589 tree tem = copy_ssa_name (scalar_dest);
7590 gsi_insert_on_edge_immediate
7591 (loop_preheader_edge (loop),
7592 gimple_build_assign (tem,
7593 unshare_expr
7594 (gimple_assign_rhs1 (stmt))));
7595 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7596 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7597 set_vinfo_for_stmt (new_stmt,
7598 new_stmt_vec_info (new_stmt, vinfo));
7600 else
7602 gimple_stmt_iterator gsi2 = *gsi;
7603 gsi_next (&gsi2);
7604 new_temp = vect_init_vector (stmt, scalar_dest,
7605 vectype, &gsi2);
7606 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7610 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7612 tree perm_mask = perm_mask_for_reverse (vectype);
7613 new_temp = permute_vec_elements (new_temp, new_temp,
7614 perm_mask, stmt, gsi);
7615 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7618 /* Collect vector loads and later create their permutation in
7619 vect_transform_grouped_load (). */
7620 if (grouped_load || slp_perm)
7621 dr_chain.quick_push (new_temp);
7623 /* Store vector loads in the corresponding SLP_NODE. */
7624 if (slp && !slp_perm)
7625 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7627 /* Bump the vector pointer to account for a gap or for excess
7628 elements loaded for a permuted SLP load. */
7629 if (group_gap_adj != 0)
7631 bool ovf;
7632 tree bump
7633 = wide_int_to_tree (sizetype,
7634 wi::smul (TYPE_SIZE_UNIT (elem_type),
7635 group_gap_adj, &ovf));
7636 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7637 stmt, bump);
7641 if (slp && !slp_perm)
7642 continue;
7644 if (slp_perm)
7646 unsigned n_perms;
7647 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7648 slp_node_instance, false,
7649 &n_perms))
7651 dr_chain.release ();
7652 return false;
7655 else
7657 if (grouped_load)
7659 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7660 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7661 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7663 else
7665 if (j == 0)
7666 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7667 else
7668 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7669 prev_stmt_info = vinfo_for_stmt (new_stmt);
7672 dr_chain.release ();
7675 return true;
7678 /* Function vect_is_simple_cond.
7680 Input:
7681 LOOP - the loop that is being vectorized.
7682 COND - Condition that is checked for simple use.
7684 Output:
7685 *COMP_VECTYPE - the vector type for the comparison.
7686 *DTS - The def types for the arguments of the comparison
7688 Returns whether a COND can be vectorized. Checks whether
7689 condition operands are supportable using vec_is_simple_use. */
7691 static bool
7692 vect_is_simple_cond (tree cond, vec_info *vinfo,
7693 tree *comp_vectype, enum vect_def_type *dts)
7695 tree lhs, rhs;
7696 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7698 /* Mask case. */
7699 if (TREE_CODE (cond) == SSA_NAME
7700 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7702 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7703 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7704 &dts[0], comp_vectype)
7705 || !*comp_vectype
7706 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7707 return false;
7708 return true;
7711 if (!COMPARISON_CLASS_P (cond))
7712 return false;
7714 lhs = TREE_OPERAND (cond, 0);
7715 rhs = TREE_OPERAND (cond, 1);
7717 if (TREE_CODE (lhs) == SSA_NAME)
7719 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7720 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7721 return false;
7723 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7724 || TREE_CODE (lhs) == FIXED_CST)
7725 dts[0] = vect_constant_def;
7726 else
7727 return false;
7729 if (TREE_CODE (rhs) == SSA_NAME)
7731 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7732 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7733 return false;
7735 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7736 || TREE_CODE (rhs) == FIXED_CST)
7737 dts[1] = vect_constant_def;
7738 else
7739 return false;
7741 if (vectype1 && vectype2
7742 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7743 return false;
7745 *comp_vectype = vectype1 ? vectype1 : vectype2;
7746 return true;
7749 /* vectorizable_condition.
7751 Check if STMT is conditional modify expression that can be vectorized.
7752 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7753 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7754 at GSI.
7756 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7757 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7758 else clause if it is 2).
7760 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7762 bool
7763 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7764 gimple **vec_stmt, tree reduc_def, int reduc_index,
7765 slp_tree slp_node)
7767 tree scalar_dest = NULL_TREE;
7768 tree vec_dest = NULL_TREE;
7769 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7770 tree then_clause, else_clause;
7771 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7772 tree comp_vectype = NULL_TREE;
7773 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7774 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7775 tree vec_compare;
7776 tree new_temp;
7777 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7778 enum vect_def_type dts[4]
7779 = {vect_unknown_def_type, vect_unknown_def_type,
7780 vect_unknown_def_type, vect_unknown_def_type};
7781 int ndts = 4;
7782 int ncopies;
7783 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7784 stmt_vec_info prev_stmt_info = NULL;
7785 int i, j;
7786 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7787 vec<tree> vec_oprnds0 = vNULL;
7788 vec<tree> vec_oprnds1 = vNULL;
7789 vec<tree> vec_oprnds2 = vNULL;
7790 vec<tree> vec_oprnds3 = vNULL;
7791 tree vec_cmp_type;
7792 bool masked = false;
7794 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7795 return false;
7797 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7799 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7800 return false;
7802 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7803 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7804 && reduc_def))
7805 return false;
7807 /* FORNOW: not yet supported. */
7808 if (STMT_VINFO_LIVE_P (stmt_info))
7810 if (dump_enabled_p ())
7811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7812 "value used after loop.\n");
7813 return false;
7817 /* Is vectorizable conditional operation? */
7818 if (!is_gimple_assign (stmt))
7819 return false;
7821 code = gimple_assign_rhs_code (stmt);
7823 if (code != COND_EXPR)
7824 return false;
7826 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7827 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7828 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7830 if (slp_node)
7831 ncopies = 1;
7832 else
7833 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7835 gcc_assert (ncopies >= 1);
7836 if (reduc_index && ncopies > 1)
7837 return false; /* FORNOW */
7839 cond_expr = gimple_assign_rhs1 (stmt);
7840 then_clause = gimple_assign_rhs2 (stmt);
7841 else_clause = gimple_assign_rhs3 (stmt);
7843 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7844 &comp_vectype, &dts[0])
7845 || !comp_vectype)
7846 return false;
7848 gimple *def_stmt;
7849 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7850 &vectype1))
7851 return false;
7852 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7853 &vectype2))
7854 return false;
7856 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7857 return false;
7859 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7860 return false;
7862 masked = !COMPARISON_CLASS_P (cond_expr);
7863 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7865 if (vec_cmp_type == NULL_TREE)
7866 return false;
7868 cond_code = TREE_CODE (cond_expr);
7869 if (!masked)
7871 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7872 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7875 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7877 /* Boolean values may have another representation in vectors
7878 and therefore we prefer bit operations over comparison for
7879 them (which also works for scalar masks). We store opcodes
7880 to use in bitop1 and bitop2. Statement is vectorized as
7881 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7882 depending on bitop1 and bitop2 arity. */
7883 switch (cond_code)
7885 case GT_EXPR:
7886 bitop1 = BIT_NOT_EXPR;
7887 bitop2 = BIT_AND_EXPR;
7888 break;
7889 case GE_EXPR:
7890 bitop1 = BIT_NOT_EXPR;
7891 bitop2 = BIT_IOR_EXPR;
7892 break;
7893 case LT_EXPR:
7894 bitop1 = BIT_NOT_EXPR;
7895 bitop2 = BIT_AND_EXPR;
7896 std::swap (cond_expr0, cond_expr1);
7897 break;
7898 case LE_EXPR:
7899 bitop1 = BIT_NOT_EXPR;
7900 bitop2 = BIT_IOR_EXPR;
7901 std::swap (cond_expr0, cond_expr1);
7902 break;
7903 case NE_EXPR:
7904 bitop1 = BIT_XOR_EXPR;
7905 break;
7906 case EQ_EXPR:
7907 bitop1 = BIT_XOR_EXPR;
7908 bitop2 = BIT_NOT_EXPR;
7909 break;
7910 default:
7911 return false;
7913 cond_code = SSA_NAME;
7916 if (!vec_stmt)
7918 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7919 if (bitop1 != NOP_EXPR)
7921 machine_mode mode = TYPE_MODE (comp_vectype);
7922 optab optab;
7924 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
7925 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7926 return false;
7928 if (bitop2 != NOP_EXPR)
7930 optab = optab_for_tree_code (bitop2, comp_vectype,
7931 optab_default);
7932 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7933 return false;
7936 if (expand_vec_cond_expr_p (vectype, comp_vectype,
7937 cond_code))
7939 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
7940 return true;
7942 return false;
7945 /* Transform. */
7947 if (!slp_node)
7949 vec_oprnds0.create (1);
7950 vec_oprnds1.create (1);
7951 vec_oprnds2.create (1);
7952 vec_oprnds3.create (1);
7955 /* Handle def. */
7956 scalar_dest = gimple_assign_lhs (stmt);
7957 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7959 /* Handle cond expr. */
7960 for (j = 0; j < ncopies; j++)
7962 gassign *new_stmt = NULL;
7963 if (j == 0)
7965 if (slp_node)
7967 auto_vec<tree, 4> ops;
7968 auto_vec<vec<tree>, 4> vec_defs;
7970 if (masked)
7971 ops.safe_push (cond_expr);
7972 else
7974 ops.safe_push (cond_expr0);
7975 ops.safe_push (cond_expr1);
7977 ops.safe_push (then_clause);
7978 ops.safe_push (else_clause);
7979 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7980 vec_oprnds3 = vec_defs.pop ();
7981 vec_oprnds2 = vec_defs.pop ();
7982 if (!masked)
7983 vec_oprnds1 = vec_defs.pop ();
7984 vec_oprnds0 = vec_defs.pop ();
7986 else
7988 gimple *gtemp;
7989 if (masked)
7991 vec_cond_lhs
7992 = vect_get_vec_def_for_operand (cond_expr, stmt,
7993 comp_vectype);
7994 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7995 &gtemp, &dts[0]);
7997 else
7999 vec_cond_lhs
8000 = vect_get_vec_def_for_operand (cond_expr0,
8001 stmt, comp_vectype);
8002 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8004 vec_cond_rhs
8005 = vect_get_vec_def_for_operand (cond_expr1,
8006 stmt, comp_vectype);
8007 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8009 if (reduc_index == 1)
8010 vec_then_clause = reduc_def;
8011 else
8013 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8014 stmt);
8015 vect_is_simple_use (then_clause, loop_vinfo,
8016 &gtemp, &dts[2]);
8018 if (reduc_index == 2)
8019 vec_else_clause = reduc_def;
8020 else
8022 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8023 stmt);
8024 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8028 else
8030 vec_cond_lhs
8031 = vect_get_vec_def_for_stmt_copy (dts[0],
8032 vec_oprnds0.pop ());
8033 if (!masked)
8034 vec_cond_rhs
8035 = vect_get_vec_def_for_stmt_copy (dts[1],
8036 vec_oprnds1.pop ());
8038 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8039 vec_oprnds2.pop ());
8040 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8041 vec_oprnds3.pop ());
8044 if (!slp_node)
8046 vec_oprnds0.quick_push (vec_cond_lhs);
8047 if (!masked)
8048 vec_oprnds1.quick_push (vec_cond_rhs);
8049 vec_oprnds2.quick_push (vec_then_clause);
8050 vec_oprnds3.quick_push (vec_else_clause);
8053 /* Arguments are ready. Create the new vector stmt. */
8054 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8056 vec_then_clause = vec_oprnds2[i];
8057 vec_else_clause = vec_oprnds3[i];
8059 if (masked)
8060 vec_compare = vec_cond_lhs;
8061 else
8063 vec_cond_rhs = vec_oprnds1[i];
8064 if (bitop1 == NOP_EXPR)
8065 vec_compare = build2 (cond_code, vec_cmp_type,
8066 vec_cond_lhs, vec_cond_rhs);
8067 else
8069 new_temp = make_ssa_name (vec_cmp_type);
8070 if (bitop1 == BIT_NOT_EXPR)
8071 new_stmt = gimple_build_assign (new_temp, bitop1,
8072 vec_cond_rhs);
8073 else
8074 new_stmt
8075 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8076 vec_cond_rhs);
8077 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8078 if (bitop2 == NOP_EXPR)
8079 vec_compare = new_temp;
8080 else if (bitop2 == BIT_NOT_EXPR)
8082 /* Instead of doing ~x ? y : z do x ? z : y. */
8083 vec_compare = new_temp;
8084 std::swap (vec_then_clause, vec_else_clause);
8086 else
8088 vec_compare = make_ssa_name (vec_cmp_type);
8089 new_stmt
8090 = gimple_build_assign (vec_compare, bitop2,
8091 vec_cond_lhs, new_temp);
8092 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8096 new_temp = make_ssa_name (vec_dest);
8097 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8098 vec_compare, vec_then_clause,
8099 vec_else_clause);
8100 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8101 if (slp_node)
8102 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8105 if (slp_node)
8106 continue;
8108 if (j == 0)
8109 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8110 else
8111 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8113 prev_stmt_info = vinfo_for_stmt (new_stmt);
8116 vec_oprnds0.release ();
8117 vec_oprnds1.release ();
8118 vec_oprnds2.release ();
8119 vec_oprnds3.release ();
8121 return true;
8124 /* vectorizable_comparison.
8126 Check if STMT is comparison expression that can be vectorized.
8127 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8128 comparison, put it in VEC_STMT, and insert it at GSI.
8130 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8132 static bool
8133 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8134 gimple **vec_stmt, tree reduc_def,
8135 slp_tree slp_node)
8137 tree lhs, rhs1, rhs2;
8138 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8139 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8140 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8141 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8142 tree new_temp;
8143 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8144 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8145 int ndts = 2;
8146 unsigned nunits;
8147 int ncopies;
8148 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8149 stmt_vec_info prev_stmt_info = NULL;
8150 int i, j;
8151 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8152 vec<tree> vec_oprnds0 = vNULL;
8153 vec<tree> vec_oprnds1 = vNULL;
8154 gimple *def_stmt;
8155 tree mask_type;
8156 tree mask;
8158 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8159 return false;
8161 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8162 return false;
8164 mask_type = vectype;
8165 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8167 if (slp_node)
8168 ncopies = 1;
8169 else
8170 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
8172 gcc_assert (ncopies >= 1);
8173 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8174 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8175 && reduc_def))
8176 return false;
8178 if (STMT_VINFO_LIVE_P (stmt_info))
8180 if (dump_enabled_p ())
8181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8182 "value used after loop.\n");
8183 return false;
8186 if (!is_gimple_assign (stmt))
8187 return false;
8189 code = gimple_assign_rhs_code (stmt);
8191 if (TREE_CODE_CLASS (code) != tcc_comparison)
8192 return false;
8194 rhs1 = gimple_assign_rhs1 (stmt);
8195 rhs2 = gimple_assign_rhs2 (stmt);
8197 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8198 &dts[0], &vectype1))
8199 return false;
8201 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8202 &dts[1], &vectype2))
8203 return false;
8205 if (vectype1 && vectype2
8206 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8207 return false;
8209 vectype = vectype1 ? vectype1 : vectype2;
8211 /* Invariant comparison. */
8212 if (!vectype)
8214 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8215 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8216 return false;
8218 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8219 return false;
8221 /* Can't compare mask and non-mask types. */
8222 if (vectype1 && vectype2
8223 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8224 return false;
8226 /* Boolean values may have another representation in vectors
8227 and therefore we prefer bit operations over comparison for
8228 them (which also works for scalar masks). We store opcodes
8229 to use in bitop1 and bitop2. Statement is vectorized as
8230 BITOP2 (rhs1 BITOP1 rhs2) or
8231 rhs1 BITOP2 (BITOP1 rhs2)
8232 depending on bitop1 and bitop2 arity. */
8233 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8235 if (code == GT_EXPR)
8237 bitop1 = BIT_NOT_EXPR;
8238 bitop2 = BIT_AND_EXPR;
8240 else if (code == GE_EXPR)
8242 bitop1 = BIT_NOT_EXPR;
8243 bitop2 = BIT_IOR_EXPR;
8245 else if (code == LT_EXPR)
8247 bitop1 = BIT_NOT_EXPR;
8248 bitop2 = BIT_AND_EXPR;
8249 std::swap (rhs1, rhs2);
8250 std::swap (dts[0], dts[1]);
8252 else if (code == LE_EXPR)
8254 bitop1 = BIT_NOT_EXPR;
8255 bitop2 = BIT_IOR_EXPR;
8256 std::swap (rhs1, rhs2);
8257 std::swap (dts[0], dts[1]);
8259 else
8261 bitop1 = BIT_XOR_EXPR;
8262 if (code == EQ_EXPR)
8263 bitop2 = BIT_NOT_EXPR;
8267 if (!vec_stmt)
8269 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8270 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8271 dts, ndts, NULL, NULL);
8272 if (bitop1 == NOP_EXPR)
8273 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8274 else
8276 machine_mode mode = TYPE_MODE (vectype);
8277 optab optab;
8279 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8280 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8281 return false;
8283 if (bitop2 != NOP_EXPR)
8285 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8286 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8287 return false;
8289 return true;
8293 /* Transform. */
8294 if (!slp_node)
8296 vec_oprnds0.create (1);
8297 vec_oprnds1.create (1);
8300 /* Handle def. */
8301 lhs = gimple_assign_lhs (stmt);
8302 mask = vect_create_destination_var (lhs, mask_type);
8304 /* Handle cmp expr. */
8305 for (j = 0; j < ncopies; j++)
8307 gassign *new_stmt = NULL;
8308 if (j == 0)
8310 if (slp_node)
8312 auto_vec<tree, 2> ops;
8313 auto_vec<vec<tree>, 2> vec_defs;
8315 ops.safe_push (rhs1);
8316 ops.safe_push (rhs2);
8317 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
8318 vec_oprnds1 = vec_defs.pop ();
8319 vec_oprnds0 = vec_defs.pop ();
8321 else
8323 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8324 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8327 else
8329 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8330 vec_oprnds0.pop ());
8331 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8332 vec_oprnds1.pop ());
8335 if (!slp_node)
8337 vec_oprnds0.quick_push (vec_rhs1);
8338 vec_oprnds1.quick_push (vec_rhs2);
8341 /* Arguments are ready. Create the new vector stmt. */
8342 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8344 vec_rhs2 = vec_oprnds1[i];
8346 new_temp = make_ssa_name (mask);
8347 if (bitop1 == NOP_EXPR)
8349 new_stmt = gimple_build_assign (new_temp, code,
8350 vec_rhs1, vec_rhs2);
8351 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8353 else
8355 if (bitop1 == BIT_NOT_EXPR)
8356 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8357 else
8358 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8359 vec_rhs2);
8360 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8361 if (bitop2 != NOP_EXPR)
8363 tree res = make_ssa_name (mask);
8364 if (bitop2 == BIT_NOT_EXPR)
8365 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8366 else
8367 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8368 new_temp);
8369 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8372 if (slp_node)
8373 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8376 if (slp_node)
8377 continue;
8379 if (j == 0)
8380 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8381 else
8382 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8384 prev_stmt_info = vinfo_for_stmt (new_stmt);
8387 vec_oprnds0.release ();
8388 vec_oprnds1.release ();
8390 return true;
8393 /* Make sure the statement is vectorizable. */
8395 bool
8396 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8398 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8399 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8400 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8401 bool ok;
8402 tree scalar_type, vectype;
8403 gimple *pattern_stmt;
8404 gimple_seq pattern_def_seq;
8406 if (dump_enabled_p ())
8408 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8409 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8412 if (gimple_has_volatile_ops (stmt))
8414 if (dump_enabled_p ())
8415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8416 "not vectorized: stmt has volatile operands\n");
8418 return false;
8421 /* Skip stmts that do not need to be vectorized. In loops this is expected
8422 to include:
8423 - the COND_EXPR which is the loop exit condition
8424 - any LABEL_EXPRs in the loop
8425 - computations that are used only for array indexing or loop control.
8426 In basic blocks we only analyze statements that are a part of some SLP
8427 instance, therefore, all the statements are relevant.
8429 Pattern statement needs to be analyzed instead of the original statement
8430 if the original statement is not relevant. Otherwise, we analyze both
8431 statements. In basic blocks we are called from some SLP instance
8432 traversal, don't analyze pattern stmts instead, the pattern stmts
8433 already will be part of SLP instance. */
8435 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8436 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8437 && !STMT_VINFO_LIVE_P (stmt_info))
8439 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8440 && pattern_stmt
8441 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8442 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8444 /* Analyze PATTERN_STMT instead of the original stmt. */
8445 stmt = pattern_stmt;
8446 stmt_info = vinfo_for_stmt (pattern_stmt);
8447 if (dump_enabled_p ())
8449 dump_printf_loc (MSG_NOTE, vect_location,
8450 "==> examining pattern statement: ");
8451 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8454 else
8456 if (dump_enabled_p ())
8457 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8459 return true;
8462 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8463 && node == NULL
8464 && pattern_stmt
8465 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8466 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8468 /* Analyze PATTERN_STMT too. */
8469 if (dump_enabled_p ())
8471 dump_printf_loc (MSG_NOTE, vect_location,
8472 "==> examining pattern statement: ");
8473 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8476 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8477 return false;
8480 if (is_pattern_stmt_p (stmt_info)
8481 && node == NULL
8482 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8484 gimple_stmt_iterator si;
8486 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8488 gimple *pattern_def_stmt = gsi_stmt (si);
8489 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8490 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8492 /* Analyze def stmt of STMT if it's a pattern stmt. */
8493 if (dump_enabled_p ())
8495 dump_printf_loc (MSG_NOTE, vect_location,
8496 "==> examining pattern def statement: ");
8497 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8500 if (!vect_analyze_stmt (pattern_def_stmt,
8501 need_to_vectorize, node))
8502 return false;
8507 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8509 case vect_internal_def:
8510 break;
8512 case vect_reduction_def:
8513 case vect_nested_cycle:
8514 gcc_assert (!bb_vinfo
8515 && (relevance == vect_used_in_outer
8516 || relevance == vect_used_in_outer_by_reduction
8517 || relevance == vect_used_by_reduction
8518 || relevance == vect_unused_in_scope
8519 || relevance == vect_used_only_live));
8520 break;
8522 case vect_induction_def:
8523 gcc_assert (!bb_vinfo);
8524 break;
8526 case vect_constant_def:
8527 case vect_external_def:
8528 case vect_unknown_def_type:
8529 default:
8530 gcc_unreachable ();
8533 if (bb_vinfo)
8535 gcc_assert (PURE_SLP_STMT (stmt_info));
8537 /* Memory accesses already got their vector type assigned
8538 in vect_analyze_data_refs. */
8539 if (! STMT_VINFO_DATA_REF (stmt_info))
8541 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8542 if (dump_enabled_p ())
8544 dump_printf_loc (MSG_NOTE, vect_location,
8545 "get vectype for scalar type: ");
8546 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8547 dump_printf (MSG_NOTE, "\n");
8550 vectype = get_vectype_for_scalar_type (scalar_type);
8551 if (!vectype)
8553 if (dump_enabled_p ())
8555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8556 "not SLPed: unsupported data-type ");
8557 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8558 scalar_type);
8559 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8561 return false;
8564 if (dump_enabled_p ())
8566 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8567 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8568 dump_printf (MSG_NOTE, "\n");
8571 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8575 if (STMT_VINFO_RELEVANT_P (stmt_info))
8577 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8578 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8579 || (is_gimple_call (stmt)
8580 && gimple_call_lhs (stmt) == NULL_TREE));
8581 *need_to_vectorize = true;
8584 if (PURE_SLP_STMT (stmt_info) && !node)
8586 dump_printf_loc (MSG_NOTE, vect_location,
8587 "handled only by SLP analysis\n");
8588 return true;
8591 ok = true;
8592 if (!bb_vinfo
8593 && (STMT_VINFO_RELEVANT_P (stmt_info)
8594 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8595 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8596 || vectorizable_conversion (stmt, NULL, NULL, node)
8597 || vectorizable_shift (stmt, NULL, NULL, node)
8598 || vectorizable_operation (stmt, NULL, NULL, node)
8599 || vectorizable_assignment (stmt, NULL, NULL, node)
8600 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8601 || vectorizable_call (stmt, NULL, NULL, node)
8602 || vectorizable_store (stmt, NULL, NULL, node)
8603 || vectorizable_reduction (stmt, NULL, NULL, node)
8604 || vectorizable_induction (stmt, NULL, NULL, node)
8605 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8606 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8607 else
8609 if (bb_vinfo)
8610 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8611 || vectorizable_conversion (stmt, NULL, NULL, node)
8612 || vectorizable_shift (stmt, NULL, NULL, node)
8613 || vectorizable_operation (stmt, NULL, NULL, node)
8614 || vectorizable_assignment (stmt, NULL, NULL, node)
8615 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8616 || vectorizable_call (stmt, NULL, NULL, node)
8617 || vectorizable_store (stmt, NULL, NULL, node)
8618 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8619 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8622 if (!ok)
8624 if (dump_enabled_p ())
8626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8627 "not vectorized: relevant stmt not ");
8628 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8629 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8632 return false;
8635 if (bb_vinfo)
8636 return true;
8638 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8639 need extra handling, except for vectorizable reductions. */
8640 if (STMT_VINFO_LIVE_P (stmt_info)
8641 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8642 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8644 if (!ok)
8646 if (dump_enabled_p ())
8648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8649 "not vectorized: live stmt not ");
8650 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8651 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8654 return false;
8657 return true;
8661 /* Function vect_transform_stmt.
8663 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8665 bool
8666 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8667 bool *grouped_store, slp_tree slp_node,
8668 slp_instance slp_node_instance)
8670 bool is_store = false;
8671 gimple *vec_stmt = NULL;
8672 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8673 bool done;
8675 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8676 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8678 switch (STMT_VINFO_TYPE (stmt_info))
8680 case type_demotion_vec_info_type:
8681 case type_promotion_vec_info_type:
8682 case type_conversion_vec_info_type:
8683 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8684 gcc_assert (done);
8685 break;
8687 case induc_vec_info_type:
8688 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8689 gcc_assert (done);
8690 break;
8692 case shift_vec_info_type:
8693 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8694 gcc_assert (done);
8695 break;
8697 case op_vec_info_type:
8698 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8699 gcc_assert (done);
8700 break;
8702 case assignment_vec_info_type:
8703 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8704 gcc_assert (done);
8705 break;
8707 case load_vec_info_type:
8708 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8709 slp_node_instance);
8710 gcc_assert (done);
8711 break;
8713 case store_vec_info_type:
8714 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8715 gcc_assert (done);
8716 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8718 /* In case of interleaving, the whole chain is vectorized when the
8719 last store in the chain is reached. Store stmts before the last
8720 one are skipped, and there vec_stmt_info shouldn't be freed
8721 meanwhile. */
8722 *grouped_store = true;
8723 if (STMT_VINFO_VEC_STMT (stmt_info))
8724 is_store = true;
8726 else
8727 is_store = true;
8728 break;
8730 case condition_vec_info_type:
8731 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8732 gcc_assert (done);
8733 break;
8735 case comparison_vec_info_type:
8736 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8737 gcc_assert (done);
8738 break;
8740 case call_vec_info_type:
8741 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8742 stmt = gsi_stmt (*gsi);
8743 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8744 is_store = true;
8745 break;
8747 case call_simd_clone_vec_info_type:
8748 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8749 stmt = gsi_stmt (*gsi);
8750 break;
8752 case reduc_vec_info_type:
8753 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8754 gcc_assert (done);
8755 break;
8757 default:
8758 if (!STMT_VINFO_LIVE_P (stmt_info))
8760 if (dump_enabled_p ())
8761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8762 "stmt not supported.\n");
8763 gcc_unreachable ();
8767 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8768 This would break hybrid SLP vectorization. */
8769 if (slp_node)
8770 gcc_assert (!vec_stmt
8771 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8773 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8774 is being vectorized, but outside the immediately enclosing loop. */
8775 if (vec_stmt
8776 && STMT_VINFO_LOOP_VINFO (stmt_info)
8777 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8778 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8779 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8780 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8781 || STMT_VINFO_RELEVANT (stmt_info) ==
8782 vect_used_in_outer_by_reduction))
8784 struct loop *innerloop = LOOP_VINFO_LOOP (
8785 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8786 imm_use_iterator imm_iter;
8787 use_operand_p use_p;
8788 tree scalar_dest;
8789 gimple *exit_phi;
8791 if (dump_enabled_p ())
8792 dump_printf_loc (MSG_NOTE, vect_location,
8793 "Record the vdef for outer-loop vectorization.\n");
8795 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8796 (to be used when vectorizing outer-loop stmts that use the DEF of
8797 STMT). */
8798 if (gimple_code (stmt) == GIMPLE_PHI)
8799 scalar_dest = PHI_RESULT (stmt);
8800 else
8801 scalar_dest = gimple_assign_lhs (stmt);
8803 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8805 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8807 exit_phi = USE_STMT (use_p);
8808 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8813 /* Handle stmts whose DEF is used outside the loop-nest that is
8814 being vectorized. */
8815 if (slp_node)
8817 gimple *slp_stmt;
8818 int i;
8819 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8821 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8822 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8823 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8825 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8826 &vec_stmt);
8827 gcc_assert (done);
8831 else if (STMT_VINFO_LIVE_P (stmt_info)
8832 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8834 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8835 gcc_assert (done);
8838 if (vec_stmt)
8839 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8841 return is_store;
8845 /* Remove a group of stores (for SLP or interleaving), free their
8846 stmt_vec_info. */
8848 void
8849 vect_remove_stores (gimple *first_stmt)
8851 gimple *next = first_stmt;
8852 gimple *tmp;
8853 gimple_stmt_iterator next_si;
8855 while (next)
8857 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8859 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8860 if (is_pattern_stmt_p (stmt_info))
8861 next = STMT_VINFO_RELATED_STMT (stmt_info);
8862 /* Free the attached stmt_vec_info and remove the stmt. */
8863 next_si = gsi_for_stmt (next);
8864 unlink_stmt_vdef (next);
8865 gsi_remove (&next_si, true);
8866 release_defs (next);
8867 free_stmt_vec_info (next);
8868 next = tmp;
8873 /* Function new_stmt_vec_info.
8875 Create and initialize a new stmt_vec_info struct for STMT. */
8877 stmt_vec_info
8878 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8880 stmt_vec_info res;
8881 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8883 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8884 STMT_VINFO_STMT (res) = stmt;
8885 res->vinfo = vinfo;
8886 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8887 STMT_VINFO_LIVE_P (res) = false;
8888 STMT_VINFO_VECTYPE (res) = NULL;
8889 STMT_VINFO_VEC_STMT (res) = NULL;
8890 STMT_VINFO_VECTORIZABLE (res) = true;
8891 STMT_VINFO_IN_PATTERN_P (res) = false;
8892 STMT_VINFO_RELATED_STMT (res) = NULL;
8893 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8894 STMT_VINFO_DATA_REF (res) = NULL;
8895 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8896 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8898 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8899 STMT_VINFO_DR_OFFSET (res) = NULL;
8900 STMT_VINFO_DR_INIT (res) = NULL;
8901 STMT_VINFO_DR_STEP (res) = NULL;
8902 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8904 if (gimple_code (stmt) == GIMPLE_PHI
8905 && is_loop_header_bb_p (gimple_bb (stmt)))
8906 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8907 else
8908 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8910 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8911 STMT_SLP_TYPE (res) = loop_vect;
8912 STMT_VINFO_NUM_SLP_USES (res) = 0;
8914 GROUP_FIRST_ELEMENT (res) = NULL;
8915 GROUP_NEXT_ELEMENT (res) = NULL;
8916 GROUP_SIZE (res) = 0;
8917 GROUP_STORE_COUNT (res) = 0;
8918 GROUP_GAP (res) = 0;
8919 GROUP_SAME_DR_STMT (res) = NULL;
8921 return res;
8925 /* Create a hash table for stmt_vec_info. */
8927 void
8928 init_stmt_vec_info_vec (void)
8930 gcc_assert (!stmt_vec_info_vec.exists ());
8931 stmt_vec_info_vec.create (50);
8935 /* Free hash table for stmt_vec_info. */
8937 void
8938 free_stmt_vec_info_vec (void)
8940 unsigned int i;
8941 stmt_vec_info info;
8942 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8943 if (info != NULL)
8944 free_stmt_vec_info (STMT_VINFO_STMT (info));
8945 gcc_assert (stmt_vec_info_vec.exists ());
8946 stmt_vec_info_vec.release ();
8950 /* Free stmt vectorization related info. */
8952 void
8953 free_stmt_vec_info (gimple *stmt)
8955 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8957 if (!stmt_info)
8958 return;
8960 /* Check if this statement has a related "pattern stmt"
8961 (introduced by the vectorizer during the pattern recognition
8962 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8963 too. */
8964 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8966 stmt_vec_info patt_info
8967 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8968 if (patt_info)
8970 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8971 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8972 gimple_set_bb (patt_stmt, NULL);
8973 tree lhs = gimple_get_lhs (patt_stmt);
8974 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8975 release_ssa_name (lhs);
8976 if (seq)
8978 gimple_stmt_iterator si;
8979 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8981 gimple *seq_stmt = gsi_stmt (si);
8982 gimple_set_bb (seq_stmt, NULL);
8983 lhs = gimple_get_lhs (seq_stmt);
8984 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8985 release_ssa_name (lhs);
8986 free_stmt_vec_info (seq_stmt);
8989 free_stmt_vec_info (patt_stmt);
8993 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8994 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8995 set_vinfo_for_stmt (stmt, NULL);
8996 free (stmt_info);
9000 /* Function get_vectype_for_scalar_type_and_size.
9002 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9003 by the target. */
9005 static tree
9006 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
9008 tree orig_scalar_type = scalar_type;
9009 machine_mode inner_mode = TYPE_MODE (scalar_type);
9010 machine_mode simd_mode;
9011 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9012 int nunits;
9013 tree vectype;
9015 if (nbytes == 0)
9016 return NULL_TREE;
9018 if (GET_MODE_CLASS (inner_mode) != MODE_INT
9019 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
9020 return NULL_TREE;
9022 /* For vector types of elements whose mode precision doesn't
9023 match their types precision we use a element type of mode
9024 precision. The vectorization routines will have to make sure
9025 they support the proper result truncation/extension.
9026 We also make sure to build vector types with INTEGER_TYPE
9027 component type only. */
9028 if (INTEGRAL_TYPE_P (scalar_type)
9029 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9030 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9031 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9032 TYPE_UNSIGNED (scalar_type));
9034 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9035 When the component mode passes the above test simply use a type
9036 corresponding to that mode. The theory is that any use that
9037 would cause problems with this will disable vectorization anyway. */
9038 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9039 && !INTEGRAL_TYPE_P (scalar_type))
9040 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9042 /* We can't build a vector type of elements with alignment bigger than
9043 their size. */
9044 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9045 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9046 TYPE_UNSIGNED (scalar_type));
9048 /* If we felt back to using the mode fail if there was
9049 no scalar type for it. */
9050 if (scalar_type == NULL_TREE)
9051 return NULL_TREE;
9053 /* If no size was supplied use the mode the target prefers. Otherwise
9054 lookup a vector mode of the specified size. */
9055 if (size == 0)
9056 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9057 else
9058 simd_mode = mode_for_vector (inner_mode, size / nbytes);
9059 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9060 if (nunits <= 1)
9061 return NULL_TREE;
9063 vectype = build_vector_type (scalar_type, nunits);
9065 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9066 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9067 return NULL_TREE;
9069 /* Re-attach the address-space qualifier if we canonicalized the scalar
9070 type. */
9071 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9072 return build_qualified_type
9073 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9075 return vectype;
9078 unsigned int current_vector_size;
9080 /* Function get_vectype_for_scalar_type.
9082 Returns the vector type corresponding to SCALAR_TYPE as supported
9083 by the target. */
9085 tree
9086 get_vectype_for_scalar_type (tree scalar_type)
9088 tree vectype;
9089 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9090 current_vector_size);
9091 if (vectype
9092 && current_vector_size == 0)
9093 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9094 return vectype;
9097 /* Function get_mask_type_for_scalar_type.
9099 Returns the mask type corresponding to a result of comparison
9100 of vectors of specified SCALAR_TYPE as supported by target. */
9102 tree
9103 get_mask_type_for_scalar_type (tree scalar_type)
9105 tree vectype = get_vectype_for_scalar_type (scalar_type);
9107 if (!vectype)
9108 return NULL;
9110 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9111 current_vector_size);
9114 /* Function get_same_sized_vectype
9116 Returns a vector type corresponding to SCALAR_TYPE of size
9117 VECTOR_TYPE if supported by the target. */
9119 tree
9120 get_same_sized_vectype (tree scalar_type, tree vector_type)
9122 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9123 return build_same_sized_truth_vector_type (vector_type);
9125 return get_vectype_for_scalar_type_and_size
9126 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9129 /* Function vect_is_simple_use.
9131 Input:
9132 VINFO - the vect info of the loop or basic block that is being vectorized.
9133 OPERAND - operand in the loop or bb.
9134 Output:
9135 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9136 DT - the type of definition
9138 Returns whether a stmt with OPERAND can be vectorized.
9139 For loops, supportable operands are constants, loop invariants, and operands
9140 that are defined by the current iteration of the loop. Unsupportable
9141 operands are those that are defined by a previous iteration of the loop (as
9142 is the case in reduction/induction computations).
9143 For basic blocks, supportable operands are constants and bb invariants.
9144 For now, operands defined outside the basic block are not supported. */
9146 bool
9147 vect_is_simple_use (tree operand, vec_info *vinfo,
9148 gimple **def_stmt, enum vect_def_type *dt)
9150 *def_stmt = NULL;
9151 *dt = vect_unknown_def_type;
9153 if (dump_enabled_p ())
9155 dump_printf_loc (MSG_NOTE, vect_location,
9156 "vect_is_simple_use: operand ");
9157 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9158 dump_printf (MSG_NOTE, "\n");
9161 if (CONSTANT_CLASS_P (operand))
9163 *dt = vect_constant_def;
9164 return true;
9167 if (is_gimple_min_invariant (operand))
9169 *dt = vect_external_def;
9170 return true;
9173 if (TREE_CODE (operand) != SSA_NAME)
9175 if (dump_enabled_p ())
9176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9177 "not ssa-name.\n");
9178 return false;
9181 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9183 *dt = vect_external_def;
9184 return true;
9187 *def_stmt = SSA_NAME_DEF_STMT (operand);
9188 if (dump_enabled_p ())
9190 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9191 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9194 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9195 *dt = vect_external_def;
9196 else
9198 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9199 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9202 if (dump_enabled_p ())
9204 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9205 switch (*dt)
9207 case vect_uninitialized_def:
9208 dump_printf (MSG_NOTE, "uninitialized\n");
9209 break;
9210 case vect_constant_def:
9211 dump_printf (MSG_NOTE, "constant\n");
9212 break;
9213 case vect_external_def:
9214 dump_printf (MSG_NOTE, "external\n");
9215 break;
9216 case vect_internal_def:
9217 dump_printf (MSG_NOTE, "internal\n");
9218 break;
9219 case vect_induction_def:
9220 dump_printf (MSG_NOTE, "induction\n");
9221 break;
9222 case vect_reduction_def:
9223 dump_printf (MSG_NOTE, "reduction\n");
9224 break;
9225 case vect_double_reduction_def:
9226 dump_printf (MSG_NOTE, "double reduction\n");
9227 break;
9228 case vect_nested_cycle:
9229 dump_printf (MSG_NOTE, "nested cycle\n");
9230 break;
9231 case vect_unknown_def_type:
9232 dump_printf (MSG_NOTE, "unknown\n");
9233 break;
9237 if (*dt == vect_unknown_def_type)
9239 if (dump_enabled_p ())
9240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9241 "Unsupported pattern.\n");
9242 return false;
9245 switch (gimple_code (*def_stmt))
9247 case GIMPLE_PHI:
9248 case GIMPLE_ASSIGN:
9249 case GIMPLE_CALL:
9250 break;
9251 default:
9252 if (dump_enabled_p ())
9253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9254 "unsupported defining stmt:\n");
9255 return false;
9258 return true;
9261 /* Function vect_is_simple_use.
9263 Same as vect_is_simple_use but also determines the vector operand
9264 type of OPERAND and stores it to *VECTYPE. If the definition of
9265 OPERAND is vect_uninitialized_def, vect_constant_def or
9266 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9267 is responsible to compute the best suited vector type for the
9268 scalar operand. */
9270 bool
9271 vect_is_simple_use (tree operand, vec_info *vinfo,
9272 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9274 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9275 return false;
9277 /* Now get a vector type if the def is internal, otherwise supply
9278 NULL_TREE and leave it up to the caller to figure out a proper
9279 type for the use stmt. */
9280 if (*dt == vect_internal_def
9281 || *dt == vect_induction_def
9282 || *dt == vect_reduction_def
9283 || *dt == vect_double_reduction_def
9284 || *dt == vect_nested_cycle)
9286 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9288 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9289 && !STMT_VINFO_RELEVANT (stmt_info)
9290 && !STMT_VINFO_LIVE_P (stmt_info))
9291 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9293 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9294 gcc_assert (*vectype != NULL_TREE);
9296 else if (*dt == vect_uninitialized_def
9297 || *dt == vect_constant_def
9298 || *dt == vect_external_def)
9299 *vectype = NULL_TREE;
9300 else
9301 gcc_unreachable ();
9303 return true;
9307 /* Function supportable_widening_operation
9309 Check whether an operation represented by the code CODE is a
9310 widening operation that is supported by the target platform in
9311 vector form (i.e., when operating on arguments of type VECTYPE_IN
9312 producing a result of type VECTYPE_OUT).
9314 Widening operations we currently support are NOP (CONVERT), FLOAT
9315 and WIDEN_MULT. This function checks if these operations are supported
9316 by the target platform either directly (via vector tree-codes), or via
9317 target builtins.
9319 Output:
9320 - CODE1 and CODE2 are codes of vector operations to be used when
9321 vectorizing the operation, if available.
9322 - MULTI_STEP_CVT determines the number of required intermediate steps in
9323 case of multi-step conversion (like char->short->int - in that case
9324 MULTI_STEP_CVT will be 1).
9325 - INTERM_TYPES contains the intermediate type required to perform the
9326 widening operation (short in the above example). */
9328 bool
9329 supportable_widening_operation (enum tree_code code, gimple *stmt,
9330 tree vectype_out, tree vectype_in,
9331 enum tree_code *code1, enum tree_code *code2,
9332 int *multi_step_cvt,
9333 vec<tree> *interm_types)
9335 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9336 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9337 struct loop *vect_loop = NULL;
9338 machine_mode vec_mode;
9339 enum insn_code icode1, icode2;
9340 optab optab1, optab2;
9341 tree vectype = vectype_in;
9342 tree wide_vectype = vectype_out;
9343 enum tree_code c1, c2;
9344 int i;
9345 tree prev_type, intermediate_type;
9346 machine_mode intermediate_mode, prev_mode;
9347 optab optab3, optab4;
9349 *multi_step_cvt = 0;
9350 if (loop_info)
9351 vect_loop = LOOP_VINFO_LOOP (loop_info);
9353 switch (code)
9355 case WIDEN_MULT_EXPR:
9356 /* The result of a vectorized widening operation usually requires
9357 two vectors (because the widened results do not fit into one vector).
9358 The generated vector results would normally be expected to be
9359 generated in the same order as in the original scalar computation,
9360 i.e. if 8 results are generated in each vector iteration, they are
9361 to be organized as follows:
9362 vect1: [res1,res2,res3,res4],
9363 vect2: [res5,res6,res7,res8].
9365 However, in the special case that the result of the widening
9366 operation is used in a reduction computation only, the order doesn't
9367 matter (because when vectorizing a reduction we change the order of
9368 the computation). Some targets can take advantage of this and
9369 generate more efficient code. For example, targets like Altivec,
9370 that support widen_mult using a sequence of {mult_even,mult_odd}
9371 generate the following vectors:
9372 vect1: [res1,res3,res5,res7],
9373 vect2: [res2,res4,res6,res8].
9375 When vectorizing outer-loops, we execute the inner-loop sequentially
9376 (each vectorized inner-loop iteration contributes to VF outer-loop
9377 iterations in parallel). We therefore don't allow to change the
9378 order of the computation in the inner-loop during outer-loop
9379 vectorization. */
9380 /* TODO: Another case in which order doesn't *really* matter is when we
9381 widen and then contract again, e.g. (short)((int)x * y >> 8).
9382 Normally, pack_trunc performs an even/odd permute, whereas the
9383 repack from an even/odd expansion would be an interleave, which
9384 would be significantly simpler for e.g. AVX2. */
9385 /* In any case, in order to avoid duplicating the code below, recurse
9386 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9387 are properly set up for the caller. If we fail, we'll continue with
9388 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9389 if (vect_loop
9390 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9391 && !nested_in_vect_loop_p (vect_loop, stmt)
9392 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9393 stmt, vectype_out, vectype_in,
9394 code1, code2, multi_step_cvt,
9395 interm_types))
9397 /* Elements in a vector with vect_used_by_reduction property cannot
9398 be reordered if the use chain with this property does not have the
9399 same operation. One such an example is s += a * b, where elements
9400 in a and b cannot be reordered. Here we check if the vector defined
9401 by STMT is only directly used in the reduction statement. */
9402 tree lhs = gimple_assign_lhs (stmt);
9403 use_operand_p dummy;
9404 gimple *use_stmt;
9405 stmt_vec_info use_stmt_info = NULL;
9406 if (single_imm_use (lhs, &dummy, &use_stmt)
9407 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9408 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9409 return true;
9411 c1 = VEC_WIDEN_MULT_LO_EXPR;
9412 c2 = VEC_WIDEN_MULT_HI_EXPR;
9413 break;
9415 case DOT_PROD_EXPR:
9416 c1 = DOT_PROD_EXPR;
9417 c2 = DOT_PROD_EXPR;
9418 break;
9420 case SAD_EXPR:
9421 c1 = SAD_EXPR;
9422 c2 = SAD_EXPR;
9423 break;
9425 case VEC_WIDEN_MULT_EVEN_EXPR:
9426 /* Support the recursion induced just above. */
9427 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9428 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9429 break;
9431 case WIDEN_LSHIFT_EXPR:
9432 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9433 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9434 break;
9436 CASE_CONVERT:
9437 c1 = VEC_UNPACK_LO_EXPR;
9438 c2 = VEC_UNPACK_HI_EXPR;
9439 break;
9441 case FLOAT_EXPR:
9442 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9443 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9444 break;
9446 case FIX_TRUNC_EXPR:
9447 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9448 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9449 computing the operation. */
9450 return false;
9452 default:
9453 gcc_unreachable ();
9456 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9457 std::swap (c1, c2);
9459 if (code == FIX_TRUNC_EXPR)
9461 /* The signedness is determined from output operand. */
9462 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9463 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9465 else
9467 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9468 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9471 if (!optab1 || !optab2)
9472 return false;
9474 vec_mode = TYPE_MODE (vectype);
9475 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9476 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9477 return false;
9479 *code1 = c1;
9480 *code2 = c2;
9482 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9483 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9484 /* For scalar masks we may have different boolean
9485 vector types having the same QImode. Thus we
9486 add additional check for elements number. */
9487 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9488 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9489 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9491 /* Check if it's a multi-step conversion that can be done using intermediate
9492 types. */
9494 prev_type = vectype;
9495 prev_mode = vec_mode;
9497 if (!CONVERT_EXPR_CODE_P (code))
9498 return false;
9500 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9501 intermediate steps in promotion sequence. We try
9502 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9503 not. */
9504 interm_types->create (MAX_INTERM_CVT_STEPS);
9505 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9507 intermediate_mode = insn_data[icode1].operand[0].mode;
9508 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9510 intermediate_type
9511 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9512 current_vector_size);
9513 if (intermediate_mode != TYPE_MODE (intermediate_type))
9514 return false;
9516 else
9517 intermediate_type
9518 = lang_hooks.types.type_for_mode (intermediate_mode,
9519 TYPE_UNSIGNED (prev_type));
9521 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9522 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9524 if (!optab3 || !optab4
9525 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9526 || insn_data[icode1].operand[0].mode != intermediate_mode
9527 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9528 || insn_data[icode2].operand[0].mode != intermediate_mode
9529 || ((icode1 = optab_handler (optab3, intermediate_mode))
9530 == CODE_FOR_nothing)
9531 || ((icode2 = optab_handler (optab4, intermediate_mode))
9532 == CODE_FOR_nothing))
9533 break;
9535 interm_types->quick_push (intermediate_type);
9536 (*multi_step_cvt)++;
9538 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9539 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9540 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9541 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9542 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9544 prev_type = intermediate_type;
9545 prev_mode = intermediate_mode;
9548 interm_types->release ();
9549 return false;
9553 /* Function supportable_narrowing_operation
9555 Check whether an operation represented by the code CODE is a
9556 narrowing operation that is supported by the target platform in
9557 vector form (i.e., when operating on arguments of type VECTYPE_IN
9558 and producing a result of type VECTYPE_OUT).
9560 Narrowing operations we currently support are NOP (CONVERT) and
9561 FIX_TRUNC. This function checks if these operations are supported by
9562 the target platform directly via vector tree-codes.
9564 Output:
9565 - CODE1 is the code of a vector operation to be used when
9566 vectorizing the operation, if available.
9567 - MULTI_STEP_CVT determines the number of required intermediate steps in
9568 case of multi-step conversion (like int->short->char - in that case
9569 MULTI_STEP_CVT will be 1).
9570 - INTERM_TYPES contains the intermediate type required to perform the
9571 narrowing operation (short in the above example). */
9573 bool
9574 supportable_narrowing_operation (enum tree_code code,
9575 tree vectype_out, tree vectype_in,
9576 enum tree_code *code1, int *multi_step_cvt,
9577 vec<tree> *interm_types)
9579 machine_mode vec_mode;
9580 enum insn_code icode1;
9581 optab optab1, interm_optab;
9582 tree vectype = vectype_in;
9583 tree narrow_vectype = vectype_out;
9584 enum tree_code c1;
9585 tree intermediate_type, prev_type;
9586 machine_mode intermediate_mode, prev_mode;
9587 int i;
9588 bool uns;
9590 *multi_step_cvt = 0;
9591 switch (code)
9593 CASE_CONVERT:
9594 c1 = VEC_PACK_TRUNC_EXPR;
9595 break;
9597 case FIX_TRUNC_EXPR:
9598 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9599 break;
9601 case FLOAT_EXPR:
9602 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9603 tree code and optabs used for computing the operation. */
9604 return false;
9606 default:
9607 gcc_unreachable ();
9610 if (code == FIX_TRUNC_EXPR)
9611 /* The signedness is determined from output operand. */
9612 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9613 else
9614 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9616 if (!optab1)
9617 return false;
9619 vec_mode = TYPE_MODE (vectype);
9620 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9621 return false;
9623 *code1 = c1;
9625 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9626 /* For scalar masks we may have different boolean
9627 vector types having the same QImode. Thus we
9628 add additional check for elements number. */
9629 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9630 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9631 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9633 /* Check if it's a multi-step conversion that can be done using intermediate
9634 types. */
9635 prev_mode = vec_mode;
9636 prev_type = vectype;
9637 if (code == FIX_TRUNC_EXPR)
9638 uns = TYPE_UNSIGNED (vectype_out);
9639 else
9640 uns = TYPE_UNSIGNED (vectype);
9642 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9643 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9644 costly than signed. */
9645 if (code == FIX_TRUNC_EXPR && uns)
9647 enum insn_code icode2;
9649 intermediate_type
9650 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9651 interm_optab
9652 = optab_for_tree_code (c1, intermediate_type, optab_default);
9653 if (interm_optab != unknown_optab
9654 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9655 && insn_data[icode1].operand[0].mode
9656 == insn_data[icode2].operand[0].mode)
9658 uns = false;
9659 optab1 = interm_optab;
9660 icode1 = icode2;
9664 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9665 intermediate steps in promotion sequence. We try
9666 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9667 interm_types->create (MAX_INTERM_CVT_STEPS);
9668 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9670 intermediate_mode = insn_data[icode1].operand[0].mode;
9671 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9673 intermediate_type
9674 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9675 current_vector_size);
9676 if (intermediate_mode != TYPE_MODE (intermediate_type))
9677 return false;
9679 else
9680 intermediate_type
9681 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9682 interm_optab
9683 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9684 optab_default);
9685 if (!interm_optab
9686 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9687 || insn_data[icode1].operand[0].mode != intermediate_mode
9688 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9689 == CODE_FOR_nothing))
9690 break;
9692 interm_types->quick_push (intermediate_type);
9693 (*multi_step_cvt)++;
9695 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9696 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9697 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9698 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9700 prev_mode = intermediate_mode;
9701 prev_type = intermediate_type;
9702 optab1 = interm_optab;
9705 interm_types->release ();
9706 return false;