[gcc]
[official-gcc.git] / gcc / tree-vect-stmts.c
blobe04390ec4b0ef11fa6bd231e95d7addd1f1d7cc6
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
582 /* We are also not interested in uses on loop PHI backedges that are
583 inductions. Otherwise we'll needlessly vectorize the IV increment
584 and cause hybrid SLP for SLP inductions. */
585 else if (gimple_code (stmt) == GIMPLE_PHI
586 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
587 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
588 == use))
590 if (dump_enabled_p ())
591 dump_printf_loc (MSG_NOTE, vect_location,
592 "induction value on backedge.\n");
593 return true;
597 vect_mark_relevant (worklist, def_stmt, relevant, false);
598 return true;
602 /* Function vect_mark_stmts_to_be_vectorized.
604 Not all stmts in the loop need to be vectorized. For example:
606 for i...
607 for j...
608 1. T0 = i + j
609 2. T1 = a[T0]
611 3. j = j + 1
613 Stmt 1 and 3 do not need to be vectorized, because loop control and
614 addressing of vectorized data-refs are handled differently.
616 This pass detects such stmts. */
618 bool
619 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
621 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
622 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
623 unsigned int nbbs = loop->num_nodes;
624 gimple_stmt_iterator si;
625 gimple *stmt;
626 unsigned int i;
627 stmt_vec_info stmt_vinfo;
628 basic_block bb;
629 gimple *phi;
630 bool live_p;
631 enum vect_relevant relevant;
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location,
635 "=== vect_mark_stmts_to_be_vectorized ===\n");
637 auto_vec<gimple *, 64> worklist;
639 /* 1. Init worklist. */
640 for (i = 0; i < nbbs; i++)
642 bb = bbs[i];
643 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
645 phi = gsi_stmt (si);
646 if (dump_enabled_p ())
648 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
649 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
652 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
653 vect_mark_relevant (&worklist, phi, relevant, live_p);
655 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
657 stmt = gsi_stmt (si);
658 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
661 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
664 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
665 vect_mark_relevant (&worklist, stmt, relevant, live_p);
669 /* 2. Process_worklist */
670 while (worklist.length () > 0)
672 use_operand_p use_p;
673 ssa_op_iter iter;
675 stmt = worklist.pop ();
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
679 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
682 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
683 (DEF_STMT) as relevant/irrelevant according to the relevance property
684 of STMT. */
685 stmt_vinfo = vinfo_for_stmt (stmt);
686 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
688 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
689 propagated as is to the DEF_STMTs of its USEs.
691 One exception is when STMT has been identified as defining a reduction
692 variable; in this case we set the relevance to vect_used_by_reduction.
693 This is because we distinguish between two kinds of relevant stmts -
694 those that are used by a reduction computation, and those that are
695 (also) used by a regular computation. This allows us later on to
696 identify stmts that are used solely by a reduction, and therefore the
697 order of the results that they produce does not have to be kept. */
699 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
701 case vect_reduction_def:
702 gcc_assert (relevant != vect_unused_in_scope);
703 if (relevant != vect_unused_in_scope
704 && relevant != vect_used_in_scope
705 && relevant != vect_used_by_reduction
706 && relevant != vect_used_only_live)
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
710 "unsupported use of reduction.\n");
711 return false;
713 break;
715 case vect_nested_cycle:
716 if (relevant != vect_unused_in_scope
717 && relevant != vect_used_in_outer_by_reduction
718 && relevant != vect_used_in_outer)
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "unsupported use of nested cycle.\n");
724 return false;
726 break;
728 case vect_double_reduction_def:
729 if (relevant != vect_unused_in_scope
730 && relevant != vect_used_by_reduction
731 && relevant != vect_used_only_live)
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
735 "unsupported use of double reduction.\n");
737 return false;
739 break;
741 default:
742 break;
745 if (is_pattern_stmt_p (stmt_vinfo))
747 /* Pattern statements are not inserted into the code, so
748 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
749 have to scan the RHS or function arguments instead. */
750 if (is_gimple_assign (stmt))
752 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
753 tree op = gimple_assign_rhs1 (stmt);
755 i = 1;
756 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
758 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
759 relevant, &worklist, false)
760 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
761 relevant, &worklist, false))
762 return false;
763 i = 2;
765 for (; i < gimple_num_ops (stmt); i++)
767 op = gimple_op (stmt, i);
768 if (TREE_CODE (op) == SSA_NAME
769 && !process_use (stmt, op, loop_vinfo, relevant,
770 &worklist, false))
771 return false;
774 else if (is_gimple_call (stmt))
776 for (i = 0; i < gimple_call_num_args (stmt); i++)
778 tree arg = gimple_call_arg (stmt, i);
779 if (!process_use (stmt, arg, loop_vinfo, relevant,
780 &worklist, false))
781 return false;
785 else
786 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
788 tree op = USE_FROM_PTR (use_p);
789 if (!process_use (stmt, op, loop_vinfo, relevant,
790 &worklist, false))
791 return false;
794 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
796 gather_scatter_info gs_info;
797 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
798 gcc_unreachable ();
799 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
800 &worklist, true))
801 return false;
803 } /* while worklist */
805 return true;
809 /* Function vect_model_simple_cost.
811 Models cost for simple operations, i.e. those that only emit ncopies of a
812 single op. Right now, this does not account for multiple insns that could
813 be generated for the single vector op. We will handle that shortly. */
815 void
816 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
817 enum vect_def_type *dt,
818 int ndts,
819 stmt_vector_for_cost *prologue_cost_vec,
820 stmt_vector_for_cost *body_cost_vec)
822 int i;
823 int inside_cost = 0, prologue_cost = 0;
825 /* The SLP costs were already calculated during SLP tree build. */
826 if (PURE_SLP_STMT (stmt_info))
827 return;
829 /* Cost the "broadcast" of a scalar operand in to a vector operand.
830 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
831 cost model. */
832 for (i = 0; i < ndts; i++)
833 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
834 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
835 stmt_info, 0, vect_prologue);
837 /* Pass the inside-of-loop statements to the target-specific cost model. */
838 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
839 stmt_info, 0, vect_body);
841 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE, vect_location,
843 "vect_model_simple_cost: inside_cost = %d, "
844 "prologue_cost = %d .\n", inside_cost, prologue_cost);
848 /* Model cost for type demotion and promotion operations. PWR is normally
849 zero for single-step promotions and demotions. It will be one if
850 two-step promotion/demotion is required, and so on. Each additional
851 step doubles the number of instructions required. */
853 static void
854 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
855 enum vect_def_type *dt, int pwr)
857 int i, tmp;
858 int inside_cost = 0, prologue_cost = 0;
859 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
860 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
861 void *target_cost_data;
863 /* The SLP costs were already calculated during SLP tree build. */
864 if (PURE_SLP_STMT (stmt_info))
865 return;
867 if (loop_vinfo)
868 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
869 else
870 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
872 for (i = 0; i < pwr + 1; i++)
874 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
875 (i + 1) : i;
876 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
877 vec_promote_demote, stmt_info, 0,
878 vect_body);
881 /* FORNOW: Assuming maximum 2 args per stmts. */
882 for (i = 0; i < 2; i++)
883 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
884 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
885 stmt_info, 0, vect_prologue);
887 if (dump_enabled_p ())
888 dump_printf_loc (MSG_NOTE, vect_location,
889 "vect_model_promotion_demotion_cost: inside_cost = %d, "
890 "prologue_cost = %d .\n", inside_cost, prologue_cost);
893 /* Function vect_model_store_cost
895 Models cost for stores. In the case of grouped accesses, one access
896 has the overhead of the grouped access attributed to it. */
898 void
899 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
900 vect_memory_access_type memory_access_type,
901 enum vect_def_type dt, slp_tree slp_node,
902 stmt_vector_for_cost *prologue_cost_vec,
903 stmt_vector_for_cost *body_cost_vec)
905 unsigned int inside_cost = 0, prologue_cost = 0;
906 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
907 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
908 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
910 if (dt == vect_constant_def || dt == vect_external_def)
911 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
912 stmt_info, 0, vect_prologue);
914 /* Grouped stores update all elements in the group at once,
915 so we want the DR for the first statement. */
916 if (!slp_node && grouped_access_p)
918 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
919 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
922 /* True if we should include any once-per-group costs as well as
923 the cost of the statement itself. For SLP we only get called
924 once per group anyhow. */
925 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
927 /* We assume that the cost of a single store-lanes instruction is
928 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
929 access is instead being provided by a permute-and-store operation,
930 include the cost of the permutes. */
931 if (first_stmt_p
932 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
934 /* Uses a high and low interleave or shuffle operations for each
935 needed permute. */
936 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
937 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
938 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
939 stmt_info, 0, vect_body);
941 if (dump_enabled_p ())
942 dump_printf_loc (MSG_NOTE, vect_location,
943 "vect_model_store_cost: strided group_size = %d .\n",
944 group_size);
947 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
948 /* Costs of the stores. */
949 if (memory_access_type == VMAT_ELEMENTWISE
950 || memory_access_type == VMAT_GATHER_SCATTER)
951 /* N scalar stores plus extracting the elements. */
952 inside_cost += record_stmt_cost (body_cost_vec,
953 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
954 scalar_store, stmt_info, 0, vect_body);
955 else
956 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_STRIDED_SLP)
960 inside_cost += record_stmt_cost (body_cost_vec,
961 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
962 vec_to_scalar, stmt_info, 0, vect_body);
964 if (dump_enabled_p ())
965 dump_printf_loc (MSG_NOTE, vect_location,
966 "vect_model_store_cost: inside_cost = %d, "
967 "prologue_cost = %d .\n", inside_cost, prologue_cost);
971 /* Calculate cost of DR's memory access. */
972 void
973 vect_get_store_cost (struct data_reference *dr, int ncopies,
974 unsigned int *inside_cost,
975 stmt_vector_for_cost *body_cost_vec)
977 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
978 gimple *stmt = DR_STMT (dr);
979 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
981 switch (alignment_support_scheme)
983 case dr_aligned:
985 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
986 vector_store, stmt_info, 0,
987 vect_body);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE, vect_location,
991 "vect_model_store_cost: aligned.\n");
992 break;
995 case dr_unaligned_supported:
997 /* Here, we assign an additional cost for the unaligned store. */
998 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
999 unaligned_store, stmt_info,
1000 DR_MISALIGNMENT (dr), vect_body);
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE, vect_location,
1003 "vect_model_store_cost: unaligned supported by "
1004 "hardware.\n");
1005 break;
1008 case dr_unaligned_unsupported:
1010 *inside_cost = VECT_MAX_COST;
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1014 "vect_model_store_cost: unsupported access.\n");
1015 break;
1018 default:
1019 gcc_unreachable ();
1024 /* Function vect_model_load_cost
1026 Models cost for loads. In the case of grouped accesses, one access has
1027 the overhead of the grouped access attributed to it. Since unaligned
1028 accesses are supported for loads, we also account for the costs of the
1029 access scheme chosen. */
1031 void
1032 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1033 vect_memory_access_type memory_access_type,
1034 slp_tree slp_node,
1035 stmt_vector_for_cost *prologue_cost_vec,
1036 stmt_vector_for_cost *body_cost_vec)
1038 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1039 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1040 unsigned int inside_cost = 0, prologue_cost = 0;
1041 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1043 /* Grouped loads read all elements in the group at once,
1044 so we want the DR for the first statement. */
1045 if (!slp_node && grouped_access_p)
1047 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1048 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1051 /* True if we should include any once-per-group costs as well as
1052 the cost of the statement itself. For SLP we only get called
1053 once per group anyhow. */
1054 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1056 /* We assume that the cost of a single load-lanes instruction is
1057 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1058 access is instead being provided by a load-and-permute operation,
1059 include the cost of the permutes. */
1060 if (first_stmt_p
1061 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1063 /* Uses an even and odd extract operations or shuffle operations
1064 for each needed permute. */
1065 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1066 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1067 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1068 stmt_info, 0, vect_body);
1070 if (dump_enabled_p ())
1071 dump_printf_loc (MSG_NOTE, vect_location,
1072 "vect_model_load_cost: strided group_size = %d .\n",
1073 group_size);
1076 /* The loads themselves. */
1077 if (memory_access_type == VMAT_ELEMENTWISE
1078 || memory_access_type == VMAT_GATHER_SCATTER)
1080 /* N scalar loads plus gathering them into a vector. */
1081 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1082 inside_cost += record_stmt_cost (body_cost_vec,
1083 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1084 scalar_load, stmt_info, 0, vect_body);
1086 else
1087 vect_get_load_cost (dr, ncopies, first_stmt_p,
1088 &inside_cost, &prologue_cost,
1089 prologue_cost_vec, body_cost_vec, true);
1090 if (memory_access_type == VMAT_ELEMENTWISE
1091 || memory_access_type == VMAT_STRIDED_SLP)
1092 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1093 stmt_info, 0, vect_body);
1095 if (dump_enabled_p ())
1096 dump_printf_loc (MSG_NOTE, vect_location,
1097 "vect_model_load_cost: inside_cost = %d, "
1098 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1102 /* Calculate cost of DR's memory access. */
1103 void
1104 vect_get_load_cost (struct data_reference *dr, int ncopies,
1105 bool add_realign_cost, unsigned int *inside_cost,
1106 unsigned int *prologue_cost,
1107 stmt_vector_for_cost *prologue_cost_vec,
1108 stmt_vector_for_cost *body_cost_vec,
1109 bool record_prologue_costs)
1111 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1112 gimple *stmt = DR_STMT (dr);
1113 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1115 switch (alignment_support_scheme)
1117 case dr_aligned:
1119 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1120 stmt_info, 0, vect_body);
1122 if (dump_enabled_p ())
1123 dump_printf_loc (MSG_NOTE, vect_location,
1124 "vect_model_load_cost: aligned.\n");
1126 break;
1128 case dr_unaligned_supported:
1130 /* Here, we assign an additional cost for the unaligned load. */
1131 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1132 unaligned_load, stmt_info,
1133 DR_MISALIGNMENT (dr), vect_body);
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_NOTE, vect_location,
1137 "vect_model_load_cost: unaligned supported by "
1138 "hardware.\n");
1140 break;
1142 case dr_explicit_realign:
1144 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1145 vector_load, stmt_info, 0, vect_body);
1146 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1147 vec_perm, stmt_info, 0, vect_body);
1149 /* FIXME: If the misalignment remains fixed across the iterations of
1150 the containing loop, the following cost should be added to the
1151 prologue costs. */
1152 if (targetm.vectorize.builtin_mask_for_load)
1153 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1154 stmt_info, 0, vect_body);
1156 if (dump_enabled_p ())
1157 dump_printf_loc (MSG_NOTE, vect_location,
1158 "vect_model_load_cost: explicit realign\n");
1160 break;
1162 case dr_explicit_realign_optimized:
1164 if (dump_enabled_p ())
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "vect_model_load_cost: unaligned software "
1167 "pipelined.\n");
1169 /* Unaligned software pipeline has a load of an address, an initial
1170 load, and possibly a mask operation to "prime" the loop. However,
1171 if this is an access in a group of loads, which provide grouped
1172 access, then the above cost should only be considered for one
1173 access in the group. Inside the loop, there is a load op
1174 and a realignment op. */
1176 if (add_realign_cost && record_prologue_costs)
1178 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1179 vector_stmt, stmt_info,
1180 0, vect_prologue);
1181 if (targetm.vectorize.builtin_mask_for_load)
1182 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1183 vector_stmt, stmt_info,
1184 0, vect_prologue);
1187 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1188 stmt_info, 0, vect_body);
1189 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1190 stmt_info, 0, vect_body);
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE, vect_location,
1194 "vect_model_load_cost: explicit realign optimized"
1195 "\n");
1197 break;
1200 case dr_unaligned_unsupported:
1202 *inside_cost = VECT_MAX_COST;
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1206 "vect_model_load_cost: unsupported access.\n");
1207 break;
1210 default:
1211 gcc_unreachable ();
1215 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1216 the loop preheader for the vectorized stmt STMT. */
1218 static void
1219 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1221 if (gsi)
1222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1223 else
1225 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1226 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228 if (loop_vinfo)
1230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1231 basic_block new_bb;
1232 edge pe;
1234 if (nested_in_vect_loop_p (loop, stmt))
1235 loop = loop->inner;
1237 pe = loop_preheader_edge (loop);
1238 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1239 gcc_assert (!new_bb);
1241 else
1243 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1244 basic_block bb;
1245 gimple_stmt_iterator gsi_bb_start;
1247 gcc_assert (bb_vinfo);
1248 bb = BB_VINFO_BB (bb_vinfo);
1249 gsi_bb_start = gsi_after_labels (bb);
1250 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1254 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "created new init_stmt: ");
1258 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1262 /* Function vect_init_vector.
1264 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1265 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1266 vector type a vector with all elements equal to VAL is created first.
1267 Place the initialization at BSI if it is not NULL. Otherwise, place the
1268 initialization at the loop preheader.
1269 Return the DEF of INIT_STMT.
1270 It will be used in the vectorization of STMT. */
1272 tree
1273 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1275 gimple *init_stmt;
1276 tree new_temp;
1278 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1279 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1281 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1282 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1284 /* Scalar boolean value should be transformed into
1285 all zeros or all ones value before building a vector. */
1286 if (VECTOR_BOOLEAN_TYPE_P (type))
1288 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1289 tree false_val = build_zero_cst (TREE_TYPE (type));
1291 if (CONSTANT_CLASS_P (val))
1292 val = integer_zerop (val) ? false_val : true_val;
1293 else
1295 new_temp = make_ssa_name (TREE_TYPE (type));
1296 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1297 val, true_val, false_val);
1298 vect_init_vector_1 (stmt, init_stmt, gsi);
1299 val = new_temp;
1302 else if (CONSTANT_CLASS_P (val))
1303 val = fold_convert (TREE_TYPE (type), val);
1304 else
1306 new_temp = make_ssa_name (TREE_TYPE (type));
1307 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1308 init_stmt = gimple_build_assign (new_temp,
1309 fold_build1 (VIEW_CONVERT_EXPR,
1310 TREE_TYPE (type),
1311 val));
1312 else
1313 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1314 vect_init_vector_1 (stmt, init_stmt, gsi);
1315 val = new_temp;
1318 val = build_vector_from_val (type, val);
1321 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1322 init_stmt = gimple_build_assign (new_temp, val);
1323 vect_init_vector_1 (stmt, init_stmt, gsi);
1324 return new_temp;
1327 /* Function vect_get_vec_def_for_operand_1.
1329 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1330 DT that will be used in the vectorized stmt. */
1332 tree
1333 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1335 tree vec_oprnd;
1336 gimple *vec_stmt;
1337 stmt_vec_info def_stmt_info = NULL;
1339 switch (dt)
1341 /* operand is a constant or a loop invariant. */
1342 case vect_constant_def:
1343 case vect_external_def:
1344 /* Code should use vect_get_vec_def_for_operand. */
1345 gcc_unreachable ();
1347 /* operand is defined inside the loop. */
1348 case vect_internal_def:
1350 /* Get the def from the vectorized stmt. */
1351 def_stmt_info = vinfo_for_stmt (def_stmt);
1353 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1354 /* Get vectorized pattern statement. */
1355 if (!vec_stmt
1356 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1357 && !STMT_VINFO_RELEVANT (def_stmt_info))
1358 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1359 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1360 gcc_assert (vec_stmt);
1361 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1362 vec_oprnd = PHI_RESULT (vec_stmt);
1363 else if (is_gimple_call (vec_stmt))
1364 vec_oprnd = gimple_call_lhs (vec_stmt);
1365 else
1366 vec_oprnd = gimple_assign_lhs (vec_stmt);
1367 return vec_oprnd;
1370 /* operand is defined by a loop header phi - reduction */
1371 case vect_reduction_def:
1372 case vect_double_reduction_def:
1373 case vect_nested_cycle:
1374 /* Code should use get_initial_def_for_reduction. */
1375 gcc_unreachable ();
1377 /* operand is defined by loop-header phi - induction. */
1378 case vect_induction_def:
1380 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1382 /* Get the def from the vectorized stmt. */
1383 def_stmt_info = vinfo_for_stmt (def_stmt);
1384 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1385 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1386 vec_oprnd = PHI_RESULT (vec_stmt);
1387 else
1388 vec_oprnd = gimple_get_lhs (vec_stmt);
1389 return vec_oprnd;
1392 default:
1393 gcc_unreachable ();
1398 /* Function vect_get_vec_def_for_operand.
1400 OP is an operand in STMT. This function returns a (vector) def that will be
1401 used in the vectorized stmt for STMT.
1403 In the case that OP is an SSA_NAME which is defined in the loop, then
1404 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1406 In case OP is an invariant or constant, a new stmt that creates a vector def
1407 needs to be introduced. VECTYPE may be used to specify a required type for
1408 vector invariant. */
1410 tree
1411 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1413 gimple *def_stmt;
1414 enum vect_def_type dt;
1415 bool is_simple_use;
1416 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1417 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1419 if (dump_enabled_p ())
1421 dump_printf_loc (MSG_NOTE, vect_location,
1422 "vect_get_vec_def_for_operand: ");
1423 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1424 dump_printf (MSG_NOTE, "\n");
1427 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1428 gcc_assert (is_simple_use);
1429 if (def_stmt && dump_enabled_p ())
1431 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1432 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1435 if (dt == vect_constant_def || dt == vect_external_def)
1437 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1438 tree vector_type;
1440 if (vectype)
1441 vector_type = vectype;
1442 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1443 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1444 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1445 else
1446 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1448 gcc_assert (vector_type);
1449 return vect_init_vector (stmt, op, vector_type, NULL);
1451 else
1452 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1456 /* Function vect_get_vec_def_for_stmt_copy
1458 Return a vector-def for an operand. This function is used when the
1459 vectorized stmt to be created (by the caller to this function) is a "copy"
1460 created in case the vectorized result cannot fit in one vector, and several
1461 copies of the vector-stmt are required. In this case the vector-def is
1462 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1463 of the stmt that defines VEC_OPRND.
1464 DT is the type of the vector def VEC_OPRND.
1466 Context:
1467 In case the vectorization factor (VF) is bigger than the number
1468 of elements that can fit in a vectype (nunits), we have to generate
1469 more than one vector stmt to vectorize the scalar stmt. This situation
1470 arises when there are multiple data-types operated upon in the loop; the
1471 smallest data-type determines the VF, and as a result, when vectorizing
1472 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1473 vector stmt (each computing a vector of 'nunits' results, and together
1474 computing 'VF' results in each iteration). This function is called when
1475 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1476 which VF=16 and nunits=4, so the number of copies required is 4):
1478 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1480 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1481 VS1.1: vx.1 = memref1 VS1.2
1482 VS1.2: vx.2 = memref2 VS1.3
1483 VS1.3: vx.3 = memref3
1485 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1486 VSnew.1: vz1 = vx.1 + ... VSnew.2
1487 VSnew.2: vz2 = vx.2 + ... VSnew.3
1488 VSnew.3: vz3 = vx.3 + ...
1490 The vectorization of S1 is explained in vectorizable_load.
1491 The vectorization of S2:
1492 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1493 the function 'vect_get_vec_def_for_operand' is called to
1494 get the relevant vector-def for each operand of S2. For operand x it
1495 returns the vector-def 'vx.0'.
1497 To create the remaining copies of the vector-stmt (VSnew.j), this
1498 function is called to get the relevant vector-def for each operand. It is
1499 obtained from the respective VS1.j stmt, which is recorded in the
1500 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1502 For example, to obtain the vector-def 'vx.1' in order to create the
1503 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1504 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1505 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1506 and return its def ('vx.1').
1507 Overall, to create the above sequence this function will be called 3 times:
1508 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1509 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1510 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1512 tree
1513 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1515 gimple *vec_stmt_for_operand;
1516 stmt_vec_info def_stmt_info;
1518 /* Do nothing; can reuse same def. */
1519 if (dt == vect_external_def || dt == vect_constant_def )
1520 return vec_oprnd;
1522 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1523 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1524 gcc_assert (def_stmt_info);
1525 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1526 gcc_assert (vec_stmt_for_operand);
1527 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1528 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1529 else
1530 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1531 return vec_oprnd;
1535 /* Get vectorized definitions for the operands to create a copy of an original
1536 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1538 static void
1539 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1540 vec<tree> *vec_oprnds0,
1541 vec<tree> *vec_oprnds1)
1543 tree vec_oprnd = vec_oprnds0->pop ();
1545 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1546 vec_oprnds0->quick_push (vec_oprnd);
1548 if (vec_oprnds1 && vec_oprnds1->length ())
1550 vec_oprnd = vec_oprnds1->pop ();
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1552 vec_oprnds1->quick_push (vec_oprnd);
1557 /* Get vectorized definitions for OP0 and OP1.
1558 REDUC_INDEX is the index of reduction operand in case of reduction,
1559 and -1 otherwise. */
1561 static void
1562 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1563 vec<tree> *vec_oprnds0,
1564 vec<tree> *vec_oprnds1,
1565 slp_tree slp_node)
1567 if (slp_node)
1569 int nops = (op1 == NULL_TREE) ? 1 : 2;
1570 auto_vec<tree> ops (nops);
1571 auto_vec<vec<tree> > vec_defs (nops);
1573 ops.quick_push (op0);
1574 if (op1)
1575 ops.quick_push (op1);
1577 vect_get_slp_defs (ops, slp_node, &vec_defs);
1579 *vec_oprnds0 = vec_defs[0];
1580 if (op1)
1581 *vec_oprnds1 = vec_defs[1];
1583 else
1585 tree vec_oprnd;
1587 vec_oprnds0->create (1);
1588 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1589 vec_oprnds0->quick_push (vec_oprnd);
1591 if (op1)
1593 vec_oprnds1->create (1);
1594 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1595 vec_oprnds1->quick_push (vec_oprnd);
1601 /* Function vect_finish_stmt_generation.
1603 Insert a new stmt. */
1605 void
1606 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1607 gimple_stmt_iterator *gsi)
1609 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1610 vec_info *vinfo = stmt_info->vinfo;
1612 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1614 if (!gsi_end_p (*gsi)
1615 && gimple_has_mem_ops (vec_stmt))
1617 gimple *at_stmt = gsi_stmt (*gsi);
1618 tree vuse = gimple_vuse (at_stmt);
1619 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1621 tree vdef = gimple_vdef (at_stmt);
1622 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1623 /* If we have an SSA vuse and insert a store, update virtual
1624 SSA form to avoid triggering the renamer. Do so only
1625 if we can easily see all uses - which is what almost always
1626 happens with the way vectorized stmts are inserted. */
1627 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1628 && ((is_gimple_assign (vec_stmt)
1629 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1630 || (is_gimple_call (vec_stmt)
1631 && !(gimple_call_flags (vec_stmt)
1632 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1634 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1635 gimple_set_vdef (vec_stmt, new_vdef);
1636 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1640 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1642 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1644 if (dump_enabled_p ())
1646 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1647 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1650 gimple_set_location (vec_stmt, gimple_location (stmt));
1652 /* While EH edges will generally prevent vectorization, stmt might
1653 e.g. be in a must-not-throw region. Ensure newly created stmts
1654 that could throw are part of the same region. */
1655 int lp_nr = lookup_stmt_eh_lp (stmt);
1656 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1657 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1660 /* We want to vectorize a call to combined function CFN with function
1661 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1662 as the types of all inputs. Check whether this is possible using
1663 an internal function, returning its code if so or IFN_LAST if not. */
1665 static internal_fn
1666 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1667 tree vectype_out, tree vectype_in)
1669 internal_fn ifn;
1670 if (internal_fn_p (cfn))
1671 ifn = as_internal_fn (cfn);
1672 else
1673 ifn = associated_internal_fn (fndecl);
1674 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1676 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1677 if (info.vectorizable)
1679 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1680 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1681 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1682 OPTIMIZE_FOR_SPEED))
1683 return ifn;
1686 return IFN_LAST;
1690 static tree permute_vec_elements (tree, tree, tree, gimple *,
1691 gimple_stmt_iterator *);
1693 /* STMT is a non-strided load or store, meaning that it accesses
1694 elements with a known constant step. Return -1 if that step
1695 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1697 static int
1698 compare_step_with_zero (gimple *stmt)
1700 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1701 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1702 tree step;
1703 if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
1704 step = STMT_VINFO_DR_STEP (stmt_info);
1705 else
1706 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1707 return tree_int_cst_compare (step, size_zero_node);
1710 /* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1713 static tree
1714 perm_mask_for_reverse (tree vectype)
1716 int i, nunits;
1717 unsigned char *sel;
1719 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1720 sel = XALLOCAVEC (unsigned char, nunits);
1722 for (i = 0; i < nunits; ++i)
1723 sel[i] = nunits - 1 - i;
1725 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1726 return NULL_TREE;
1727 return vect_gen_perm_mask_checked (vectype, sel);
1730 /* A subroutine of get_load_store_type, with a subset of the same
1731 arguments. Handle the case where STMT is part of a grouped load
1732 or store.
1734 For stores, the statements in the group are all consecutive
1735 and there is no gap at the end. For loads, the statements in the
1736 group might not be consecutive; there can be gaps between statements
1737 as well as at the end. */
1739 static bool
1740 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1741 vec_load_store_type vls_type,
1742 vect_memory_access_type *memory_access_type)
1744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1745 vec_info *vinfo = stmt_info->vinfo;
1746 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1747 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1748 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1749 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1750 bool single_element_p = (stmt == first_stmt
1751 && !GROUP_NEXT_ELEMENT (stmt_info));
1752 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1753 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1755 /* True if the vectorized statements would access beyond the last
1756 statement in the group. */
1757 bool overrun_p = false;
1759 /* True if we can cope with such overrun by peeling for gaps, so that
1760 there is at least one final scalar iteration after the vector loop. */
1761 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1763 /* There can only be a gap at the end of the group if the stride is
1764 known at compile time. */
1765 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1767 /* Stores can't yet have gaps. */
1768 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1770 if (slp)
1772 if (STMT_VINFO_STRIDED_P (stmt_info))
1774 /* Try to use consecutive accesses of GROUP_SIZE elements,
1775 separated by the stride, until we have a complete vector.
1776 Fall back to scalar accesses if that isn't possible. */
1777 if (nunits % group_size == 0)
1778 *memory_access_type = VMAT_STRIDED_SLP;
1779 else
1780 *memory_access_type = VMAT_ELEMENTWISE;
1782 else
1784 overrun_p = loop_vinfo && gap != 0;
1785 if (overrun_p && vls_type != VLS_LOAD)
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1788 "Grouped store with gaps requires"
1789 " non-consecutive accesses\n");
1790 return false;
1792 /* If the access is aligned an overrun is fine. */
1793 if (overrun_p
1794 && aligned_access_p
1795 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1796 overrun_p = false;
1797 if (overrun_p && !can_overrun_p)
1799 if (dump_enabled_p ())
1800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1801 "Peeling for outer loop is not supported\n");
1802 return false;
1804 *memory_access_type = VMAT_CONTIGUOUS;
1807 else
1809 /* We can always handle this case using elementwise accesses,
1810 but see if something more efficient is available. */
1811 *memory_access_type = VMAT_ELEMENTWISE;
1813 /* If there is a gap at the end of the group then these optimizations
1814 would access excess elements in the last iteration. */
1815 bool would_overrun_p = (gap != 0);
1816 /* If the access is aligned an overrun is fine, but only if the
1817 overrun is not inside an unused vector (if the gap is as large
1818 or larger than a vector). */
1819 if (would_overrun_p
1820 && gap < nunits
1821 && aligned_access_p
1822 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1823 would_overrun_p = false;
1824 if (!STMT_VINFO_STRIDED_P (stmt_info)
1825 && (can_overrun_p || !would_overrun_p)
1826 && compare_step_with_zero (stmt) > 0)
1828 /* First try using LOAD/STORE_LANES. */
1829 if (vls_type == VLS_LOAD
1830 ? vect_load_lanes_supported (vectype, group_size)
1831 : vect_store_lanes_supported (vectype, group_size))
1833 *memory_access_type = VMAT_LOAD_STORE_LANES;
1834 overrun_p = would_overrun_p;
1837 /* If that fails, try using permuting loads. */
1838 if (*memory_access_type == VMAT_ELEMENTWISE
1839 && (vls_type == VLS_LOAD
1840 ? vect_grouped_load_supported (vectype, single_element_p,
1841 group_size)
1842 : vect_grouped_store_supported (vectype, group_size)))
1844 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1845 overrun_p = would_overrun_p;
1850 if (vls_type != VLS_LOAD && first_stmt == stmt)
1852 /* STMT is the leader of the group. Check the operands of all the
1853 stmts of the group. */
1854 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1855 while (next_stmt)
1857 gcc_assert (gimple_assign_single_p (next_stmt));
1858 tree op = gimple_assign_rhs1 (next_stmt);
1859 gimple *def_stmt;
1860 enum vect_def_type dt;
1861 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1863 if (dump_enabled_p ())
1864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1865 "use not simple.\n");
1866 return false;
1868 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1872 if (overrun_p)
1874 gcc_assert (can_overrun_p);
1875 if (dump_enabled_p ())
1876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1877 "Data access with gaps requires scalar "
1878 "epilogue loop\n");
1879 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1882 return true;
1885 /* A subroutine of get_load_store_type, with a subset of the same
1886 arguments. Handle the case where STMT is a load or store that
1887 accesses consecutive elements with a negative step. */
1889 static vect_memory_access_type
1890 get_negative_load_store_type (gimple *stmt, tree vectype,
1891 vec_load_store_type vls_type,
1892 unsigned int ncopies)
1894 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1895 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1896 dr_alignment_support alignment_support_scheme;
1898 if (ncopies > 1)
1900 if (dump_enabled_p ())
1901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1902 "multiple types with negative step.\n");
1903 return VMAT_ELEMENTWISE;
1906 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1907 if (alignment_support_scheme != dr_aligned
1908 && alignment_support_scheme != dr_unaligned_supported)
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1912 "negative step but alignment required.\n");
1913 return VMAT_ELEMENTWISE;
1916 if (vls_type == VLS_STORE_INVARIANT)
1918 if (dump_enabled_p ())
1919 dump_printf_loc (MSG_NOTE, vect_location,
1920 "negative step with invariant source;"
1921 " no permute needed.\n");
1922 return VMAT_CONTIGUOUS_DOWN;
1925 if (!perm_mask_for_reverse (vectype))
1927 if (dump_enabled_p ())
1928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1929 "negative step and reversing not supported.\n");
1930 return VMAT_ELEMENTWISE;
1933 return VMAT_CONTIGUOUS_REVERSE;
1936 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1937 if there is a memory access type that the vectorized form can use,
1938 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1939 or scatters, fill in GS_INFO accordingly.
1941 SLP says whether we're performing SLP rather than loop vectorization.
1942 VECTYPE is the vector type that the vectorized statements will use.
1943 NCOPIES is the number of vector statements that will be needed. */
1945 static bool
1946 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1947 vec_load_store_type vls_type, unsigned int ncopies,
1948 vect_memory_access_type *memory_access_type,
1949 gather_scatter_info *gs_info)
1951 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1952 vec_info *vinfo = stmt_info->vinfo;
1953 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1954 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1956 *memory_access_type = VMAT_GATHER_SCATTER;
1957 gimple *def_stmt;
1958 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1959 gcc_unreachable ();
1960 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1961 &gs_info->offset_dt,
1962 &gs_info->offset_vectype))
1964 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1966 "%s index use not simple.\n",
1967 vls_type == VLS_LOAD ? "gather" : "scatter");
1968 return false;
1971 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1973 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1974 memory_access_type))
1975 return false;
1977 else if (STMT_VINFO_STRIDED_P (stmt_info))
1979 gcc_assert (!slp);
1980 *memory_access_type = VMAT_ELEMENTWISE;
1982 else
1984 int cmp = compare_step_with_zero (stmt);
1985 if (cmp < 0)
1986 *memory_access_type = get_negative_load_store_type
1987 (stmt, vectype, vls_type, ncopies);
1988 else if (cmp == 0)
1990 gcc_assert (vls_type == VLS_LOAD);
1991 *memory_access_type = VMAT_INVARIANT;
1993 else
1994 *memory_access_type = VMAT_CONTIGUOUS;
1997 /* FIXME: At the moment the cost model seems to underestimate the
1998 cost of using elementwise accesses. This check preserves the
1999 traditional behavior until that can be fixed. */
2000 if (*memory_access_type == VMAT_ELEMENTWISE
2001 && !STMT_VINFO_STRIDED_P (stmt_info))
2003 if (dump_enabled_p ())
2004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2005 "not falling back to elementwise accesses\n");
2006 return false;
2008 return true;
2011 /* Function vectorizable_mask_load_store.
2013 Check if STMT performs a conditional load or store that can be vectorized.
2014 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2015 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2016 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2018 static bool
2019 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2020 gimple **vec_stmt, slp_tree slp_node)
2022 tree vec_dest = NULL;
2023 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2024 stmt_vec_info prev_stmt_info;
2025 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2026 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2027 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2028 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2029 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2030 tree rhs_vectype = NULL_TREE;
2031 tree mask_vectype;
2032 tree elem_type;
2033 gimple *new_stmt;
2034 tree dummy;
2035 tree dataref_ptr = NULL_TREE;
2036 gimple *ptr_incr;
2037 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2038 int ncopies;
2039 int i, j;
2040 bool inv_p;
2041 gather_scatter_info gs_info;
2042 vec_load_store_type vls_type;
2043 tree mask;
2044 gimple *def_stmt;
2045 enum vect_def_type dt;
2047 if (slp_node != NULL)
2048 return false;
2050 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2051 gcc_assert (ncopies >= 1);
2053 mask = gimple_call_arg (stmt, 2);
2055 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2056 return false;
2058 /* FORNOW. This restriction should be relaxed. */
2059 if (nested_in_vect_loop && ncopies > 1)
2061 if (dump_enabled_p ())
2062 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2063 "multiple types in nested loop.");
2064 return false;
2067 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2068 return false;
2070 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2071 && ! vec_stmt)
2072 return false;
2074 if (!STMT_VINFO_DATA_REF (stmt_info))
2075 return false;
2077 elem_type = TREE_TYPE (vectype);
2079 if (TREE_CODE (mask) != SSA_NAME)
2080 return false;
2082 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2083 return false;
2085 if (!mask_vectype)
2086 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2088 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2089 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2090 return false;
2092 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2094 tree rhs = gimple_call_arg (stmt, 3);
2095 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2096 return false;
2097 if (dt == vect_constant_def || dt == vect_external_def)
2098 vls_type = VLS_STORE_INVARIANT;
2099 else
2100 vls_type = VLS_STORE;
2102 else
2103 vls_type = VLS_LOAD;
2105 vect_memory_access_type memory_access_type;
2106 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2107 &memory_access_type, &gs_info))
2108 return false;
2110 if (memory_access_type == VMAT_GATHER_SCATTER)
2112 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2113 tree masktype
2114 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2115 if (TREE_CODE (masktype) == INTEGER_TYPE)
2117 if (dump_enabled_p ())
2118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2119 "masked gather with integer mask not supported.");
2120 return false;
2123 else if (memory_access_type != VMAT_CONTIGUOUS)
2125 if (dump_enabled_p ())
2126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2127 "unsupported access type for masked %s.\n",
2128 vls_type == VLS_LOAD ? "load" : "store");
2129 return false;
2131 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2132 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2133 TYPE_MODE (mask_vectype),
2134 vls_type == VLS_LOAD)
2135 || (rhs_vectype
2136 && !useless_type_conversion_p (vectype, rhs_vectype)))
2137 return false;
2139 if (!vec_stmt) /* transformation not required. */
2141 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2142 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2143 if (vls_type == VLS_LOAD)
2144 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2145 NULL, NULL, NULL);
2146 else
2147 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2148 dt, NULL, NULL, NULL);
2149 return true;
2151 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2153 /* Transform. */
2155 if (memory_access_type == VMAT_GATHER_SCATTER)
2157 tree vec_oprnd0 = NULL_TREE, op;
2158 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2159 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2160 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2161 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2162 tree mask_perm_mask = NULL_TREE;
2163 edge pe = loop_preheader_edge (loop);
2164 gimple_seq seq;
2165 basic_block new_bb;
2166 enum { NARROW, NONE, WIDEN } modifier;
2167 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2169 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2170 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2171 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2172 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2173 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2174 scaletype = TREE_VALUE (arglist);
2175 gcc_checking_assert (types_compatible_p (srctype, rettype)
2176 && types_compatible_p (srctype, masktype));
2178 if (nunits == gather_off_nunits)
2179 modifier = NONE;
2180 else if (nunits == gather_off_nunits / 2)
2182 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2183 modifier = WIDEN;
2185 for (i = 0; i < gather_off_nunits; ++i)
2186 sel[i] = i | nunits;
2188 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2190 else if (nunits == gather_off_nunits * 2)
2192 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2193 modifier = NARROW;
2195 for (i = 0; i < nunits; ++i)
2196 sel[i] = i < gather_off_nunits
2197 ? i : i + nunits - gather_off_nunits;
2199 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2200 ncopies *= 2;
2201 for (i = 0; i < nunits; ++i)
2202 sel[i] = i | gather_off_nunits;
2203 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2205 else
2206 gcc_unreachable ();
2208 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2210 ptr = fold_convert (ptrtype, gs_info.base);
2211 if (!is_gimple_min_invariant (ptr))
2213 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2214 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2215 gcc_assert (!new_bb);
2218 scale = build_int_cst (scaletype, gs_info.scale);
2220 prev_stmt_info = NULL;
2221 for (j = 0; j < ncopies; ++j)
2223 if (modifier == WIDEN && (j & 1))
2224 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2225 perm_mask, stmt, gsi);
2226 else if (j == 0)
2227 op = vec_oprnd0
2228 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2229 else
2230 op = vec_oprnd0
2231 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2233 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2235 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2236 == TYPE_VECTOR_SUBPARTS (idxtype));
2237 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2238 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2239 new_stmt
2240 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2241 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2242 op = var;
2245 if (mask_perm_mask && (j & 1))
2246 mask_op = permute_vec_elements (mask_op, mask_op,
2247 mask_perm_mask, stmt, gsi);
2248 else
2250 if (j == 0)
2251 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2252 else
2254 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2255 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2258 mask_op = vec_mask;
2259 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2261 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2262 == TYPE_VECTOR_SUBPARTS (masktype));
2263 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2264 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2265 new_stmt
2266 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2267 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2268 mask_op = var;
2272 new_stmt
2273 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2274 scale);
2276 if (!useless_type_conversion_p (vectype, rettype))
2278 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2279 == TYPE_VECTOR_SUBPARTS (rettype));
2280 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2281 gimple_call_set_lhs (new_stmt, op);
2282 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2283 var = make_ssa_name (vec_dest);
2284 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2285 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2287 else
2289 var = make_ssa_name (vec_dest, new_stmt);
2290 gimple_call_set_lhs (new_stmt, var);
2293 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2295 if (modifier == NARROW)
2297 if ((j & 1) == 0)
2299 prev_res = var;
2300 continue;
2302 var = permute_vec_elements (prev_res, var,
2303 perm_mask, stmt, gsi);
2304 new_stmt = SSA_NAME_DEF_STMT (var);
2307 if (prev_stmt_info == NULL)
2308 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2309 else
2310 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2311 prev_stmt_info = vinfo_for_stmt (new_stmt);
2314 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2315 from the IL. */
2316 if (STMT_VINFO_RELATED_STMT (stmt_info))
2318 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2319 stmt_info = vinfo_for_stmt (stmt);
2321 tree lhs = gimple_call_lhs (stmt);
2322 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2323 set_vinfo_for_stmt (new_stmt, stmt_info);
2324 set_vinfo_for_stmt (stmt, NULL);
2325 STMT_VINFO_STMT (stmt_info) = new_stmt;
2326 gsi_replace (gsi, new_stmt, true);
2327 return true;
2329 else if (vls_type != VLS_LOAD)
2331 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2332 prev_stmt_info = NULL;
2333 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2334 for (i = 0; i < ncopies; i++)
2336 unsigned align, misalign;
2338 if (i == 0)
2340 tree rhs = gimple_call_arg (stmt, 3);
2341 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2342 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2343 /* We should have catched mismatched types earlier. */
2344 gcc_assert (useless_type_conversion_p (vectype,
2345 TREE_TYPE (vec_rhs)));
2346 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2347 NULL_TREE, &dummy, gsi,
2348 &ptr_incr, false, &inv_p);
2349 gcc_assert (!inv_p);
2351 else
2353 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2354 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2355 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2356 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2357 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2358 TYPE_SIZE_UNIT (vectype));
2361 align = TYPE_ALIGN_UNIT (vectype);
2362 if (aligned_access_p (dr))
2363 misalign = 0;
2364 else if (DR_MISALIGNMENT (dr) == -1)
2366 align = TYPE_ALIGN_UNIT (elem_type);
2367 misalign = 0;
2369 else
2370 misalign = DR_MISALIGNMENT (dr);
2371 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2372 misalign);
2373 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2374 misalign ? least_bit_hwi (misalign) : align);
2375 new_stmt
2376 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2377 ptr, vec_mask, vec_rhs);
2378 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2379 if (i == 0)
2380 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2381 else
2382 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2383 prev_stmt_info = vinfo_for_stmt (new_stmt);
2386 else
2388 tree vec_mask = NULL_TREE;
2389 prev_stmt_info = NULL;
2390 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2391 for (i = 0; i < ncopies; i++)
2393 unsigned align, misalign;
2395 if (i == 0)
2397 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2398 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2399 NULL_TREE, &dummy, gsi,
2400 &ptr_incr, false, &inv_p);
2401 gcc_assert (!inv_p);
2403 else
2405 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2406 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2407 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2408 TYPE_SIZE_UNIT (vectype));
2411 align = TYPE_ALIGN_UNIT (vectype);
2412 if (aligned_access_p (dr))
2413 misalign = 0;
2414 else if (DR_MISALIGNMENT (dr) == -1)
2416 align = TYPE_ALIGN_UNIT (elem_type);
2417 misalign = 0;
2419 else
2420 misalign = DR_MISALIGNMENT (dr);
2421 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2422 misalign);
2423 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2424 misalign ? least_bit_hwi (misalign) : align);
2425 new_stmt
2426 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2427 ptr, vec_mask);
2428 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2429 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2430 if (i == 0)
2431 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2432 else
2433 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2434 prev_stmt_info = vinfo_for_stmt (new_stmt);
2438 if (vls_type == VLS_LOAD)
2440 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2441 from the IL. */
2442 if (STMT_VINFO_RELATED_STMT (stmt_info))
2444 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2445 stmt_info = vinfo_for_stmt (stmt);
2447 tree lhs = gimple_call_lhs (stmt);
2448 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2449 set_vinfo_for_stmt (new_stmt, stmt_info);
2450 set_vinfo_for_stmt (stmt, NULL);
2451 STMT_VINFO_STMT (stmt_info) = new_stmt;
2452 gsi_replace (gsi, new_stmt, true);
2455 return true;
2458 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2460 static bool
2461 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2462 gimple **vec_stmt, slp_tree slp_node,
2463 tree vectype_in, enum vect_def_type *dt)
2465 tree op, vectype;
2466 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2467 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2468 unsigned ncopies, nunits;
2470 op = gimple_call_arg (stmt, 0);
2471 vectype = STMT_VINFO_VECTYPE (stmt_info);
2472 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2474 /* Multiple types in SLP are handled by creating the appropriate number of
2475 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2476 case of SLP. */
2477 if (slp_node)
2478 ncopies = 1;
2479 else
2480 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2482 gcc_assert (ncopies >= 1);
2484 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2485 if (! char_vectype)
2486 return false;
2488 unsigned char *elts
2489 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype));
2490 unsigned char *elt = elts;
2491 unsigned word_bytes = TYPE_VECTOR_SUBPARTS (char_vectype) / nunits;
2492 for (unsigned i = 0; i < nunits; ++i)
2493 for (unsigned j = 0; j < word_bytes; ++j)
2494 *elt++ = (i + 1) * word_bytes - j - 1;
2496 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
2497 return false;
2499 if (! vec_stmt)
2501 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2502 if (dump_enabled_p ())
2503 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2504 "\n");
2505 if (! PURE_SLP_STMT (stmt_info))
2507 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2508 1, vector_stmt, stmt_info, 0, vect_prologue);
2509 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2510 ncopies, vec_perm, stmt_info, 0, vect_body);
2512 return true;
2515 tree *telts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (char_vectype));
2516 for (unsigned i = 0; i < TYPE_VECTOR_SUBPARTS (char_vectype); ++i)
2517 telts[i] = build_int_cst (char_type_node, elts[i]);
2518 tree bswap_vconst = build_vector (char_vectype, telts);
2520 /* Transform. */
2521 vec<tree> vec_oprnds = vNULL;
2522 gimple *new_stmt = NULL;
2523 stmt_vec_info prev_stmt_info = NULL;
2524 for (unsigned j = 0; j < ncopies; j++)
2526 /* Handle uses. */
2527 if (j == 0)
2528 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2529 else
2530 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2532 /* Arguments are ready. create the new vector stmt. */
2533 unsigned i;
2534 tree vop;
2535 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2537 tree tem = make_ssa_name (char_vectype);
2538 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2539 char_vectype, vop));
2540 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2541 tree tem2 = make_ssa_name (char_vectype);
2542 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2543 tem, tem, bswap_vconst);
2544 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2545 tem = make_ssa_name (vectype);
2546 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2547 vectype, tem2));
2548 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2549 if (slp_node)
2550 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2553 if (slp_node)
2554 continue;
2556 if (j == 0)
2557 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2558 else
2559 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2561 prev_stmt_info = vinfo_for_stmt (new_stmt);
2564 vec_oprnds.release ();
2565 return true;
2568 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2569 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2570 in a single step. On success, store the binary pack code in
2571 *CONVERT_CODE. */
2573 static bool
2574 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2575 tree_code *convert_code)
2577 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2578 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2579 return false;
2581 tree_code code;
2582 int multi_step_cvt = 0;
2583 auto_vec <tree, 8> interm_types;
2584 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2585 &code, &multi_step_cvt,
2586 &interm_types)
2587 || multi_step_cvt)
2588 return false;
2590 *convert_code = code;
2591 return true;
2594 /* Function vectorizable_call.
2596 Check if GS performs a function call that can be vectorized.
2597 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2598 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2599 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2601 static bool
2602 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2603 slp_tree slp_node)
2605 gcall *stmt;
2606 tree vec_dest;
2607 tree scalar_dest;
2608 tree op, type;
2609 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2610 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2611 tree vectype_out, vectype_in;
2612 int nunits_in;
2613 int nunits_out;
2614 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2615 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2616 vec_info *vinfo = stmt_info->vinfo;
2617 tree fndecl, new_temp, rhs_type;
2618 gimple *def_stmt;
2619 enum vect_def_type dt[3]
2620 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2621 int ndts = 3;
2622 gimple *new_stmt = NULL;
2623 int ncopies, j;
2624 vec<tree> vargs = vNULL;
2625 enum { NARROW, NONE, WIDEN } modifier;
2626 size_t i, nargs;
2627 tree lhs;
2629 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2630 return false;
2632 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2633 && ! vec_stmt)
2634 return false;
2636 /* Is GS a vectorizable call? */
2637 stmt = dyn_cast <gcall *> (gs);
2638 if (!stmt)
2639 return false;
2641 if (gimple_call_internal_p (stmt)
2642 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2643 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2644 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2645 slp_node);
2647 if (gimple_call_lhs (stmt) == NULL_TREE
2648 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2649 return false;
2651 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2653 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2655 /* Process function arguments. */
2656 rhs_type = NULL_TREE;
2657 vectype_in = NULL_TREE;
2658 nargs = gimple_call_num_args (stmt);
2660 /* Bail out if the function has more than three arguments, we do not have
2661 interesting builtin functions to vectorize with more than two arguments
2662 except for fma. No arguments is also not good. */
2663 if (nargs == 0 || nargs > 3)
2664 return false;
2666 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2667 if (gimple_call_internal_p (stmt)
2668 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2670 nargs = 0;
2671 rhs_type = unsigned_type_node;
2674 for (i = 0; i < nargs; i++)
2676 tree opvectype;
2678 op = gimple_call_arg (stmt, i);
2680 /* We can only handle calls with arguments of the same type. */
2681 if (rhs_type
2682 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2684 if (dump_enabled_p ())
2685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2686 "argument types differ.\n");
2687 return false;
2689 if (!rhs_type)
2690 rhs_type = TREE_TYPE (op);
2692 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2694 if (dump_enabled_p ())
2695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2696 "use not simple.\n");
2697 return false;
2700 if (!vectype_in)
2701 vectype_in = opvectype;
2702 else if (opvectype
2703 && opvectype != vectype_in)
2705 if (dump_enabled_p ())
2706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2707 "argument vector types differ.\n");
2708 return false;
2711 /* If all arguments are external or constant defs use a vector type with
2712 the same size as the output vector type. */
2713 if (!vectype_in)
2714 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2715 if (vec_stmt)
2716 gcc_assert (vectype_in);
2717 if (!vectype_in)
2719 if (dump_enabled_p ())
2721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2722 "no vectype for scalar type ");
2723 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2724 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2727 return false;
2730 /* FORNOW */
2731 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2732 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2733 if (nunits_in == nunits_out / 2)
2734 modifier = NARROW;
2735 else if (nunits_out == nunits_in)
2736 modifier = NONE;
2737 else if (nunits_out == nunits_in / 2)
2738 modifier = WIDEN;
2739 else
2740 return false;
2742 /* We only handle functions that do not read or clobber memory. */
2743 if (gimple_vuse (stmt))
2745 if (dump_enabled_p ())
2746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2747 "function reads from or writes to memory.\n");
2748 return false;
2751 /* For now, we only vectorize functions if a target specific builtin
2752 is available. TODO -- in some cases, it might be profitable to
2753 insert the calls for pieces of the vector, in order to be able
2754 to vectorize other operations in the loop. */
2755 fndecl = NULL_TREE;
2756 internal_fn ifn = IFN_LAST;
2757 combined_fn cfn = gimple_call_combined_fn (stmt);
2758 tree callee = gimple_call_fndecl (stmt);
2760 /* First try using an internal function. */
2761 tree_code convert_code = ERROR_MARK;
2762 if (cfn != CFN_LAST
2763 && (modifier == NONE
2764 || (modifier == NARROW
2765 && simple_integer_narrowing (vectype_out, vectype_in,
2766 &convert_code))))
2767 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2768 vectype_in);
2770 /* If that fails, try asking for a target-specific built-in function. */
2771 if (ifn == IFN_LAST)
2773 if (cfn != CFN_LAST)
2774 fndecl = targetm.vectorize.builtin_vectorized_function
2775 (cfn, vectype_out, vectype_in);
2776 else
2777 fndecl = targetm.vectorize.builtin_md_vectorized_function
2778 (callee, vectype_out, vectype_in);
2781 if (ifn == IFN_LAST && !fndecl)
2783 if (cfn == CFN_GOMP_SIMD_LANE
2784 && !slp_node
2785 && loop_vinfo
2786 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2787 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2788 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2789 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2791 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2792 { 0, 1, 2, ... vf - 1 } vector. */
2793 gcc_assert (nargs == 0);
2795 else if (modifier == NONE
2796 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2797 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2798 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2799 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2800 vectype_in, dt);
2801 else
2803 if (dump_enabled_p ())
2804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2805 "function is not vectorizable.\n");
2806 return false;
2810 if (slp_node)
2811 ncopies = 1;
2812 else if (modifier == NARROW && ifn == IFN_LAST)
2813 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2814 else
2815 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2817 /* Sanity check: make sure that at least one copy of the vectorized stmt
2818 needs to be generated. */
2819 gcc_assert (ncopies >= 1);
2821 if (!vec_stmt) /* transformation not required. */
2823 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2824 if (dump_enabled_p ())
2825 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2826 "\n");
2827 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2828 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2829 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2830 vec_promote_demote, stmt_info, 0, vect_body);
2832 return true;
2835 /* Transform. */
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2840 /* Handle def. */
2841 scalar_dest = gimple_call_lhs (stmt);
2842 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2844 prev_stmt_info = NULL;
2845 if (modifier == NONE || ifn != IFN_LAST)
2847 tree prev_res = NULL_TREE;
2848 for (j = 0; j < ncopies; ++j)
2850 /* Build argument list for the vectorized call. */
2851 if (j == 0)
2852 vargs.create (nargs);
2853 else
2854 vargs.truncate (0);
2856 if (slp_node)
2858 auto_vec<vec<tree> > vec_defs (nargs);
2859 vec<tree> vec_oprnds0;
2861 for (i = 0; i < nargs; i++)
2862 vargs.quick_push (gimple_call_arg (stmt, i));
2863 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2864 vec_oprnds0 = vec_defs[0];
2866 /* Arguments are ready. Create the new vector stmt. */
2867 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2869 size_t k;
2870 for (k = 0; k < nargs; k++)
2872 vec<tree> vec_oprndsk = vec_defs[k];
2873 vargs[k] = vec_oprndsk[i];
2875 if (modifier == NARROW)
2877 tree half_res = make_ssa_name (vectype_in);
2878 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2879 gimple_call_set_lhs (new_stmt, half_res);
2880 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2881 if ((i & 1) == 0)
2883 prev_res = half_res;
2884 continue;
2886 new_temp = make_ssa_name (vec_dest);
2887 new_stmt = gimple_build_assign (new_temp, convert_code,
2888 prev_res, half_res);
2890 else
2892 if (ifn != IFN_LAST)
2893 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2894 else
2895 new_stmt = gimple_build_call_vec (fndecl, vargs);
2896 new_temp = make_ssa_name (vec_dest, new_stmt);
2897 gimple_call_set_lhs (new_stmt, new_temp);
2899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2900 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2903 for (i = 0; i < nargs; i++)
2905 vec<tree> vec_oprndsi = vec_defs[i];
2906 vec_oprndsi.release ();
2908 continue;
2911 for (i = 0; i < nargs; i++)
2913 op = gimple_call_arg (stmt, i);
2914 if (j == 0)
2915 vec_oprnd0
2916 = vect_get_vec_def_for_operand (op, stmt);
2917 else
2919 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2920 vec_oprnd0
2921 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2924 vargs.quick_push (vec_oprnd0);
2927 if (gimple_call_internal_p (stmt)
2928 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2930 tree *v = XALLOCAVEC (tree, nunits_out);
2931 int k;
2932 for (k = 0; k < nunits_out; ++k)
2933 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2934 tree cst = build_vector (vectype_out, v);
2935 tree new_var
2936 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2937 gimple *init_stmt = gimple_build_assign (new_var, cst);
2938 vect_init_vector_1 (stmt, init_stmt, NULL);
2939 new_temp = make_ssa_name (vec_dest);
2940 new_stmt = gimple_build_assign (new_temp, new_var);
2942 else if (modifier == NARROW)
2944 tree half_res = make_ssa_name (vectype_in);
2945 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2946 gimple_call_set_lhs (new_stmt, half_res);
2947 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2948 if ((j & 1) == 0)
2950 prev_res = half_res;
2951 continue;
2953 new_temp = make_ssa_name (vec_dest);
2954 new_stmt = gimple_build_assign (new_temp, convert_code,
2955 prev_res, half_res);
2957 else
2959 if (ifn != IFN_LAST)
2960 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2961 else
2962 new_stmt = gimple_build_call_vec (fndecl, vargs);
2963 new_temp = make_ssa_name (vec_dest, new_stmt);
2964 gimple_call_set_lhs (new_stmt, new_temp);
2966 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2968 if (j == (modifier == NARROW ? 1 : 0))
2969 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2970 else
2971 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2973 prev_stmt_info = vinfo_for_stmt (new_stmt);
2976 else if (modifier == NARROW)
2978 for (j = 0; j < ncopies; ++j)
2980 /* Build argument list for the vectorized call. */
2981 if (j == 0)
2982 vargs.create (nargs * 2);
2983 else
2984 vargs.truncate (0);
2986 if (slp_node)
2988 auto_vec<vec<tree> > vec_defs (nargs);
2989 vec<tree> vec_oprnds0;
2991 for (i = 0; i < nargs; i++)
2992 vargs.quick_push (gimple_call_arg (stmt, i));
2993 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2994 vec_oprnds0 = vec_defs[0];
2996 /* Arguments are ready. Create the new vector stmt. */
2997 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2999 size_t k;
3000 vargs.truncate (0);
3001 for (k = 0; k < nargs; k++)
3003 vec<tree> vec_oprndsk = vec_defs[k];
3004 vargs.quick_push (vec_oprndsk[i]);
3005 vargs.quick_push (vec_oprndsk[i + 1]);
3007 if (ifn != IFN_LAST)
3008 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
3009 else
3010 new_stmt = gimple_build_call_vec (fndecl, vargs);
3011 new_temp = make_ssa_name (vec_dest, new_stmt);
3012 gimple_call_set_lhs (new_stmt, new_temp);
3013 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3014 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3017 for (i = 0; i < nargs; i++)
3019 vec<tree> vec_oprndsi = vec_defs[i];
3020 vec_oprndsi.release ();
3022 continue;
3025 for (i = 0; i < nargs; i++)
3027 op = gimple_call_arg (stmt, i);
3028 if (j == 0)
3030 vec_oprnd0
3031 = vect_get_vec_def_for_operand (op, stmt);
3032 vec_oprnd1
3033 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3035 else
3037 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3038 vec_oprnd0
3039 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3040 vec_oprnd1
3041 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3044 vargs.quick_push (vec_oprnd0);
3045 vargs.quick_push (vec_oprnd1);
3048 new_stmt = gimple_build_call_vec (fndecl, vargs);
3049 new_temp = make_ssa_name (vec_dest, new_stmt);
3050 gimple_call_set_lhs (new_stmt, new_temp);
3051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3053 if (j == 0)
3054 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3055 else
3056 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3058 prev_stmt_info = vinfo_for_stmt (new_stmt);
3061 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3063 else
3064 /* No current target implements this case. */
3065 return false;
3067 vargs.release ();
3069 /* The call in STMT might prevent it from being removed in dce.
3070 We however cannot remove it here, due to the way the ssa name
3071 it defines is mapped to the new definition. So just replace
3072 rhs of the statement with something harmless. */
3074 if (slp_node)
3075 return true;
3077 type = TREE_TYPE (scalar_dest);
3078 if (is_pattern_stmt_p (stmt_info))
3079 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3080 else
3081 lhs = gimple_call_lhs (stmt);
3083 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3084 set_vinfo_for_stmt (new_stmt, stmt_info);
3085 set_vinfo_for_stmt (stmt, NULL);
3086 STMT_VINFO_STMT (stmt_info) = new_stmt;
3087 gsi_replace (gsi, new_stmt, false);
3089 return true;
3093 struct simd_call_arg_info
3095 tree vectype;
3096 tree op;
3097 HOST_WIDE_INT linear_step;
3098 enum vect_def_type dt;
3099 unsigned int align;
3100 bool simd_lane_linear;
3103 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3104 is linear within simd lane (but not within whole loop), note it in
3105 *ARGINFO. */
3107 static void
3108 vect_simd_lane_linear (tree op, struct loop *loop,
3109 struct simd_call_arg_info *arginfo)
3111 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3113 if (!is_gimple_assign (def_stmt)
3114 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3115 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3116 return;
3118 tree base = gimple_assign_rhs1 (def_stmt);
3119 HOST_WIDE_INT linear_step = 0;
3120 tree v = gimple_assign_rhs2 (def_stmt);
3121 while (TREE_CODE (v) == SSA_NAME)
3123 tree t;
3124 def_stmt = SSA_NAME_DEF_STMT (v);
3125 if (is_gimple_assign (def_stmt))
3126 switch (gimple_assign_rhs_code (def_stmt))
3128 case PLUS_EXPR:
3129 t = gimple_assign_rhs2 (def_stmt);
3130 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3131 return;
3132 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3133 v = gimple_assign_rhs1 (def_stmt);
3134 continue;
3135 case MULT_EXPR:
3136 t = gimple_assign_rhs2 (def_stmt);
3137 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3138 return;
3139 linear_step = tree_to_shwi (t);
3140 v = gimple_assign_rhs1 (def_stmt);
3141 continue;
3142 CASE_CONVERT:
3143 t = gimple_assign_rhs1 (def_stmt);
3144 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3145 || (TYPE_PRECISION (TREE_TYPE (v))
3146 < TYPE_PRECISION (TREE_TYPE (t))))
3147 return;
3148 if (!linear_step)
3149 linear_step = 1;
3150 v = t;
3151 continue;
3152 default:
3153 return;
3155 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3156 && loop->simduid
3157 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3158 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3159 == loop->simduid))
3161 if (!linear_step)
3162 linear_step = 1;
3163 arginfo->linear_step = linear_step;
3164 arginfo->op = base;
3165 arginfo->simd_lane_linear = true;
3166 return;
3171 /* Function vectorizable_simd_clone_call.
3173 Check if STMT performs a function call that can be vectorized
3174 by calling a simd clone of the function.
3175 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3176 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3177 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3179 static bool
3180 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3181 gimple **vec_stmt, slp_tree slp_node)
3183 tree vec_dest;
3184 tree scalar_dest;
3185 tree op, type;
3186 tree vec_oprnd0 = NULL_TREE;
3187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3188 tree vectype;
3189 unsigned int nunits;
3190 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3191 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3192 vec_info *vinfo = stmt_info->vinfo;
3193 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3194 tree fndecl, new_temp;
3195 gimple *def_stmt;
3196 gimple *new_stmt = NULL;
3197 int ncopies, j;
3198 auto_vec<simd_call_arg_info> arginfo;
3199 vec<tree> vargs = vNULL;
3200 size_t i, nargs;
3201 tree lhs, rtype, ratype;
3202 vec<constructor_elt, va_gc> *ret_ctor_elts;
3204 /* Is STMT a vectorizable call? */
3205 if (!is_gimple_call (stmt))
3206 return false;
3208 fndecl = gimple_call_fndecl (stmt);
3209 if (fndecl == NULL_TREE)
3210 return false;
3212 struct cgraph_node *node = cgraph_node::get (fndecl);
3213 if (node == NULL || node->simd_clones == NULL)
3214 return false;
3216 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3217 return false;
3219 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3220 && ! vec_stmt)
3221 return false;
3223 if (gimple_call_lhs (stmt)
3224 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3225 return false;
3227 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3229 vectype = STMT_VINFO_VECTYPE (stmt_info);
3231 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3232 return false;
3234 /* FORNOW */
3235 if (slp_node)
3236 return false;
3238 /* Process function arguments. */
3239 nargs = gimple_call_num_args (stmt);
3241 /* Bail out if the function has zero arguments. */
3242 if (nargs == 0)
3243 return false;
3245 arginfo.reserve (nargs, true);
3247 for (i = 0; i < nargs; i++)
3249 simd_call_arg_info thisarginfo;
3250 affine_iv iv;
3252 thisarginfo.linear_step = 0;
3253 thisarginfo.align = 0;
3254 thisarginfo.op = NULL_TREE;
3255 thisarginfo.simd_lane_linear = false;
3257 op = gimple_call_arg (stmt, i);
3258 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3259 &thisarginfo.vectype)
3260 || thisarginfo.dt == vect_uninitialized_def)
3262 if (dump_enabled_p ())
3263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3264 "use not simple.\n");
3265 return false;
3268 if (thisarginfo.dt == vect_constant_def
3269 || thisarginfo.dt == vect_external_def)
3270 gcc_assert (thisarginfo.vectype == NULL_TREE);
3271 else
3272 gcc_assert (thisarginfo.vectype != NULL_TREE);
3274 /* For linear arguments, the analyze phase should have saved
3275 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3276 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3277 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3279 gcc_assert (vec_stmt);
3280 thisarginfo.linear_step
3281 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3282 thisarginfo.op
3283 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3284 thisarginfo.simd_lane_linear
3285 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3286 == boolean_true_node);
3287 /* If loop has been peeled for alignment, we need to adjust it. */
3288 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3289 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3290 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3292 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3293 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3294 tree opt = TREE_TYPE (thisarginfo.op);
3295 bias = fold_convert (TREE_TYPE (step), bias);
3296 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3297 thisarginfo.op
3298 = fold_build2 (POINTER_TYPE_P (opt)
3299 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3300 thisarginfo.op, bias);
3303 else if (!vec_stmt
3304 && thisarginfo.dt != vect_constant_def
3305 && thisarginfo.dt != vect_external_def
3306 && loop_vinfo
3307 && TREE_CODE (op) == SSA_NAME
3308 && simple_iv (loop, loop_containing_stmt (stmt), op,
3309 &iv, false)
3310 && tree_fits_shwi_p (iv.step))
3312 thisarginfo.linear_step = tree_to_shwi (iv.step);
3313 thisarginfo.op = iv.base;
3315 else if ((thisarginfo.dt == vect_constant_def
3316 || thisarginfo.dt == vect_external_def)
3317 && POINTER_TYPE_P (TREE_TYPE (op)))
3318 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3319 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3320 linear too. */
3321 if (POINTER_TYPE_P (TREE_TYPE (op))
3322 && !thisarginfo.linear_step
3323 && !vec_stmt
3324 && thisarginfo.dt != vect_constant_def
3325 && thisarginfo.dt != vect_external_def
3326 && loop_vinfo
3327 && !slp_node
3328 && TREE_CODE (op) == SSA_NAME)
3329 vect_simd_lane_linear (op, loop, &thisarginfo);
3331 arginfo.quick_push (thisarginfo);
3334 unsigned int badness = 0;
3335 struct cgraph_node *bestn = NULL;
3336 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3337 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3338 else
3339 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3340 n = n->simdclone->next_clone)
3342 unsigned int this_badness = 0;
3343 if (n->simdclone->simdlen
3344 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3345 || n->simdclone->nargs != nargs)
3346 continue;
3347 if (n->simdclone->simdlen
3348 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3349 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3350 - exact_log2 (n->simdclone->simdlen)) * 1024;
3351 if (n->simdclone->inbranch)
3352 this_badness += 2048;
3353 int target_badness = targetm.simd_clone.usable (n);
3354 if (target_badness < 0)
3355 continue;
3356 this_badness += target_badness * 512;
3357 /* FORNOW: Have to add code to add the mask argument. */
3358 if (n->simdclone->inbranch)
3359 continue;
3360 for (i = 0; i < nargs; i++)
3362 switch (n->simdclone->args[i].arg_type)
3364 case SIMD_CLONE_ARG_TYPE_VECTOR:
3365 if (!useless_type_conversion_p
3366 (n->simdclone->args[i].orig_type,
3367 TREE_TYPE (gimple_call_arg (stmt, i))))
3368 i = -1;
3369 else if (arginfo[i].dt == vect_constant_def
3370 || arginfo[i].dt == vect_external_def
3371 || arginfo[i].linear_step)
3372 this_badness += 64;
3373 break;
3374 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3375 if (arginfo[i].dt != vect_constant_def
3376 && arginfo[i].dt != vect_external_def)
3377 i = -1;
3378 break;
3379 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3380 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3381 if (arginfo[i].dt == vect_constant_def
3382 || arginfo[i].dt == vect_external_def
3383 || (arginfo[i].linear_step
3384 != n->simdclone->args[i].linear_step))
3385 i = -1;
3386 break;
3387 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3388 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3389 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3390 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3391 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3392 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3393 /* FORNOW */
3394 i = -1;
3395 break;
3396 case SIMD_CLONE_ARG_TYPE_MASK:
3397 gcc_unreachable ();
3399 if (i == (size_t) -1)
3400 break;
3401 if (n->simdclone->args[i].alignment > arginfo[i].align)
3403 i = -1;
3404 break;
3406 if (arginfo[i].align)
3407 this_badness += (exact_log2 (arginfo[i].align)
3408 - exact_log2 (n->simdclone->args[i].alignment));
3410 if (i == (size_t) -1)
3411 continue;
3412 if (bestn == NULL || this_badness < badness)
3414 bestn = n;
3415 badness = this_badness;
3419 if (bestn == NULL)
3420 return false;
3422 for (i = 0; i < nargs; i++)
3423 if ((arginfo[i].dt == vect_constant_def
3424 || arginfo[i].dt == vect_external_def)
3425 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3427 arginfo[i].vectype
3428 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3429 i)));
3430 if (arginfo[i].vectype == NULL
3431 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3432 > bestn->simdclone->simdlen))
3433 return false;
3436 fndecl = bestn->decl;
3437 nunits = bestn->simdclone->simdlen;
3438 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3440 /* If the function isn't const, only allow it in simd loops where user
3441 has asserted that at least nunits consecutive iterations can be
3442 performed using SIMD instructions. */
3443 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3444 && gimple_vuse (stmt))
3445 return false;
3447 /* Sanity check: make sure that at least one copy of the vectorized stmt
3448 needs to be generated. */
3449 gcc_assert (ncopies >= 1);
3451 if (!vec_stmt) /* transformation not required. */
3453 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3454 for (i = 0; i < nargs; i++)
3455 if ((bestn->simdclone->args[i].arg_type
3456 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3457 || (bestn->simdclone->args[i].arg_type
3458 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3460 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3461 + 1);
3462 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3463 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3464 ? size_type_node : TREE_TYPE (arginfo[i].op);
3465 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3466 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3467 tree sll = arginfo[i].simd_lane_linear
3468 ? boolean_true_node : boolean_false_node;
3469 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3471 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3472 if (dump_enabled_p ())
3473 dump_printf_loc (MSG_NOTE, vect_location,
3474 "=== vectorizable_simd_clone_call ===\n");
3475 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3476 return true;
3479 /* Transform. */
3481 if (dump_enabled_p ())
3482 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3484 /* Handle def. */
3485 scalar_dest = gimple_call_lhs (stmt);
3486 vec_dest = NULL_TREE;
3487 rtype = NULL_TREE;
3488 ratype = NULL_TREE;
3489 if (scalar_dest)
3491 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3492 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3493 if (TREE_CODE (rtype) == ARRAY_TYPE)
3495 ratype = rtype;
3496 rtype = TREE_TYPE (ratype);
3500 prev_stmt_info = NULL;
3501 for (j = 0; j < ncopies; ++j)
3503 /* Build argument list for the vectorized call. */
3504 if (j == 0)
3505 vargs.create (nargs);
3506 else
3507 vargs.truncate (0);
3509 for (i = 0; i < nargs; i++)
3511 unsigned int k, l, m, o;
3512 tree atype;
3513 op = gimple_call_arg (stmt, i);
3514 switch (bestn->simdclone->args[i].arg_type)
3516 case SIMD_CLONE_ARG_TYPE_VECTOR:
3517 atype = bestn->simdclone->args[i].vector_type;
3518 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3519 for (m = j * o; m < (j + 1) * o; m++)
3521 if (TYPE_VECTOR_SUBPARTS (atype)
3522 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3524 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3525 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3526 / TYPE_VECTOR_SUBPARTS (atype));
3527 gcc_assert ((k & (k - 1)) == 0);
3528 if (m == 0)
3529 vec_oprnd0
3530 = vect_get_vec_def_for_operand (op, stmt);
3531 else
3533 vec_oprnd0 = arginfo[i].op;
3534 if ((m & (k - 1)) == 0)
3535 vec_oprnd0
3536 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3537 vec_oprnd0);
3539 arginfo[i].op = vec_oprnd0;
3540 vec_oprnd0
3541 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3542 size_int (prec),
3543 bitsize_int ((m & (k - 1)) * prec));
3544 new_stmt
3545 = gimple_build_assign (make_ssa_name (atype),
3546 vec_oprnd0);
3547 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3548 vargs.safe_push (gimple_assign_lhs (new_stmt));
3550 else
3552 k = (TYPE_VECTOR_SUBPARTS (atype)
3553 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3554 gcc_assert ((k & (k - 1)) == 0);
3555 vec<constructor_elt, va_gc> *ctor_elts;
3556 if (k != 1)
3557 vec_alloc (ctor_elts, k);
3558 else
3559 ctor_elts = NULL;
3560 for (l = 0; l < k; l++)
3562 if (m == 0 && l == 0)
3563 vec_oprnd0
3564 = vect_get_vec_def_for_operand (op, stmt);
3565 else
3566 vec_oprnd0
3567 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3568 arginfo[i].op);
3569 arginfo[i].op = vec_oprnd0;
3570 if (k == 1)
3571 break;
3572 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3573 vec_oprnd0);
3575 if (k == 1)
3576 vargs.safe_push (vec_oprnd0);
3577 else
3579 vec_oprnd0 = build_constructor (atype, ctor_elts);
3580 new_stmt
3581 = gimple_build_assign (make_ssa_name (atype),
3582 vec_oprnd0);
3583 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3584 vargs.safe_push (gimple_assign_lhs (new_stmt));
3588 break;
3589 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3590 vargs.safe_push (op);
3591 break;
3592 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3593 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3594 if (j == 0)
3596 gimple_seq stmts;
3597 arginfo[i].op
3598 = force_gimple_operand (arginfo[i].op, &stmts, true,
3599 NULL_TREE);
3600 if (stmts != NULL)
3602 basic_block new_bb;
3603 edge pe = loop_preheader_edge (loop);
3604 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3605 gcc_assert (!new_bb);
3607 if (arginfo[i].simd_lane_linear)
3609 vargs.safe_push (arginfo[i].op);
3610 break;
3612 tree phi_res = copy_ssa_name (op);
3613 gphi *new_phi = create_phi_node (phi_res, loop->header);
3614 set_vinfo_for_stmt (new_phi,
3615 new_stmt_vec_info (new_phi, loop_vinfo));
3616 add_phi_arg (new_phi, arginfo[i].op,
3617 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3618 enum tree_code code
3619 = POINTER_TYPE_P (TREE_TYPE (op))
3620 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3621 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3622 ? sizetype : TREE_TYPE (op);
3623 widest_int cst
3624 = wi::mul (bestn->simdclone->args[i].linear_step,
3625 ncopies * nunits);
3626 tree tcst = wide_int_to_tree (type, cst);
3627 tree phi_arg = copy_ssa_name (op);
3628 new_stmt
3629 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3630 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3631 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3632 set_vinfo_for_stmt (new_stmt,
3633 new_stmt_vec_info (new_stmt, loop_vinfo));
3634 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3635 UNKNOWN_LOCATION);
3636 arginfo[i].op = phi_res;
3637 vargs.safe_push (phi_res);
3639 else
3641 enum tree_code code
3642 = POINTER_TYPE_P (TREE_TYPE (op))
3643 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3644 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3645 ? sizetype : TREE_TYPE (op);
3646 widest_int cst
3647 = wi::mul (bestn->simdclone->args[i].linear_step,
3648 j * nunits);
3649 tree tcst = wide_int_to_tree (type, cst);
3650 new_temp = make_ssa_name (TREE_TYPE (op));
3651 new_stmt = gimple_build_assign (new_temp, code,
3652 arginfo[i].op, tcst);
3653 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3654 vargs.safe_push (new_temp);
3656 break;
3657 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3658 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3659 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3660 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3661 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3662 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3663 default:
3664 gcc_unreachable ();
3668 new_stmt = gimple_build_call_vec (fndecl, vargs);
3669 if (vec_dest)
3671 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3672 if (ratype)
3673 new_temp = create_tmp_var (ratype);
3674 else if (TYPE_VECTOR_SUBPARTS (vectype)
3675 == TYPE_VECTOR_SUBPARTS (rtype))
3676 new_temp = make_ssa_name (vec_dest, new_stmt);
3677 else
3678 new_temp = make_ssa_name (rtype, new_stmt);
3679 gimple_call_set_lhs (new_stmt, new_temp);
3681 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3683 if (vec_dest)
3685 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3687 unsigned int k, l;
3688 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3689 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3690 gcc_assert ((k & (k - 1)) == 0);
3691 for (l = 0; l < k; l++)
3693 tree t;
3694 if (ratype)
3696 t = build_fold_addr_expr (new_temp);
3697 t = build2 (MEM_REF, vectype, t,
3698 build_int_cst (TREE_TYPE (t),
3699 l * prec / BITS_PER_UNIT));
3701 else
3702 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3703 size_int (prec), bitsize_int (l * prec));
3704 new_stmt
3705 = gimple_build_assign (make_ssa_name (vectype), t);
3706 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3707 if (j == 0 && l == 0)
3708 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3709 else
3710 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3712 prev_stmt_info = vinfo_for_stmt (new_stmt);
3715 if (ratype)
3717 tree clobber = build_constructor (ratype, NULL);
3718 TREE_THIS_VOLATILE (clobber) = 1;
3719 new_stmt = gimple_build_assign (new_temp, clobber);
3720 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3722 continue;
3724 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3726 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3727 / TYPE_VECTOR_SUBPARTS (rtype));
3728 gcc_assert ((k & (k - 1)) == 0);
3729 if ((j & (k - 1)) == 0)
3730 vec_alloc (ret_ctor_elts, k);
3731 if (ratype)
3733 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3734 for (m = 0; m < o; m++)
3736 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3737 size_int (m), NULL_TREE, NULL_TREE);
3738 new_stmt
3739 = gimple_build_assign (make_ssa_name (rtype), tem);
3740 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3741 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3742 gimple_assign_lhs (new_stmt));
3744 tree clobber = build_constructor (ratype, NULL);
3745 TREE_THIS_VOLATILE (clobber) = 1;
3746 new_stmt = gimple_build_assign (new_temp, clobber);
3747 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3749 else
3750 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3751 if ((j & (k - 1)) != k - 1)
3752 continue;
3753 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3754 new_stmt
3755 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3756 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3758 if ((unsigned) j == k - 1)
3759 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3760 else
3761 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3763 prev_stmt_info = vinfo_for_stmt (new_stmt);
3764 continue;
3766 else if (ratype)
3768 tree t = build_fold_addr_expr (new_temp);
3769 t = build2 (MEM_REF, vectype, t,
3770 build_int_cst (TREE_TYPE (t), 0));
3771 new_stmt
3772 = gimple_build_assign (make_ssa_name (vec_dest), t);
3773 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3774 tree clobber = build_constructor (ratype, NULL);
3775 TREE_THIS_VOLATILE (clobber) = 1;
3776 vect_finish_stmt_generation (stmt,
3777 gimple_build_assign (new_temp,
3778 clobber), gsi);
3782 if (j == 0)
3783 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3784 else
3785 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3787 prev_stmt_info = vinfo_for_stmt (new_stmt);
3790 vargs.release ();
3792 /* The call in STMT might prevent it from being removed in dce.
3793 We however cannot remove it here, due to the way the ssa name
3794 it defines is mapped to the new definition. So just replace
3795 rhs of the statement with something harmless. */
3797 if (slp_node)
3798 return true;
3800 if (scalar_dest)
3802 type = TREE_TYPE (scalar_dest);
3803 if (is_pattern_stmt_p (stmt_info))
3804 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3805 else
3806 lhs = gimple_call_lhs (stmt);
3807 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3809 else
3810 new_stmt = gimple_build_nop ();
3811 set_vinfo_for_stmt (new_stmt, stmt_info);
3812 set_vinfo_for_stmt (stmt, NULL);
3813 STMT_VINFO_STMT (stmt_info) = new_stmt;
3814 gsi_replace (gsi, new_stmt, true);
3815 unlink_stmt_vdef (stmt);
3817 return true;
3821 /* Function vect_gen_widened_results_half
3823 Create a vector stmt whose code, type, number of arguments, and result
3824 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3825 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3826 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3827 needs to be created (DECL is a function-decl of a target-builtin).
3828 STMT is the original scalar stmt that we are vectorizing. */
3830 static gimple *
3831 vect_gen_widened_results_half (enum tree_code code,
3832 tree decl,
3833 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3834 tree vec_dest, gimple_stmt_iterator *gsi,
3835 gimple *stmt)
3837 gimple *new_stmt;
3838 tree new_temp;
3840 /* Generate half of the widened result: */
3841 if (code == CALL_EXPR)
3843 /* Target specific support */
3844 if (op_type == binary_op)
3845 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3846 else
3847 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3848 new_temp = make_ssa_name (vec_dest, new_stmt);
3849 gimple_call_set_lhs (new_stmt, new_temp);
3851 else
3853 /* Generic support */
3854 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3855 if (op_type != binary_op)
3856 vec_oprnd1 = NULL;
3857 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3858 new_temp = make_ssa_name (vec_dest, new_stmt);
3859 gimple_assign_set_lhs (new_stmt, new_temp);
3861 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3863 return new_stmt;
3867 /* Get vectorized definitions for loop-based vectorization. For the first
3868 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3869 scalar operand), and for the rest we get a copy with
3870 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3871 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3872 The vectors are collected into VEC_OPRNDS. */
3874 static void
3875 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3876 vec<tree> *vec_oprnds, int multi_step_cvt)
3878 tree vec_oprnd;
3880 /* Get first vector operand. */
3881 /* All the vector operands except the very first one (that is scalar oprnd)
3882 are stmt copies. */
3883 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3884 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3885 else
3886 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3888 vec_oprnds->quick_push (vec_oprnd);
3890 /* Get second vector operand. */
3891 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3892 vec_oprnds->quick_push (vec_oprnd);
3894 *oprnd = vec_oprnd;
3896 /* For conversion in multiple steps, continue to get operands
3897 recursively. */
3898 if (multi_step_cvt)
3899 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3903 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3904 For multi-step conversions store the resulting vectors and call the function
3905 recursively. */
3907 static void
3908 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3909 int multi_step_cvt, gimple *stmt,
3910 vec<tree> vec_dsts,
3911 gimple_stmt_iterator *gsi,
3912 slp_tree slp_node, enum tree_code code,
3913 stmt_vec_info *prev_stmt_info)
3915 unsigned int i;
3916 tree vop0, vop1, new_tmp, vec_dest;
3917 gimple *new_stmt;
3918 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3920 vec_dest = vec_dsts.pop ();
3922 for (i = 0; i < vec_oprnds->length (); i += 2)
3924 /* Create demotion operation. */
3925 vop0 = (*vec_oprnds)[i];
3926 vop1 = (*vec_oprnds)[i + 1];
3927 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3928 new_tmp = make_ssa_name (vec_dest, new_stmt);
3929 gimple_assign_set_lhs (new_stmt, new_tmp);
3930 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3932 if (multi_step_cvt)
3933 /* Store the resulting vector for next recursive call. */
3934 (*vec_oprnds)[i/2] = new_tmp;
3935 else
3937 /* This is the last step of the conversion sequence. Store the
3938 vectors in SLP_NODE or in vector info of the scalar statement
3939 (or in STMT_VINFO_RELATED_STMT chain). */
3940 if (slp_node)
3941 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3942 else
3944 if (!*prev_stmt_info)
3945 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3946 else
3947 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3949 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3954 /* For multi-step demotion operations we first generate demotion operations
3955 from the source type to the intermediate types, and then combine the
3956 results (stored in VEC_OPRNDS) in demotion operation to the destination
3957 type. */
3958 if (multi_step_cvt)
3960 /* At each level of recursion we have half of the operands we had at the
3961 previous level. */
3962 vec_oprnds->truncate ((i+1)/2);
3963 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3964 stmt, vec_dsts, gsi, slp_node,
3965 VEC_PACK_TRUNC_EXPR,
3966 prev_stmt_info);
3969 vec_dsts.quick_push (vec_dest);
3973 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3974 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3975 the resulting vectors and call the function recursively. */
3977 static void
3978 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3979 vec<tree> *vec_oprnds1,
3980 gimple *stmt, tree vec_dest,
3981 gimple_stmt_iterator *gsi,
3982 enum tree_code code1,
3983 enum tree_code code2, tree decl1,
3984 tree decl2, int op_type)
3986 int i;
3987 tree vop0, vop1, new_tmp1, new_tmp2;
3988 gimple *new_stmt1, *new_stmt2;
3989 vec<tree> vec_tmp = vNULL;
3991 vec_tmp.create (vec_oprnds0->length () * 2);
3992 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3994 if (op_type == binary_op)
3995 vop1 = (*vec_oprnds1)[i];
3996 else
3997 vop1 = NULL_TREE;
3999 /* Generate the two halves of promotion operation. */
4000 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4001 op_type, vec_dest, gsi, stmt);
4002 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4003 op_type, vec_dest, gsi, stmt);
4004 if (is_gimple_call (new_stmt1))
4006 new_tmp1 = gimple_call_lhs (new_stmt1);
4007 new_tmp2 = gimple_call_lhs (new_stmt2);
4009 else
4011 new_tmp1 = gimple_assign_lhs (new_stmt1);
4012 new_tmp2 = gimple_assign_lhs (new_stmt2);
4015 /* Store the results for the next step. */
4016 vec_tmp.quick_push (new_tmp1);
4017 vec_tmp.quick_push (new_tmp2);
4020 vec_oprnds0->release ();
4021 *vec_oprnds0 = vec_tmp;
4025 /* Check if STMT performs a conversion operation, that can be vectorized.
4026 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4027 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4028 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4030 static bool
4031 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4032 gimple **vec_stmt, slp_tree slp_node)
4034 tree vec_dest;
4035 tree scalar_dest;
4036 tree op0, op1 = NULL_TREE;
4037 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4039 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4040 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4041 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4042 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4043 tree new_temp;
4044 gimple *def_stmt;
4045 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4046 int ndts = 2;
4047 gimple *new_stmt = NULL;
4048 stmt_vec_info prev_stmt_info;
4049 int nunits_in;
4050 int nunits_out;
4051 tree vectype_out, vectype_in;
4052 int ncopies, i, j;
4053 tree lhs_type, rhs_type;
4054 enum { NARROW, NONE, WIDEN } modifier;
4055 vec<tree> vec_oprnds0 = vNULL;
4056 vec<tree> vec_oprnds1 = vNULL;
4057 tree vop0;
4058 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4059 vec_info *vinfo = stmt_info->vinfo;
4060 int multi_step_cvt = 0;
4061 vec<tree> interm_types = vNULL;
4062 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4063 int op_type;
4064 machine_mode rhs_mode;
4065 unsigned short fltsz;
4067 /* Is STMT a vectorizable conversion? */
4069 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4070 return false;
4072 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4073 && ! vec_stmt)
4074 return false;
4076 if (!is_gimple_assign (stmt))
4077 return false;
4079 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4080 return false;
4082 code = gimple_assign_rhs_code (stmt);
4083 if (!CONVERT_EXPR_CODE_P (code)
4084 && code != FIX_TRUNC_EXPR
4085 && code != FLOAT_EXPR
4086 && code != WIDEN_MULT_EXPR
4087 && code != WIDEN_LSHIFT_EXPR)
4088 return false;
4090 op_type = TREE_CODE_LENGTH (code);
4092 /* Check types of lhs and rhs. */
4093 scalar_dest = gimple_assign_lhs (stmt);
4094 lhs_type = TREE_TYPE (scalar_dest);
4095 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4097 op0 = gimple_assign_rhs1 (stmt);
4098 rhs_type = TREE_TYPE (op0);
4100 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4101 && !((INTEGRAL_TYPE_P (lhs_type)
4102 && INTEGRAL_TYPE_P (rhs_type))
4103 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4104 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4105 return false;
4107 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4108 && ((INTEGRAL_TYPE_P (lhs_type)
4109 && (TYPE_PRECISION (lhs_type)
4110 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
4111 || (INTEGRAL_TYPE_P (rhs_type)
4112 && (TYPE_PRECISION (rhs_type)
4113 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4115 if (dump_enabled_p ())
4116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4117 "type conversion to/from bit-precision unsupported."
4118 "\n");
4119 return false;
4122 /* Check the operands of the operation. */
4123 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4125 if (dump_enabled_p ())
4126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4127 "use not simple.\n");
4128 return false;
4130 if (op_type == binary_op)
4132 bool ok;
4134 op1 = gimple_assign_rhs2 (stmt);
4135 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4136 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4137 OP1. */
4138 if (CONSTANT_CLASS_P (op0))
4139 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4140 else
4141 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4143 if (!ok)
4145 if (dump_enabled_p ())
4146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4147 "use not simple.\n");
4148 return false;
4152 /* If op0 is an external or constant defs use a vector type of
4153 the same size as the output vector type. */
4154 if (!vectype_in)
4155 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4156 if (vec_stmt)
4157 gcc_assert (vectype_in);
4158 if (!vectype_in)
4160 if (dump_enabled_p ())
4162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4163 "no vectype for scalar type ");
4164 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4165 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4168 return false;
4171 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4172 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4174 if (dump_enabled_p ())
4176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4177 "can't convert between boolean and non "
4178 "boolean vectors");
4179 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4180 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4183 return false;
4186 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4187 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4188 if (nunits_in < nunits_out)
4189 modifier = NARROW;
4190 else if (nunits_out == nunits_in)
4191 modifier = NONE;
4192 else
4193 modifier = WIDEN;
4195 /* Multiple types in SLP are handled by creating the appropriate number of
4196 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4197 case of SLP. */
4198 if (slp_node)
4199 ncopies = 1;
4200 else if (modifier == NARROW)
4201 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4202 else
4203 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4205 /* Sanity check: make sure that at least one copy of the vectorized stmt
4206 needs to be generated. */
4207 gcc_assert (ncopies >= 1);
4209 /* Supportable by target? */
4210 switch (modifier)
4212 case NONE:
4213 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4214 return false;
4215 if (supportable_convert_operation (code, vectype_out, vectype_in,
4216 &decl1, &code1))
4217 break;
4218 /* FALLTHRU */
4219 unsupported:
4220 if (dump_enabled_p ())
4221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4222 "conversion not supported by target.\n");
4223 return false;
4225 case WIDEN:
4226 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4227 &code1, &code2, &multi_step_cvt,
4228 &interm_types))
4230 /* Binary widening operation can only be supported directly by the
4231 architecture. */
4232 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4233 break;
4236 if (code != FLOAT_EXPR
4237 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4238 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4239 goto unsupported;
4241 rhs_mode = TYPE_MODE (rhs_type);
4242 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4243 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4244 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4245 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4247 cvt_type
4248 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4249 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4250 if (cvt_type == NULL_TREE)
4251 goto unsupported;
4253 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4255 if (!supportable_convert_operation (code, vectype_out,
4256 cvt_type, &decl1, &codecvt1))
4257 goto unsupported;
4259 else if (!supportable_widening_operation (code, stmt, vectype_out,
4260 cvt_type, &codecvt1,
4261 &codecvt2, &multi_step_cvt,
4262 &interm_types))
4263 continue;
4264 else
4265 gcc_assert (multi_step_cvt == 0);
4267 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4268 vectype_in, &code1, &code2,
4269 &multi_step_cvt, &interm_types))
4270 break;
4273 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4274 goto unsupported;
4276 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4277 codecvt2 = ERROR_MARK;
4278 else
4280 multi_step_cvt++;
4281 interm_types.safe_push (cvt_type);
4282 cvt_type = NULL_TREE;
4284 break;
4286 case NARROW:
4287 gcc_assert (op_type == unary_op);
4288 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4289 &code1, &multi_step_cvt,
4290 &interm_types))
4291 break;
4293 if (code != FIX_TRUNC_EXPR
4294 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4295 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4296 goto unsupported;
4298 rhs_mode = TYPE_MODE (rhs_type);
4299 cvt_type
4300 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4301 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4302 if (cvt_type == NULL_TREE)
4303 goto unsupported;
4304 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4305 &decl1, &codecvt1))
4306 goto unsupported;
4307 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4308 &code1, &multi_step_cvt,
4309 &interm_types))
4310 break;
4311 goto unsupported;
4313 default:
4314 gcc_unreachable ();
4317 if (!vec_stmt) /* transformation not required. */
4319 if (dump_enabled_p ())
4320 dump_printf_loc (MSG_NOTE, vect_location,
4321 "=== vectorizable_conversion ===\n");
4322 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4324 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4325 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4327 else if (modifier == NARROW)
4329 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4330 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4332 else
4334 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4335 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4337 interm_types.release ();
4338 return true;
4341 /* Transform. */
4342 if (dump_enabled_p ())
4343 dump_printf_loc (MSG_NOTE, vect_location,
4344 "transform conversion. ncopies = %d.\n", ncopies);
4346 if (op_type == binary_op)
4348 if (CONSTANT_CLASS_P (op0))
4349 op0 = fold_convert (TREE_TYPE (op1), op0);
4350 else if (CONSTANT_CLASS_P (op1))
4351 op1 = fold_convert (TREE_TYPE (op0), op1);
4354 /* In case of multi-step conversion, we first generate conversion operations
4355 to the intermediate types, and then from that types to the final one.
4356 We create vector destinations for the intermediate type (TYPES) received
4357 from supportable_*_operation, and store them in the correct order
4358 for future use in vect_create_vectorized_*_stmts (). */
4359 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4360 vec_dest = vect_create_destination_var (scalar_dest,
4361 (cvt_type && modifier == WIDEN)
4362 ? cvt_type : vectype_out);
4363 vec_dsts.quick_push (vec_dest);
4365 if (multi_step_cvt)
4367 for (i = interm_types.length () - 1;
4368 interm_types.iterate (i, &intermediate_type); i--)
4370 vec_dest = vect_create_destination_var (scalar_dest,
4371 intermediate_type);
4372 vec_dsts.quick_push (vec_dest);
4376 if (cvt_type)
4377 vec_dest = vect_create_destination_var (scalar_dest,
4378 modifier == WIDEN
4379 ? vectype_out : cvt_type);
4381 if (!slp_node)
4383 if (modifier == WIDEN)
4385 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4386 if (op_type == binary_op)
4387 vec_oprnds1.create (1);
4389 else if (modifier == NARROW)
4390 vec_oprnds0.create (
4391 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4393 else if (code == WIDEN_LSHIFT_EXPR)
4394 vec_oprnds1.create (slp_node->vec_stmts_size);
4396 last_oprnd = op0;
4397 prev_stmt_info = NULL;
4398 switch (modifier)
4400 case NONE:
4401 for (j = 0; j < ncopies; j++)
4403 if (j == 0)
4404 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4405 else
4406 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4408 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4410 /* Arguments are ready, create the new vector stmt. */
4411 if (code1 == CALL_EXPR)
4413 new_stmt = gimple_build_call (decl1, 1, vop0);
4414 new_temp = make_ssa_name (vec_dest, new_stmt);
4415 gimple_call_set_lhs (new_stmt, new_temp);
4417 else
4419 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4420 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4421 new_temp = make_ssa_name (vec_dest, new_stmt);
4422 gimple_assign_set_lhs (new_stmt, new_temp);
4425 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4426 if (slp_node)
4427 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4428 else
4430 if (!prev_stmt_info)
4431 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4432 else
4433 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4434 prev_stmt_info = vinfo_for_stmt (new_stmt);
4438 break;
4440 case WIDEN:
4441 /* In case the vectorization factor (VF) is bigger than the number
4442 of elements that we can fit in a vectype (nunits), we have to
4443 generate more than one vector stmt - i.e - we need to "unroll"
4444 the vector stmt by a factor VF/nunits. */
4445 for (j = 0; j < ncopies; j++)
4447 /* Handle uses. */
4448 if (j == 0)
4450 if (slp_node)
4452 if (code == WIDEN_LSHIFT_EXPR)
4454 unsigned int k;
4456 vec_oprnd1 = op1;
4457 /* Store vec_oprnd1 for every vector stmt to be created
4458 for SLP_NODE. We check during the analysis that all
4459 the shift arguments are the same. */
4460 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4461 vec_oprnds1.quick_push (vec_oprnd1);
4463 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4464 slp_node);
4466 else
4467 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4468 &vec_oprnds1, slp_node);
4470 else
4472 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4473 vec_oprnds0.quick_push (vec_oprnd0);
4474 if (op_type == binary_op)
4476 if (code == WIDEN_LSHIFT_EXPR)
4477 vec_oprnd1 = op1;
4478 else
4479 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4480 vec_oprnds1.quick_push (vec_oprnd1);
4484 else
4486 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4487 vec_oprnds0.truncate (0);
4488 vec_oprnds0.quick_push (vec_oprnd0);
4489 if (op_type == binary_op)
4491 if (code == WIDEN_LSHIFT_EXPR)
4492 vec_oprnd1 = op1;
4493 else
4494 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4495 vec_oprnd1);
4496 vec_oprnds1.truncate (0);
4497 vec_oprnds1.quick_push (vec_oprnd1);
4501 /* Arguments are ready. Create the new vector stmts. */
4502 for (i = multi_step_cvt; i >= 0; i--)
4504 tree this_dest = vec_dsts[i];
4505 enum tree_code c1 = code1, c2 = code2;
4506 if (i == 0 && codecvt2 != ERROR_MARK)
4508 c1 = codecvt1;
4509 c2 = codecvt2;
4511 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4512 &vec_oprnds1,
4513 stmt, this_dest, gsi,
4514 c1, c2, decl1, decl2,
4515 op_type);
4518 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4520 if (cvt_type)
4522 if (codecvt1 == CALL_EXPR)
4524 new_stmt = gimple_build_call (decl1, 1, vop0);
4525 new_temp = make_ssa_name (vec_dest, new_stmt);
4526 gimple_call_set_lhs (new_stmt, new_temp);
4528 else
4530 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4531 new_temp = make_ssa_name (vec_dest);
4532 new_stmt = gimple_build_assign (new_temp, codecvt1,
4533 vop0);
4536 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4538 else
4539 new_stmt = SSA_NAME_DEF_STMT (vop0);
4541 if (slp_node)
4542 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4543 else
4545 if (!prev_stmt_info)
4546 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4547 else
4548 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4549 prev_stmt_info = vinfo_for_stmt (new_stmt);
4554 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4555 break;
4557 case NARROW:
4558 /* In case the vectorization factor (VF) is bigger than the number
4559 of elements that we can fit in a vectype (nunits), we have to
4560 generate more than one vector stmt - i.e - we need to "unroll"
4561 the vector stmt by a factor VF/nunits. */
4562 for (j = 0; j < ncopies; j++)
4564 /* Handle uses. */
4565 if (slp_node)
4566 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4567 slp_node);
4568 else
4570 vec_oprnds0.truncate (0);
4571 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4572 vect_pow2 (multi_step_cvt) - 1);
4575 /* Arguments are ready. Create the new vector stmts. */
4576 if (cvt_type)
4577 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4579 if (codecvt1 == CALL_EXPR)
4581 new_stmt = gimple_build_call (decl1, 1, vop0);
4582 new_temp = make_ssa_name (vec_dest, new_stmt);
4583 gimple_call_set_lhs (new_stmt, new_temp);
4585 else
4587 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4588 new_temp = make_ssa_name (vec_dest);
4589 new_stmt = gimple_build_assign (new_temp, codecvt1,
4590 vop0);
4593 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4594 vec_oprnds0[i] = new_temp;
4597 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4598 stmt, vec_dsts, gsi,
4599 slp_node, code1,
4600 &prev_stmt_info);
4603 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4604 break;
4607 vec_oprnds0.release ();
4608 vec_oprnds1.release ();
4609 interm_types.release ();
4611 return true;
4615 /* Function vectorizable_assignment.
4617 Check if STMT performs an assignment (copy) that can be vectorized.
4618 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4619 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4620 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4622 static bool
4623 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4624 gimple **vec_stmt, slp_tree slp_node)
4626 tree vec_dest;
4627 tree scalar_dest;
4628 tree op;
4629 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4630 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4631 tree new_temp;
4632 gimple *def_stmt;
4633 enum vect_def_type dt[1] = {vect_unknown_def_type};
4634 int ndts = 1;
4635 int ncopies;
4636 int i, j;
4637 vec<tree> vec_oprnds = vNULL;
4638 tree vop;
4639 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4640 vec_info *vinfo = stmt_info->vinfo;
4641 gimple *new_stmt = NULL;
4642 stmt_vec_info prev_stmt_info = NULL;
4643 enum tree_code code;
4644 tree vectype_in;
4646 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4647 return false;
4649 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4650 && ! vec_stmt)
4651 return false;
4653 /* Is vectorizable assignment? */
4654 if (!is_gimple_assign (stmt))
4655 return false;
4657 scalar_dest = gimple_assign_lhs (stmt);
4658 if (TREE_CODE (scalar_dest) != SSA_NAME)
4659 return false;
4661 code = gimple_assign_rhs_code (stmt);
4662 if (gimple_assign_single_p (stmt)
4663 || code == PAREN_EXPR
4664 || CONVERT_EXPR_CODE_P (code))
4665 op = gimple_assign_rhs1 (stmt);
4666 else
4667 return false;
4669 if (code == VIEW_CONVERT_EXPR)
4670 op = TREE_OPERAND (op, 0);
4672 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4673 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4675 /* Multiple types in SLP are handled by creating the appropriate number of
4676 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4677 case of SLP. */
4678 if (slp_node)
4679 ncopies = 1;
4680 else
4681 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4683 gcc_assert (ncopies >= 1);
4685 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4687 if (dump_enabled_p ())
4688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4689 "use not simple.\n");
4690 return false;
4693 /* We can handle NOP_EXPR conversions that do not change the number
4694 of elements or the vector size. */
4695 if ((CONVERT_EXPR_CODE_P (code)
4696 || code == VIEW_CONVERT_EXPR)
4697 && (!vectype_in
4698 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4699 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4700 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4701 return false;
4703 /* We do not handle bit-precision changes. */
4704 if ((CONVERT_EXPR_CODE_P (code)
4705 || code == VIEW_CONVERT_EXPR)
4706 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4707 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4708 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4709 || ((TYPE_PRECISION (TREE_TYPE (op))
4710 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4711 /* But a conversion that does not change the bit-pattern is ok. */
4712 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4713 > TYPE_PRECISION (TREE_TYPE (op)))
4714 && TYPE_UNSIGNED (TREE_TYPE (op)))
4715 /* Conversion between boolean types of different sizes is
4716 a simple assignment in case their vectypes are same
4717 boolean vectors. */
4718 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4719 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4721 if (dump_enabled_p ())
4722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4723 "type conversion to/from bit-precision "
4724 "unsupported.\n");
4725 return false;
4728 if (!vec_stmt) /* transformation not required. */
4730 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4731 if (dump_enabled_p ())
4732 dump_printf_loc (MSG_NOTE, vect_location,
4733 "=== vectorizable_assignment ===\n");
4734 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4735 return true;
4738 /* Transform. */
4739 if (dump_enabled_p ())
4740 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4742 /* Handle def. */
4743 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4745 /* Handle use. */
4746 for (j = 0; j < ncopies; j++)
4748 /* Handle uses. */
4749 if (j == 0)
4750 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4751 else
4752 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4754 /* Arguments are ready. create the new vector stmt. */
4755 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4757 if (CONVERT_EXPR_CODE_P (code)
4758 || code == VIEW_CONVERT_EXPR)
4759 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4760 new_stmt = gimple_build_assign (vec_dest, vop);
4761 new_temp = make_ssa_name (vec_dest, new_stmt);
4762 gimple_assign_set_lhs (new_stmt, new_temp);
4763 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4764 if (slp_node)
4765 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4768 if (slp_node)
4769 continue;
4771 if (j == 0)
4772 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4773 else
4774 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4776 prev_stmt_info = vinfo_for_stmt (new_stmt);
4779 vec_oprnds.release ();
4780 return true;
4784 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4785 either as shift by a scalar or by a vector. */
4787 bool
4788 vect_supportable_shift (enum tree_code code, tree scalar_type)
4791 machine_mode vec_mode;
4792 optab optab;
4793 int icode;
4794 tree vectype;
4796 vectype = get_vectype_for_scalar_type (scalar_type);
4797 if (!vectype)
4798 return false;
4800 optab = optab_for_tree_code (code, vectype, optab_scalar);
4801 if (!optab
4802 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4804 optab = optab_for_tree_code (code, vectype, optab_vector);
4805 if (!optab
4806 || (optab_handler (optab, TYPE_MODE (vectype))
4807 == CODE_FOR_nothing))
4808 return false;
4811 vec_mode = TYPE_MODE (vectype);
4812 icode = (int) optab_handler (optab, vec_mode);
4813 if (icode == CODE_FOR_nothing)
4814 return false;
4816 return true;
4820 /* Function vectorizable_shift.
4822 Check if STMT performs a shift operation that can be vectorized.
4823 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4824 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4825 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4827 static bool
4828 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4829 gimple **vec_stmt, slp_tree slp_node)
4831 tree vec_dest;
4832 tree scalar_dest;
4833 tree op0, op1 = NULL;
4834 tree vec_oprnd1 = NULL_TREE;
4835 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4836 tree vectype;
4837 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4838 enum tree_code code;
4839 machine_mode vec_mode;
4840 tree new_temp;
4841 optab optab;
4842 int icode;
4843 machine_mode optab_op2_mode;
4844 gimple *def_stmt;
4845 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4846 int ndts = 2;
4847 gimple *new_stmt = NULL;
4848 stmt_vec_info prev_stmt_info;
4849 int nunits_in;
4850 int nunits_out;
4851 tree vectype_out;
4852 tree op1_vectype;
4853 int ncopies;
4854 int j, i;
4855 vec<tree> vec_oprnds0 = vNULL;
4856 vec<tree> vec_oprnds1 = vNULL;
4857 tree vop0, vop1;
4858 unsigned int k;
4859 bool scalar_shift_arg = true;
4860 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4861 vec_info *vinfo = stmt_info->vinfo;
4862 int vf;
4864 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4865 return false;
4867 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4868 && ! vec_stmt)
4869 return false;
4871 /* Is STMT a vectorizable binary/unary operation? */
4872 if (!is_gimple_assign (stmt))
4873 return false;
4875 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4876 return false;
4878 code = gimple_assign_rhs_code (stmt);
4880 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4881 || code == RROTATE_EXPR))
4882 return false;
4884 scalar_dest = gimple_assign_lhs (stmt);
4885 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4886 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4887 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4889 if (dump_enabled_p ())
4890 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4891 "bit-precision shifts not supported.\n");
4892 return false;
4895 op0 = gimple_assign_rhs1 (stmt);
4896 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4898 if (dump_enabled_p ())
4899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4900 "use not simple.\n");
4901 return false;
4903 /* If op0 is an external or constant def use a vector type with
4904 the same size as the output vector type. */
4905 if (!vectype)
4906 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4907 if (vec_stmt)
4908 gcc_assert (vectype);
4909 if (!vectype)
4911 if (dump_enabled_p ())
4912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4913 "no vectype for scalar type\n");
4914 return false;
4917 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4918 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4919 if (nunits_out != nunits_in)
4920 return false;
4922 op1 = gimple_assign_rhs2 (stmt);
4923 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4925 if (dump_enabled_p ())
4926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4927 "use not simple.\n");
4928 return false;
4931 if (loop_vinfo)
4932 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4933 else
4934 vf = 1;
4936 /* Multiple types in SLP are handled by creating the appropriate number of
4937 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4938 case of SLP. */
4939 if (slp_node)
4940 ncopies = 1;
4941 else
4942 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4944 gcc_assert (ncopies >= 1);
4946 /* Determine whether the shift amount is a vector, or scalar. If the
4947 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4949 if ((dt[1] == vect_internal_def
4950 || dt[1] == vect_induction_def)
4951 && !slp_node)
4952 scalar_shift_arg = false;
4953 else if (dt[1] == vect_constant_def
4954 || dt[1] == vect_external_def
4955 || dt[1] == vect_internal_def)
4957 /* In SLP, need to check whether the shift count is the same,
4958 in loops if it is a constant or invariant, it is always
4959 a scalar shift. */
4960 if (slp_node)
4962 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4963 gimple *slpstmt;
4965 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4966 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4967 scalar_shift_arg = false;
4970 /* If the shift amount is computed by a pattern stmt we cannot
4971 use the scalar amount directly thus give up and use a vector
4972 shift. */
4973 if (dt[1] == vect_internal_def)
4975 gimple *def = SSA_NAME_DEF_STMT (op1);
4976 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4977 scalar_shift_arg = false;
4980 else
4982 if (dump_enabled_p ())
4983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4984 "operand mode requires invariant argument.\n");
4985 return false;
4988 /* Vector shifted by vector. */
4989 if (!scalar_shift_arg)
4991 optab = optab_for_tree_code (code, vectype, optab_vector);
4992 if (dump_enabled_p ())
4993 dump_printf_loc (MSG_NOTE, vect_location,
4994 "vector/vector shift/rotate found.\n");
4996 if (!op1_vectype)
4997 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4998 if (op1_vectype == NULL_TREE
4999 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5001 if (dump_enabled_p ())
5002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5003 "unusable type for last operand in"
5004 " vector/vector shift/rotate.\n");
5005 return false;
5008 /* See if the machine has a vector shifted by scalar insn and if not
5009 then see if it has a vector shifted by vector insn. */
5010 else
5012 optab = optab_for_tree_code (code, vectype, optab_scalar);
5013 if (optab
5014 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_NOTE, vect_location,
5018 "vector/scalar shift/rotate found.\n");
5020 else
5022 optab = optab_for_tree_code (code, vectype, optab_vector);
5023 if (optab
5024 && (optab_handler (optab, TYPE_MODE (vectype))
5025 != CODE_FOR_nothing))
5027 scalar_shift_arg = false;
5029 if (dump_enabled_p ())
5030 dump_printf_loc (MSG_NOTE, vect_location,
5031 "vector/vector shift/rotate found.\n");
5033 /* Unlike the other binary operators, shifts/rotates have
5034 the rhs being int, instead of the same type as the lhs,
5035 so make sure the scalar is the right type if we are
5036 dealing with vectors of long long/long/short/char. */
5037 if (dt[1] == vect_constant_def)
5038 op1 = fold_convert (TREE_TYPE (vectype), op1);
5039 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5040 TREE_TYPE (op1)))
5042 if (slp_node
5043 && TYPE_MODE (TREE_TYPE (vectype))
5044 != TYPE_MODE (TREE_TYPE (op1)))
5046 if (dump_enabled_p ())
5047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5048 "unusable type for last operand in"
5049 " vector/vector shift/rotate.\n");
5050 return false;
5052 if (vec_stmt && !slp_node)
5054 op1 = fold_convert (TREE_TYPE (vectype), op1);
5055 op1 = vect_init_vector (stmt, op1,
5056 TREE_TYPE (vectype), NULL);
5063 /* Supportable by target? */
5064 if (!optab)
5066 if (dump_enabled_p ())
5067 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5068 "no optab.\n");
5069 return false;
5071 vec_mode = TYPE_MODE (vectype);
5072 icode = (int) optab_handler (optab, vec_mode);
5073 if (icode == CODE_FOR_nothing)
5075 if (dump_enabled_p ())
5076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5077 "op not supported by target.\n");
5078 /* Check only during analysis. */
5079 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5080 || (vf < vect_min_worthwhile_factor (code)
5081 && !vec_stmt))
5082 return false;
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_NOTE, vect_location,
5085 "proceeding using word mode.\n");
5088 /* Worthwhile without SIMD support? Check only during analysis. */
5089 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5090 && vf < vect_min_worthwhile_factor (code)
5091 && !vec_stmt)
5093 if (dump_enabled_p ())
5094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5095 "not worthwhile without SIMD support.\n");
5096 return false;
5099 if (!vec_stmt) /* transformation not required. */
5101 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5102 if (dump_enabled_p ())
5103 dump_printf_loc (MSG_NOTE, vect_location,
5104 "=== vectorizable_shift ===\n");
5105 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5106 return true;
5109 /* Transform. */
5111 if (dump_enabled_p ())
5112 dump_printf_loc (MSG_NOTE, vect_location,
5113 "transform binary/unary operation.\n");
5115 /* Handle def. */
5116 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5118 prev_stmt_info = NULL;
5119 for (j = 0; j < ncopies; j++)
5121 /* Handle uses. */
5122 if (j == 0)
5124 if (scalar_shift_arg)
5126 /* Vector shl and shr insn patterns can be defined with scalar
5127 operand 2 (shift operand). In this case, use constant or loop
5128 invariant op1 directly, without extending it to vector mode
5129 first. */
5130 optab_op2_mode = insn_data[icode].operand[2].mode;
5131 if (!VECTOR_MODE_P (optab_op2_mode))
5133 if (dump_enabled_p ())
5134 dump_printf_loc (MSG_NOTE, vect_location,
5135 "operand 1 using scalar mode.\n");
5136 vec_oprnd1 = op1;
5137 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5138 vec_oprnds1.quick_push (vec_oprnd1);
5139 if (slp_node)
5141 /* Store vec_oprnd1 for every vector stmt to be created
5142 for SLP_NODE. We check during the analysis that all
5143 the shift arguments are the same.
5144 TODO: Allow different constants for different vector
5145 stmts generated for an SLP instance. */
5146 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5147 vec_oprnds1.quick_push (vec_oprnd1);
5152 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5153 (a special case for certain kind of vector shifts); otherwise,
5154 operand 1 should be of a vector type (the usual case). */
5155 if (vec_oprnd1)
5156 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5157 slp_node);
5158 else
5159 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5160 slp_node);
5162 else
5163 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5165 /* Arguments are ready. Create the new vector stmt. */
5166 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5168 vop1 = vec_oprnds1[i];
5169 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5170 new_temp = make_ssa_name (vec_dest, new_stmt);
5171 gimple_assign_set_lhs (new_stmt, new_temp);
5172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5173 if (slp_node)
5174 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5177 if (slp_node)
5178 continue;
5180 if (j == 0)
5181 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5182 else
5183 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5184 prev_stmt_info = vinfo_for_stmt (new_stmt);
5187 vec_oprnds0.release ();
5188 vec_oprnds1.release ();
5190 return true;
5194 /* Function vectorizable_operation.
5196 Check if STMT performs a binary, unary or ternary operation that can
5197 be vectorized.
5198 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5199 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5200 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5202 static bool
5203 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5204 gimple **vec_stmt, slp_tree slp_node)
5206 tree vec_dest;
5207 tree scalar_dest;
5208 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5209 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5210 tree vectype;
5211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5212 enum tree_code code;
5213 machine_mode vec_mode;
5214 tree new_temp;
5215 int op_type;
5216 optab optab;
5217 bool target_support_p;
5218 gimple *def_stmt;
5219 enum vect_def_type dt[3]
5220 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5221 int ndts = 3;
5222 gimple *new_stmt = NULL;
5223 stmt_vec_info prev_stmt_info;
5224 int nunits_in;
5225 int nunits_out;
5226 tree vectype_out;
5227 int ncopies;
5228 int j, i;
5229 vec<tree> vec_oprnds0 = vNULL;
5230 vec<tree> vec_oprnds1 = vNULL;
5231 vec<tree> vec_oprnds2 = vNULL;
5232 tree vop0, vop1, vop2;
5233 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5234 vec_info *vinfo = stmt_info->vinfo;
5235 int vf;
5237 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5238 return false;
5240 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5241 && ! vec_stmt)
5242 return false;
5244 /* Is STMT a vectorizable binary/unary operation? */
5245 if (!is_gimple_assign (stmt))
5246 return false;
5248 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5249 return false;
5251 code = gimple_assign_rhs_code (stmt);
5253 /* For pointer addition, we should use the normal plus for
5254 the vector addition. */
5255 if (code == POINTER_PLUS_EXPR)
5256 code = PLUS_EXPR;
5258 /* Support only unary or binary operations. */
5259 op_type = TREE_CODE_LENGTH (code);
5260 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5262 if (dump_enabled_p ())
5263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5264 "num. args = %d (not unary/binary/ternary op).\n",
5265 op_type);
5266 return false;
5269 scalar_dest = gimple_assign_lhs (stmt);
5270 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5272 /* Most operations cannot handle bit-precision types without extra
5273 truncations. */
5274 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5275 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5276 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5277 /* Exception are bitwise binary operations. */
5278 && code != BIT_IOR_EXPR
5279 && code != BIT_XOR_EXPR
5280 && code != BIT_AND_EXPR)
5282 if (dump_enabled_p ())
5283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5284 "bit-precision arithmetic not supported.\n");
5285 return false;
5288 op0 = gimple_assign_rhs1 (stmt);
5289 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5291 if (dump_enabled_p ())
5292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5293 "use not simple.\n");
5294 return false;
5296 /* If op0 is an external or constant def use a vector type with
5297 the same size as the output vector type. */
5298 if (!vectype)
5300 /* For boolean type we cannot determine vectype by
5301 invariant value (don't know whether it is a vector
5302 of booleans or vector of integers). We use output
5303 vectype because operations on boolean don't change
5304 type. */
5305 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5307 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5309 if (dump_enabled_p ())
5310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5311 "not supported operation on bool value.\n");
5312 return false;
5314 vectype = vectype_out;
5316 else
5317 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5319 if (vec_stmt)
5320 gcc_assert (vectype);
5321 if (!vectype)
5323 if (dump_enabled_p ())
5325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5326 "no vectype for scalar type ");
5327 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5328 TREE_TYPE (op0));
5329 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5332 return false;
5335 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5336 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5337 if (nunits_out != nunits_in)
5338 return false;
5340 if (op_type == binary_op || op_type == ternary_op)
5342 op1 = gimple_assign_rhs2 (stmt);
5343 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5345 if (dump_enabled_p ())
5346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5347 "use not simple.\n");
5348 return false;
5351 if (op_type == ternary_op)
5353 op2 = gimple_assign_rhs3 (stmt);
5354 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5356 if (dump_enabled_p ())
5357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5358 "use not simple.\n");
5359 return false;
5363 if (loop_vinfo)
5364 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5365 else
5366 vf = 1;
5368 /* Multiple types in SLP are handled by creating the appropriate number of
5369 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5370 case of SLP. */
5371 if (slp_node)
5372 ncopies = 1;
5373 else
5374 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5376 gcc_assert (ncopies >= 1);
5378 /* Shifts are handled in vectorizable_shift (). */
5379 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5380 || code == RROTATE_EXPR)
5381 return false;
5383 /* Supportable by target? */
5385 vec_mode = TYPE_MODE (vectype);
5386 if (code == MULT_HIGHPART_EXPR)
5387 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5388 else
5390 optab = optab_for_tree_code (code, vectype, optab_default);
5391 if (!optab)
5393 if (dump_enabled_p ())
5394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5395 "no optab.\n");
5396 return false;
5398 target_support_p = (optab_handler (optab, vec_mode)
5399 != CODE_FOR_nothing);
5402 if (!target_support_p)
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5406 "op not supported by target.\n");
5407 /* Check only during analysis. */
5408 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5409 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5410 return false;
5411 if (dump_enabled_p ())
5412 dump_printf_loc (MSG_NOTE, vect_location,
5413 "proceeding using word mode.\n");
5416 /* Worthwhile without SIMD support? Check only during analysis. */
5417 if (!VECTOR_MODE_P (vec_mode)
5418 && !vec_stmt
5419 && vf < vect_min_worthwhile_factor (code))
5421 if (dump_enabled_p ())
5422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5423 "not worthwhile without SIMD support.\n");
5424 return false;
5427 if (!vec_stmt) /* transformation not required. */
5429 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5430 if (dump_enabled_p ())
5431 dump_printf_loc (MSG_NOTE, vect_location,
5432 "=== vectorizable_operation ===\n");
5433 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5434 return true;
5437 /* Transform. */
5439 if (dump_enabled_p ())
5440 dump_printf_loc (MSG_NOTE, vect_location,
5441 "transform binary/unary operation.\n");
5443 /* Handle def. */
5444 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5446 /* In case the vectorization factor (VF) is bigger than the number
5447 of elements that we can fit in a vectype (nunits), we have to generate
5448 more than one vector stmt - i.e - we need to "unroll" the
5449 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5450 from one copy of the vector stmt to the next, in the field
5451 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5452 stages to find the correct vector defs to be used when vectorizing
5453 stmts that use the defs of the current stmt. The example below
5454 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5455 we need to create 4 vectorized stmts):
5457 before vectorization:
5458 RELATED_STMT VEC_STMT
5459 S1: x = memref - -
5460 S2: z = x + 1 - -
5462 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5463 there):
5464 RELATED_STMT VEC_STMT
5465 VS1_0: vx0 = memref0 VS1_1 -
5466 VS1_1: vx1 = memref1 VS1_2 -
5467 VS1_2: vx2 = memref2 VS1_3 -
5468 VS1_3: vx3 = memref3 - -
5469 S1: x = load - VS1_0
5470 S2: z = x + 1 - -
5472 step2: vectorize stmt S2 (done here):
5473 To vectorize stmt S2 we first need to find the relevant vector
5474 def for the first operand 'x'. This is, as usual, obtained from
5475 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5476 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5477 relevant vector def 'vx0'. Having found 'vx0' we can generate
5478 the vector stmt VS2_0, and as usual, record it in the
5479 STMT_VINFO_VEC_STMT of stmt S2.
5480 When creating the second copy (VS2_1), we obtain the relevant vector
5481 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5482 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5483 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5484 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5485 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5486 chain of stmts and pointers:
5487 RELATED_STMT VEC_STMT
5488 VS1_0: vx0 = memref0 VS1_1 -
5489 VS1_1: vx1 = memref1 VS1_2 -
5490 VS1_2: vx2 = memref2 VS1_3 -
5491 VS1_3: vx3 = memref3 - -
5492 S1: x = load - VS1_0
5493 VS2_0: vz0 = vx0 + v1 VS2_1 -
5494 VS2_1: vz1 = vx1 + v1 VS2_2 -
5495 VS2_2: vz2 = vx2 + v1 VS2_3 -
5496 VS2_3: vz3 = vx3 + v1 - -
5497 S2: z = x + 1 - VS2_0 */
5499 prev_stmt_info = NULL;
5500 for (j = 0; j < ncopies; j++)
5502 /* Handle uses. */
5503 if (j == 0)
5505 if (op_type == binary_op || op_type == ternary_op)
5506 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5507 slp_node);
5508 else
5509 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5510 slp_node);
5511 if (op_type == ternary_op)
5512 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5513 slp_node);
5515 else
5517 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5518 if (op_type == ternary_op)
5520 tree vec_oprnd = vec_oprnds2.pop ();
5521 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5522 vec_oprnd));
5526 /* Arguments are ready. Create the new vector stmt. */
5527 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5529 vop1 = ((op_type == binary_op || op_type == ternary_op)
5530 ? vec_oprnds1[i] : NULL_TREE);
5531 vop2 = ((op_type == ternary_op)
5532 ? vec_oprnds2[i] : NULL_TREE);
5533 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5534 new_temp = make_ssa_name (vec_dest, new_stmt);
5535 gimple_assign_set_lhs (new_stmt, new_temp);
5536 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5537 if (slp_node)
5538 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5541 if (slp_node)
5542 continue;
5544 if (j == 0)
5545 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5546 else
5547 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5548 prev_stmt_info = vinfo_for_stmt (new_stmt);
5551 vec_oprnds0.release ();
5552 vec_oprnds1.release ();
5553 vec_oprnds2.release ();
5555 return true;
5558 /* A helper function to ensure data reference DR's base alignment
5559 for STMT_INFO. */
5561 static void
5562 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5564 if (!dr->aux)
5565 return;
5567 if (DR_VECT_AUX (dr)->base_misaligned)
5569 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5570 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5572 if (decl_in_symtab_p (base_decl))
5573 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5574 else
5576 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5577 DECL_USER_ALIGN (base_decl) = 1;
5579 DR_VECT_AUX (dr)->base_misaligned = false;
5584 /* Function get_group_alias_ptr_type.
5586 Return the alias type for the group starting at FIRST_STMT. */
5588 static tree
5589 get_group_alias_ptr_type (gimple *first_stmt)
5591 struct data_reference *first_dr, *next_dr;
5592 gimple *next_stmt;
5594 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5595 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5596 while (next_stmt)
5598 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5599 if (get_alias_set (DR_REF (first_dr))
5600 != get_alias_set (DR_REF (next_dr)))
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_NOTE, vect_location,
5604 "conflicting alias set types.\n");
5605 return ptr_type_node;
5607 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5609 return reference_alias_ptr_type (DR_REF (first_dr));
5613 /* Function vectorizable_store.
5615 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5616 can be vectorized.
5617 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5618 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5619 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5621 static bool
5622 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5623 slp_tree slp_node)
5625 tree scalar_dest;
5626 tree data_ref;
5627 tree op;
5628 tree vec_oprnd = NULL_TREE;
5629 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5630 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5631 tree elem_type;
5632 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5633 struct loop *loop = NULL;
5634 machine_mode vec_mode;
5635 tree dummy;
5636 enum dr_alignment_support alignment_support_scheme;
5637 gimple *def_stmt;
5638 enum vect_def_type dt;
5639 stmt_vec_info prev_stmt_info = NULL;
5640 tree dataref_ptr = NULL_TREE;
5641 tree dataref_offset = NULL_TREE;
5642 gimple *ptr_incr = NULL;
5643 int ncopies;
5644 int j;
5645 gimple *next_stmt, *first_stmt;
5646 bool grouped_store;
5647 unsigned int group_size, i;
5648 vec<tree> oprnds = vNULL;
5649 vec<tree> result_chain = vNULL;
5650 bool inv_p;
5651 tree offset = NULL_TREE;
5652 vec<tree> vec_oprnds = vNULL;
5653 bool slp = (slp_node != NULL);
5654 unsigned int vec_num;
5655 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5656 vec_info *vinfo = stmt_info->vinfo;
5657 tree aggr_type;
5658 gather_scatter_info gs_info;
5659 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5660 gimple *new_stmt;
5661 int vf;
5662 vec_load_store_type vls_type;
5663 tree ref_type;
5665 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5666 return false;
5668 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5669 && ! vec_stmt)
5670 return false;
5672 /* Is vectorizable store? */
5674 if (!is_gimple_assign (stmt))
5675 return false;
5677 scalar_dest = gimple_assign_lhs (stmt);
5678 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5679 && is_pattern_stmt_p (stmt_info))
5680 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5681 if (TREE_CODE (scalar_dest) != ARRAY_REF
5682 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5683 && TREE_CODE (scalar_dest) != INDIRECT_REF
5684 && TREE_CODE (scalar_dest) != COMPONENT_REF
5685 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5686 && TREE_CODE (scalar_dest) != REALPART_EXPR
5687 && TREE_CODE (scalar_dest) != MEM_REF)
5688 return false;
5690 /* Cannot have hybrid store SLP -- that would mean storing to the
5691 same location twice. */
5692 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5694 gcc_assert (gimple_assign_single_p (stmt));
5696 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5697 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5699 if (loop_vinfo)
5701 loop = LOOP_VINFO_LOOP (loop_vinfo);
5702 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5704 else
5705 vf = 1;
5707 /* Multiple types in SLP are handled by creating the appropriate number of
5708 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5709 case of SLP. */
5710 if (slp)
5711 ncopies = 1;
5712 else
5713 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5715 gcc_assert (ncopies >= 1);
5717 /* FORNOW. This restriction should be relaxed. */
5718 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5720 if (dump_enabled_p ())
5721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5722 "multiple types in nested loop.\n");
5723 return false;
5726 op = gimple_assign_rhs1 (stmt);
5728 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5730 if (dump_enabled_p ())
5731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5732 "use not simple.\n");
5733 return false;
5736 if (dt == vect_constant_def || dt == vect_external_def)
5737 vls_type = VLS_STORE_INVARIANT;
5738 else
5739 vls_type = VLS_STORE;
5741 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5742 return false;
5744 elem_type = TREE_TYPE (vectype);
5745 vec_mode = TYPE_MODE (vectype);
5747 /* FORNOW. In some cases can vectorize even if data-type not supported
5748 (e.g. - array initialization with 0). */
5749 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5750 return false;
5752 if (!STMT_VINFO_DATA_REF (stmt_info))
5753 return false;
5755 vect_memory_access_type memory_access_type;
5756 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5757 &memory_access_type, &gs_info))
5758 return false;
5760 if (!vec_stmt) /* transformation not required. */
5762 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5763 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5764 /* The SLP costs are calculated during SLP analysis. */
5765 if (!PURE_SLP_STMT (stmt_info))
5766 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5767 NULL, NULL, NULL);
5768 return true;
5770 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5772 /* Transform. */
5774 ensure_base_align (stmt_info, dr);
5776 if (memory_access_type == VMAT_GATHER_SCATTER)
5778 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5779 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5780 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5781 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5782 edge pe = loop_preheader_edge (loop);
5783 gimple_seq seq;
5784 basic_block new_bb;
5785 enum { NARROW, NONE, WIDEN } modifier;
5786 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5788 if (nunits == (unsigned int) scatter_off_nunits)
5789 modifier = NONE;
5790 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5792 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5793 modifier = WIDEN;
5795 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5796 sel[i] = i | nunits;
5798 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5799 gcc_assert (perm_mask != NULL_TREE);
5801 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5803 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5804 modifier = NARROW;
5806 for (i = 0; i < (unsigned int) nunits; ++i)
5807 sel[i] = i | scatter_off_nunits;
5809 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5810 gcc_assert (perm_mask != NULL_TREE);
5811 ncopies *= 2;
5813 else
5814 gcc_unreachable ();
5816 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5817 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5818 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5819 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5820 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5821 scaletype = TREE_VALUE (arglist);
5823 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5824 && TREE_CODE (rettype) == VOID_TYPE);
5826 ptr = fold_convert (ptrtype, gs_info.base);
5827 if (!is_gimple_min_invariant (ptr))
5829 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5830 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5831 gcc_assert (!new_bb);
5834 /* Currently we support only unconditional scatter stores,
5835 so mask should be all ones. */
5836 mask = build_int_cst (masktype, -1);
5837 mask = vect_init_vector (stmt, mask, masktype, NULL);
5839 scale = build_int_cst (scaletype, gs_info.scale);
5841 prev_stmt_info = NULL;
5842 for (j = 0; j < ncopies; ++j)
5844 if (j == 0)
5846 src = vec_oprnd1
5847 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5848 op = vec_oprnd0
5849 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5851 else if (modifier != NONE && (j & 1))
5853 if (modifier == WIDEN)
5855 src = vec_oprnd1
5856 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5857 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5858 stmt, gsi);
5860 else if (modifier == NARROW)
5862 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5863 stmt, gsi);
5864 op = vec_oprnd0
5865 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5866 vec_oprnd0);
5868 else
5869 gcc_unreachable ();
5871 else
5873 src = vec_oprnd1
5874 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5875 op = vec_oprnd0
5876 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5877 vec_oprnd0);
5880 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5882 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5883 == TYPE_VECTOR_SUBPARTS (srctype));
5884 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5885 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5886 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5887 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5888 src = var;
5891 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5893 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5894 == TYPE_VECTOR_SUBPARTS (idxtype));
5895 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5896 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5897 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5898 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5899 op = var;
5902 new_stmt
5903 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5905 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5907 if (prev_stmt_info == NULL)
5908 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5909 else
5910 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5911 prev_stmt_info = vinfo_for_stmt (new_stmt);
5913 return true;
5916 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5917 if (grouped_store)
5919 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5920 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5921 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5923 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5925 /* FORNOW */
5926 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5928 /* We vectorize all the stmts of the interleaving group when we
5929 reach the last stmt in the group. */
5930 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5931 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5932 && !slp)
5934 *vec_stmt = NULL;
5935 return true;
5938 if (slp)
5940 grouped_store = false;
5941 /* VEC_NUM is the number of vect stmts to be created for this
5942 group. */
5943 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5944 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5945 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5946 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5947 op = gimple_assign_rhs1 (first_stmt);
5949 else
5950 /* VEC_NUM is the number of vect stmts to be created for this
5951 group. */
5952 vec_num = group_size;
5954 ref_type = get_group_alias_ptr_type (first_stmt);
5956 else
5958 first_stmt = stmt;
5959 first_dr = dr;
5960 group_size = vec_num = 1;
5961 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5964 if (dump_enabled_p ())
5965 dump_printf_loc (MSG_NOTE, vect_location,
5966 "transform store. ncopies = %d\n", ncopies);
5968 if (memory_access_type == VMAT_ELEMENTWISE
5969 || memory_access_type == VMAT_STRIDED_SLP)
5971 gimple_stmt_iterator incr_gsi;
5972 bool insert_after;
5973 gimple *incr;
5974 tree offvar;
5975 tree ivstep;
5976 tree running_off;
5977 gimple_seq stmts = NULL;
5978 tree stride_base, stride_step, alias_off;
5979 tree vec_oprnd;
5980 unsigned int g;
5982 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5984 stride_base
5985 = fold_build_pointer_plus
5986 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5987 size_binop (PLUS_EXPR,
5988 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5989 convert_to_ptrofftype (DR_INIT (first_dr))));
5990 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5992 /* For a store with loop-invariant (but other than power-of-2)
5993 stride (i.e. not a grouped access) like so:
5995 for (i = 0; i < n; i += stride)
5996 array[i] = ...;
5998 we generate a new induction variable and new stores from
5999 the components of the (vectorized) rhs:
6001 for (j = 0; ; j += VF*stride)
6002 vectemp = ...;
6003 tmp1 = vectemp[0];
6004 array[j] = tmp1;
6005 tmp2 = vectemp[1];
6006 array[j + stride] = tmp2;
6010 unsigned nstores = nunits;
6011 unsigned lnel = 1;
6012 tree ltype = elem_type;
6013 if (slp)
6015 if (group_size < nunits
6016 && nunits % group_size == 0)
6018 nstores = nunits / group_size;
6019 lnel = group_size;
6020 ltype = build_vector_type (elem_type, group_size);
6022 else if (group_size >= nunits
6023 && group_size % nunits == 0)
6025 nstores = 1;
6026 lnel = nunits;
6027 ltype = vectype;
6029 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6030 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6033 ivstep = stride_step;
6034 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6035 build_int_cst (TREE_TYPE (ivstep), vf));
6037 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6039 create_iv (stride_base, ivstep, NULL,
6040 loop, &incr_gsi, insert_after,
6041 &offvar, NULL);
6042 incr = gsi_stmt (incr_gsi);
6043 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6045 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6046 if (stmts)
6047 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6049 prev_stmt_info = NULL;
6050 alias_off = build_int_cst (ref_type, 0);
6051 next_stmt = first_stmt;
6052 for (g = 0; g < group_size; g++)
6054 running_off = offvar;
6055 if (g)
6057 tree size = TYPE_SIZE_UNIT (ltype);
6058 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6059 size);
6060 tree newoff = copy_ssa_name (running_off, NULL);
6061 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6062 running_off, pos);
6063 vect_finish_stmt_generation (stmt, incr, gsi);
6064 running_off = newoff;
6066 unsigned int group_el = 0;
6067 unsigned HOST_WIDE_INT
6068 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6069 for (j = 0; j < ncopies; j++)
6071 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6072 and first_stmt == stmt. */
6073 if (j == 0)
6075 if (slp)
6077 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6078 slp_node);
6079 vec_oprnd = vec_oprnds[0];
6081 else
6083 gcc_assert (gimple_assign_single_p (next_stmt));
6084 op = gimple_assign_rhs1 (next_stmt);
6085 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6088 else
6090 if (slp)
6091 vec_oprnd = vec_oprnds[j];
6092 else
6094 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6095 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6099 for (i = 0; i < nstores; i++)
6101 tree newref, newoff;
6102 gimple *incr, *assign;
6103 tree size = TYPE_SIZE (ltype);
6104 /* Extract the i'th component. */
6105 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6106 bitsize_int (i), size);
6107 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6108 size, pos);
6110 elem = force_gimple_operand_gsi (gsi, elem, true,
6111 NULL_TREE, true,
6112 GSI_SAME_STMT);
6114 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6115 group_el * elsz);
6116 newref = build2 (MEM_REF, ltype,
6117 running_off, this_off);
6119 /* And store it to *running_off. */
6120 assign = gimple_build_assign (newref, elem);
6121 vect_finish_stmt_generation (stmt, assign, gsi);
6123 group_el += lnel;
6124 if (! slp
6125 || group_el == group_size)
6127 newoff = copy_ssa_name (running_off, NULL);
6128 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6129 running_off, stride_step);
6130 vect_finish_stmt_generation (stmt, incr, gsi);
6132 running_off = newoff;
6133 group_el = 0;
6135 if (g == group_size - 1
6136 && !slp)
6138 if (j == 0 && i == 0)
6139 STMT_VINFO_VEC_STMT (stmt_info)
6140 = *vec_stmt = assign;
6141 else
6142 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6143 prev_stmt_info = vinfo_for_stmt (assign);
6147 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6148 if (slp)
6149 break;
6152 vec_oprnds.release ();
6153 return true;
6156 auto_vec<tree> dr_chain (group_size);
6157 oprnds.create (group_size);
6159 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6160 gcc_assert (alignment_support_scheme);
6161 /* Targets with store-lane instructions must not require explicit
6162 realignment. */
6163 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6164 || alignment_support_scheme == dr_aligned
6165 || alignment_support_scheme == dr_unaligned_supported);
6167 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6168 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6169 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6171 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6172 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6173 else
6174 aggr_type = vectype;
6176 /* In case the vectorization factor (VF) is bigger than the number
6177 of elements that we can fit in a vectype (nunits), we have to generate
6178 more than one vector stmt - i.e - we need to "unroll" the
6179 vector stmt by a factor VF/nunits. For more details see documentation in
6180 vect_get_vec_def_for_copy_stmt. */
6182 /* In case of interleaving (non-unit grouped access):
6184 S1: &base + 2 = x2
6185 S2: &base = x0
6186 S3: &base + 1 = x1
6187 S4: &base + 3 = x3
6189 We create vectorized stores starting from base address (the access of the
6190 first stmt in the chain (S2 in the above example), when the last store stmt
6191 of the chain (S4) is reached:
6193 VS1: &base = vx2
6194 VS2: &base + vec_size*1 = vx0
6195 VS3: &base + vec_size*2 = vx1
6196 VS4: &base + vec_size*3 = vx3
6198 Then permutation statements are generated:
6200 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6201 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6204 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6205 (the order of the data-refs in the output of vect_permute_store_chain
6206 corresponds to the order of scalar stmts in the interleaving chain - see
6207 the documentation of vect_permute_store_chain()).
6209 In case of both multiple types and interleaving, above vector stores and
6210 permutation stmts are created for every copy. The result vector stmts are
6211 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6212 STMT_VINFO_RELATED_STMT for the next copies.
6215 prev_stmt_info = NULL;
6216 for (j = 0; j < ncopies; j++)
6219 if (j == 0)
6221 if (slp)
6223 /* Get vectorized arguments for SLP_NODE. */
6224 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6225 NULL, slp_node);
6227 vec_oprnd = vec_oprnds[0];
6229 else
6231 /* For interleaved stores we collect vectorized defs for all the
6232 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6233 used as an input to vect_permute_store_chain(), and OPRNDS as
6234 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6236 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6237 OPRNDS are of size 1. */
6238 next_stmt = first_stmt;
6239 for (i = 0; i < group_size; i++)
6241 /* Since gaps are not supported for interleaved stores,
6242 GROUP_SIZE is the exact number of stmts in the chain.
6243 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6244 there is no interleaving, GROUP_SIZE is 1, and only one
6245 iteration of the loop will be executed. */
6246 gcc_assert (next_stmt
6247 && gimple_assign_single_p (next_stmt));
6248 op = gimple_assign_rhs1 (next_stmt);
6250 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6251 dr_chain.quick_push (vec_oprnd);
6252 oprnds.quick_push (vec_oprnd);
6253 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6257 /* We should have catched mismatched types earlier. */
6258 gcc_assert (useless_type_conversion_p (vectype,
6259 TREE_TYPE (vec_oprnd)));
6260 bool simd_lane_access_p
6261 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6262 if (simd_lane_access_p
6263 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6264 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6265 && integer_zerop (DR_OFFSET (first_dr))
6266 && integer_zerop (DR_INIT (first_dr))
6267 && alias_sets_conflict_p (get_alias_set (aggr_type),
6268 get_alias_set (TREE_TYPE (ref_type))))
6270 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6271 dataref_offset = build_int_cst (ref_type, 0);
6272 inv_p = false;
6274 else
6275 dataref_ptr
6276 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6277 simd_lane_access_p ? loop : NULL,
6278 offset, &dummy, gsi, &ptr_incr,
6279 simd_lane_access_p, &inv_p);
6280 gcc_assert (bb_vinfo || !inv_p);
6282 else
6284 /* For interleaved stores we created vectorized defs for all the
6285 defs stored in OPRNDS in the previous iteration (previous copy).
6286 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6287 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6288 next copy.
6289 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6290 OPRNDS are of size 1. */
6291 for (i = 0; i < group_size; i++)
6293 op = oprnds[i];
6294 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6295 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6296 dr_chain[i] = vec_oprnd;
6297 oprnds[i] = vec_oprnd;
6299 if (dataref_offset)
6300 dataref_offset
6301 = int_const_binop (PLUS_EXPR, dataref_offset,
6302 TYPE_SIZE_UNIT (aggr_type));
6303 else
6304 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6305 TYPE_SIZE_UNIT (aggr_type));
6308 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6310 tree vec_array;
6312 /* Combine all the vectors into an array. */
6313 vec_array = create_vector_array (vectype, vec_num);
6314 for (i = 0; i < vec_num; i++)
6316 vec_oprnd = dr_chain[i];
6317 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6320 /* Emit:
6321 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6322 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6323 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6324 gimple_call_set_lhs (new_stmt, data_ref);
6325 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6327 else
6329 new_stmt = NULL;
6330 if (grouped_store)
6332 if (j == 0)
6333 result_chain.create (group_size);
6334 /* Permute. */
6335 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6336 &result_chain);
6339 next_stmt = first_stmt;
6340 for (i = 0; i < vec_num; i++)
6342 unsigned align, misalign;
6344 if (i > 0)
6345 /* Bump the vector pointer. */
6346 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6347 stmt, NULL_TREE);
6349 if (slp)
6350 vec_oprnd = vec_oprnds[i];
6351 else if (grouped_store)
6352 /* For grouped stores vectorized defs are interleaved in
6353 vect_permute_store_chain(). */
6354 vec_oprnd = result_chain[i];
6356 data_ref = fold_build2 (MEM_REF, vectype,
6357 dataref_ptr,
6358 dataref_offset
6359 ? dataref_offset
6360 : build_int_cst (ref_type, 0));
6361 align = TYPE_ALIGN_UNIT (vectype);
6362 if (aligned_access_p (first_dr))
6363 misalign = 0;
6364 else if (DR_MISALIGNMENT (first_dr) == -1)
6366 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6367 align = TYPE_ALIGN_UNIT (elem_type);
6368 else
6369 align = get_object_alignment (DR_REF (first_dr))
6370 / BITS_PER_UNIT;
6371 misalign = 0;
6372 TREE_TYPE (data_ref)
6373 = build_aligned_type (TREE_TYPE (data_ref),
6374 align * BITS_PER_UNIT);
6376 else
6378 TREE_TYPE (data_ref)
6379 = build_aligned_type (TREE_TYPE (data_ref),
6380 TYPE_ALIGN (elem_type));
6381 misalign = DR_MISALIGNMENT (first_dr);
6383 if (dataref_offset == NULL_TREE
6384 && TREE_CODE (dataref_ptr) == SSA_NAME)
6385 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6386 misalign);
6388 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6390 tree perm_mask = perm_mask_for_reverse (vectype);
6391 tree perm_dest
6392 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6393 vectype);
6394 tree new_temp = make_ssa_name (perm_dest);
6396 /* Generate the permute statement. */
6397 gimple *perm_stmt
6398 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6399 vec_oprnd, perm_mask);
6400 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6402 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6403 vec_oprnd = new_temp;
6406 /* Arguments are ready. Create the new vector stmt. */
6407 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6408 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6410 if (slp)
6411 continue;
6413 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6414 if (!next_stmt)
6415 break;
6418 if (!slp)
6420 if (j == 0)
6421 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6422 else
6423 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6424 prev_stmt_info = vinfo_for_stmt (new_stmt);
6428 oprnds.release ();
6429 result_chain.release ();
6430 vec_oprnds.release ();
6432 return true;
6435 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6436 VECTOR_CST mask. No checks are made that the target platform supports the
6437 mask, so callers may wish to test can_vec_perm_p separately, or use
6438 vect_gen_perm_mask_checked. */
6440 tree
6441 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6443 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6444 int i, nunits;
6446 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6448 mask_elt_type = lang_hooks.types.type_for_mode
6449 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6450 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6452 mask_elts = XALLOCAVEC (tree, nunits);
6453 for (i = nunits - 1; i >= 0; i--)
6454 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6455 mask_vec = build_vector (mask_type, mask_elts);
6457 return mask_vec;
6460 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6461 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6463 tree
6464 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6466 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6467 return vect_gen_perm_mask_any (vectype, sel);
6470 /* Given a vector variable X and Y, that was generated for the scalar
6471 STMT, generate instructions to permute the vector elements of X and Y
6472 using permutation mask MASK_VEC, insert them at *GSI and return the
6473 permuted vector variable. */
6475 static tree
6476 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6477 gimple_stmt_iterator *gsi)
6479 tree vectype = TREE_TYPE (x);
6480 tree perm_dest, data_ref;
6481 gimple *perm_stmt;
6483 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6484 data_ref = make_ssa_name (perm_dest);
6486 /* Generate the permute statement. */
6487 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6488 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6490 return data_ref;
6493 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6494 inserting them on the loops preheader edge. Returns true if we
6495 were successful in doing so (and thus STMT can be moved then),
6496 otherwise returns false. */
6498 static bool
6499 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6501 ssa_op_iter i;
6502 tree op;
6503 bool any = false;
6505 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6507 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6508 if (!gimple_nop_p (def_stmt)
6509 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6511 /* Make sure we don't need to recurse. While we could do
6512 so in simple cases when there are more complex use webs
6513 we don't have an easy way to preserve stmt order to fulfil
6514 dependencies within them. */
6515 tree op2;
6516 ssa_op_iter i2;
6517 if (gimple_code (def_stmt) == GIMPLE_PHI)
6518 return false;
6519 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6521 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6522 if (!gimple_nop_p (def_stmt2)
6523 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6524 return false;
6526 any = true;
6530 if (!any)
6531 return true;
6533 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6535 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6536 if (!gimple_nop_p (def_stmt)
6537 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6539 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6540 gsi_remove (&gsi, false);
6541 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6545 return true;
6548 /* vectorizable_load.
6550 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6551 can be vectorized.
6552 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6553 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6554 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6556 static bool
6557 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6558 slp_tree slp_node, slp_instance slp_node_instance)
6560 tree scalar_dest;
6561 tree vec_dest = NULL;
6562 tree data_ref = NULL;
6563 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6564 stmt_vec_info prev_stmt_info;
6565 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6566 struct loop *loop = NULL;
6567 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6568 bool nested_in_vect_loop = false;
6569 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6570 tree elem_type;
6571 tree new_temp;
6572 machine_mode mode;
6573 gimple *new_stmt = NULL;
6574 tree dummy;
6575 enum dr_alignment_support alignment_support_scheme;
6576 tree dataref_ptr = NULL_TREE;
6577 tree dataref_offset = NULL_TREE;
6578 gimple *ptr_incr = NULL;
6579 int ncopies;
6580 int i, j, group_size, group_gap_adj;
6581 tree msq = NULL_TREE, lsq;
6582 tree offset = NULL_TREE;
6583 tree byte_offset = NULL_TREE;
6584 tree realignment_token = NULL_TREE;
6585 gphi *phi = NULL;
6586 vec<tree> dr_chain = vNULL;
6587 bool grouped_load = false;
6588 gimple *first_stmt;
6589 gimple *first_stmt_for_drptr = NULL;
6590 bool inv_p;
6591 bool compute_in_loop = false;
6592 struct loop *at_loop;
6593 int vec_num;
6594 bool slp = (slp_node != NULL);
6595 bool slp_perm = false;
6596 enum tree_code code;
6597 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6598 int vf;
6599 tree aggr_type;
6600 gather_scatter_info gs_info;
6601 vec_info *vinfo = stmt_info->vinfo;
6602 tree ref_type;
6604 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6605 return false;
6607 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6608 && ! vec_stmt)
6609 return false;
6611 /* Is vectorizable load? */
6612 if (!is_gimple_assign (stmt))
6613 return false;
6615 scalar_dest = gimple_assign_lhs (stmt);
6616 if (TREE_CODE (scalar_dest) != SSA_NAME)
6617 return false;
6619 code = gimple_assign_rhs_code (stmt);
6620 if (code != ARRAY_REF
6621 && code != BIT_FIELD_REF
6622 && code != INDIRECT_REF
6623 && code != COMPONENT_REF
6624 && code != IMAGPART_EXPR
6625 && code != REALPART_EXPR
6626 && code != MEM_REF
6627 && TREE_CODE_CLASS (code) != tcc_declaration)
6628 return false;
6630 if (!STMT_VINFO_DATA_REF (stmt_info))
6631 return false;
6633 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6634 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6636 if (loop_vinfo)
6638 loop = LOOP_VINFO_LOOP (loop_vinfo);
6639 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6640 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6642 else
6643 vf = 1;
6645 /* Multiple types in SLP are handled by creating the appropriate number of
6646 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6647 case of SLP. */
6648 if (slp)
6649 ncopies = 1;
6650 else
6651 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6653 gcc_assert (ncopies >= 1);
6655 /* FORNOW. This restriction should be relaxed. */
6656 if (nested_in_vect_loop && ncopies > 1)
6658 if (dump_enabled_p ())
6659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6660 "multiple types in nested loop.\n");
6661 return false;
6664 /* Invalidate assumptions made by dependence analysis when vectorization
6665 on the unrolled body effectively re-orders stmts. */
6666 if (ncopies > 1
6667 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6668 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6669 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6671 if (dump_enabled_p ())
6672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6673 "cannot perform implicit CSE when unrolling "
6674 "with negative dependence distance\n");
6675 return false;
6678 elem_type = TREE_TYPE (vectype);
6679 mode = TYPE_MODE (vectype);
6681 /* FORNOW. In some cases can vectorize even if data-type not supported
6682 (e.g. - data copies). */
6683 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6685 if (dump_enabled_p ())
6686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6687 "Aligned load, but unsupported type.\n");
6688 return false;
6691 /* Check if the load is a part of an interleaving chain. */
6692 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6694 grouped_load = true;
6695 /* FORNOW */
6696 gcc_assert (!nested_in_vect_loop);
6697 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6699 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6700 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6702 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6703 slp_perm = true;
6705 /* Invalidate assumptions made by dependence analysis when vectorization
6706 on the unrolled body effectively re-orders stmts. */
6707 if (!PURE_SLP_STMT (stmt_info)
6708 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6709 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6710 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6712 if (dump_enabled_p ())
6713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6714 "cannot perform implicit CSE when performing "
6715 "group loads with negative dependence distance\n");
6716 return false;
6719 /* Similarly when the stmt is a load that is both part of a SLP
6720 instance and a loop vectorized stmt via the same-dr mechanism
6721 we have to give up. */
6722 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6723 && (STMT_SLP_TYPE (stmt_info)
6724 != STMT_SLP_TYPE (vinfo_for_stmt
6725 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6727 if (dump_enabled_p ())
6728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6729 "conflicting SLP types for CSEd load\n");
6730 return false;
6734 vect_memory_access_type memory_access_type;
6735 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6736 &memory_access_type, &gs_info))
6737 return false;
6739 if (!vec_stmt) /* transformation not required. */
6741 if (!slp)
6742 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6743 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6744 /* The SLP costs are calculated during SLP analysis. */
6745 if (!PURE_SLP_STMT (stmt_info))
6746 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6747 NULL, NULL, NULL);
6748 return true;
6751 if (!slp)
6752 gcc_assert (memory_access_type
6753 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6755 if (dump_enabled_p ())
6756 dump_printf_loc (MSG_NOTE, vect_location,
6757 "transform load. ncopies = %d\n", ncopies);
6759 /* Transform. */
6761 ensure_base_align (stmt_info, dr);
6763 if (memory_access_type == VMAT_GATHER_SCATTER)
6765 tree vec_oprnd0 = NULL_TREE, op;
6766 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6767 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6768 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6769 edge pe = loop_preheader_edge (loop);
6770 gimple_seq seq;
6771 basic_block new_bb;
6772 enum { NARROW, NONE, WIDEN } modifier;
6773 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6775 if (nunits == gather_off_nunits)
6776 modifier = NONE;
6777 else if (nunits == gather_off_nunits / 2)
6779 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6780 modifier = WIDEN;
6782 for (i = 0; i < gather_off_nunits; ++i)
6783 sel[i] = i | nunits;
6785 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6787 else if (nunits == gather_off_nunits * 2)
6789 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6790 modifier = NARROW;
6792 for (i = 0; i < nunits; ++i)
6793 sel[i] = i < gather_off_nunits
6794 ? i : i + nunits - gather_off_nunits;
6796 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6797 ncopies *= 2;
6799 else
6800 gcc_unreachable ();
6802 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6803 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6804 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6805 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6806 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6807 scaletype = TREE_VALUE (arglist);
6808 gcc_checking_assert (types_compatible_p (srctype, rettype));
6810 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6812 ptr = fold_convert (ptrtype, gs_info.base);
6813 if (!is_gimple_min_invariant (ptr))
6815 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6816 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6817 gcc_assert (!new_bb);
6820 /* Currently we support only unconditional gather loads,
6821 so mask should be all ones. */
6822 if (TREE_CODE (masktype) == INTEGER_TYPE)
6823 mask = build_int_cst (masktype, -1);
6824 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6826 mask = build_int_cst (TREE_TYPE (masktype), -1);
6827 mask = build_vector_from_val (masktype, mask);
6828 mask = vect_init_vector (stmt, mask, masktype, NULL);
6830 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6832 REAL_VALUE_TYPE r;
6833 long tmp[6];
6834 for (j = 0; j < 6; ++j)
6835 tmp[j] = -1;
6836 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6837 mask = build_real (TREE_TYPE (masktype), r);
6838 mask = build_vector_from_val (masktype, mask);
6839 mask = vect_init_vector (stmt, mask, masktype, NULL);
6841 else
6842 gcc_unreachable ();
6844 scale = build_int_cst (scaletype, gs_info.scale);
6846 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6847 merge = build_int_cst (TREE_TYPE (rettype), 0);
6848 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6850 REAL_VALUE_TYPE r;
6851 long tmp[6];
6852 for (j = 0; j < 6; ++j)
6853 tmp[j] = 0;
6854 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6855 merge = build_real (TREE_TYPE (rettype), r);
6857 else
6858 gcc_unreachable ();
6859 merge = build_vector_from_val (rettype, merge);
6860 merge = vect_init_vector (stmt, merge, rettype, NULL);
6862 prev_stmt_info = NULL;
6863 for (j = 0; j < ncopies; ++j)
6865 if (modifier == WIDEN && (j & 1))
6866 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6867 perm_mask, stmt, gsi);
6868 else if (j == 0)
6869 op = vec_oprnd0
6870 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6871 else
6872 op = vec_oprnd0
6873 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6875 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6877 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6878 == TYPE_VECTOR_SUBPARTS (idxtype));
6879 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6880 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6881 new_stmt
6882 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6884 op = var;
6887 new_stmt
6888 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6890 if (!useless_type_conversion_p (vectype, rettype))
6892 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6893 == TYPE_VECTOR_SUBPARTS (rettype));
6894 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6895 gimple_call_set_lhs (new_stmt, op);
6896 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6897 var = make_ssa_name (vec_dest);
6898 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6899 new_stmt
6900 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6902 else
6904 var = make_ssa_name (vec_dest, new_stmt);
6905 gimple_call_set_lhs (new_stmt, var);
6908 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6910 if (modifier == NARROW)
6912 if ((j & 1) == 0)
6914 prev_res = var;
6915 continue;
6917 var = permute_vec_elements (prev_res, var,
6918 perm_mask, stmt, gsi);
6919 new_stmt = SSA_NAME_DEF_STMT (var);
6922 if (prev_stmt_info == NULL)
6923 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6924 else
6925 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6926 prev_stmt_info = vinfo_for_stmt (new_stmt);
6928 return true;
6931 if (memory_access_type == VMAT_ELEMENTWISE
6932 || memory_access_type == VMAT_STRIDED_SLP)
6934 gimple_stmt_iterator incr_gsi;
6935 bool insert_after;
6936 gimple *incr;
6937 tree offvar;
6938 tree ivstep;
6939 tree running_off;
6940 vec<constructor_elt, va_gc> *v = NULL;
6941 gimple_seq stmts = NULL;
6942 tree stride_base, stride_step, alias_off;
6944 gcc_assert (!nested_in_vect_loop);
6946 if (slp && grouped_load)
6948 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6949 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6950 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6951 ref_type = get_group_alias_ptr_type (first_stmt);
6953 else
6955 first_stmt = stmt;
6956 first_dr = dr;
6957 group_size = 1;
6958 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6961 stride_base
6962 = fold_build_pointer_plus
6963 (DR_BASE_ADDRESS (first_dr),
6964 size_binop (PLUS_EXPR,
6965 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6966 convert_to_ptrofftype (DR_INIT (first_dr))));
6967 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6969 /* For a load with loop-invariant (but other than power-of-2)
6970 stride (i.e. not a grouped access) like so:
6972 for (i = 0; i < n; i += stride)
6973 ... = array[i];
6975 we generate a new induction variable and new accesses to
6976 form a new vector (or vectors, depending on ncopies):
6978 for (j = 0; ; j += VF*stride)
6979 tmp1 = array[j];
6980 tmp2 = array[j + stride];
6982 vectemp = {tmp1, tmp2, ...}
6985 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6986 build_int_cst (TREE_TYPE (stride_step), vf));
6988 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6990 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6991 loop, &incr_gsi, insert_after,
6992 &offvar, NULL);
6993 incr = gsi_stmt (incr_gsi);
6994 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6996 stride_step = force_gimple_operand (unshare_expr (stride_step),
6997 &stmts, true, NULL_TREE);
6998 if (stmts)
6999 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7001 prev_stmt_info = NULL;
7002 running_off = offvar;
7003 alias_off = build_int_cst (ref_type, 0);
7004 int nloads = nunits;
7005 int lnel = 1;
7006 tree ltype = TREE_TYPE (vectype);
7007 tree lvectype = vectype;
7008 auto_vec<tree> dr_chain;
7009 if (memory_access_type == VMAT_STRIDED_SLP)
7011 if (group_size < nunits)
7013 /* Avoid emitting a constructor of vector elements by performing
7014 the loads using an integer type of the same size,
7015 constructing a vector of those and then re-interpreting it
7016 as the original vector type. This works around the fact
7017 that the vec_init optab was only designed for scalar
7018 element modes and thus expansion goes through memory.
7019 This avoids a huge runtime penalty due to the general
7020 inability to perform store forwarding from smaller stores
7021 to a larger load. */
7022 unsigned lsize
7023 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7024 enum machine_mode elmode = mode_for_size (lsize, MODE_INT, 0);
7025 enum machine_mode vmode = mode_for_vector (elmode,
7026 nunits / group_size);
7027 /* If we can't construct such a vector fall back to
7028 element loads of the original vector type. */
7029 if (VECTOR_MODE_P (vmode)
7030 && optab_handler (vec_init_optab, vmode) != CODE_FOR_nothing)
7032 nloads = nunits / group_size;
7033 lnel = group_size;
7034 ltype = build_nonstandard_integer_type (lsize, 1);
7035 lvectype = build_vector_type (ltype, nloads);
7038 else
7040 nloads = 1;
7041 lnel = nunits;
7042 ltype = vectype;
7044 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7046 if (slp)
7048 /* For SLP permutation support we need to load the whole group,
7049 not only the number of vector stmts the permutation result
7050 fits in. */
7051 if (slp_perm)
7053 ncopies = (group_size * vf + nunits - 1) / nunits;
7054 dr_chain.create (ncopies);
7056 else
7057 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7059 int group_el = 0;
7060 unsigned HOST_WIDE_INT
7061 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7062 for (j = 0; j < ncopies; j++)
7064 if (nloads > 1)
7065 vec_alloc (v, nloads);
7066 for (i = 0; i < nloads; i++)
7068 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7069 group_el * elsz);
7070 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7071 build2 (MEM_REF, ltype,
7072 running_off, this_off));
7073 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7074 if (nloads > 1)
7075 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7076 gimple_assign_lhs (new_stmt));
7078 group_el += lnel;
7079 if (! slp
7080 || group_el == group_size)
7082 tree newoff = copy_ssa_name (running_off);
7083 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7084 running_off, stride_step);
7085 vect_finish_stmt_generation (stmt, incr, gsi);
7087 running_off = newoff;
7088 group_el = 0;
7091 if (nloads > 1)
7093 tree vec_inv = build_constructor (lvectype, v);
7094 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7095 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7096 if (lvectype != vectype)
7098 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7099 VIEW_CONVERT_EXPR,
7100 build1 (VIEW_CONVERT_EXPR,
7101 vectype, new_temp));
7102 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7106 if (slp)
7108 if (slp_perm)
7109 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7110 else
7111 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7113 else
7115 if (j == 0)
7116 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7117 else
7118 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7119 prev_stmt_info = vinfo_for_stmt (new_stmt);
7122 if (slp_perm)
7124 unsigned n_perms;
7125 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7126 slp_node_instance, false, &n_perms);
7128 return true;
7131 if (grouped_load)
7133 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7134 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7135 /* For SLP vectorization we directly vectorize a subchain
7136 without permutation. */
7137 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7138 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7139 /* For BB vectorization always use the first stmt to base
7140 the data ref pointer on. */
7141 if (bb_vinfo)
7142 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7144 /* Check if the chain of loads is already vectorized. */
7145 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7146 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7147 ??? But we can only do so if there is exactly one
7148 as we have no way to get at the rest. Leave the CSE
7149 opportunity alone.
7150 ??? With the group load eventually participating
7151 in multiple different permutations (having multiple
7152 slp nodes which refer to the same group) the CSE
7153 is even wrong code. See PR56270. */
7154 && !slp)
7156 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7157 return true;
7159 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7160 group_gap_adj = 0;
7162 /* VEC_NUM is the number of vect stmts to be created for this group. */
7163 if (slp)
7165 grouped_load = false;
7166 /* For SLP permutation support we need to load the whole group,
7167 not only the number of vector stmts the permutation result
7168 fits in. */
7169 if (slp_perm)
7170 vec_num = (group_size * vf + nunits - 1) / nunits;
7171 else
7172 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7173 group_gap_adj = vf * group_size - nunits * vec_num;
7175 else
7176 vec_num = group_size;
7178 ref_type = get_group_alias_ptr_type (first_stmt);
7180 else
7182 first_stmt = stmt;
7183 first_dr = dr;
7184 group_size = vec_num = 1;
7185 group_gap_adj = 0;
7186 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7189 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7190 gcc_assert (alignment_support_scheme);
7191 /* Targets with load-lane instructions must not require explicit
7192 realignment. */
7193 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7194 || alignment_support_scheme == dr_aligned
7195 || alignment_support_scheme == dr_unaligned_supported);
7197 /* In case the vectorization factor (VF) is bigger than the number
7198 of elements that we can fit in a vectype (nunits), we have to generate
7199 more than one vector stmt - i.e - we need to "unroll" the
7200 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7201 from one copy of the vector stmt to the next, in the field
7202 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7203 stages to find the correct vector defs to be used when vectorizing
7204 stmts that use the defs of the current stmt. The example below
7205 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7206 need to create 4 vectorized stmts):
7208 before vectorization:
7209 RELATED_STMT VEC_STMT
7210 S1: x = memref - -
7211 S2: z = x + 1 - -
7213 step 1: vectorize stmt S1:
7214 We first create the vector stmt VS1_0, and, as usual, record a
7215 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7216 Next, we create the vector stmt VS1_1, and record a pointer to
7217 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7218 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7219 stmts and pointers:
7220 RELATED_STMT VEC_STMT
7221 VS1_0: vx0 = memref0 VS1_1 -
7222 VS1_1: vx1 = memref1 VS1_2 -
7223 VS1_2: vx2 = memref2 VS1_3 -
7224 VS1_3: vx3 = memref3 - -
7225 S1: x = load - VS1_0
7226 S2: z = x + 1 - -
7228 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7229 information we recorded in RELATED_STMT field is used to vectorize
7230 stmt S2. */
7232 /* In case of interleaving (non-unit grouped access):
7234 S1: x2 = &base + 2
7235 S2: x0 = &base
7236 S3: x1 = &base + 1
7237 S4: x3 = &base + 3
7239 Vectorized loads are created in the order of memory accesses
7240 starting from the access of the first stmt of the chain:
7242 VS1: vx0 = &base
7243 VS2: vx1 = &base + vec_size*1
7244 VS3: vx3 = &base + vec_size*2
7245 VS4: vx4 = &base + vec_size*3
7247 Then permutation statements are generated:
7249 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7250 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7253 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7254 (the order of the data-refs in the output of vect_permute_load_chain
7255 corresponds to the order of scalar stmts in the interleaving chain - see
7256 the documentation of vect_permute_load_chain()).
7257 The generation of permutation stmts and recording them in
7258 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7260 In case of both multiple types and interleaving, the vector loads and
7261 permutation stmts above are created for every copy. The result vector
7262 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7263 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7265 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7266 on a target that supports unaligned accesses (dr_unaligned_supported)
7267 we generate the following code:
7268 p = initial_addr;
7269 indx = 0;
7270 loop {
7271 p = p + indx * vectype_size;
7272 vec_dest = *(p);
7273 indx = indx + 1;
7276 Otherwise, the data reference is potentially unaligned on a target that
7277 does not support unaligned accesses (dr_explicit_realign_optimized) -
7278 then generate the following code, in which the data in each iteration is
7279 obtained by two vector loads, one from the previous iteration, and one
7280 from the current iteration:
7281 p1 = initial_addr;
7282 msq_init = *(floor(p1))
7283 p2 = initial_addr + VS - 1;
7284 realignment_token = call target_builtin;
7285 indx = 0;
7286 loop {
7287 p2 = p2 + indx * vectype_size
7288 lsq = *(floor(p2))
7289 vec_dest = realign_load (msq, lsq, realignment_token)
7290 indx = indx + 1;
7291 msq = lsq;
7292 } */
7294 /* If the misalignment remains the same throughout the execution of the
7295 loop, we can create the init_addr and permutation mask at the loop
7296 preheader. Otherwise, it needs to be created inside the loop.
7297 This can only occur when vectorizing memory accesses in the inner-loop
7298 nested within an outer-loop that is being vectorized. */
7300 if (nested_in_vect_loop
7301 && (TREE_INT_CST_LOW (DR_STEP (dr))
7302 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7304 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7305 compute_in_loop = true;
7308 if ((alignment_support_scheme == dr_explicit_realign_optimized
7309 || alignment_support_scheme == dr_explicit_realign)
7310 && !compute_in_loop)
7312 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7313 alignment_support_scheme, NULL_TREE,
7314 &at_loop);
7315 if (alignment_support_scheme == dr_explicit_realign_optimized)
7317 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7318 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7319 size_one_node);
7322 else
7323 at_loop = loop;
7325 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7326 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7328 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7329 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7330 else
7331 aggr_type = vectype;
7333 prev_stmt_info = NULL;
7334 for (j = 0; j < ncopies; j++)
7336 /* 1. Create the vector or array pointer update chain. */
7337 if (j == 0)
7339 bool simd_lane_access_p
7340 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7341 if (simd_lane_access_p
7342 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7343 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7344 && integer_zerop (DR_OFFSET (first_dr))
7345 && integer_zerop (DR_INIT (first_dr))
7346 && alias_sets_conflict_p (get_alias_set (aggr_type),
7347 get_alias_set (TREE_TYPE (ref_type)))
7348 && (alignment_support_scheme == dr_aligned
7349 || alignment_support_scheme == dr_unaligned_supported))
7351 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7352 dataref_offset = build_int_cst (ref_type, 0);
7353 inv_p = false;
7355 else if (first_stmt_for_drptr
7356 && first_stmt != first_stmt_for_drptr)
7358 dataref_ptr
7359 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7360 at_loop, offset, &dummy, gsi,
7361 &ptr_incr, simd_lane_access_p,
7362 &inv_p, byte_offset);
7363 /* Adjust the pointer by the difference to first_stmt. */
7364 data_reference_p ptrdr
7365 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7366 tree diff = fold_convert (sizetype,
7367 size_binop (MINUS_EXPR,
7368 DR_INIT (first_dr),
7369 DR_INIT (ptrdr)));
7370 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7371 stmt, diff);
7373 else
7374 dataref_ptr
7375 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7376 offset, &dummy, gsi, &ptr_incr,
7377 simd_lane_access_p, &inv_p,
7378 byte_offset);
7380 else if (dataref_offset)
7381 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7382 TYPE_SIZE_UNIT (aggr_type));
7383 else
7384 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7385 TYPE_SIZE_UNIT (aggr_type));
7387 if (grouped_load || slp_perm)
7388 dr_chain.create (vec_num);
7390 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7392 tree vec_array;
7394 vec_array = create_vector_array (vectype, vec_num);
7396 /* Emit:
7397 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7398 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7399 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7400 gimple_call_set_lhs (new_stmt, vec_array);
7401 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7403 /* Extract each vector into an SSA_NAME. */
7404 for (i = 0; i < vec_num; i++)
7406 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7407 vec_array, i);
7408 dr_chain.quick_push (new_temp);
7411 /* Record the mapping between SSA_NAMEs and statements. */
7412 vect_record_grouped_load_vectors (stmt, dr_chain);
7414 else
7416 for (i = 0; i < vec_num; i++)
7418 if (i > 0)
7419 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7420 stmt, NULL_TREE);
7422 /* 2. Create the vector-load in the loop. */
7423 switch (alignment_support_scheme)
7425 case dr_aligned:
7426 case dr_unaligned_supported:
7428 unsigned int align, misalign;
7430 data_ref
7431 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7432 dataref_offset
7433 ? dataref_offset
7434 : build_int_cst (ref_type, 0));
7435 align = TYPE_ALIGN_UNIT (vectype);
7436 if (alignment_support_scheme == dr_aligned)
7438 gcc_assert (aligned_access_p (first_dr));
7439 misalign = 0;
7441 else if (DR_MISALIGNMENT (first_dr) == -1)
7443 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7444 align = TYPE_ALIGN_UNIT (elem_type);
7445 else
7446 align = (get_object_alignment (DR_REF (first_dr))
7447 / BITS_PER_UNIT);
7448 misalign = 0;
7449 TREE_TYPE (data_ref)
7450 = build_aligned_type (TREE_TYPE (data_ref),
7451 align * BITS_PER_UNIT);
7453 else
7455 TREE_TYPE (data_ref)
7456 = build_aligned_type (TREE_TYPE (data_ref),
7457 TYPE_ALIGN (elem_type));
7458 misalign = DR_MISALIGNMENT (first_dr);
7460 if (dataref_offset == NULL_TREE
7461 && TREE_CODE (dataref_ptr) == SSA_NAME)
7462 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7463 align, misalign);
7464 break;
7466 case dr_explicit_realign:
7468 tree ptr, bump;
7470 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7472 if (compute_in_loop)
7473 msq = vect_setup_realignment (first_stmt, gsi,
7474 &realignment_token,
7475 dr_explicit_realign,
7476 dataref_ptr, NULL);
7478 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7479 ptr = copy_ssa_name (dataref_ptr);
7480 else
7481 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7482 new_stmt = gimple_build_assign
7483 (ptr, BIT_AND_EXPR, dataref_ptr,
7484 build_int_cst
7485 (TREE_TYPE (dataref_ptr),
7486 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7487 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7488 data_ref
7489 = build2 (MEM_REF, vectype, ptr,
7490 build_int_cst (ref_type, 0));
7491 vec_dest = vect_create_destination_var (scalar_dest,
7492 vectype);
7493 new_stmt = gimple_build_assign (vec_dest, data_ref);
7494 new_temp = make_ssa_name (vec_dest, new_stmt);
7495 gimple_assign_set_lhs (new_stmt, new_temp);
7496 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7497 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7498 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7499 msq = new_temp;
7501 bump = size_binop (MULT_EXPR, vs,
7502 TYPE_SIZE_UNIT (elem_type));
7503 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7504 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7505 new_stmt = gimple_build_assign
7506 (NULL_TREE, BIT_AND_EXPR, ptr,
7507 build_int_cst
7508 (TREE_TYPE (ptr),
7509 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7510 ptr = copy_ssa_name (ptr, new_stmt);
7511 gimple_assign_set_lhs (new_stmt, ptr);
7512 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7513 data_ref
7514 = build2 (MEM_REF, vectype, ptr,
7515 build_int_cst (ref_type, 0));
7516 break;
7518 case dr_explicit_realign_optimized:
7519 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7520 new_temp = copy_ssa_name (dataref_ptr);
7521 else
7522 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7523 new_stmt = gimple_build_assign
7524 (new_temp, BIT_AND_EXPR, dataref_ptr,
7525 build_int_cst
7526 (TREE_TYPE (dataref_ptr),
7527 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7528 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7529 data_ref
7530 = build2 (MEM_REF, vectype, new_temp,
7531 build_int_cst (ref_type, 0));
7532 break;
7533 default:
7534 gcc_unreachable ();
7536 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7537 new_stmt = gimple_build_assign (vec_dest, data_ref);
7538 new_temp = make_ssa_name (vec_dest, new_stmt);
7539 gimple_assign_set_lhs (new_stmt, new_temp);
7540 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7542 /* 3. Handle explicit realignment if necessary/supported.
7543 Create in loop:
7544 vec_dest = realign_load (msq, lsq, realignment_token) */
7545 if (alignment_support_scheme == dr_explicit_realign_optimized
7546 || alignment_support_scheme == dr_explicit_realign)
7548 lsq = gimple_assign_lhs (new_stmt);
7549 if (!realignment_token)
7550 realignment_token = dataref_ptr;
7551 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7552 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7553 msq, lsq, realignment_token);
7554 new_temp = make_ssa_name (vec_dest, new_stmt);
7555 gimple_assign_set_lhs (new_stmt, new_temp);
7556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7558 if (alignment_support_scheme == dr_explicit_realign_optimized)
7560 gcc_assert (phi);
7561 if (i == vec_num - 1 && j == ncopies - 1)
7562 add_phi_arg (phi, lsq,
7563 loop_latch_edge (containing_loop),
7564 UNKNOWN_LOCATION);
7565 msq = lsq;
7569 /* 4. Handle invariant-load. */
7570 if (inv_p && !bb_vinfo)
7572 gcc_assert (!grouped_load);
7573 /* If we have versioned for aliasing or the loop doesn't
7574 have any data dependencies that would preclude this,
7575 then we are sure this is a loop invariant load and
7576 thus we can insert it on the preheader edge. */
7577 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7578 && !nested_in_vect_loop
7579 && hoist_defs_of_uses (stmt, loop))
7581 if (dump_enabled_p ())
7583 dump_printf_loc (MSG_NOTE, vect_location,
7584 "hoisting out of the vectorized "
7585 "loop: ");
7586 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7588 tree tem = copy_ssa_name (scalar_dest);
7589 gsi_insert_on_edge_immediate
7590 (loop_preheader_edge (loop),
7591 gimple_build_assign (tem,
7592 unshare_expr
7593 (gimple_assign_rhs1 (stmt))));
7594 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7595 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7596 set_vinfo_for_stmt (new_stmt,
7597 new_stmt_vec_info (new_stmt, vinfo));
7599 else
7601 gimple_stmt_iterator gsi2 = *gsi;
7602 gsi_next (&gsi2);
7603 new_temp = vect_init_vector (stmt, scalar_dest,
7604 vectype, &gsi2);
7605 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7609 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7611 tree perm_mask = perm_mask_for_reverse (vectype);
7612 new_temp = permute_vec_elements (new_temp, new_temp,
7613 perm_mask, stmt, gsi);
7614 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7617 /* Collect vector loads and later create their permutation in
7618 vect_transform_grouped_load (). */
7619 if (grouped_load || slp_perm)
7620 dr_chain.quick_push (new_temp);
7622 /* Store vector loads in the corresponding SLP_NODE. */
7623 if (slp && !slp_perm)
7624 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7626 /* Bump the vector pointer to account for a gap or for excess
7627 elements loaded for a permuted SLP load. */
7628 if (group_gap_adj != 0)
7630 bool ovf;
7631 tree bump
7632 = wide_int_to_tree (sizetype,
7633 wi::smul (TYPE_SIZE_UNIT (elem_type),
7634 group_gap_adj, &ovf));
7635 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7636 stmt, bump);
7640 if (slp && !slp_perm)
7641 continue;
7643 if (slp_perm)
7645 unsigned n_perms;
7646 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7647 slp_node_instance, false,
7648 &n_perms))
7650 dr_chain.release ();
7651 return false;
7654 else
7656 if (grouped_load)
7658 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7659 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7660 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7662 else
7664 if (j == 0)
7665 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7666 else
7667 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7668 prev_stmt_info = vinfo_for_stmt (new_stmt);
7671 dr_chain.release ();
7674 return true;
7677 /* Function vect_is_simple_cond.
7679 Input:
7680 LOOP - the loop that is being vectorized.
7681 COND - Condition that is checked for simple use.
7683 Output:
7684 *COMP_VECTYPE - the vector type for the comparison.
7685 *DTS - The def types for the arguments of the comparison
7687 Returns whether a COND can be vectorized. Checks whether
7688 condition operands are supportable using vec_is_simple_use. */
7690 static bool
7691 vect_is_simple_cond (tree cond, vec_info *vinfo,
7692 tree *comp_vectype, enum vect_def_type *dts)
7694 tree lhs, rhs;
7695 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7697 /* Mask case. */
7698 if (TREE_CODE (cond) == SSA_NAME
7699 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7701 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7702 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7703 &dts[0], comp_vectype)
7704 || !*comp_vectype
7705 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7706 return false;
7707 return true;
7710 if (!COMPARISON_CLASS_P (cond))
7711 return false;
7713 lhs = TREE_OPERAND (cond, 0);
7714 rhs = TREE_OPERAND (cond, 1);
7716 if (TREE_CODE (lhs) == SSA_NAME)
7718 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7719 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7720 return false;
7722 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7723 || TREE_CODE (lhs) == FIXED_CST)
7724 dts[0] = vect_constant_def;
7725 else
7726 return false;
7728 if (TREE_CODE (rhs) == SSA_NAME)
7730 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7731 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7732 return false;
7734 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7735 || TREE_CODE (rhs) == FIXED_CST)
7736 dts[1] = vect_constant_def;
7737 else
7738 return false;
7740 if (vectype1 && vectype2
7741 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7742 return false;
7744 *comp_vectype = vectype1 ? vectype1 : vectype2;
7745 return true;
7748 /* vectorizable_condition.
7750 Check if STMT is conditional modify expression that can be vectorized.
7751 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7752 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7753 at GSI.
7755 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7756 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7757 else clause if it is 2).
7759 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7761 bool
7762 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7763 gimple **vec_stmt, tree reduc_def, int reduc_index,
7764 slp_tree slp_node)
7766 tree scalar_dest = NULL_TREE;
7767 tree vec_dest = NULL_TREE;
7768 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7769 tree then_clause, else_clause;
7770 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7771 tree comp_vectype = NULL_TREE;
7772 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7773 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7774 tree vec_compare;
7775 tree new_temp;
7776 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7777 enum vect_def_type dts[4]
7778 = {vect_unknown_def_type, vect_unknown_def_type,
7779 vect_unknown_def_type, vect_unknown_def_type};
7780 int ndts = 4;
7781 int ncopies;
7782 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7783 stmt_vec_info prev_stmt_info = NULL;
7784 int i, j;
7785 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7786 vec<tree> vec_oprnds0 = vNULL;
7787 vec<tree> vec_oprnds1 = vNULL;
7788 vec<tree> vec_oprnds2 = vNULL;
7789 vec<tree> vec_oprnds3 = vNULL;
7790 tree vec_cmp_type;
7791 bool masked = false;
7793 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7794 return false;
7796 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7798 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7799 return false;
7801 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7802 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7803 && reduc_def))
7804 return false;
7806 /* FORNOW: not yet supported. */
7807 if (STMT_VINFO_LIVE_P (stmt_info))
7809 if (dump_enabled_p ())
7810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7811 "value used after loop.\n");
7812 return false;
7816 /* Is vectorizable conditional operation? */
7817 if (!is_gimple_assign (stmt))
7818 return false;
7820 code = gimple_assign_rhs_code (stmt);
7822 if (code != COND_EXPR)
7823 return false;
7825 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7826 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7827 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7829 if (slp_node)
7830 ncopies = 1;
7831 else
7832 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7834 gcc_assert (ncopies >= 1);
7835 if (reduc_index && ncopies > 1)
7836 return false; /* FORNOW */
7838 cond_expr = gimple_assign_rhs1 (stmt);
7839 then_clause = gimple_assign_rhs2 (stmt);
7840 else_clause = gimple_assign_rhs3 (stmt);
7842 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7843 &comp_vectype, &dts[0])
7844 || !comp_vectype)
7845 return false;
7847 gimple *def_stmt;
7848 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7849 &vectype1))
7850 return false;
7851 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7852 &vectype2))
7853 return false;
7855 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7856 return false;
7858 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7859 return false;
7861 masked = !COMPARISON_CLASS_P (cond_expr);
7862 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7864 if (vec_cmp_type == NULL_TREE)
7865 return false;
7867 cond_code = TREE_CODE (cond_expr);
7868 if (!masked)
7870 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7871 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7874 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7876 /* Boolean values may have another representation in vectors
7877 and therefore we prefer bit operations over comparison for
7878 them (which also works for scalar masks). We store opcodes
7879 to use in bitop1 and bitop2. Statement is vectorized as
7880 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7881 depending on bitop1 and bitop2 arity. */
7882 switch (cond_code)
7884 case GT_EXPR:
7885 bitop1 = BIT_NOT_EXPR;
7886 bitop2 = BIT_AND_EXPR;
7887 break;
7888 case GE_EXPR:
7889 bitop1 = BIT_NOT_EXPR;
7890 bitop2 = BIT_IOR_EXPR;
7891 break;
7892 case LT_EXPR:
7893 bitop1 = BIT_NOT_EXPR;
7894 bitop2 = BIT_AND_EXPR;
7895 std::swap (cond_expr0, cond_expr1);
7896 break;
7897 case LE_EXPR:
7898 bitop1 = BIT_NOT_EXPR;
7899 bitop2 = BIT_IOR_EXPR;
7900 std::swap (cond_expr0, cond_expr1);
7901 break;
7902 case NE_EXPR:
7903 bitop1 = BIT_XOR_EXPR;
7904 break;
7905 case EQ_EXPR:
7906 bitop1 = BIT_XOR_EXPR;
7907 bitop2 = BIT_NOT_EXPR;
7908 break;
7909 default:
7910 return false;
7912 cond_code = SSA_NAME;
7915 if (!vec_stmt)
7917 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7918 if (bitop1 != NOP_EXPR)
7920 machine_mode mode = TYPE_MODE (comp_vectype);
7921 optab optab;
7923 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
7924 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7925 return false;
7927 if (bitop2 != NOP_EXPR)
7929 optab = optab_for_tree_code (bitop2, comp_vectype,
7930 optab_default);
7931 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7932 return false;
7935 if (expand_vec_cond_expr_p (vectype, comp_vectype,
7936 cond_code))
7938 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
7939 return true;
7941 return false;
7944 /* Transform. */
7946 if (!slp_node)
7948 vec_oprnds0.create (1);
7949 vec_oprnds1.create (1);
7950 vec_oprnds2.create (1);
7951 vec_oprnds3.create (1);
7954 /* Handle def. */
7955 scalar_dest = gimple_assign_lhs (stmt);
7956 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7958 /* Handle cond expr. */
7959 for (j = 0; j < ncopies; j++)
7961 gassign *new_stmt = NULL;
7962 if (j == 0)
7964 if (slp_node)
7966 auto_vec<tree, 4> ops;
7967 auto_vec<vec<tree>, 4> vec_defs;
7969 if (masked)
7970 ops.safe_push (cond_expr);
7971 else
7973 ops.safe_push (cond_expr0);
7974 ops.safe_push (cond_expr1);
7976 ops.safe_push (then_clause);
7977 ops.safe_push (else_clause);
7978 vect_get_slp_defs (ops, slp_node, &vec_defs);
7979 vec_oprnds3 = vec_defs.pop ();
7980 vec_oprnds2 = vec_defs.pop ();
7981 if (!masked)
7982 vec_oprnds1 = vec_defs.pop ();
7983 vec_oprnds0 = vec_defs.pop ();
7985 else
7987 gimple *gtemp;
7988 if (masked)
7990 vec_cond_lhs
7991 = vect_get_vec_def_for_operand (cond_expr, stmt,
7992 comp_vectype);
7993 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7994 &gtemp, &dts[0]);
7996 else
7998 vec_cond_lhs
7999 = vect_get_vec_def_for_operand (cond_expr0,
8000 stmt, comp_vectype);
8001 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8003 vec_cond_rhs
8004 = vect_get_vec_def_for_operand (cond_expr1,
8005 stmt, comp_vectype);
8006 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8008 if (reduc_index == 1)
8009 vec_then_clause = reduc_def;
8010 else
8012 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8013 stmt);
8014 vect_is_simple_use (then_clause, loop_vinfo,
8015 &gtemp, &dts[2]);
8017 if (reduc_index == 2)
8018 vec_else_clause = reduc_def;
8019 else
8021 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8022 stmt);
8023 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8027 else
8029 vec_cond_lhs
8030 = vect_get_vec_def_for_stmt_copy (dts[0],
8031 vec_oprnds0.pop ());
8032 if (!masked)
8033 vec_cond_rhs
8034 = vect_get_vec_def_for_stmt_copy (dts[1],
8035 vec_oprnds1.pop ());
8037 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8038 vec_oprnds2.pop ());
8039 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8040 vec_oprnds3.pop ());
8043 if (!slp_node)
8045 vec_oprnds0.quick_push (vec_cond_lhs);
8046 if (!masked)
8047 vec_oprnds1.quick_push (vec_cond_rhs);
8048 vec_oprnds2.quick_push (vec_then_clause);
8049 vec_oprnds3.quick_push (vec_else_clause);
8052 /* Arguments are ready. Create the new vector stmt. */
8053 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8055 vec_then_clause = vec_oprnds2[i];
8056 vec_else_clause = vec_oprnds3[i];
8058 if (masked)
8059 vec_compare = vec_cond_lhs;
8060 else
8062 vec_cond_rhs = vec_oprnds1[i];
8063 if (bitop1 == NOP_EXPR)
8064 vec_compare = build2 (cond_code, vec_cmp_type,
8065 vec_cond_lhs, vec_cond_rhs);
8066 else
8068 new_temp = make_ssa_name (vec_cmp_type);
8069 if (bitop1 == BIT_NOT_EXPR)
8070 new_stmt = gimple_build_assign (new_temp, bitop1,
8071 vec_cond_rhs);
8072 else
8073 new_stmt
8074 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8075 vec_cond_rhs);
8076 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8077 if (bitop2 == NOP_EXPR)
8078 vec_compare = new_temp;
8079 else if (bitop2 == BIT_NOT_EXPR)
8081 /* Instead of doing ~x ? y : z do x ? z : y. */
8082 vec_compare = new_temp;
8083 std::swap (vec_then_clause, vec_else_clause);
8085 else
8087 vec_compare = make_ssa_name (vec_cmp_type);
8088 new_stmt
8089 = gimple_build_assign (vec_compare, bitop2,
8090 vec_cond_lhs, new_temp);
8091 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8095 new_temp = make_ssa_name (vec_dest);
8096 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8097 vec_compare, vec_then_clause,
8098 vec_else_clause);
8099 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8100 if (slp_node)
8101 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8104 if (slp_node)
8105 continue;
8107 if (j == 0)
8108 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8109 else
8110 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8112 prev_stmt_info = vinfo_for_stmt (new_stmt);
8115 vec_oprnds0.release ();
8116 vec_oprnds1.release ();
8117 vec_oprnds2.release ();
8118 vec_oprnds3.release ();
8120 return true;
8123 /* vectorizable_comparison.
8125 Check if STMT is comparison expression that can be vectorized.
8126 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8127 comparison, put it in VEC_STMT, and insert it at GSI.
8129 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8131 static bool
8132 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8133 gimple **vec_stmt, tree reduc_def,
8134 slp_tree slp_node)
8136 tree lhs, rhs1, rhs2;
8137 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8138 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8139 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8140 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8141 tree new_temp;
8142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8143 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8144 int ndts = 2;
8145 unsigned nunits;
8146 int ncopies;
8147 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8148 stmt_vec_info prev_stmt_info = NULL;
8149 int i, j;
8150 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8151 vec<tree> vec_oprnds0 = vNULL;
8152 vec<tree> vec_oprnds1 = vNULL;
8153 gimple *def_stmt;
8154 tree mask_type;
8155 tree mask;
8157 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8158 return false;
8160 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8161 return false;
8163 mask_type = vectype;
8164 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8166 if (slp_node)
8167 ncopies = 1;
8168 else
8169 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
8171 gcc_assert (ncopies >= 1);
8172 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8173 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8174 && reduc_def))
8175 return false;
8177 if (STMT_VINFO_LIVE_P (stmt_info))
8179 if (dump_enabled_p ())
8180 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8181 "value used after loop.\n");
8182 return false;
8185 if (!is_gimple_assign (stmt))
8186 return false;
8188 code = gimple_assign_rhs_code (stmt);
8190 if (TREE_CODE_CLASS (code) != tcc_comparison)
8191 return false;
8193 rhs1 = gimple_assign_rhs1 (stmt);
8194 rhs2 = gimple_assign_rhs2 (stmt);
8196 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8197 &dts[0], &vectype1))
8198 return false;
8200 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8201 &dts[1], &vectype2))
8202 return false;
8204 if (vectype1 && vectype2
8205 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8206 return false;
8208 vectype = vectype1 ? vectype1 : vectype2;
8210 /* Invariant comparison. */
8211 if (!vectype)
8213 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8214 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8215 return false;
8217 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8218 return false;
8220 /* Can't compare mask and non-mask types. */
8221 if (vectype1 && vectype2
8222 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8223 return false;
8225 /* Boolean values may have another representation in vectors
8226 and therefore we prefer bit operations over comparison for
8227 them (which also works for scalar masks). We store opcodes
8228 to use in bitop1 and bitop2. Statement is vectorized as
8229 BITOP2 (rhs1 BITOP1 rhs2) or
8230 rhs1 BITOP2 (BITOP1 rhs2)
8231 depending on bitop1 and bitop2 arity. */
8232 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8234 if (code == GT_EXPR)
8236 bitop1 = BIT_NOT_EXPR;
8237 bitop2 = BIT_AND_EXPR;
8239 else if (code == GE_EXPR)
8241 bitop1 = BIT_NOT_EXPR;
8242 bitop2 = BIT_IOR_EXPR;
8244 else if (code == LT_EXPR)
8246 bitop1 = BIT_NOT_EXPR;
8247 bitop2 = BIT_AND_EXPR;
8248 std::swap (rhs1, rhs2);
8249 std::swap (dts[0], dts[1]);
8251 else if (code == LE_EXPR)
8253 bitop1 = BIT_NOT_EXPR;
8254 bitop2 = BIT_IOR_EXPR;
8255 std::swap (rhs1, rhs2);
8256 std::swap (dts[0], dts[1]);
8258 else
8260 bitop1 = BIT_XOR_EXPR;
8261 if (code == EQ_EXPR)
8262 bitop2 = BIT_NOT_EXPR;
8266 if (!vec_stmt)
8268 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8269 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8270 dts, ndts, NULL, NULL);
8271 if (bitop1 == NOP_EXPR)
8272 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8273 else
8275 machine_mode mode = TYPE_MODE (vectype);
8276 optab optab;
8278 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8279 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8280 return false;
8282 if (bitop2 != NOP_EXPR)
8284 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8285 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8286 return false;
8288 return true;
8292 /* Transform. */
8293 if (!slp_node)
8295 vec_oprnds0.create (1);
8296 vec_oprnds1.create (1);
8299 /* Handle def. */
8300 lhs = gimple_assign_lhs (stmt);
8301 mask = vect_create_destination_var (lhs, mask_type);
8303 /* Handle cmp expr. */
8304 for (j = 0; j < ncopies; j++)
8306 gassign *new_stmt = NULL;
8307 if (j == 0)
8309 if (slp_node)
8311 auto_vec<tree, 2> ops;
8312 auto_vec<vec<tree>, 2> vec_defs;
8314 ops.safe_push (rhs1);
8315 ops.safe_push (rhs2);
8316 vect_get_slp_defs (ops, slp_node, &vec_defs);
8317 vec_oprnds1 = vec_defs.pop ();
8318 vec_oprnds0 = vec_defs.pop ();
8320 else
8322 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8323 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8326 else
8328 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8329 vec_oprnds0.pop ());
8330 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8331 vec_oprnds1.pop ());
8334 if (!slp_node)
8336 vec_oprnds0.quick_push (vec_rhs1);
8337 vec_oprnds1.quick_push (vec_rhs2);
8340 /* Arguments are ready. Create the new vector stmt. */
8341 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8343 vec_rhs2 = vec_oprnds1[i];
8345 new_temp = make_ssa_name (mask);
8346 if (bitop1 == NOP_EXPR)
8348 new_stmt = gimple_build_assign (new_temp, code,
8349 vec_rhs1, vec_rhs2);
8350 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8352 else
8354 if (bitop1 == BIT_NOT_EXPR)
8355 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8356 else
8357 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8358 vec_rhs2);
8359 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8360 if (bitop2 != NOP_EXPR)
8362 tree res = make_ssa_name (mask);
8363 if (bitop2 == BIT_NOT_EXPR)
8364 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8365 else
8366 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8367 new_temp);
8368 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8371 if (slp_node)
8372 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8375 if (slp_node)
8376 continue;
8378 if (j == 0)
8379 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8380 else
8381 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8383 prev_stmt_info = vinfo_for_stmt (new_stmt);
8386 vec_oprnds0.release ();
8387 vec_oprnds1.release ();
8389 return true;
8392 /* Make sure the statement is vectorizable. */
8394 bool
8395 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8397 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8398 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8399 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8400 bool ok;
8401 tree scalar_type, vectype;
8402 gimple *pattern_stmt;
8403 gimple_seq pattern_def_seq;
8405 if (dump_enabled_p ())
8407 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8408 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8411 if (gimple_has_volatile_ops (stmt))
8413 if (dump_enabled_p ())
8414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8415 "not vectorized: stmt has volatile operands\n");
8417 return false;
8420 /* Skip stmts that do not need to be vectorized. In loops this is expected
8421 to include:
8422 - the COND_EXPR which is the loop exit condition
8423 - any LABEL_EXPRs in the loop
8424 - computations that are used only for array indexing or loop control.
8425 In basic blocks we only analyze statements that are a part of some SLP
8426 instance, therefore, all the statements are relevant.
8428 Pattern statement needs to be analyzed instead of the original statement
8429 if the original statement is not relevant. Otherwise, we analyze both
8430 statements. In basic blocks we are called from some SLP instance
8431 traversal, don't analyze pattern stmts instead, the pattern stmts
8432 already will be part of SLP instance. */
8434 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8435 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8436 && !STMT_VINFO_LIVE_P (stmt_info))
8438 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8439 && pattern_stmt
8440 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8441 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8443 /* Analyze PATTERN_STMT instead of the original stmt. */
8444 stmt = pattern_stmt;
8445 stmt_info = vinfo_for_stmt (pattern_stmt);
8446 if (dump_enabled_p ())
8448 dump_printf_loc (MSG_NOTE, vect_location,
8449 "==> examining pattern statement: ");
8450 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8453 else
8455 if (dump_enabled_p ())
8456 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8458 return true;
8461 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8462 && node == NULL
8463 && pattern_stmt
8464 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8465 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8467 /* Analyze PATTERN_STMT too. */
8468 if (dump_enabled_p ())
8470 dump_printf_loc (MSG_NOTE, vect_location,
8471 "==> examining pattern statement: ");
8472 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8475 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8476 return false;
8479 if (is_pattern_stmt_p (stmt_info)
8480 && node == NULL
8481 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8483 gimple_stmt_iterator si;
8485 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8487 gimple *pattern_def_stmt = gsi_stmt (si);
8488 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8489 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8491 /* Analyze def stmt of STMT if it's a pattern stmt. */
8492 if (dump_enabled_p ())
8494 dump_printf_loc (MSG_NOTE, vect_location,
8495 "==> examining pattern def statement: ");
8496 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8499 if (!vect_analyze_stmt (pattern_def_stmt,
8500 need_to_vectorize, node))
8501 return false;
8506 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8508 case vect_internal_def:
8509 break;
8511 case vect_reduction_def:
8512 case vect_nested_cycle:
8513 gcc_assert (!bb_vinfo
8514 && (relevance == vect_used_in_outer
8515 || relevance == vect_used_in_outer_by_reduction
8516 || relevance == vect_used_by_reduction
8517 || relevance == vect_unused_in_scope
8518 || relevance == vect_used_only_live));
8519 break;
8521 case vect_induction_def:
8522 gcc_assert (!bb_vinfo);
8523 break;
8525 case vect_constant_def:
8526 case vect_external_def:
8527 case vect_unknown_def_type:
8528 default:
8529 gcc_unreachable ();
8532 if (bb_vinfo)
8534 gcc_assert (PURE_SLP_STMT (stmt_info));
8536 /* Memory accesses already got their vector type assigned
8537 in vect_analyze_data_refs. */
8538 if (! STMT_VINFO_DATA_REF (stmt_info))
8540 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8541 if (dump_enabled_p ())
8543 dump_printf_loc (MSG_NOTE, vect_location,
8544 "get vectype for scalar type: ");
8545 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8546 dump_printf (MSG_NOTE, "\n");
8549 vectype = get_vectype_for_scalar_type (scalar_type);
8550 if (!vectype)
8552 if (dump_enabled_p ())
8554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8555 "not SLPed: unsupported data-type ");
8556 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8557 scalar_type);
8558 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8560 return false;
8563 if (dump_enabled_p ())
8565 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8566 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8567 dump_printf (MSG_NOTE, "\n");
8570 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8574 if (STMT_VINFO_RELEVANT_P (stmt_info))
8576 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8577 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8578 || (is_gimple_call (stmt)
8579 && gimple_call_lhs (stmt) == NULL_TREE));
8580 *need_to_vectorize = true;
8583 if (PURE_SLP_STMT (stmt_info) && !node)
8585 dump_printf_loc (MSG_NOTE, vect_location,
8586 "handled only by SLP analysis\n");
8587 return true;
8590 ok = true;
8591 if (!bb_vinfo
8592 && (STMT_VINFO_RELEVANT_P (stmt_info)
8593 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8594 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8595 || vectorizable_conversion (stmt, NULL, NULL, node)
8596 || vectorizable_shift (stmt, NULL, NULL, node)
8597 || vectorizable_operation (stmt, NULL, NULL, node)
8598 || vectorizable_assignment (stmt, NULL, NULL, node)
8599 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8600 || vectorizable_call (stmt, NULL, NULL, node)
8601 || vectorizable_store (stmt, NULL, NULL, node)
8602 || vectorizable_reduction (stmt, NULL, NULL, node)
8603 || vectorizable_induction (stmt, NULL, NULL, node)
8604 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8605 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8606 else
8608 if (bb_vinfo)
8609 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8610 || vectorizable_conversion (stmt, NULL, NULL, node)
8611 || vectorizable_shift (stmt, NULL, NULL, node)
8612 || vectorizable_operation (stmt, NULL, NULL, node)
8613 || vectorizable_assignment (stmt, NULL, NULL, node)
8614 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8615 || vectorizable_call (stmt, NULL, NULL, node)
8616 || vectorizable_store (stmt, NULL, NULL, node)
8617 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8618 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8621 if (!ok)
8623 if (dump_enabled_p ())
8625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8626 "not vectorized: relevant stmt not ");
8627 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8628 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8631 return false;
8634 if (bb_vinfo)
8635 return true;
8637 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8638 need extra handling, except for vectorizable reductions. */
8639 if (STMT_VINFO_LIVE_P (stmt_info)
8640 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8641 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8643 if (!ok)
8645 if (dump_enabled_p ())
8647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8648 "not vectorized: live stmt not ");
8649 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8650 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8653 return false;
8656 return true;
8660 /* Function vect_transform_stmt.
8662 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8664 bool
8665 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8666 bool *grouped_store, slp_tree slp_node,
8667 slp_instance slp_node_instance)
8669 bool is_store = false;
8670 gimple *vec_stmt = NULL;
8671 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8672 bool done;
8674 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8675 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8677 switch (STMT_VINFO_TYPE (stmt_info))
8679 case type_demotion_vec_info_type:
8680 case type_promotion_vec_info_type:
8681 case type_conversion_vec_info_type:
8682 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8683 gcc_assert (done);
8684 break;
8686 case induc_vec_info_type:
8687 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8688 gcc_assert (done);
8689 break;
8691 case shift_vec_info_type:
8692 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8693 gcc_assert (done);
8694 break;
8696 case op_vec_info_type:
8697 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8698 gcc_assert (done);
8699 break;
8701 case assignment_vec_info_type:
8702 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8703 gcc_assert (done);
8704 break;
8706 case load_vec_info_type:
8707 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8708 slp_node_instance);
8709 gcc_assert (done);
8710 break;
8712 case store_vec_info_type:
8713 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8714 gcc_assert (done);
8715 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8717 /* In case of interleaving, the whole chain is vectorized when the
8718 last store in the chain is reached. Store stmts before the last
8719 one are skipped, and there vec_stmt_info shouldn't be freed
8720 meanwhile. */
8721 *grouped_store = true;
8722 if (STMT_VINFO_VEC_STMT (stmt_info))
8723 is_store = true;
8725 else
8726 is_store = true;
8727 break;
8729 case condition_vec_info_type:
8730 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8731 gcc_assert (done);
8732 break;
8734 case comparison_vec_info_type:
8735 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8736 gcc_assert (done);
8737 break;
8739 case call_vec_info_type:
8740 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8741 stmt = gsi_stmt (*gsi);
8742 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8743 is_store = true;
8744 break;
8746 case call_simd_clone_vec_info_type:
8747 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8748 stmt = gsi_stmt (*gsi);
8749 break;
8751 case reduc_vec_info_type:
8752 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8753 gcc_assert (done);
8754 break;
8756 default:
8757 if (!STMT_VINFO_LIVE_P (stmt_info))
8759 if (dump_enabled_p ())
8760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8761 "stmt not supported.\n");
8762 gcc_unreachable ();
8766 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8767 This would break hybrid SLP vectorization. */
8768 if (slp_node)
8769 gcc_assert (!vec_stmt
8770 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8772 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8773 is being vectorized, but outside the immediately enclosing loop. */
8774 if (vec_stmt
8775 && STMT_VINFO_LOOP_VINFO (stmt_info)
8776 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8777 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8778 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8779 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8780 || STMT_VINFO_RELEVANT (stmt_info) ==
8781 vect_used_in_outer_by_reduction))
8783 struct loop *innerloop = LOOP_VINFO_LOOP (
8784 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8785 imm_use_iterator imm_iter;
8786 use_operand_p use_p;
8787 tree scalar_dest;
8788 gimple *exit_phi;
8790 if (dump_enabled_p ())
8791 dump_printf_loc (MSG_NOTE, vect_location,
8792 "Record the vdef for outer-loop vectorization.\n");
8794 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8795 (to be used when vectorizing outer-loop stmts that use the DEF of
8796 STMT). */
8797 if (gimple_code (stmt) == GIMPLE_PHI)
8798 scalar_dest = PHI_RESULT (stmt);
8799 else
8800 scalar_dest = gimple_assign_lhs (stmt);
8802 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8804 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8806 exit_phi = USE_STMT (use_p);
8807 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8812 /* Handle stmts whose DEF is used outside the loop-nest that is
8813 being vectorized. */
8814 if (slp_node)
8816 gimple *slp_stmt;
8817 int i;
8818 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8820 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8821 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8822 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8824 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8825 &vec_stmt);
8826 gcc_assert (done);
8830 else if (STMT_VINFO_LIVE_P (stmt_info)
8831 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8833 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8834 gcc_assert (done);
8837 if (vec_stmt)
8838 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8840 return is_store;
8844 /* Remove a group of stores (for SLP or interleaving), free their
8845 stmt_vec_info. */
8847 void
8848 vect_remove_stores (gimple *first_stmt)
8850 gimple *next = first_stmt;
8851 gimple *tmp;
8852 gimple_stmt_iterator next_si;
8854 while (next)
8856 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8858 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8859 if (is_pattern_stmt_p (stmt_info))
8860 next = STMT_VINFO_RELATED_STMT (stmt_info);
8861 /* Free the attached stmt_vec_info and remove the stmt. */
8862 next_si = gsi_for_stmt (next);
8863 unlink_stmt_vdef (next);
8864 gsi_remove (&next_si, true);
8865 release_defs (next);
8866 free_stmt_vec_info (next);
8867 next = tmp;
8872 /* Function new_stmt_vec_info.
8874 Create and initialize a new stmt_vec_info struct for STMT. */
8876 stmt_vec_info
8877 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8879 stmt_vec_info res;
8880 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8882 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8883 STMT_VINFO_STMT (res) = stmt;
8884 res->vinfo = vinfo;
8885 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8886 STMT_VINFO_LIVE_P (res) = false;
8887 STMT_VINFO_VECTYPE (res) = NULL;
8888 STMT_VINFO_VEC_STMT (res) = NULL;
8889 STMT_VINFO_VECTORIZABLE (res) = true;
8890 STMT_VINFO_IN_PATTERN_P (res) = false;
8891 STMT_VINFO_RELATED_STMT (res) = NULL;
8892 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8893 STMT_VINFO_DATA_REF (res) = NULL;
8894 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8895 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8897 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8898 STMT_VINFO_DR_OFFSET (res) = NULL;
8899 STMT_VINFO_DR_INIT (res) = NULL;
8900 STMT_VINFO_DR_STEP (res) = NULL;
8901 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8903 if (gimple_code (stmt) == GIMPLE_PHI
8904 && is_loop_header_bb_p (gimple_bb (stmt)))
8905 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8906 else
8907 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8909 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8910 STMT_SLP_TYPE (res) = loop_vect;
8911 STMT_VINFO_NUM_SLP_USES (res) = 0;
8913 GROUP_FIRST_ELEMENT (res) = NULL;
8914 GROUP_NEXT_ELEMENT (res) = NULL;
8915 GROUP_SIZE (res) = 0;
8916 GROUP_STORE_COUNT (res) = 0;
8917 GROUP_GAP (res) = 0;
8918 GROUP_SAME_DR_STMT (res) = NULL;
8920 return res;
8924 /* Create a hash table for stmt_vec_info. */
8926 void
8927 init_stmt_vec_info_vec (void)
8929 gcc_assert (!stmt_vec_info_vec.exists ());
8930 stmt_vec_info_vec.create (50);
8934 /* Free hash table for stmt_vec_info. */
8936 void
8937 free_stmt_vec_info_vec (void)
8939 unsigned int i;
8940 stmt_vec_info info;
8941 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8942 if (info != NULL)
8943 free_stmt_vec_info (STMT_VINFO_STMT (info));
8944 gcc_assert (stmt_vec_info_vec.exists ());
8945 stmt_vec_info_vec.release ();
8949 /* Free stmt vectorization related info. */
8951 void
8952 free_stmt_vec_info (gimple *stmt)
8954 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8956 if (!stmt_info)
8957 return;
8959 /* Check if this statement has a related "pattern stmt"
8960 (introduced by the vectorizer during the pattern recognition
8961 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8962 too. */
8963 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8965 stmt_vec_info patt_info
8966 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8967 if (patt_info)
8969 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8970 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8971 gimple_set_bb (patt_stmt, NULL);
8972 tree lhs = gimple_get_lhs (patt_stmt);
8973 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8974 release_ssa_name (lhs);
8975 if (seq)
8977 gimple_stmt_iterator si;
8978 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8980 gimple *seq_stmt = gsi_stmt (si);
8981 gimple_set_bb (seq_stmt, NULL);
8982 lhs = gimple_get_lhs (seq_stmt);
8983 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8984 release_ssa_name (lhs);
8985 free_stmt_vec_info (seq_stmt);
8988 free_stmt_vec_info (patt_stmt);
8992 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8993 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8994 set_vinfo_for_stmt (stmt, NULL);
8995 free (stmt_info);
8999 /* Function get_vectype_for_scalar_type_and_size.
9001 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9002 by the target. */
9004 static tree
9005 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
9007 tree orig_scalar_type = scalar_type;
9008 machine_mode inner_mode = TYPE_MODE (scalar_type);
9009 machine_mode simd_mode;
9010 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9011 int nunits;
9012 tree vectype;
9014 if (nbytes == 0)
9015 return NULL_TREE;
9017 if (GET_MODE_CLASS (inner_mode) != MODE_INT
9018 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
9019 return NULL_TREE;
9021 /* For vector types of elements whose mode precision doesn't
9022 match their types precision we use a element type of mode
9023 precision. The vectorization routines will have to make sure
9024 they support the proper result truncation/extension.
9025 We also make sure to build vector types with INTEGER_TYPE
9026 component type only. */
9027 if (INTEGRAL_TYPE_P (scalar_type)
9028 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9029 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9030 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9031 TYPE_UNSIGNED (scalar_type));
9033 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9034 When the component mode passes the above test simply use a type
9035 corresponding to that mode. The theory is that any use that
9036 would cause problems with this will disable vectorization anyway. */
9037 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9038 && !INTEGRAL_TYPE_P (scalar_type))
9039 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9041 /* We can't build a vector type of elements with alignment bigger than
9042 their size. */
9043 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9044 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9045 TYPE_UNSIGNED (scalar_type));
9047 /* If we felt back to using the mode fail if there was
9048 no scalar type for it. */
9049 if (scalar_type == NULL_TREE)
9050 return NULL_TREE;
9052 /* If no size was supplied use the mode the target prefers. Otherwise
9053 lookup a vector mode of the specified size. */
9054 if (size == 0)
9055 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9056 else
9057 simd_mode = mode_for_vector (inner_mode, size / nbytes);
9058 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9059 if (nunits <= 1)
9060 return NULL_TREE;
9062 vectype = build_vector_type (scalar_type, nunits);
9064 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9065 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9066 return NULL_TREE;
9068 /* Re-attach the address-space qualifier if we canonicalized the scalar
9069 type. */
9070 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9071 return build_qualified_type
9072 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9074 return vectype;
9077 unsigned int current_vector_size;
9079 /* Function get_vectype_for_scalar_type.
9081 Returns the vector type corresponding to SCALAR_TYPE as supported
9082 by the target. */
9084 tree
9085 get_vectype_for_scalar_type (tree scalar_type)
9087 tree vectype;
9088 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9089 current_vector_size);
9090 if (vectype
9091 && current_vector_size == 0)
9092 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9093 return vectype;
9096 /* Function get_mask_type_for_scalar_type.
9098 Returns the mask type corresponding to a result of comparison
9099 of vectors of specified SCALAR_TYPE as supported by target. */
9101 tree
9102 get_mask_type_for_scalar_type (tree scalar_type)
9104 tree vectype = get_vectype_for_scalar_type (scalar_type);
9106 if (!vectype)
9107 return NULL;
9109 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9110 current_vector_size);
9113 /* Function get_same_sized_vectype
9115 Returns a vector type corresponding to SCALAR_TYPE of size
9116 VECTOR_TYPE if supported by the target. */
9118 tree
9119 get_same_sized_vectype (tree scalar_type, tree vector_type)
9121 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9122 return build_same_sized_truth_vector_type (vector_type);
9124 return get_vectype_for_scalar_type_and_size
9125 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9128 /* Function vect_is_simple_use.
9130 Input:
9131 VINFO - the vect info of the loop or basic block that is being vectorized.
9132 OPERAND - operand in the loop or bb.
9133 Output:
9134 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9135 DT - the type of definition
9137 Returns whether a stmt with OPERAND can be vectorized.
9138 For loops, supportable operands are constants, loop invariants, and operands
9139 that are defined by the current iteration of the loop. Unsupportable
9140 operands are those that are defined by a previous iteration of the loop (as
9141 is the case in reduction/induction computations).
9142 For basic blocks, supportable operands are constants and bb invariants.
9143 For now, operands defined outside the basic block are not supported. */
9145 bool
9146 vect_is_simple_use (tree operand, vec_info *vinfo,
9147 gimple **def_stmt, enum vect_def_type *dt)
9149 *def_stmt = NULL;
9150 *dt = vect_unknown_def_type;
9152 if (dump_enabled_p ())
9154 dump_printf_loc (MSG_NOTE, vect_location,
9155 "vect_is_simple_use: operand ");
9156 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9157 dump_printf (MSG_NOTE, "\n");
9160 if (CONSTANT_CLASS_P (operand))
9162 *dt = vect_constant_def;
9163 return true;
9166 if (is_gimple_min_invariant (operand))
9168 *dt = vect_external_def;
9169 return true;
9172 if (TREE_CODE (operand) != SSA_NAME)
9174 if (dump_enabled_p ())
9175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9176 "not ssa-name.\n");
9177 return false;
9180 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9182 *dt = vect_external_def;
9183 return true;
9186 *def_stmt = SSA_NAME_DEF_STMT (operand);
9187 if (dump_enabled_p ())
9189 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9190 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9193 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9194 *dt = vect_external_def;
9195 else
9197 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9198 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9201 if (dump_enabled_p ())
9203 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9204 switch (*dt)
9206 case vect_uninitialized_def:
9207 dump_printf (MSG_NOTE, "uninitialized\n");
9208 break;
9209 case vect_constant_def:
9210 dump_printf (MSG_NOTE, "constant\n");
9211 break;
9212 case vect_external_def:
9213 dump_printf (MSG_NOTE, "external\n");
9214 break;
9215 case vect_internal_def:
9216 dump_printf (MSG_NOTE, "internal\n");
9217 break;
9218 case vect_induction_def:
9219 dump_printf (MSG_NOTE, "induction\n");
9220 break;
9221 case vect_reduction_def:
9222 dump_printf (MSG_NOTE, "reduction\n");
9223 break;
9224 case vect_double_reduction_def:
9225 dump_printf (MSG_NOTE, "double reduction\n");
9226 break;
9227 case vect_nested_cycle:
9228 dump_printf (MSG_NOTE, "nested cycle\n");
9229 break;
9230 case vect_unknown_def_type:
9231 dump_printf (MSG_NOTE, "unknown\n");
9232 break;
9236 if (*dt == vect_unknown_def_type)
9238 if (dump_enabled_p ())
9239 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9240 "Unsupported pattern.\n");
9241 return false;
9244 switch (gimple_code (*def_stmt))
9246 case GIMPLE_PHI:
9247 case GIMPLE_ASSIGN:
9248 case GIMPLE_CALL:
9249 break;
9250 default:
9251 if (dump_enabled_p ())
9252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9253 "unsupported defining stmt:\n");
9254 return false;
9257 return true;
9260 /* Function vect_is_simple_use.
9262 Same as vect_is_simple_use but also determines the vector operand
9263 type of OPERAND and stores it to *VECTYPE. If the definition of
9264 OPERAND is vect_uninitialized_def, vect_constant_def or
9265 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9266 is responsible to compute the best suited vector type for the
9267 scalar operand. */
9269 bool
9270 vect_is_simple_use (tree operand, vec_info *vinfo,
9271 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9273 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9274 return false;
9276 /* Now get a vector type if the def is internal, otherwise supply
9277 NULL_TREE and leave it up to the caller to figure out a proper
9278 type for the use stmt. */
9279 if (*dt == vect_internal_def
9280 || *dt == vect_induction_def
9281 || *dt == vect_reduction_def
9282 || *dt == vect_double_reduction_def
9283 || *dt == vect_nested_cycle)
9285 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9287 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9288 && !STMT_VINFO_RELEVANT (stmt_info)
9289 && !STMT_VINFO_LIVE_P (stmt_info))
9290 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9292 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9293 gcc_assert (*vectype != NULL_TREE);
9295 else if (*dt == vect_uninitialized_def
9296 || *dt == vect_constant_def
9297 || *dt == vect_external_def)
9298 *vectype = NULL_TREE;
9299 else
9300 gcc_unreachable ();
9302 return true;
9306 /* Function supportable_widening_operation
9308 Check whether an operation represented by the code CODE is a
9309 widening operation that is supported by the target platform in
9310 vector form (i.e., when operating on arguments of type VECTYPE_IN
9311 producing a result of type VECTYPE_OUT).
9313 Widening operations we currently support are NOP (CONVERT), FLOAT
9314 and WIDEN_MULT. This function checks if these operations are supported
9315 by the target platform either directly (via vector tree-codes), or via
9316 target builtins.
9318 Output:
9319 - CODE1 and CODE2 are codes of vector operations to be used when
9320 vectorizing the operation, if available.
9321 - MULTI_STEP_CVT determines the number of required intermediate steps in
9322 case of multi-step conversion (like char->short->int - in that case
9323 MULTI_STEP_CVT will be 1).
9324 - INTERM_TYPES contains the intermediate type required to perform the
9325 widening operation (short in the above example). */
9327 bool
9328 supportable_widening_operation (enum tree_code code, gimple *stmt,
9329 tree vectype_out, tree vectype_in,
9330 enum tree_code *code1, enum tree_code *code2,
9331 int *multi_step_cvt,
9332 vec<tree> *interm_types)
9334 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9335 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9336 struct loop *vect_loop = NULL;
9337 machine_mode vec_mode;
9338 enum insn_code icode1, icode2;
9339 optab optab1, optab2;
9340 tree vectype = vectype_in;
9341 tree wide_vectype = vectype_out;
9342 enum tree_code c1, c2;
9343 int i;
9344 tree prev_type, intermediate_type;
9345 machine_mode intermediate_mode, prev_mode;
9346 optab optab3, optab4;
9348 *multi_step_cvt = 0;
9349 if (loop_info)
9350 vect_loop = LOOP_VINFO_LOOP (loop_info);
9352 switch (code)
9354 case WIDEN_MULT_EXPR:
9355 /* The result of a vectorized widening operation usually requires
9356 two vectors (because the widened results do not fit into one vector).
9357 The generated vector results would normally be expected to be
9358 generated in the same order as in the original scalar computation,
9359 i.e. if 8 results are generated in each vector iteration, they are
9360 to be organized as follows:
9361 vect1: [res1,res2,res3,res4],
9362 vect2: [res5,res6,res7,res8].
9364 However, in the special case that the result of the widening
9365 operation is used in a reduction computation only, the order doesn't
9366 matter (because when vectorizing a reduction we change the order of
9367 the computation). Some targets can take advantage of this and
9368 generate more efficient code. For example, targets like Altivec,
9369 that support widen_mult using a sequence of {mult_even,mult_odd}
9370 generate the following vectors:
9371 vect1: [res1,res3,res5,res7],
9372 vect2: [res2,res4,res6,res8].
9374 When vectorizing outer-loops, we execute the inner-loop sequentially
9375 (each vectorized inner-loop iteration contributes to VF outer-loop
9376 iterations in parallel). We therefore don't allow to change the
9377 order of the computation in the inner-loop during outer-loop
9378 vectorization. */
9379 /* TODO: Another case in which order doesn't *really* matter is when we
9380 widen and then contract again, e.g. (short)((int)x * y >> 8).
9381 Normally, pack_trunc performs an even/odd permute, whereas the
9382 repack from an even/odd expansion would be an interleave, which
9383 would be significantly simpler for e.g. AVX2. */
9384 /* In any case, in order to avoid duplicating the code below, recurse
9385 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9386 are properly set up for the caller. If we fail, we'll continue with
9387 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9388 if (vect_loop
9389 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9390 && !nested_in_vect_loop_p (vect_loop, stmt)
9391 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9392 stmt, vectype_out, vectype_in,
9393 code1, code2, multi_step_cvt,
9394 interm_types))
9396 /* Elements in a vector with vect_used_by_reduction property cannot
9397 be reordered if the use chain with this property does not have the
9398 same operation. One such an example is s += a * b, where elements
9399 in a and b cannot be reordered. Here we check if the vector defined
9400 by STMT is only directly used in the reduction statement. */
9401 tree lhs = gimple_assign_lhs (stmt);
9402 use_operand_p dummy;
9403 gimple *use_stmt;
9404 stmt_vec_info use_stmt_info = NULL;
9405 if (single_imm_use (lhs, &dummy, &use_stmt)
9406 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9407 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9408 return true;
9410 c1 = VEC_WIDEN_MULT_LO_EXPR;
9411 c2 = VEC_WIDEN_MULT_HI_EXPR;
9412 break;
9414 case DOT_PROD_EXPR:
9415 c1 = DOT_PROD_EXPR;
9416 c2 = DOT_PROD_EXPR;
9417 break;
9419 case SAD_EXPR:
9420 c1 = SAD_EXPR;
9421 c2 = SAD_EXPR;
9422 break;
9424 case VEC_WIDEN_MULT_EVEN_EXPR:
9425 /* Support the recursion induced just above. */
9426 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9427 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9428 break;
9430 case WIDEN_LSHIFT_EXPR:
9431 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9432 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9433 break;
9435 CASE_CONVERT:
9436 c1 = VEC_UNPACK_LO_EXPR;
9437 c2 = VEC_UNPACK_HI_EXPR;
9438 break;
9440 case FLOAT_EXPR:
9441 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9442 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9443 break;
9445 case FIX_TRUNC_EXPR:
9446 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9447 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9448 computing the operation. */
9449 return false;
9451 default:
9452 gcc_unreachable ();
9455 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9456 std::swap (c1, c2);
9458 if (code == FIX_TRUNC_EXPR)
9460 /* The signedness is determined from output operand. */
9461 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9462 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9464 else
9466 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9467 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9470 if (!optab1 || !optab2)
9471 return false;
9473 vec_mode = TYPE_MODE (vectype);
9474 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9475 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9476 return false;
9478 *code1 = c1;
9479 *code2 = c2;
9481 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9482 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9483 /* For scalar masks we may have different boolean
9484 vector types having the same QImode. Thus we
9485 add additional check for elements number. */
9486 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9487 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9488 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9490 /* Check if it's a multi-step conversion that can be done using intermediate
9491 types. */
9493 prev_type = vectype;
9494 prev_mode = vec_mode;
9496 if (!CONVERT_EXPR_CODE_P (code))
9497 return false;
9499 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9500 intermediate steps in promotion sequence. We try
9501 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9502 not. */
9503 interm_types->create (MAX_INTERM_CVT_STEPS);
9504 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9506 intermediate_mode = insn_data[icode1].operand[0].mode;
9507 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9509 intermediate_type
9510 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9511 current_vector_size);
9512 if (intermediate_mode != TYPE_MODE (intermediate_type))
9513 return false;
9515 else
9516 intermediate_type
9517 = lang_hooks.types.type_for_mode (intermediate_mode,
9518 TYPE_UNSIGNED (prev_type));
9520 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9521 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9523 if (!optab3 || !optab4
9524 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9525 || insn_data[icode1].operand[0].mode != intermediate_mode
9526 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9527 || insn_data[icode2].operand[0].mode != intermediate_mode
9528 || ((icode1 = optab_handler (optab3, intermediate_mode))
9529 == CODE_FOR_nothing)
9530 || ((icode2 = optab_handler (optab4, intermediate_mode))
9531 == CODE_FOR_nothing))
9532 break;
9534 interm_types->quick_push (intermediate_type);
9535 (*multi_step_cvt)++;
9537 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9538 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9539 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9540 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9541 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9543 prev_type = intermediate_type;
9544 prev_mode = intermediate_mode;
9547 interm_types->release ();
9548 return false;
9552 /* Function supportable_narrowing_operation
9554 Check whether an operation represented by the code CODE is a
9555 narrowing operation that is supported by the target platform in
9556 vector form (i.e., when operating on arguments of type VECTYPE_IN
9557 and producing a result of type VECTYPE_OUT).
9559 Narrowing operations we currently support are NOP (CONVERT) and
9560 FIX_TRUNC. This function checks if these operations are supported by
9561 the target platform directly via vector tree-codes.
9563 Output:
9564 - CODE1 is the code of a vector operation to be used when
9565 vectorizing the operation, if available.
9566 - MULTI_STEP_CVT determines the number of required intermediate steps in
9567 case of multi-step conversion (like int->short->char - in that case
9568 MULTI_STEP_CVT will be 1).
9569 - INTERM_TYPES contains the intermediate type required to perform the
9570 narrowing operation (short in the above example). */
9572 bool
9573 supportable_narrowing_operation (enum tree_code code,
9574 tree vectype_out, tree vectype_in,
9575 enum tree_code *code1, int *multi_step_cvt,
9576 vec<tree> *interm_types)
9578 machine_mode vec_mode;
9579 enum insn_code icode1;
9580 optab optab1, interm_optab;
9581 tree vectype = vectype_in;
9582 tree narrow_vectype = vectype_out;
9583 enum tree_code c1;
9584 tree intermediate_type, prev_type;
9585 machine_mode intermediate_mode, prev_mode;
9586 int i;
9587 bool uns;
9589 *multi_step_cvt = 0;
9590 switch (code)
9592 CASE_CONVERT:
9593 c1 = VEC_PACK_TRUNC_EXPR;
9594 break;
9596 case FIX_TRUNC_EXPR:
9597 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9598 break;
9600 case FLOAT_EXPR:
9601 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9602 tree code and optabs used for computing the operation. */
9603 return false;
9605 default:
9606 gcc_unreachable ();
9609 if (code == FIX_TRUNC_EXPR)
9610 /* The signedness is determined from output operand. */
9611 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9612 else
9613 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9615 if (!optab1)
9616 return false;
9618 vec_mode = TYPE_MODE (vectype);
9619 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9620 return false;
9622 *code1 = c1;
9624 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9625 /* For scalar masks we may have different boolean
9626 vector types having the same QImode. Thus we
9627 add additional check for elements number. */
9628 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9629 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9630 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9632 /* Check if it's a multi-step conversion that can be done using intermediate
9633 types. */
9634 prev_mode = vec_mode;
9635 prev_type = vectype;
9636 if (code == FIX_TRUNC_EXPR)
9637 uns = TYPE_UNSIGNED (vectype_out);
9638 else
9639 uns = TYPE_UNSIGNED (vectype);
9641 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9642 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9643 costly than signed. */
9644 if (code == FIX_TRUNC_EXPR && uns)
9646 enum insn_code icode2;
9648 intermediate_type
9649 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9650 interm_optab
9651 = optab_for_tree_code (c1, intermediate_type, optab_default);
9652 if (interm_optab != unknown_optab
9653 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9654 && insn_data[icode1].operand[0].mode
9655 == insn_data[icode2].operand[0].mode)
9657 uns = false;
9658 optab1 = interm_optab;
9659 icode1 = icode2;
9663 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9664 intermediate steps in promotion sequence. We try
9665 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9666 interm_types->create (MAX_INTERM_CVT_STEPS);
9667 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9669 intermediate_mode = insn_data[icode1].operand[0].mode;
9670 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9672 intermediate_type
9673 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9674 current_vector_size);
9675 if (intermediate_mode != TYPE_MODE (intermediate_type))
9676 return false;
9678 else
9679 intermediate_type
9680 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9681 interm_optab
9682 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9683 optab_default);
9684 if (!interm_optab
9685 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9686 || insn_data[icode1].operand[0].mode != intermediate_mode
9687 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9688 == CODE_FOR_nothing))
9689 break;
9691 interm_types->quick_push (intermediate_type);
9692 (*multi_step_cvt)++;
9694 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9695 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9696 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9697 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9699 prev_mode = intermediate_mode;
9700 prev_type = intermediate_type;
9701 optab1 = interm_optab;
9704 interm_types->release ();
9705 return false;