Daily bump.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob81fc46b7b26588f53b7644b8b52872db95ca2ca0
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
57 tree
58 stmt_vectype (struct _stmt_vec_info *stmt_info)
60 return STMT_VINFO_VECTYPE (stmt_info);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
65 bool
66 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
68 gimple *stmt = STMT_VINFO_STMT (stmt_info);
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
71 struct loop* loop;
73 if (!loop_vinfo)
74 return false;
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
78 return (bb->loop_father == loop->inner);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
85 unsigned
86 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
88 int misalign, enum vect_cost_model_location where)
90 if (body_cost_vec)
92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
93 stmt_info_for_cost si = { count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
95 misalign };
96 body_cost_vec->safe_push (si);
97 return (unsigned)
98 (builtin_vectorization_cost (kind, vectype, misalign) * count);
100 else
101 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
102 count, kind, stmt_info, misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
119 static tree
120 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
124 gimple *new_stmt;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
138 return vect_name;
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
145 static void
146 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
149 tree array_ref;
150 gimple *new_stmt;
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
164 static tree
165 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
167 tree mem_ref;
169 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
170 /* Arrays have the same alignment as their type. */
171 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
172 return mem_ref;
175 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177 /* Function vect_mark_relevant.
179 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
181 static void
182 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
183 enum vect_relevant relevant, bool live_p)
185 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
186 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
187 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
188 gimple *pattern_stmt;
190 if (dump_enabled_p ())
192 dump_printf_loc (MSG_NOTE, vect_location,
193 "mark relevant %d, live %d: ", relevant, live_p);
194 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
197 /* If this stmt is an original stmt in a pattern, we might need to mark its
198 related pattern stmt instead of the original stmt. However, such stmts
199 may have their own uses that are not in any pattern, in such cases the
200 stmt itself should be marked. */
201 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
203 /* This is the last stmt in a sequence that was detected as a
204 pattern that can potentially be vectorized. Don't mark the stmt
205 as relevant/live because it's not going to be vectorized.
206 Instead mark the pattern-stmt that replaces it. */
208 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
210 if (dump_enabled_p ())
211 dump_printf_loc (MSG_NOTE, vect_location,
212 "last stmt in pattern. don't mark"
213 " relevant/live.\n");
214 stmt_info = vinfo_for_stmt (pattern_stmt);
215 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
216 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
217 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
218 stmt = pattern_stmt;
221 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
222 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
223 STMT_VINFO_RELEVANT (stmt_info) = relevant;
225 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
226 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
228 if (dump_enabled_p ())
229 dump_printf_loc (MSG_NOTE, vect_location,
230 "already marked relevant/live.\n");
231 return;
234 worklist->safe_push (stmt);
238 /* Function vect_stmt_relevant_p.
240 Return true if STMT in loop that is represented by LOOP_VINFO is
241 "relevant for vectorization".
243 A stmt is considered "relevant for vectorization" if:
244 - it has uses outside the loop.
245 - it has vdefs (it alters memory).
246 - control stmts in the loop (except for the exit condition).
248 CHECKME: what other side effects would the vectorizer allow? */
250 static bool
251 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
252 enum vect_relevant *relevant, bool *live_p)
254 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
255 ssa_op_iter op_iter;
256 imm_use_iterator imm_iter;
257 use_operand_p use_p;
258 def_operand_p def_p;
260 *relevant = vect_unused_in_scope;
261 *live_p = false;
263 /* cond stmt other than loop exit cond. */
264 if (is_ctrl_stmt (stmt)
265 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
266 != loop_exit_ctrl_vec_info_type)
267 *relevant = vect_used_in_scope;
269 /* changing memory. */
270 if (gimple_code (stmt) != GIMPLE_PHI)
271 if (gimple_vdef (stmt)
272 && !gimple_clobber_p (stmt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_NOTE, vect_location,
276 "vec_stmt_relevant_p: stmt has vdefs.\n");
277 *relevant = vect_used_in_scope;
280 /* uses outside the loop. */
281 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
283 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
285 basic_block bb = gimple_bb (USE_STMT (use_p));
286 if (!flow_bb_inside_loop_p (loop, bb))
288 if (dump_enabled_p ())
289 dump_printf_loc (MSG_NOTE, vect_location,
290 "vec_stmt_relevant_p: used out of loop.\n");
292 if (is_gimple_debug (USE_STMT (use_p)))
293 continue;
295 /* We expect all such uses to be in the loop exit phis
296 (because of loop closed form) */
297 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
298 gcc_assert (bb == single_exit (loop)->dest);
300 *live_p = true;
305 return (*live_p || *relevant);
309 /* Function exist_non_indexing_operands_for_use_p
311 USE is one of the uses attached to STMT. Check if USE is
312 used in STMT for anything other than indexing an array. */
314 static bool
315 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
317 tree operand;
318 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
320 /* USE corresponds to some operand in STMT. If there is no data
321 reference in STMT, then any operand that corresponds to USE
322 is not indexing an array. */
323 if (!STMT_VINFO_DATA_REF (stmt_info))
324 return true;
326 /* STMT has a data_ref. FORNOW this means that its of one of
327 the following forms:
328 -1- ARRAY_REF = var
329 -2- var = ARRAY_REF
330 (This should have been verified in analyze_data_refs).
332 'var' in the second case corresponds to a def, not a use,
333 so USE cannot correspond to any operands that are not used
334 for array indexing.
336 Therefore, all we need to check is if STMT falls into the
337 first case, and whether var corresponds to USE. */
339 if (!gimple_assign_copy_p (stmt))
341 if (is_gimple_call (stmt)
342 && gimple_call_internal_p (stmt))
343 switch (gimple_call_internal_fn (stmt))
345 case IFN_MASK_STORE:
346 operand = gimple_call_arg (stmt, 3);
347 if (operand == use)
348 return true;
349 /* FALLTHRU */
350 case IFN_MASK_LOAD:
351 operand = gimple_call_arg (stmt, 2);
352 if (operand == use)
353 return true;
354 break;
355 default:
356 break;
358 return false;
361 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
362 return false;
363 operand = gimple_assign_rhs1 (stmt);
364 if (TREE_CODE (operand) != SSA_NAME)
365 return false;
367 if (operand == use)
368 return true;
370 return false;
375 Function process_use.
377 Inputs:
378 - a USE in STMT in a loop represented by LOOP_VINFO
379 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
380 that defined USE. This is done by calling mark_relevant and passing it
381 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
382 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
383 be performed.
385 Outputs:
386 Generally, LIVE_P and RELEVANT are used to define the liveness and
387 relevance info of the DEF_STMT of this USE:
388 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
389 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
390 Exceptions:
391 - case 1: If USE is used only for address computations (e.g. array indexing),
392 which does not need to be directly vectorized, then the liveness/relevance
393 of the respective DEF_STMT is left unchanged.
394 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
395 skip DEF_STMT cause it had already been processed.
396 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
397 be modified accordingly.
399 Return true if everything is as expected. Return false otherwise. */
401 static bool
402 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
403 enum vect_relevant relevant, vec<gimple *> *worklist,
404 bool force)
406 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
407 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
408 stmt_vec_info dstmt_vinfo;
409 basic_block bb, def_bb;
410 gimple *def_stmt;
411 enum vect_def_type dt;
413 /* case 1: we are only interested in uses that need to be vectorized. Uses
414 that are used for address computation are not considered relevant. */
415 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
416 return true;
418 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
420 if (dump_enabled_p ())
421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
422 "not vectorized: unsupported use in stmt.\n");
423 return false;
426 if (!def_stmt || gimple_nop_p (def_stmt))
427 return true;
429 def_bb = gimple_bb (def_stmt);
430 if (!flow_bb_inside_loop_p (loop, def_bb))
432 if (dump_enabled_p ())
433 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
434 return true;
437 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
438 DEF_STMT must have already been processed, because this should be the
439 only way that STMT, which is a reduction-phi, was put in the worklist,
440 as there should be no other uses for DEF_STMT in the loop. So we just
441 check that everything is as expected, and we are done. */
442 dstmt_vinfo = vinfo_for_stmt (def_stmt);
443 bb = gimple_bb (stmt);
444 if (gimple_code (stmt) == GIMPLE_PHI
445 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
446 && gimple_code (def_stmt) != GIMPLE_PHI
447 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
448 && bb->loop_father == def_bb->loop_father)
450 if (dump_enabled_p ())
451 dump_printf_loc (MSG_NOTE, vect_location,
452 "reduc-stmt defining reduc-phi in the same nest.\n");
453 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
454 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
455 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
456 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
457 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
458 return true;
461 /* case 3a: outer-loop stmt defining an inner-loop stmt:
462 outer-loop-header-bb:
463 d = def_stmt
464 inner-loop:
465 stmt # use (d)
466 outer-loop-tail-bb:
467 ... */
468 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_NOTE, vect_location,
472 "outer-loop def-stmt defining inner-loop stmt.\n");
474 switch (relevant)
476 case vect_unused_in_scope:
477 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
478 vect_used_in_scope : vect_unused_in_scope;
479 break;
481 case vect_used_in_outer_by_reduction:
482 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
483 relevant = vect_used_by_reduction;
484 break;
486 case vect_used_in_outer:
487 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
488 relevant = vect_used_in_scope;
489 break;
491 case vect_used_in_scope:
492 break;
494 default:
495 gcc_unreachable ();
499 /* case 3b: inner-loop stmt defining an outer-loop stmt:
500 outer-loop-header-bb:
502 inner-loop:
503 d = def_stmt
504 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
505 stmt # use (d) */
506 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE, vect_location,
510 "inner-loop def-stmt defining outer-loop stmt.\n");
512 switch (relevant)
514 case vect_unused_in_scope:
515 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
516 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
517 vect_used_in_outer_by_reduction : vect_unused_in_scope;
518 break;
520 case vect_used_by_reduction:
521 relevant = vect_used_in_outer_by_reduction;
522 break;
524 case vect_used_in_scope:
525 relevant = vect_used_in_outer;
526 break;
528 default:
529 gcc_unreachable ();
533 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
534 return true;
538 /* Function vect_mark_stmts_to_be_vectorized.
540 Not all stmts in the loop need to be vectorized. For example:
542 for i...
543 for j...
544 1. T0 = i + j
545 2. T1 = a[T0]
547 3. j = j + 1
549 Stmt 1 and 3 do not need to be vectorized, because loop control and
550 addressing of vectorized data-refs are handled differently.
552 This pass detects such stmts. */
554 bool
555 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
557 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
558 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
559 unsigned int nbbs = loop->num_nodes;
560 gimple_stmt_iterator si;
561 gimple *stmt;
562 unsigned int i;
563 stmt_vec_info stmt_vinfo;
564 basic_block bb;
565 gimple *phi;
566 bool live_p;
567 enum vect_relevant relevant, tmp_relevant;
568 enum vect_def_type def_type;
570 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE, vect_location,
572 "=== vect_mark_stmts_to_be_vectorized ===\n");
574 auto_vec<gimple *, 64> worklist;
576 /* 1. Init worklist. */
577 for (i = 0; i < nbbs; i++)
579 bb = bbs[i];
580 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
582 phi = gsi_stmt (si);
583 if (dump_enabled_p ())
585 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
586 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
589 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
590 vect_mark_relevant (&worklist, phi, relevant, live_p);
592 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
594 stmt = gsi_stmt (si);
595 if (dump_enabled_p ())
597 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
598 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
601 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
602 vect_mark_relevant (&worklist, stmt, relevant, live_p);
606 /* 2. Process_worklist */
607 while (worklist.length () > 0)
609 use_operand_p use_p;
610 ssa_op_iter iter;
612 stmt = worklist.pop ();
613 if (dump_enabled_p ())
615 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
616 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
619 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
620 (DEF_STMT) as relevant/irrelevant and live/dead according to the
621 liveness and relevance properties of STMT. */
622 stmt_vinfo = vinfo_for_stmt (stmt);
623 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
624 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
626 /* Generally, the liveness and relevance properties of STMT are
627 propagated as is to the DEF_STMTs of its USEs:
628 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
629 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
631 One exception is when STMT has been identified as defining a reduction
632 variable; in this case we set the liveness/relevance as follows:
633 live_p = false
634 relevant = vect_used_by_reduction
635 This is because we distinguish between two kinds of relevant stmts -
636 those that are used by a reduction computation, and those that are
637 (also) used by a regular computation. This allows us later on to
638 identify stmts that are used solely by a reduction, and therefore the
639 order of the results that they produce does not have to be kept. */
641 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
642 tmp_relevant = relevant;
643 switch (def_type)
645 case vect_reduction_def:
646 switch (tmp_relevant)
648 case vect_unused_in_scope:
649 relevant = vect_used_by_reduction;
650 break;
652 case vect_used_by_reduction:
653 if (gimple_code (stmt) == GIMPLE_PHI)
654 break;
655 /* fall through */
657 default:
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
660 "unsupported use of reduction.\n");
661 return false;
664 live_p = false;
665 break;
667 case vect_nested_cycle:
668 if (tmp_relevant != vect_unused_in_scope
669 && tmp_relevant != vect_used_in_outer_by_reduction
670 && tmp_relevant != vect_used_in_outer)
672 if (dump_enabled_p ())
673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
674 "unsupported use of nested cycle.\n");
676 return false;
679 live_p = false;
680 break;
682 case vect_double_reduction_def:
683 if (tmp_relevant != vect_unused_in_scope
684 && tmp_relevant != vect_used_by_reduction)
686 if (dump_enabled_p ())
687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
688 "unsupported use of double reduction.\n");
690 return false;
693 live_p = false;
694 break;
696 default:
697 break;
700 if (is_pattern_stmt_p (stmt_vinfo))
702 /* Pattern statements are not inserted into the code, so
703 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
704 have to scan the RHS or function arguments instead. */
705 if (is_gimple_assign (stmt))
707 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
708 tree op = gimple_assign_rhs1 (stmt);
710 i = 1;
711 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
713 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
714 live_p, relevant, &worklist, false)
715 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
716 live_p, relevant, &worklist, false))
717 return false;
718 i = 2;
720 for (; i < gimple_num_ops (stmt); i++)
722 op = gimple_op (stmt, i);
723 if (TREE_CODE (op) == SSA_NAME
724 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
725 &worklist, false))
726 return false;
729 else if (is_gimple_call (stmt))
731 for (i = 0; i < gimple_call_num_args (stmt); i++)
733 tree arg = gimple_call_arg (stmt, i);
734 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
735 &worklist, false))
736 return false;
740 else
741 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
743 tree op = USE_FROM_PTR (use_p);
744 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
745 &worklist, false))
746 return false;
749 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
751 tree off;
752 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
753 gcc_assert (decl);
754 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
755 &worklist, true))
756 return false;
758 } /* while worklist */
760 return true;
764 /* Function vect_model_simple_cost.
766 Models cost for simple operations, i.e. those that only emit ncopies of a
767 single op. Right now, this does not account for multiple insns that could
768 be generated for the single vector op. We will handle that shortly. */
770 void
771 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
772 enum vect_def_type *dt,
773 stmt_vector_for_cost *prologue_cost_vec,
774 stmt_vector_for_cost *body_cost_vec)
776 int i;
777 int inside_cost = 0, prologue_cost = 0;
779 /* The SLP costs were already calculated during SLP tree build. */
780 if (PURE_SLP_STMT (stmt_info))
781 return;
783 /* FORNOW: Assuming maximum 2 args per stmts. */
784 for (i = 0; i < 2; i++)
785 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
786 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
787 stmt_info, 0, vect_prologue);
789 /* Pass the inside-of-loop statements to the target-specific cost model. */
790 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
791 stmt_info, 0, vect_body);
793 if (dump_enabled_p ())
794 dump_printf_loc (MSG_NOTE, vect_location,
795 "vect_model_simple_cost: inside_cost = %d, "
796 "prologue_cost = %d .\n", inside_cost, prologue_cost);
800 /* Model cost for type demotion and promotion operations. PWR is normally
801 zero for single-step promotions and demotions. It will be one if
802 two-step promotion/demotion is required, and so on. Each additional
803 step doubles the number of instructions required. */
805 static void
806 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
807 enum vect_def_type *dt, int pwr)
809 int i, tmp;
810 int inside_cost = 0, prologue_cost = 0;
811 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
812 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
813 void *target_cost_data;
815 /* The SLP costs were already calculated during SLP tree build. */
816 if (PURE_SLP_STMT (stmt_info))
817 return;
819 if (loop_vinfo)
820 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
821 else
822 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
824 for (i = 0; i < pwr + 1; i++)
826 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
827 (i + 1) : i;
828 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
829 vec_promote_demote, stmt_info, 0,
830 vect_body);
833 /* FORNOW: Assuming maximum 2 args per stmts. */
834 for (i = 0; i < 2; i++)
835 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
836 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
837 stmt_info, 0, vect_prologue);
839 if (dump_enabled_p ())
840 dump_printf_loc (MSG_NOTE, vect_location,
841 "vect_model_promotion_demotion_cost: inside_cost = %d, "
842 "prologue_cost = %d .\n", inside_cost, prologue_cost);
845 /* Function vect_cost_group_size
847 For grouped load or store, return the group_size only if it is the first
848 load or store of a group, else return 1. This ensures that group size is
849 only returned once per group. */
851 static int
852 vect_cost_group_size (stmt_vec_info stmt_info)
854 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
856 if (first_stmt == STMT_VINFO_STMT (stmt_info))
857 return GROUP_SIZE (stmt_info);
859 return 1;
863 /* Function vect_model_store_cost
865 Models cost for stores. In the case of grouped accesses, one access
866 has the overhead of the grouped access attributed to it. */
868 void
869 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
870 bool store_lanes_p, enum vect_def_type dt,
871 slp_tree slp_node,
872 stmt_vector_for_cost *prologue_cost_vec,
873 stmt_vector_for_cost *body_cost_vec)
875 int group_size;
876 unsigned int inside_cost = 0, prologue_cost = 0;
877 struct data_reference *first_dr;
878 gimple *first_stmt;
880 if (dt == vect_constant_def || dt == vect_external_def)
881 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
882 stmt_info, 0, vect_prologue);
884 /* Grouped access? */
885 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
887 if (slp_node)
889 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
890 group_size = 1;
892 else
894 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
895 group_size = vect_cost_group_size (stmt_info);
898 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
900 /* Not a grouped access. */
901 else
903 group_size = 1;
904 first_dr = STMT_VINFO_DATA_REF (stmt_info);
907 /* We assume that the cost of a single store-lanes instruction is
908 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
909 access is instead being provided by a permute-and-store operation,
910 include the cost of the permutes. */
911 if (!store_lanes_p && group_size > 1
912 && !STMT_VINFO_STRIDED_P (stmt_info))
914 /* Uses a high and low interleave or shuffle operations for each
915 needed permute. */
916 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
917 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
918 stmt_info, 0, vect_body);
920 if (dump_enabled_p ())
921 dump_printf_loc (MSG_NOTE, vect_location,
922 "vect_model_store_cost: strided group_size = %d .\n",
923 group_size);
926 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
927 /* Costs of the stores. */
928 if (STMT_VINFO_STRIDED_P (stmt_info)
929 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
931 /* N scalar stores plus extracting the elements. */
932 inside_cost += record_stmt_cost (body_cost_vec,
933 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
934 scalar_store, stmt_info, 0, vect_body);
936 else
937 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
939 if (STMT_VINFO_STRIDED_P (stmt_info))
940 inside_cost += record_stmt_cost (body_cost_vec,
941 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
942 vec_to_scalar, stmt_info, 0, vect_body);
944 if (dump_enabled_p ())
945 dump_printf_loc (MSG_NOTE, vect_location,
946 "vect_model_store_cost: inside_cost = %d, "
947 "prologue_cost = %d .\n", inside_cost, prologue_cost);
951 /* Calculate cost of DR's memory access. */
952 void
953 vect_get_store_cost (struct data_reference *dr, int ncopies,
954 unsigned int *inside_cost,
955 stmt_vector_for_cost *body_cost_vec)
957 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
958 gimple *stmt = DR_STMT (dr);
959 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
961 switch (alignment_support_scheme)
963 case dr_aligned:
965 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
966 vector_store, stmt_info, 0,
967 vect_body);
969 if (dump_enabled_p ())
970 dump_printf_loc (MSG_NOTE, vect_location,
971 "vect_model_store_cost: aligned.\n");
972 break;
975 case dr_unaligned_supported:
977 /* Here, we assign an additional cost for the unaligned store. */
978 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
979 unaligned_store, stmt_info,
980 DR_MISALIGNMENT (dr), vect_body);
981 if (dump_enabled_p ())
982 dump_printf_loc (MSG_NOTE, vect_location,
983 "vect_model_store_cost: unaligned supported by "
984 "hardware.\n");
985 break;
988 case dr_unaligned_unsupported:
990 *inside_cost = VECT_MAX_COST;
992 if (dump_enabled_p ())
993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
994 "vect_model_store_cost: unsupported access.\n");
995 break;
998 default:
999 gcc_unreachable ();
1004 /* Function vect_model_load_cost
1006 Models cost for loads. In the case of grouped accesses, the last access
1007 has the overhead of the grouped access attributed to it. Since unaligned
1008 accesses are supported for loads, we also account for the costs of the
1009 access scheme chosen. */
1011 void
1012 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1013 bool load_lanes_p, slp_tree slp_node,
1014 stmt_vector_for_cost *prologue_cost_vec,
1015 stmt_vector_for_cost *body_cost_vec)
1017 int group_size;
1018 gimple *first_stmt;
1019 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1020 unsigned int inside_cost = 0, prologue_cost = 0;
1022 /* Grouped accesses? */
1023 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1024 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1026 group_size = vect_cost_group_size (stmt_info);
1027 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1029 /* Not a grouped access. */
1030 else
1032 group_size = 1;
1033 first_dr = dr;
1036 /* We assume that the cost of a single load-lanes instruction is
1037 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1038 access is instead being provided by a load-and-permute operation,
1039 include the cost of the permutes. */
1040 if (!load_lanes_p && group_size > 1
1041 && !STMT_VINFO_STRIDED_P (stmt_info))
1043 /* Uses an even and odd extract operations or shuffle operations
1044 for each needed permute. */
1045 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1046 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1047 stmt_info, 0, vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_load_cost: strided group_size = %d .\n",
1052 group_size);
1055 /* The loads themselves. */
1056 if (STMT_VINFO_STRIDED_P (stmt_info)
1057 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1059 /* N scalar loads plus gathering them into a vector. */
1060 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1061 inside_cost += record_stmt_cost (body_cost_vec,
1062 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1063 scalar_load, stmt_info, 0, vect_body);
1065 else
1066 vect_get_load_cost (first_dr, ncopies,
1067 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1068 || group_size > 1 || slp_node),
1069 &inside_cost, &prologue_cost,
1070 prologue_cost_vec, body_cost_vec, true);
1071 if (STMT_VINFO_STRIDED_P (stmt_info))
1072 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1073 stmt_info, 0, vect_body);
1075 if (dump_enabled_p ())
1076 dump_printf_loc (MSG_NOTE, vect_location,
1077 "vect_model_load_cost: inside_cost = %d, "
1078 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1082 /* Calculate cost of DR's memory access. */
1083 void
1084 vect_get_load_cost (struct data_reference *dr, int ncopies,
1085 bool add_realign_cost, unsigned int *inside_cost,
1086 unsigned int *prologue_cost,
1087 stmt_vector_for_cost *prologue_cost_vec,
1088 stmt_vector_for_cost *body_cost_vec,
1089 bool record_prologue_costs)
1091 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1092 gimple *stmt = DR_STMT (dr);
1093 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1095 switch (alignment_support_scheme)
1097 case dr_aligned:
1099 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1100 stmt_info, 0, vect_body);
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_load_cost: aligned.\n");
1106 break;
1108 case dr_unaligned_supported:
1110 /* Here, we assign an additional cost for the unaligned load. */
1111 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1112 unaligned_load, stmt_info,
1113 DR_MISALIGNMENT (dr), vect_body);
1115 if (dump_enabled_p ())
1116 dump_printf_loc (MSG_NOTE, vect_location,
1117 "vect_model_load_cost: unaligned supported by "
1118 "hardware.\n");
1120 break;
1122 case dr_explicit_realign:
1124 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1125 vector_load, stmt_info, 0, vect_body);
1126 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1127 vec_perm, stmt_info, 0, vect_body);
1129 /* FIXME: If the misalignment remains fixed across the iterations of
1130 the containing loop, the following cost should be added to the
1131 prologue costs. */
1132 if (targetm.vectorize.builtin_mask_for_load)
1133 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1134 stmt_info, 0, vect_body);
1136 if (dump_enabled_p ())
1137 dump_printf_loc (MSG_NOTE, vect_location,
1138 "vect_model_load_cost: explicit realign\n");
1140 break;
1142 case dr_explicit_realign_optimized:
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: unaligned software "
1147 "pipelined.\n");
1149 /* Unaligned software pipeline has a load of an address, an initial
1150 load, and possibly a mask operation to "prime" the loop. However,
1151 if this is an access in a group of loads, which provide grouped
1152 access, then the above cost should only be considered for one
1153 access in the group. Inside the loop, there is a load op
1154 and a realignment op. */
1156 if (add_realign_cost && record_prologue_costs)
1158 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1159 vector_stmt, stmt_info,
1160 0, vect_prologue);
1161 if (targetm.vectorize.builtin_mask_for_load)
1162 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1163 vector_stmt, stmt_info,
1164 0, vect_prologue);
1167 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1168 stmt_info, 0, vect_body);
1169 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1170 stmt_info, 0, vect_body);
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE, vect_location,
1174 "vect_model_load_cost: explicit realign optimized"
1175 "\n");
1177 break;
1180 case dr_unaligned_unsupported:
1182 *inside_cost = VECT_MAX_COST;
1184 if (dump_enabled_p ())
1185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1186 "vect_model_load_cost: unsupported access.\n");
1187 break;
1190 default:
1191 gcc_unreachable ();
1195 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1196 the loop preheader for the vectorized stmt STMT. */
1198 static void
1199 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1201 if (gsi)
1202 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1203 else
1205 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1206 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1208 if (loop_vinfo)
1210 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1211 basic_block new_bb;
1212 edge pe;
1214 if (nested_in_vect_loop_p (loop, stmt))
1215 loop = loop->inner;
1217 pe = loop_preheader_edge (loop);
1218 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1219 gcc_assert (!new_bb);
1221 else
1223 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1224 basic_block bb;
1225 gimple_stmt_iterator gsi_bb_start;
1227 gcc_assert (bb_vinfo);
1228 bb = BB_VINFO_BB (bb_vinfo);
1229 gsi_bb_start = gsi_after_labels (bb);
1230 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1234 if (dump_enabled_p ())
1236 dump_printf_loc (MSG_NOTE, vect_location,
1237 "created new init_stmt: ");
1238 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1242 /* Function vect_init_vector.
1244 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1245 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1246 vector type a vector with all elements equal to VAL is created first.
1247 Place the initialization at BSI if it is not NULL. Otherwise, place the
1248 initialization at the loop preheader.
1249 Return the DEF of INIT_STMT.
1250 It will be used in the vectorization of STMT. */
1252 tree
1253 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1255 gimple *init_stmt;
1256 tree new_temp;
1258 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1259 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1261 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1262 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1264 /* Scalar boolean value should be transformed into
1265 all zeros or all ones value before building a vector. */
1266 if (VECTOR_BOOLEAN_TYPE_P (type))
1268 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1269 tree false_val = build_zero_cst (TREE_TYPE (type));
1271 if (CONSTANT_CLASS_P (val))
1272 val = integer_zerop (val) ? false_val : true_val;
1273 else
1275 new_temp = make_ssa_name (TREE_TYPE (type));
1276 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1277 val, true_val, false_val);
1278 vect_init_vector_1 (stmt, init_stmt, gsi);
1279 val = new_temp;
1282 else if (CONSTANT_CLASS_P (val))
1283 val = fold_convert (TREE_TYPE (type), val);
1284 else
1286 new_temp = make_ssa_name (TREE_TYPE (type));
1287 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1288 init_stmt = gimple_build_assign (new_temp,
1289 fold_build1 (VIEW_CONVERT_EXPR,
1290 TREE_TYPE (type),
1291 val));
1292 else
1293 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1294 vect_init_vector_1 (stmt, init_stmt, gsi);
1295 val = new_temp;
1298 val = build_vector_from_val (type, val);
1301 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1302 init_stmt = gimple_build_assign (new_temp, val);
1303 vect_init_vector_1 (stmt, init_stmt, gsi);
1304 return new_temp;
1308 /* Function vect_get_vec_def_for_operand.
1310 OP is an operand in STMT. This function returns a (vector) def that will be
1311 used in the vectorized stmt for STMT.
1313 In the case that OP is an SSA_NAME which is defined in the loop, then
1314 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1316 In case OP is an invariant or constant, a new stmt that creates a vector def
1317 needs to be introduced. VECTYPE may be used to specify a required type for
1318 vector invariant. */
1320 tree
1321 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1323 tree vec_oprnd;
1324 gimple *vec_stmt;
1325 gimple *def_stmt;
1326 stmt_vec_info def_stmt_info = NULL;
1327 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1328 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1329 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1330 enum vect_def_type dt;
1331 bool is_simple_use;
1332 tree vector_type;
1334 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_NOTE, vect_location,
1337 "vect_get_vec_def_for_operand: ");
1338 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1339 dump_printf (MSG_NOTE, "\n");
1342 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1343 gcc_assert (is_simple_use);
1344 if (dump_enabled_p ())
1346 int loc_printed = 0;
1347 if (def_stmt)
1349 if (loc_printed)
1350 dump_printf (MSG_NOTE, " def_stmt = ");
1351 else
1352 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1353 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1357 switch (dt)
1359 /* operand is a constant or a loop invariant. */
1360 case vect_constant_def:
1361 case vect_external_def:
1363 if (vectype)
1364 vector_type = vectype;
1365 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1366 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1367 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1368 else
1369 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1371 gcc_assert (vector_type);
1372 return vect_init_vector (stmt, op, vector_type, NULL);
1375 /* operand is defined inside the loop. */
1376 case vect_internal_def:
1378 /* Get the def from the vectorized stmt. */
1379 def_stmt_info = vinfo_for_stmt (def_stmt);
1381 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1382 /* Get vectorized pattern statement. */
1383 if (!vec_stmt
1384 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1385 && !STMT_VINFO_RELEVANT (def_stmt_info))
1386 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1387 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1388 gcc_assert (vec_stmt);
1389 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1390 vec_oprnd = PHI_RESULT (vec_stmt);
1391 else if (is_gimple_call (vec_stmt))
1392 vec_oprnd = gimple_call_lhs (vec_stmt);
1393 else
1394 vec_oprnd = gimple_assign_lhs (vec_stmt);
1395 return vec_oprnd;
1398 /* operand is defined by a loop header phi - reduction */
1399 case vect_reduction_def:
1400 case vect_double_reduction_def:
1401 case vect_nested_cycle:
1402 /* Code should use get_initial_def_for_reduction. */
1403 gcc_unreachable ();
1405 /* operand is defined by loop-header phi - induction. */
1406 case vect_induction_def:
1408 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1410 /* Get the def from the vectorized stmt. */
1411 def_stmt_info = vinfo_for_stmt (def_stmt);
1412 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1413 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1414 vec_oprnd = PHI_RESULT (vec_stmt);
1415 else
1416 vec_oprnd = gimple_get_lhs (vec_stmt);
1417 return vec_oprnd;
1420 default:
1421 gcc_unreachable ();
1426 /* Function vect_get_vec_def_for_stmt_copy
1428 Return a vector-def for an operand. This function is used when the
1429 vectorized stmt to be created (by the caller to this function) is a "copy"
1430 created in case the vectorized result cannot fit in one vector, and several
1431 copies of the vector-stmt are required. In this case the vector-def is
1432 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1433 of the stmt that defines VEC_OPRND.
1434 DT is the type of the vector def VEC_OPRND.
1436 Context:
1437 In case the vectorization factor (VF) is bigger than the number
1438 of elements that can fit in a vectype (nunits), we have to generate
1439 more than one vector stmt to vectorize the scalar stmt. This situation
1440 arises when there are multiple data-types operated upon in the loop; the
1441 smallest data-type determines the VF, and as a result, when vectorizing
1442 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1443 vector stmt (each computing a vector of 'nunits' results, and together
1444 computing 'VF' results in each iteration). This function is called when
1445 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1446 which VF=16 and nunits=4, so the number of copies required is 4):
1448 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1450 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1451 VS1.1: vx.1 = memref1 VS1.2
1452 VS1.2: vx.2 = memref2 VS1.3
1453 VS1.3: vx.3 = memref3
1455 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1456 VSnew.1: vz1 = vx.1 + ... VSnew.2
1457 VSnew.2: vz2 = vx.2 + ... VSnew.3
1458 VSnew.3: vz3 = vx.3 + ...
1460 The vectorization of S1 is explained in vectorizable_load.
1461 The vectorization of S2:
1462 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1463 the function 'vect_get_vec_def_for_operand' is called to
1464 get the relevant vector-def for each operand of S2. For operand x it
1465 returns the vector-def 'vx.0'.
1467 To create the remaining copies of the vector-stmt (VSnew.j), this
1468 function is called to get the relevant vector-def for each operand. It is
1469 obtained from the respective VS1.j stmt, which is recorded in the
1470 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1472 For example, to obtain the vector-def 'vx.1' in order to create the
1473 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1474 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1475 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1476 and return its def ('vx.1').
1477 Overall, to create the above sequence this function will be called 3 times:
1478 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1479 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1480 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1482 tree
1483 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1485 gimple *vec_stmt_for_operand;
1486 stmt_vec_info def_stmt_info;
1488 /* Do nothing; can reuse same def. */
1489 if (dt == vect_external_def || dt == vect_constant_def )
1490 return vec_oprnd;
1492 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1493 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1494 gcc_assert (def_stmt_info);
1495 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1496 gcc_assert (vec_stmt_for_operand);
1497 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1498 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1499 else
1500 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1501 return vec_oprnd;
1505 /* Get vectorized definitions for the operands to create a copy of an original
1506 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1508 static void
1509 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1510 vec<tree> *vec_oprnds0,
1511 vec<tree> *vec_oprnds1)
1513 tree vec_oprnd = vec_oprnds0->pop ();
1515 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1516 vec_oprnds0->quick_push (vec_oprnd);
1518 if (vec_oprnds1 && vec_oprnds1->length ())
1520 vec_oprnd = vec_oprnds1->pop ();
1521 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1522 vec_oprnds1->quick_push (vec_oprnd);
1527 /* Get vectorized definitions for OP0 and OP1.
1528 REDUC_INDEX is the index of reduction operand in case of reduction,
1529 and -1 otherwise. */
1531 void
1532 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1533 vec<tree> *vec_oprnds0,
1534 vec<tree> *vec_oprnds1,
1535 slp_tree slp_node, int reduc_index)
1537 if (slp_node)
1539 int nops = (op1 == NULL_TREE) ? 1 : 2;
1540 auto_vec<tree> ops (nops);
1541 auto_vec<vec<tree> > vec_defs (nops);
1543 ops.quick_push (op0);
1544 if (op1)
1545 ops.quick_push (op1);
1547 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1549 *vec_oprnds0 = vec_defs[0];
1550 if (op1)
1551 *vec_oprnds1 = vec_defs[1];
1553 else
1555 tree vec_oprnd;
1557 vec_oprnds0->create (1);
1558 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1559 vec_oprnds0->quick_push (vec_oprnd);
1561 if (op1)
1563 vec_oprnds1->create (1);
1564 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1565 vec_oprnds1->quick_push (vec_oprnd);
1571 /* Function vect_finish_stmt_generation.
1573 Insert a new stmt. */
1575 void
1576 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1577 gimple_stmt_iterator *gsi)
1579 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1580 vec_info *vinfo = stmt_info->vinfo;
1582 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1584 if (!gsi_end_p (*gsi)
1585 && gimple_has_mem_ops (vec_stmt))
1587 gimple *at_stmt = gsi_stmt (*gsi);
1588 tree vuse = gimple_vuse (at_stmt);
1589 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1591 tree vdef = gimple_vdef (at_stmt);
1592 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1593 /* If we have an SSA vuse and insert a store, update virtual
1594 SSA form to avoid triggering the renamer. Do so only
1595 if we can easily see all uses - which is what almost always
1596 happens with the way vectorized stmts are inserted. */
1597 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1598 && ((is_gimple_assign (vec_stmt)
1599 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1600 || (is_gimple_call (vec_stmt)
1601 && !(gimple_call_flags (vec_stmt)
1602 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1604 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1605 gimple_set_vdef (vec_stmt, new_vdef);
1606 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1610 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1612 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1614 if (dump_enabled_p ())
1616 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1617 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1620 gimple_set_location (vec_stmt, gimple_location (stmt));
1622 /* While EH edges will generally prevent vectorization, stmt might
1623 e.g. be in a must-not-throw region. Ensure newly created stmts
1624 that could throw are part of the same region. */
1625 int lp_nr = lookup_stmt_eh_lp (stmt);
1626 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1627 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1630 /* We want to vectorize a call to combined function CFN with function
1631 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1632 as the types of all inputs. Check whether this is possible using
1633 an internal function, returning its code if so or IFN_LAST if not. */
1635 static internal_fn
1636 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1637 tree vectype_out, tree vectype_in)
1639 internal_fn ifn;
1640 if (internal_fn_p (cfn))
1641 ifn = as_internal_fn (cfn);
1642 else
1643 ifn = associated_internal_fn (fndecl);
1644 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1646 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1647 if (info.vectorizable)
1649 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1650 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1651 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1652 OPTIMIZE_FOR_SPEED))
1653 return ifn;
1656 return IFN_LAST;
1660 static tree permute_vec_elements (tree, tree, tree, gimple *,
1661 gimple_stmt_iterator *);
1664 /* Function vectorizable_mask_load_store.
1666 Check if STMT performs a conditional load or store that can be vectorized.
1667 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1668 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1669 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1671 static bool
1672 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1673 gimple **vec_stmt, slp_tree slp_node)
1675 tree vec_dest = NULL;
1676 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1677 stmt_vec_info prev_stmt_info;
1678 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1679 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1680 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1681 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1682 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1683 tree rhs_vectype = NULL_TREE;
1684 tree mask_vectype;
1685 tree elem_type;
1686 gimple *new_stmt;
1687 tree dummy;
1688 tree dataref_ptr = NULL_TREE;
1689 gimple *ptr_incr;
1690 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1691 int ncopies;
1692 int i, j;
1693 bool inv_p;
1694 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1695 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1696 int gather_scale = 1;
1697 enum vect_def_type gather_dt = vect_unknown_def_type;
1698 bool is_store;
1699 tree mask;
1700 gimple *def_stmt;
1701 enum vect_def_type dt;
1703 if (slp_node != NULL)
1704 return false;
1706 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1707 gcc_assert (ncopies >= 1);
1709 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1710 mask = gimple_call_arg (stmt, 2);
1712 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
1713 return false;
1715 /* FORNOW. This restriction should be relaxed. */
1716 if (nested_in_vect_loop && ncopies > 1)
1718 if (dump_enabled_p ())
1719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1720 "multiple types in nested loop.");
1721 return false;
1724 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1725 return false;
1727 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
1728 && ! vec_stmt)
1729 return false;
1731 if (!STMT_VINFO_DATA_REF (stmt_info))
1732 return false;
1734 elem_type = TREE_TYPE (vectype);
1736 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1737 return false;
1739 if (STMT_VINFO_STRIDED_P (stmt_info))
1740 return false;
1742 if (TREE_CODE (mask) != SSA_NAME)
1743 return false;
1745 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
1746 return false;
1748 if (!mask_vectype)
1749 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
1751 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
1752 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
1753 return false;
1755 if (is_store)
1757 tree rhs = gimple_call_arg (stmt, 3);
1758 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
1759 return false;
1762 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1764 gimple *def_stmt;
1765 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
1766 &gather_off, &gather_scale);
1767 gcc_assert (gather_decl);
1768 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1769 &gather_off_vectype))
1771 if (dump_enabled_p ())
1772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1773 "gather index use not simple.");
1774 return false;
1777 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1778 tree masktype
1779 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1780 if (TREE_CODE (masktype) == INTEGER_TYPE)
1782 if (dump_enabled_p ())
1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1784 "masked gather with integer mask not supported.");
1785 return false;
1788 else if (tree_int_cst_compare (nested_in_vect_loop
1789 ? STMT_VINFO_DR_STEP (stmt_info)
1790 : DR_STEP (dr), size_zero_node) <= 0)
1791 return false;
1792 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1793 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
1794 TYPE_MODE (mask_vectype),
1795 !is_store)
1796 || (rhs_vectype
1797 && !useless_type_conversion_p (vectype, rhs_vectype)))
1798 return false;
1800 if (!vec_stmt) /* transformation not required. */
1802 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1803 if (is_store)
1804 vect_model_store_cost (stmt_info, ncopies, false, dt,
1805 NULL, NULL, NULL);
1806 else
1807 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1808 return true;
1811 /** Transform. **/
1813 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1815 tree vec_oprnd0 = NULL_TREE, op;
1816 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1817 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1818 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1819 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1820 tree mask_perm_mask = NULL_TREE;
1821 edge pe = loop_preheader_edge (loop);
1822 gimple_seq seq;
1823 basic_block new_bb;
1824 enum { NARROW, NONE, WIDEN } modifier;
1825 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1827 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1828 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1829 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1830 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1831 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1832 scaletype = TREE_VALUE (arglist);
1833 gcc_checking_assert (types_compatible_p (srctype, rettype)
1834 && types_compatible_p (srctype, masktype));
1836 if (nunits == gather_off_nunits)
1837 modifier = NONE;
1838 else if (nunits == gather_off_nunits / 2)
1840 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1841 modifier = WIDEN;
1843 for (i = 0; i < gather_off_nunits; ++i)
1844 sel[i] = i | nunits;
1846 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1848 else if (nunits == gather_off_nunits * 2)
1850 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1851 modifier = NARROW;
1853 for (i = 0; i < nunits; ++i)
1854 sel[i] = i < gather_off_nunits
1855 ? i : i + nunits - gather_off_nunits;
1857 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1858 ncopies *= 2;
1859 for (i = 0; i < nunits; ++i)
1860 sel[i] = i | gather_off_nunits;
1861 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1863 else
1864 gcc_unreachable ();
1866 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1868 ptr = fold_convert (ptrtype, gather_base);
1869 if (!is_gimple_min_invariant (ptr))
1871 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1872 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1873 gcc_assert (!new_bb);
1876 scale = build_int_cst (scaletype, gather_scale);
1878 prev_stmt_info = NULL;
1879 for (j = 0; j < ncopies; ++j)
1881 if (modifier == WIDEN && (j & 1))
1882 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1883 perm_mask, stmt, gsi);
1884 else if (j == 0)
1885 op = vec_oprnd0
1886 = vect_get_vec_def_for_operand (gather_off, stmt);
1887 else
1888 op = vec_oprnd0
1889 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1891 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1893 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1894 == TYPE_VECTOR_SUBPARTS (idxtype));
1895 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
1896 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1897 new_stmt
1898 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1900 op = var;
1903 if (mask_perm_mask && (j & 1))
1904 mask_op = permute_vec_elements (mask_op, mask_op,
1905 mask_perm_mask, stmt, gsi);
1906 else
1908 if (j == 0)
1909 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
1910 else
1912 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
1913 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1916 mask_op = vec_mask;
1917 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1919 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1920 == TYPE_VECTOR_SUBPARTS (masktype));
1921 var = vect_get_new_ssa_name (masktype, vect_simple_var);
1922 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1923 new_stmt
1924 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1925 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1926 mask_op = var;
1930 new_stmt
1931 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1932 scale);
1934 if (!useless_type_conversion_p (vectype, rettype))
1936 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1937 == TYPE_VECTOR_SUBPARTS (rettype));
1938 op = vect_get_new_ssa_name (rettype, vect_simple_var);
1939 gimple_call_set_lhs (new_stmt, op);
1940 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1941 var = make_ssa_name (vec_dest);
1942 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
1943 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1945 else
1947 var = make_ssa_name (vec_dest, new_stmt);
1948 gimple_call_set_lhs (new_stmt, var);
1951 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1953 if (modifier == NARROW)
1955 if ((j & 1) == 0)
1957 prev_res = var;
1958 continue;
1960 var = permute_vec_elements (prev_res, var,
1961 perm_mask, stmt, gsi);
1962 new_stmt = SSA_NAME_DEF_STMT (var);
1965 if (prev_stmt_info == NULL)
1966 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1967 else
1968 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1969 prev_stmt_info = vinfo_for_stmt (new_stmt);
1972 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
1973 from the IL. */
1974 if (STMT_VINFO_RELATED_STMT (stmt_info))
1976 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1977 stmt_info = vinfo_for_stmt (stmt);
1979 tree lhs = gimple_call_lhs (stmt);
1980 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
1981 set_vinfo_for_stmt (new_stmt, stmt_info);
1982 set_vinfo_for_stmt (stmt, NULL);
1983 STMT_VINFO_STMT (stmt_info) = new_stmt;
1984 gsi_replace (gsi, new_stmt, true);
1985 return true;
1987 else if (is_store)
1989 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
1990 prev_stmt_info = NULL;
1991 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
1992 for (i = 0; i < ncopies; i++)
1994 unsigned align, misalign;
1996 if (i == 0)
1998 tree rhs = gimple_call_arg (stmt, 3);
1999 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2000 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2001 /* We should have catched mismatched types earlier. */
2002 gcc_assert (useless_type_conversion_p (vectype,
2003 TREE_TYPE (vec_rhs)));
2004 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2005 NULL_TREE, &dummy, gsi,
2006 &ptr_incr, false, &inv_p);
2007 gcc_assert (!inv_p);
2009 else
2011 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2012 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2013 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2014 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2015 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2016 TYPE_SIZE_UNIT (vectype));
2019 align = TYPE_ALIGN_UNIT (vectype);
2020 if (aligned_access_p (dr))
2021 misalign = 0;
2022 else if (DR_MISALIGNMENT (dr) == -1)
2024 align = TYPE_ALIGN_UNIT (elem_type);
2025 misalign = 0;
2027 else
2028 misalign = DR_MISALIGNMENT (dr);
2029 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2030 misalign);
2031 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2032 misalign ? misalign & -misalign : align);
2033 new_stmt
2034 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2035 ptr, vec_mask, vec_rhs);
2036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2037 if (i == 0)
2038 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2039 else
2040 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2041 prev_stmt_info = vinfo_for_stmt (new_stmt);
2044 else
2046 tree vec_mask = NULL_TREE;
2047 prev_stmt_info = NULL;
2048 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2049 for (i = 0; i < ncopies; i++)
2051 unsigned align, misalign;
2053 if (i == 0)
2055 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2056 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2057 NULL_TREE, &dummy, gsi,
2058 &ptr_incr, false, &inv_p);
2059 gcc_assert (!inv_p);
2061 else
2063 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2064 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2065 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2066 TYPE_SIZE_UNIT (vectype));
2069 align = TYPE_ALIGN_UNIT (vectype);
2070 if (aligned_access_p (dr))
2071 misalign = 0;
2072 else if (DR_MISALIGNMENT (dr) == -1)
2074 align = TYPE_ALIGN_UNIT (elem_type);
2075 misalign = 0;
2077 else
2078 misalign = DR_MISALIGNMENT (dr);
2079 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2080 misalign);
2081 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2082 misalign ? misalign & -misalign : align);
2083 new_stmt
2084 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2085 ptr, vec_mask);
2086 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2087 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2088 if (i == 0)
2089 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2090 else
2091 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2092 prev_stmt_info = vinfo_for_stmt (new_stmt);
2096 if (!is_store)
2098 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2099 from the IL. */
2100 if (STMT_VINFO_RELATED_STMT (stmt_info))
2102 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2103 stmt_info = vinfo_for_stmt (stmt);
2105 tree lhs = gimple_call_lhs (stmt);
2106 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2107 set_vinfo_for_stmt (new_stmt, stmt_info);
2108 set_vinfo_for_stmt (stmt, NULL);
2109 STMT_VINFO_STMT (stmt_info) = new_stmt;
2110 gsi_replace (gsi, new_stmt, true);
2113 return true;
2116 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2117 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2118 in a single step. On success, store the binary pack code in
2119 *CONVERT_CODE. */
2121 static bool
2122 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2123 tree_code *convert_code)
2125 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2126 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2127 return false;
2129 tree_code code;
2130 int multi_step_cvt = 0;
2131 auto_vec <tree, 8> interm_types;
2132 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2133 &code, &multi_step_cvt,
2134 &interm_types)
2135 || multi_step_cvt)
2136 return false;
2138 *convert_code = code;
2139 return true;
2142 /* Function vectorizable_call.
2144 Check if GS performs a function call that can be vectorized.
2145 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2146 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2147 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2149 static bool
2150 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2151 slp_tree slp_node)
2153 gcall *stmt;
2154 tree vec_dest;
2155 tree scalar_dest;
2156 tree op, type;
2157 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2158 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2159 tree vectype_out, vectype_in;
2160 int nunits_in;
2161 int nunits_out;
2162 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2163 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2164 vec_info *vinfo = stmt_info->vinfo;
2165 tree fndecl, new_temp, rhs_type;
2166 gimple *def_stmt;
2167 enum vect_def_type dt[3]
2168 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2169 gimple *new_stmt = NULL;
2170 int ncopies, j;
2171 vec<tree> vargs = vNULL;
2172 enum { NARROW, NONE, WIDEN } modifier;
2173 size_t i, nargs;
2174 tree lhs;
2176 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2177 return false;
2179 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2180 && ! vec_stmt)
2181 return false;
2183 /* Is GS a vectorizable call? */
2184 stmt = dyn_cast <gcall *> (gs);
2185 if (!stmt)
2186 return false;
2188 if (gimple_call_internal_p (stmt)
2189 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2190 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2191 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2192 slp_node);
2194 if (gimple_call_lhs (stmt) == NULL_TREE
2195 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2196 return false;
2198 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2200 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2202 /* Process function arguments. */
2203 rhs_type = NULL_TREE;
2204 vectype_in = NULL_TREE;
2205 nargs = gimple_call_num_args (stmt);
2207 /* Bail out if the function has more than three arguments, we do not have
2208 interesting builtin functions to vectorize with more than two arguments
2209 except for fma. No arguments is also not good. */
2210 if (nargs == 0 || nargs > 3)
2211 return false;
2213 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2214 if (gimple_call_internal_p (stmt)
2215 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2217 nargs = 0;
2218 rhs_type = unsigned_type_node;
2221 for (i = 0; i < nargs; i++)
2223 tree opvectype;
2225 op = gimple_call_arg (stmt, i);
2227 /* We can only handle calls with arguments of the same type. */
2228 if (rhs_type
2229 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2231 if (dump_enabled_p ())
2232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2233 "argument types differ.\n");
2234 return false;
2236 if (!rhs_type)
2237 rhs_type = TREE_TYPE (op);
2239 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "use not simple.\n");
2244 return false;
2247 if (!vectype_in)
2248 vectype_in = opvectype;
2249 else if (opvectype
2250 && opvectype != vectype_in)
2252 if (dump_enabled_p ())
2253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2254 "argument vector types differ.\n");
2255 return false;
2258 /* If all arguments are external or constant defs use a vector type with
2259 the same size as the output vector type. */
2260 if (!vectype_in)
2261 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2262 if (vec_stmt)
2263 gcc_assert (vectype_in);
2264 if (!vectype_in)
2266 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2269 "no vectype for scalar type ");
2270 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2271 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2274 return false;
2277 /* FORNOW */
2278 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2279 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2280 if (nunits_in == nunits_out / 2)
2281 modifier = NARROW;
2282 else if (nunits_out == nunits_in)
2283 modifier = NONE;
2284 else if (nunits_out == nunits_in / 2)
2285 modifier = WIDEN;
2286 else
2287 return false;
2289 /* We only handle functions that do not read or clobber memory. */
2290 if (gimple_vuse (stmt))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "function reads from or writes to memory.\n");
2295 return false;
2298 /* For now, we only vectorize functions if a target specific builtin
2299 is available. TODO -- in some cases, it might be profitable to
2300 insert the calls for pieces of the vector, in order to be able
2301 to vectorize other operations in the loop. */
2302 fndecl = NULL_TREE;
2303 internal_fn ifn = IFN_LAST;
2304 combined_fn cfn = gimple_call_combined_fn (stmt);
2305 tree callee = gimple_call_fndecl (stmt);
2307 /* First try using an internal function. */
2308 tree_code convert_code = ERROR_MARK;
2309 if (cfn != CFN_LAST
2310 && (modifier == NONE
2311 || (modifier == NARROW
2312 && simple_integer_narrowing (vectype_out, vectype_in,
2313 &convert_code))))
2314 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2315 vectype_in);
2317 /* If that fails, try asking for a target-specific built-in function. */
2318 if (ifn == IFN_LAST)
2320 if (cfn != CFN_LAST)
2321 fndecl = targetm.vectorize.builtin_vectorized_function
2322 (cfn, vectype_out, vectype_in);
2323 else
2324 fndecl = targetm.vectorize.builtin_md_vectorized_function
2325 (callee, vectype_out, vectype_in);
2328 if (ifn == IFN_LAST && !fndecl)
2330 if (cfn == CFN_GOMP_SIMD_LANE
2331 && !slp_node
2332 && loop_vinfo
2333 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2334 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2335 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2336 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2338 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2339 { 0, 1, 2, ... vf - 1 } vector. */
2340 gcc_assert (nargs == 0);
2342 else
2344 if (dump_enabled_p ())
2345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2346 "function is not vectorizable.\n");
2347 return false;
2351 if (slp_node || PURE_SLP_STMT (stmt_info))
2352 ncopies = 1;
2353 else if (modifier == NARROW && ifn == IFN_LAST)
2354 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2355 else
2356 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2358 /* Sanity check: make sure that at least one copy of the vectorized stmt
2359 needs to be generated. */
2360 gcc_assert (ncopies >= 1);
2362 if (!vec_stmt) /* transformation not required. */
2364 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2367 "\n");
2368 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2369 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2370 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2371 vec_promote_demote, stmt_info, 0, vect_body);
2373 return true;
2376 /** Transform. **/
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2381 /* Handle def. */
2382 scalar_dest = gimple_call_lhs (stmt);
2383 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2385 prev_stmt_info = NULL;
2386 if (modifier == NONE || ifn != IFN_LAST)
2388 tree prev_res = NULL_TREE;
2389 for (j = 0; j < ncopies; ++j)
2391 /* Build argument list for the vectorized call. */
2392 if (j == 0)
2393 vargs.create (nargs);
2394 else
2395 vargs.truncate (0);
2397 if (slp_node)
2399 auto_vec<vec<tree> > vec_defs (nargs);
2400 vec<tree> vec_oprnds0;
2402 for (i = 0; i < nargs; i++)
2403 vargs.quick_push (gimple_call_arg (stmt, i));
2404 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2405 vec_oprnds0 = vec_defs[0];
2407 /* Arguments are ready. Create the new vector stmt. */
2408 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2410 size_t k;
2411 for (k = 0; k < nargs; k++)
2413 vec<tree> vec_oprndsk = vec_defs[k];
2414 vargs[k] = vec_oprndsk[i];
2416 if (modifier == NARROW)
2418 tree half_res = make_ssa_name (vectype_in);
2419 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2420 gimple_call_set_lhs (new_stmt, half_res);
2421 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2422 if ((i & 1) == 0)
2424 prev_res = half_res;
2425 continue;
2427 new_temp = make_ssa_name (vec_dest);
2428 new_stmt = gimple_build_assign (new_temp, convert_code,
2429 prev_res, half_res);
2431 else
2433 if (ifn != IFN_LAST)
2434 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2435 else
2436 new_stmt = gimple_build_call_vec (fndecl, vargs);
2437 new_temp = make_ssa_name (vec_dest, new_stmt);
2438 gimple_call_set_lhs (new_stmt, new_temp);
2440 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2441 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2444 for (i = 0; i < nargs; i++)
2446 vec<tree> vec_oprndsi = vec_defs[i];
2447 vec_oprndsi.release ();
2449 continue;
2452 for (i = 0; i < nargs; i++)
2454 op = gimple_call_arg (stmt, i);
2455 if (j == 0)
2456 vec_oprnd0
2457 = vect_get_vec_def_for_operand (op, stmt);
2458 else
2460 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2461 vec_oprnd0
2462 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2465 vargs.quick_push (vec_oprnd0);
2468 if (gimple_call_internal_p (stmt)
2469 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2471 tree *v = XALLOCAVEC (tree, nunits_out);
2472 int k;
2473 for (k = 0; k < nunits_out; ++k)
2474 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2475 tree cst = build_vector (vectype_out, v);
2476 tree new_var
2477 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2478 gimple *init_stmt = gimple_build_assign (new_var, cst);
2479 vect_init_vector_1 (stmt, init_stmt, NULL);
2480 new_temp = make_ssa_name (vec_dest);
2481 new_stmt = gimple_build_assign (new_temp, new_var);
2483 else if (modifier == NARROW)
2485 tree half_res = make_ssa_name (vectype_in);
2486 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2487 gimple_call_set_lhs (new_stmt, half_res);
2488 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2489 if ((j & 1) == 0)
2491 prev_res = half_res;
2492 continue;
2494 new_temp = make_ssa_name (vec_dest);
2495 new_stmt = gimple_build_assign (new_temp, convert_code,
2496 prev_res, half_res);
2498 else
2500 if (ifn != IFN_LAST)
2501 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2502 else
2503 new_stmt = gimple_build_call_vec (fndecl, vargs);
2504 new_temp = make_ssa_name (vec_dest, new_stmt);
2505 gimple_call_set_lhs (new_stmt, new_temp);
2507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2509 if (j == (modifier == NARROW ? 1 : 0))
2510 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2511 else
2512 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2514 prev_stmt_info = vinfo_for_stmt (new_stmt);
2517 else if (modifier == NARROW)
2519 for (j = 0; j < ncopies; ++j)
2521 /* Build argument list for the vectorized call. */
2522 if (j == 0)
2523 vargs.create (nargs * 2);
2524 else
2525 vargs.truncate (0);
2527 if (slp_node)
2529 auto_vec<vec<tree> > vec_defs (nargs);
2530 vec<tree> vec_oprnds0;
2532 for (i = 0; i < nargs; i++)
2533 vargs.quick_push (gimple_call_arg (stmt, i));
2534 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2535 vec_oprnds0 = vec_defs[0];
2537 /* Arguments are ready. Create the new vector stmt. */
2538 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2540 size_t k;
2541 vargs.truncate (0);
2542 for (k = 0; k < nargs; k++)
2544 vec<tree> vec_oprndsk = vec_defs[k];
2545 vargs.quick_push (vec_oprndsk[i]);
2546 vargs.quick_push (vec_oprndsk[i + 1]);
2548 if (ifn != IFN_LAST)
2549 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2550 else
2551 new_stmt = gimple_build_call_vec (fndecl, vargs);
2552 new_temp = make_ssa_name (vec_dest, new_stmt);
2553 gimple_call_set_lhs (new_stmt, new_temp);
2554 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2555 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2558 for (i = 0; i < nargs; i++)
2560 vec<tree> vec_oprndsi = vec_defs[i];
2561 vec_oprndsi.release ();
2563 continue;
2566 for (i = 0; i < nargs; i++)
2568 op = gimple_call_arg (stmt, i);
2569 if (j == 0)
2571 vec_oprnd0
2572 = vect_get_vec_def_for_operand (op, stmt);
2573 vec_oprnd1
2574 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2576 else
2578 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2579 vec_oprnd0
2580 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2581 vec_oprnd1
2582 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2585 vargs.quick_push (vec_oprnd0);
2586 vargs.quick_push (vec_oprnd1);
2589 new_stmt = gimple_build_call_vec (fndecl, vargs);
2590 new_temp = make_ssa_name (vec_dest, new_stmt);
2591 gimple_call_set_lhs (new_stmt, new_temp);
2592 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2594 if (j == 0)
2595 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2596 else
2597 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2599 prev_stmt_info = vinfo_for_stmt (new_stmt);
2602 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2604 else
2605 /* No current target implements this case. */
2606 return false;
2608 vargs.release ();
2610 /* The call in STMT might prevent it from being removed in dce.
2611 We however cannot remove it here, due to the way the ssa name
2612 it defines is mapped to the new definition. So just replace
2613 rhs of the statement with something harmless. */
2615 if (slp_node)
2616 return true;
2618 type = TREE_TYPE (scalar_dest);
2619 if (is_pattern_stmt_p (stmt_info))
2620 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2621 else
2622 lhs = gimple_call_lhs (stmt);
2624 if (gimple_call_internal_p (stmt)
2625 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2627 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2628 with vf - 1 rather than 0, that is the last iteration of the
2629 vectorized loop. */
2630 imm_use_iterator iter;
2631 use_operand_p use_p;
2632 gimple *use_stmt;
2633 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2635 basic_block use_bb = gimple_bb (use_stmt);
2636 if (use_bb
2637 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
2639 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2640 SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
2641 ncopies * nunits_out - 1));
2642 update_stmt (use_stmt);
2647 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2648 set_vinfo_for_stmt (new_stmt, stmt_info);
2649 set_vinfo_for_stmt (stmt, NULL);
2650 STMT_VINFO_STMT (stmt_info) = new_stmt;
2651 gsi_replace (gsi, new_stmt, false);
2653 return true;
2657 struct simd_call_arg_info
2659 tree vectype;
2660 tree op;
2661 enum vect_def_type dt;
2662 HOST_WIDE_INT linear_step;
2663 unsigned int align;
2664 bool simd_lane_linear;
2667 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2668 is linear within simd lane (but not within whole loop), note it in
2669 *ARGINFO. */
2671 static void
2672 vect_simd_lane_linear (tree op, struct loop *loop,
2673 struct simd_call_arg_info *arginfo)
2675 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2677 if (!is_gimple_assign (def_stmt)
2678 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2679 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2680 return;
2682 tree base = gimple_assign_rhs1 (def_stmt);
2683 HOST_WIDE_INT linear_step = 0;
2684 tree v = gimple_assign_rhs2 (def_stmt);
2685 while (TREE_CODE (v) == SSA_NAME)
2687 tree t;
2688 def_stmt = SSA_NAME_DEF_STMT (v);
2689 if (is_gimple_assign (def_stmt))
2690 switch (gimple_assign_rhs_code (def_stmt))
2692 case PLUS_EXPR:
2693 t = gimple_assign_rhs2 (def_stmt);
2694 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2695 return;
2696 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2697 v = gimple_assign_rhs1 (def_stmt);
2698 continue;
2699 case MULT_EXPR:
2700 t = gimple_assign_rhs2 (def_stmt);
2701 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2702 return;
2703 linear_step = tree_to_shwi (t);
2704 v = gimple_assign_rhs1 (def_stmt);
2705 continue;
2706 CASE_CONVERT:
2707 t = gimple_assign_rhs1 (def_stmt);
2708 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2709 || (TYPE_PRECISION (TREE_TYPE (v))
2710 < TYPE_PRECISION (TREE_TYPE (t))))
2711 return;
2712 if (!linear_step)
2713 linear_step = 1;
2714 v = t;
2715 continue;
2716 default:
2717 return;
2719 else if (is_gimple_call (def_stmt)
2720 && gimple_call_internal_p (def_stmt)
2721 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2722 && loop->simduid
2723 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2724 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2725 == loop->simduid))
2727 if (!linear_step)
2728 linear_step = 1;
2729 arginfo->linear_step = linear_step;
2730 arginfo->op = base;
2731 arginfo->simd_lane_linear = true;
2732 return;
2737 /* Function vectorizable_simd_clone_call.
2739 Check if STMT performs a function call that can be vectorized
2740 by calling a simd clone of the function.
2741 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2742 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2743 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2745 static bool
2746 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2747 gimple **vec_stmt, slp_tree slp_node)
2749 tree vec_dest;
2750 tree scalar_dest;
2751 tree op, type;
2752 tree vec_oprnd0 = NULL_TREE;
2753 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2754 tree vectype;
2755 unsigned int nunits;
2756 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2757 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2758 vec_info *vinfo = stmt_info->vinfo;
2759 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2760 tree fndecl, new_temp;
2761 gimple *def_stmt;
2762 gimple *new_stmt = NULL;
2763 int ncopies, j;
2764 vec<simd_call_arg_info> arginfo = vNULL;
2765 vec<tree> vargs = vNULL;
2766 size_t i, nargs;
2767 tree lhs, rtype, ratype;
2768 vec<constructor_elt, va_gc> *ret_ctor_elts;
2770 /* Is STMT a vectorizable call? */
2771 if (!is_gimple_call (stmt))
2772 return false;
2774 fndecl = gimple_call_fndecl (stmt);
2775 if (fndecl == NULL_TREE)
2776 return false;
2778 struct cgraph_node *node = cgraph_node::get (fndecl);
2779 if (node == NULL || node->simd_clones == NULL)
2780 return false;
2782 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2783 return false;
2785 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2786 && ! vec_stmt)
2787 return false;
2789 if (gimple_call_lhs (stmt)
2790 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2791 return false;
2793 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2795 vectype = STMT_VINFO_VECTYPE (stmt_info);
2797 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2798 return false;
2800 /* FORNOW */
2801 if (slp_node || PURE_SLP_STMT (stmt_info))
2802 return false;
2804 /* Process function arguments. */
2805 nargs = gimple_call_num_args (stmt);
2807 /* Bail out if the function has zero arguments. */
2808 if (nargs == 0)
2809 return false;
2811 arginfo.create (nargs);
2813 for (i = 0; i < nargs; i++)
2815 simd_call_arg_info thisarginfo;
2816 affine_iv iv;
2818 thisarginfo.linear_step = 0;
2819 thisarginfo.align = 0;
2820 thisarginfo.op = NULL_TREE;
2821 thisarginfo.simd_lane_linear = false;
2823 op = gimple_call_arg (stmt, i);
2824 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2825 &thisarginfo.vectype)
2826 || thisarginfo.dt == vect_uninitialized_def)
2828 if (dump_enabled_p ())
2829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2830 "use not simple.\n");
2831 arginfo.release ();
2832 return false;
2835 if (thisarginfo.dt == vect_constant_def
2836 || thisarginfo.dt == vect_external_def)
2837 gcc_assert (thisarginfo.vectype == NULL_TREE);
2838 else
2839 gcc_assert (thisarginfo.vectype != NULL_TREE);
2841 /* For linear arguments, the analyze phase should have saved
2842 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2843 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2844 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2846 gcc_assert (vec_stmt);
2847 thisarginfo.linear_step
2848 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2849 thisarginfo.op
2850 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2851 thisarginfo.simd_lane_linear
2852 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2853 == boolean_true_node);
2854 /* If loop has been peeled for alignment, we need to adjust it. */
2855 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2856 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2857 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2859 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2860 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2861 tree opt = TREE_TYPE (thisarginfo.op);
2862 bias = fold_convert (TREE_TYPE (step), bias);
2863 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2864 thisarginfo.op
2865 = fold_build2 (POINTER_TYPE_P (opt)
2866 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2867 thisarginfo.op, bias);
2870 else if (!vec_stmt
2871 && thisarginfo.dt != vect_constant_def
2872 && thisarginfo.dt != vect_external_def
2873 && loop_vinfo
2874 && TREE_CODE (op) == SSA_NAME
2875 && simple_iv (loop, loop_containing_stmt (stmt), op,
2876 &iv, false)
2877 && tree_fits_shwi_p (iv.step))
2879 thisarginfo.linear_step = tree_to_shwi (iv.step);
2880 thisarginfo.op = iv.base;
2882 else if ((thisarginfo.dt == vect_constant_def
2883 || thisarginfo.dt == vect_external_def)
2884 && POINTER_TYPE_P (TREE_TYPE (op)))
2885 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2886 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2887 linear too. */
2888 if (POINTER_TYPE_P (TREE_TYPE (op))
2889 && !thisarginfo.linear_step
2890 && !vec_stmt
2891 && thisarginfo.dt != vect_constant_def
2892 && thisarginfo.dt != vect_external_def
2893 && loop_vinfo
2894 && !slp_node
2895 && TREE_CODE (op) == SSA_NAME)
2896 vect_simd_lane_linear (op, loop, &thisarginfo);
2898 arginfo.quick_push (thisarginfo);
2901 unsigned int badness = 0;
2902 struct cgraph_node *bestn = NULL;
2903 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2904 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2905 else
2906 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2907 n = n->simdclone->next_clone)
2909 unsigned int this_badness = 0;
2910 if (n->simdclone->simdlen
2911 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2912 || n->simdclone->nargs != nargs)
2913 continue;
2914 if (n->simdclone->simdlen
2915 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2916 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2917 - exact_log2 (n->simdclone->simdlen)) * 1024;
2918 if (n->simdclone->inbranch)
2919 this_badness += 2048;
2920 int target_badness = targetm.simd_clone.usable (n);
2921 if (target_badness < 0)
2922 continue;
2923 this_badness += target_badness * 512;
2924 /* FORNOW: Have to add code to add the mask argument. */
2925 if (n->simdclone->inbranch)
2926 continue;
2927 for (i = 0; i < nargs; i++)
2929 switch (n->simdclone->args[i].arg_type)
2931 case SIMD_CLONE_ARG_TYPE_VECTOR:
2932 if (!useless_type_conversion_p
2933 (n->simdclone->args[i].orig_type,
2934 TREE_TYPE (gimple_call_arg (stmt, i))))
2935 i = -1;
2936 else if (arginfo[i].dt == vect_constant_def
2937 || arginfo[i].dt == vect_external_def
2938 || arginfo[i].linear_step)
2939 this_badness += 64;
2940 break;
2941 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2942 if (arginfo[i].dt != vect_constant_def
2943 && arginfo[i].dt != vect_external_def)
2944 i = -1;
2945 break;
2946 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2947 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
2948 if (arginfo[i].dt == vect_constant_def
2949 || arginfo[i].dt == vect_external_def
2950 || (arginfo[i].linear_step
2951 != n->simdclone->args[i].linear_step))
2952 i = -1;
2953 break;
2954 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2956 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
2957 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
2958 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
2959 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
2960 /* FORNOW */
2961 i = -1;
2962 break;
2963 case SIMD_CLONE_ARG_TYPE_MASK:
2964 gcc_unreachable ();
2966 if (i == (size_t) -1)
2967 break;
2968 if (n->simdclone->args[i].alignment > arginfo[i].align)
2970 i = -1;
2971 break;
2973 if (arginfo[i].align)
2974 this_badness += (exact_log2 (arginfo[i].align)
2975 - exact_log2 (n->simdclone->args[i].alignment));
2977 if (i == (size_t) -1)
2978 continue;
2979 if (bestn == NULL || this_badness < badness)
2981 bestn = n;
2982 badness = this_badness;
2986 if (bestn == NULL)
2988 arginfo.release ();
2989 return false;
2992 for (i = 0; i < nargs; i++)
2993 if ((arginfo[i].dt == vect_constant_def
2994 || arginfo[i].dt == vect_external_def)
2995 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2997 arginfo[i].vectype
2998 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2999 i)));
3000 if (arginfo[i].vectype == NULL
3001 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3002 > bestn->simdclone->simdlen))
3004 arginfo.release ();
3005 return false;
3009 fndecl = bestn->decl;
3010 nunits = bestn->simdclone->simdlen;
3011 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3013 /* If the function isn't const, only allow it in simd loops where user
3014 has asserted that at least nunits consecutive iterations can be
3015 performed using SIMD instructions. */
3016 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3017 && gimple_vuse (stmt))
3019 arginfo.release ();
3020 return false;
3023 /* Sanity check: make sure that at least one copy of the vectorized stmt
3024 needs to be generated. */
3025 gcc_assert (ncopies >= 1);
3027 if (!vec_stmt) /* transformation not required. */
3029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3030 for (i = 0; i < nargs; i++)
3031 if ((bestn->simdclone->args[i].arg_type
3032 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3033 || (bestn->simdclone->args[i].arg_type
3034 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3037 + 1);
3038 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3039 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3040 ? size_type_node : TREE_TYPE (arginfo[i].op);
3041 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3042 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3043 tree sll = arginfo[i].simd_lane_linear
3044 ? boolean_true_node : boolean_false_node;
3045 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3047 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE, vect_location,
3050 "=== vectorizable_simd_clone_call ===\n");
3051 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3052 arginfo.release ();
3053 return true;
3056 /** Transform. **/
3058 if (dump_enabled_p ())
3059 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3061 /* Handle def. */
3062 scalar_dest = gimple_call_lhs (stmt);
3063 vec_dest = NULL_TREE;
3064 rtype = NULL_TREE;
3065 ratype = NULL_TREE;
3066 if (scalar_dest)
3068 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3069 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3070 if (TREE_CODE (rtype) == ARRAY_TYPE)
3072 ratype = rtype;
3073 rtype = TREE_TYPE (ratype);
3077 prev_stmt_info = NULL;
3078 for (j = 0; j < ncopies; ++j)
3080 /* Build argument list for the vectorized call. */
3081 if (j == 0)
3082 vargs.create (nargs);
3083 else
3084 vargs.truncate (0);
3086 for (i = 0; i < nargs; i++)
3088 unsigned int k, l, m, o;
3089 tree atype;
3090 op = gimple_call_arg (stmt, i);
3091 switch (bestn->simdclone->args[i].arg_type)
3093 case SIMD_CLONE_ARG_TYPE_VECTOR:
3094 atype = bestn->simdclone->args[i].vector_type;
3095 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3096 for (m = j * o; m < (j + 1) * o; m++)
3098 if (TYPE_VECTOR_SUBPARTS (atype)
3099 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3101 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3102 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3103 / TYPE_VECTOR_SUBPARTS (atype));
3104 gcc_assert ((k & (k - 1)) == 0);
3105 if (m == 0)
3106 vec_oprnd0
3107 = vect_get_vec_def_for_operand (op, stmt);
3108 else
3110 vec_oprnd0 = arginfo[i].op;
3111 if ((m & (k - 1)) == 0)
3112 vec_oprnd0
3113 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3114 vec_oprnd0);
3116 arginfo[i].op = vec_oprnd0;
3117 vec_oprnd0
3118 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3119 size_int (prec),
3120 bitsize_int ((m & (k - 1)) * prec));
3121 new_stmt
3122 = gimple_build_assign (make_ssa_name (atype),
3123 vec_oprnd0);
3124 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3125 vargs.safe_push (gimple_assign_lhs (new_stmt));
3127 else
3129 k = (TYPE_VECTOR_SUBPARTS (atype)
3130 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3131 gcc_assert ((k & (k - 1)) == 0);
3132 vec<constructor_elt, va_gc> *ctor_elts;
3133 if (k != 1)
3134 vec_alloc (ctor_elts, k);
3135 else
3136 ctor_elts = NULL;
3137 for (l = 0; l < k; l++)
3139 if (m == 0 && l == 0)
3140 vec_oprnd0
3141 = vect_get_vec_def_for_operand (op, stmt);
3142 else
3143 vec_oprnd0
3144 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3145 arginfo[i].op);
3146 arginfo[i].op = vec_oprnd0;
3147 if (k == 1)
3148 break;
3149 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3150 vec_oprnd0);
3152 if (k == 1)
3153 vargs.safe_push (vec_oprnd0);
3154 else
3156 vec_oprnd0 = build_constructor (atype, ctor_elts);
3157 new_stmt
3158 = gimple_build_assign (make_ssa_name (atype),
3159 vec_oprnd0);
3160 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3161 vargs.safe_push (gimple_assign_lhs (new_stmt));
3165 break;
3166 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3167 vargs.safe_push (op);
3168 break;
3169 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3170 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3171 if (j == 0)
3173 gimple_seq stmts;
3174 arginfo[i].op
3175 = force_gimple_operand (arginfo[i].op, &stmts, true,
3176 NULL_TREE);
3177 if (stmts != NULL)
3179 basic_block new_bb;
3180 edge pe = loop_preheader_edge (loop);
3181 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3182 gcc_assert (!new_bb);
3184 if (arginfo[i].simd_lane_linear)
3186 vargs.safe_push (arginfo[i].op);
3187 break;
3189 tree phi_res = copy_ssa_name (op);
3190 gphi *new_phi = create_phi_node (phi_res, loop->header);
3191 set_vinfo_for_stmt (new_phi,
3192 new_stmt_vec_info (new_phi, loop_vinfo));
3193 add_phi_arg (new_phi, arginfo[i].op,
3194 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3195 enum tree_code code
3196 = POINTER_TYPE_P (TREE_TYPE (op))
3197 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3198 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3199 ? sizetype : TREE_TYPE (op);
3200 widest_int cst
3201 = wi::mul (bestn->simdclone->args[i].linear_step,
3202 ncopies * nunits);
3203 tree tcst = wide_int_to_tree (type, cst);
3204 tree phi_arg = copy_ssa_name (op);
3205 new_stmt
3206 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3207 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3208 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3209 set_vinfo_for_stmt (new_stmt,
3210 new_stmt_vec_info (new_stmt, loop_vinfo));
3211 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3212 UNKNOWN_LOCATION);
3213 arginfo[i].op = phi_res;
3214 vargs.safe_push (phi_res);
3216 else
3218 enum tree_code code
3219 = POINTER_TYPE_P (TREE_TYPE (op))
3220 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3221 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3222 ? sizetype : TREE_TYPE (op);
3223 widest_int cst
3224 = wi::mul (bestn->simdclone->args[i].linear_step,
3225 j * nunits);
3226 tree tcst = wide_int_to_tree (type, cst);
3227 new_temp = make_ssa_name (TREE_TYPE (op));
3228 new_stmt = gimple_build_assign (new_temp, code,
3229 arginfo[i].op, tcst);
3230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3231 vargs.safe_push (new_temp);
3233 break;
3234 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3235 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3236 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3237 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3238 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3239 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3240 default:
3241 gcc_unreachable ();
3245 new_stmt = gimple_build_call_vec (fndecl, vargs);
3246 if (vec_dest)
3248 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3249 if (ratype)
3250 new_temp = create_tmp_var (ratype);
3251 else if (TYPE_VECTOR_SUBPARTS (vectype)
3252 == TYPE_VECTOR_SUBPARTS (rtype))
3253 new_temp = make_ssa_name (vec_dest, new_stmt);
3254 else
3255 new_temp = make_ssa_name (rtype, new_stmt);
3256 gimple_call_set_lhs (new_stmt, new_temp);
3258 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3260 if (vec_dest)
3262 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3264 unsigned int k, l;
3265 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3266 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3267 gcc_assert ((k & (k - 1)) == 0);
3268 for (l = 0; l < k; l++)
3270 tree t;
3271 if (ratype)
3273 t = build_fold_addr_expr (new_temp);
3274 t = build2 (MEM_REF, vectype, t,
3275 build_int_cst (TREE_TYPE (t),
3276 l * prec / BITS_PER_UNIT));
3278 else
3279 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3280 size_int (prec), bitsize_int (l * prec));
3281 new_stmt
3282 = gimple_build_assign (make_ssa_name (vectype), t);
3283 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3284 if (j == 0 && l == 0)
3285 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3286 else
3287 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3289 prev_stmt_info = vinfo_for_stmt (new_stmt);
3292 if (ratype)
3294 tree clobber = build_constructor (ratype, NULL);
3295 TREE_THIS_VOLATILE (clobber) = 1;
3296 new_stmt = gimple_build_assign (new_temp, clobber);
3297 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3299 continue;
3301 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3303 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3304 / TYPE_VECTOR_SUBPARTS (rtype));
3305 gcc_assert ((k & (k - 1)) == 0);
3306 if ((j & (k - 1)) == 0)
3307 vec_alloc (ret_ctor_elts, k);
3308 if (ratype)
3310 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3311 for (m = 0; m < o; m++)
3313 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3314 size_int (m), NULL_TREE, NULL_TREE);
3315 new_stmt
3316 = gimple_build_assign (make_ssa_name (rtype), tem);
3317 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3318 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3319 gimple_assign_lhs (new_stmt));
3321 tree clobber = build_constructor (ratype, NULL);
3322 TREE_THIS_VOLATILE (clobber) = 1;
3323 new_stmt = gimple_build_assign (new_temp, clobber);
3324 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3326 else
3327 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3328 if ((j & (k - 1)) != k - 1)
3329 continue;
3330 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3331 new_stmt
3332 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3333 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3335 if ((unsigned) j == k - 1)
3336 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3337 else
3338 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3340 prev_stmt_info = vinfo_for_stmt (new_stmt);
3341 continue;
3343 else if (ratype)
3345 tree t = build_fold_addr_expr (new_temp);
3346 t = build2 (MEM_REF, vectype, t,
3347 build_int_cst (TREE_TYPE (t), 0));
3348 new_stmt
3349 = gimple_build_assign (make_ssa_name (vec_dest), t);
3350 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3351 tree clobber = build_constructor (ratype, NULL);
3352 TREE_THIS_VOLATILE (clobber) = 1;
3353 vect_finish_stmt_generation (stmt,
3354 gimple_build_assign (new_temp,
3355 clobber), gsi);
3359 if (j == 0)
3360 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3361 else
3362 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3364 prev_stmt_info = vinfo_for_stmt (new_stmt);
3367 vargs.release ();
3369 /* The call in STMT might prevent it from being removed in dce.
3370 We however cannot remove it here, due to the way the ssa name
3371 it defines is mapped to the new definition. So just replace
3372 rhs of the statement with something harmless. */
3374 if (slp_node)
3375 return true;
3377 if (scalar_dest)
3379 type = TREE_TYPE (scalar_dest);
3380 if (is_pattern_stmt_p (stmt_info))
3381 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3382 else
3383 lhs = gimple_call_lhs (stmt);
3384 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3386 else
3387 new_stmt = gimple_build_nop ();
3388 set_vinfo_for_stmt (new_stmt, stmt_info);
3389 set_vinfo_for_stmt (stmt, NULL);
3390 STMT_VINFO_STMT (stmt_info) = new_stmt;
3391 gsi_replace (gsi, new_stmt, true);
3392 unlink_stmt_vdef (stmt);
3394 return true;
3398 /* Function vect_gen_widened_results_half
3400 Create a vector stmt whose code, type, number of arguments, and result
3401 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3402 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3403 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3404 needs to be created (DECL is a function-decl of a target-builtin).
3405 STMT is the original scalar stmt that we are vectorizing. */
3407 static gimple *
3408 vect_gen_widened_results_half (enum tree_code code,
3409 tree decl,
3410 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3411 tree vec_dest, gimple_stmt_iterator *gsi,
3412 gimple *stmt)
3414 gimple *new_stmt;
3415 tree new_temp;
3417 /* Generate half of the widened result: */
3418 if (code == CALL_EXPR)
3420 /* Target specific support */
3421 if (op_type == binary_op)
3422 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3423 else
3424 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3425 new_temp = make_ssa_name (vec_dest, new_stmt);
3426 gimple_call_set_lhs (new_stmt, new_temp);
3428 else
3430 /* Generic support */
3431 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3432 if (op_type != binary_op)
3433 vec_oprnd1 = NULL;
3434 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3435 new_temp = make_ssa_name (vec_dest, new_stmt);
3436 gimple_assign_set_lhs (new_stmt, new_temp);
3438 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3440 return new_stmt;
3444 /* Get vectorized definitions for loop-based vectorization. For the first
3445 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3446 scalar operand), and for the rest we get a copy with
3447 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3448 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3449 The vectors are collected into VEC_OPRNDS. */
3451 static void
3452 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3453 vec<tree> *vec_oprnds, int multi_step_cvt)
3455 tree vec_oprnd;
3457 /* Get first vector operand. */
3458 /* All the vector operands except the very first one (that is scalar oprnd)
3459 are stmt copies. */
3460 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3461 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3462 else
3463 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3465 vec_oprnds->quick_push (vec_oprnd);
3467 /* Get second vector operand. */
3468 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3469 vec_oprnds->quick_push (vec_oprnd);
3471 *oprnd = vec_oprnd;
3473 /* For conversion in multiple steps, continue to get operands
3474 recursively. */
3475 if (multi_step_cvt)
3476 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3480 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3481 For multi-step conversions store the resulting vectors and call the function
3482 recursively. */
3484 static void
3485 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3486 int multi_step_cvt, gimple *stmt,
3487 vec<tree> vec_dsts,
3488 gimple_stmt_iterator *gsi,
3489 slp_tree slp_node, enum tree_code code,
3490 stmt_vec_info *prev_stmt_info)
3492 unsigned int i;
3493 tree vop0, vop1, new_tmp, vec_dest;
3494 gimple *new_stmt;
3495 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3497 vec_dest = vec_dsts.pop ();
3499 for (i = 0; i < vec_oprnds->length (); i += 2)
3501 /* Create demotion operation. */
3502 vop0 = (*vec_oprnds)[i];
3503 vop1 = (*vec_oprnds)[i + 1];
3504 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3505 new_tmp = make_ssa_name (vec_dest, new_stmt);
3506 gimple_assign_set_lhs (new_stmt, new_tmp);
3507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3509 if (multi_step_cvt)
3510 /* Store the resulting vector for next recursive call. */
3511 (*vec_oprnds)[i/2] = new_tmp;
3512 else
3514 /* This is the last step of the conversion sequence. Store the
3515 vectors in SLP_NODE or in vector info of the scalar statement
3516 (or in STMT_VINFO_RELATED_STMT chain). */
3517 if (slp_node)
3518 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3519 else
3521 if (!*prev_stmt_info)
3522 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3523 else
3524 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3526 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3531 /* For multi-step demotion operations we first generate demotion operations
3532 from the source type to the intermediate types, and then combine the
3533 results (stored in VEC_OPRNDS) in demotion operation to the destination
3534 type. */
3535 if (multi_step_cvt)
3537 /* At each level of recursion we have half of the operands we had at the
3538 previous level. */
3539 vec_oprnds->truncate ((i+1)/2);
3540 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3541 stmt, vec_dsts, gsi, slp_node,
3542 VEC_PACK_TRUNC_EXPR,
3543 prev_stmt_info);
3546 vec_dsts.quick_push (vec_dest);
3550 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3551 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3552 the resulting vectors and call the function recursively. */
3554 static void
3555 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3556 vec<tree> *vec_oprnds1,
3557 gimple *stmt, tree vec_dest,
3558 gimple_stmt_iterator *gsi,
3559 enum tree_code code1,
3560 enum tree_code code2, tree decl1,
3561 tree decl2, int op_type)
3563 int i;
3564 tree vop0, vop1, new_tmp1, new_tmp2;
3565 gimple *new_stmt1, *new_stmt2;
3566 vec<tree> vec_tmp = vNULL;
3568 vec_tmp.create (vec_oprnds0->length () * 2);
3569 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3571 if (op_type == binary_op)
3572 vop1 = (*vec_oprnds1)[i];
3573 else
3574 vop1 = NULL_TREE;
3576 /* Generate the two halves of promotion operation. */
3577 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3578 op_type, vec_dest, gsi, stmt);
3579 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3580 op_type, vec_dest, gsi, stmt);
3581 if (is_gimple_call (new_stmt1))
3583 new_tmp1 = gimple_call_lhs (new_stmt1);
3584 new_tmp2 = gimple_call_lhs (new_stmt2);
3586 else
3588 new_tmp1 = gimple_assign_lhs (new_stmt1);
3589 new_tmp2 = gimple_assign_lhs (new_stmt2);
3592 /* Store the results for the next step. */
3593 vec_tmp.quick_push (new_tmp1);
3594 vec_tmp.quick_push (new_tmp2);
3597 vec_oprnds0->release ();
3598 *vec_oprnds0 = vec_tmp;
3602 /* Check if STMT performs a conversion operation, that can be vectorized.
3603 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3604 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3605 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3607 static bool
3608 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3609 gimple **vec_stmt, slp_tree slp_node)
3611 tree vec_dest;
3612 tree scalar_dest;
3613 tree op0, op1 = NULL_TREE;
3614 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3615 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3616 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3617 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3618 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3619 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3620 tree new_temp;
3621 gimple *def_stmt;
3622 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3623 gimple *new_stmt = NULL;
3624 stmt_vec_info prev_stmt_info;
3625 int nunits_in;
3626 int nunits_out;
3627 tree vectype_out, vectype_in;
3628 int ncopies, i, j;
3629 tree lhs_type, rhs_type;
3630 enum { NARROW, NONE, WIDEN } modifier;
3631 vec<tree> vec_oprnds0 = vNULL;
3632 vec<tree> vec_oprnds1 = vNULL;
3633 tree vop0;
3634 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3635 vec_info *vinfo = stmt_info->vinfo;
3636 int multi_step_cvt = 0;
3637 vec<tree> vec_dsts = vNULL;
3638 vec<tree> interm_types = vNULL;
3639 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3640 int op_type;
3641 machine_mode rhs_mode;
3642 unsigned short fltsz;
3644 /* Is STMT a vectorizable conversion? */
3646 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3647 return false;
3649 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3650 && ! vec_stmt)
3651 return false;
3653 if (!is_gimple_assign (stmt))
3654 return false;
3656 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3657 return false;
3659 code = gimple_assign_rhs_code (stmt);
3660 if (!CONVERT_EXPR_CODE_P (code)
3661 && code != FIX_TRUNC_EXPR
3662 && code != FLOAT_EXPR
3663 && code != WIDEN_MULT_EXPR
3664 && code != WIDEN_LSHIFT_EXPR)
3665 return false;
3667 op_type = TREE_CODE_LENGTH (code);
3669 /* Check types of lhs and rhs. */
3670 scalar_dest = gimple_assign_lhs (stmt);
3671 lhs_type = TREE_TYPE (scalar_dest);
3672 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3674 op0 = gimple_assign_rhs1 (stmt);
3675 rhs_type = TREE_TYPE (op0);
3677 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3678 && !((INTEGRAL_TYPE_P (lhs_type)
3679 && INTEGRAL_TYPE_P (rhs_type))
3680 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3681 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3682 return false;
3684 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3685 && ((INTEGRAL_TYPE_P (lhs_type)
3686 && (TYPE_PRECISION (lhs_type)
3687 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3688 || (INTEGRAL_TYPE_P (rhs_type)
3689 && (TYPE_PRECISION (rhs_type)
3690 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
3692 if (dump_enabled_p ())
3693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3694 "type conversion to/from bit-precision unsupported."
3695 "\n");
3696 return false;
3699 /* Check the operands of the operation. */
3700 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3702 if (dump_enabled_p ())
3703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3704 "use not simple.\n");
3705 return false;
3707 if (op_type == binary_op)
3709 bool ok;
3711 op1 = gimple_assign_rhs2 (stmt);
3712 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3713 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3714 OP1. */
3715 if (CONSTANT_CLASS_P (op0))
3716 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
3717 else
3718 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
3720 if (!ok)
3722 if (dump_enabled_p ())
3723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3724 "use not simple.\n");
3725 return false;
3729 /* If op0 is an external or constant defs use a vector type of
3730 the same size as the output vector type. */
3731 if (!vectype_in)
3732 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3733 if (vec_stmt)
3734 gcc_assert (vectype_in);
3735 if (!vectype_in)
3737 if (dump_enabled_p ())
3739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3740 "no vectype for scalar type ");
3741 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3742 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3745 return false;
3748 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3749 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
3751 if (dump_enabled_p ())
3753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3754 "can't convert between boolean and non "
3755 "boolean vectors");
3756 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3757 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3760 return false;
3763 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3764 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3765 if (nunits_in < nunits_out)
3766 modifier = NARROW;
3767 else if (nunits_out == nunits_in)
3768 modifier = NONE;
3769 else
3770 modifier = WIDEN;
3772 /* Multiple types in SLP are handled by creating the appropriate number of
3773 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3774 case of SLP. */
3775 if (slp_node || PURE_SLP_STMT (stmt_info))
3776 ncopies = 1;
3777 else if (modifier == NARROW)
3778 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3779 else
3780 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3782 /* Sanity check: make sure that at least one copy of the vectorized stmt
3783 needs to be generated. */
3784 gcc_assert (ncopies >= 1);
3786 /* Supportable by target? */
3787 switch (modifier)
3789 case NONE:
3790 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3791 return false;
3792 if (supportable_convert_operation (code, vectype_out, vectype_in,
3793 &decl1, &code1))
3794 break;
3795 /* FALLTHRU */
3796 unsupported:
3797 if (dump_enabled_p ())
3798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3799 "conversion not supported by target.\n");
3800 return false;
3802 case WIDEN:
3803 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3804 &code1, &code2, &multi_step_cvt,
3805 &interm_types))
3807 /* Binary widening operation can only be supported directly by the
3808 architecture. */
3809 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3810 break;
3813 if (code != FLOAT_EXPR
3814 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3815 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3816 goto unsupported;
3818 rhs_mode = TYPE_MODE (rhs_type);
3819 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3820 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3821 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3822 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3824 cvt_type
3825 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3826 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3827 if (cvt_type == NULL_TREE)
3828 goto unsupported;
3830 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3832 if (!supportable_convert_operation (code, vectype_out,
3833 cvt_type, &decl1, &codecvt1))
3834 goto unsupported;
3836 else if (!supportable_widening_operation (code, stmt, vectype_out,
3837 cvt_type, &codecvt1,
3838 &codecvt2, &multi_step_cvt,
3839 &interm_types))
3840 continue;
3841 else
3842 gcc_assert (multi_step_cvt == 0);
3844 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3845 vectype_in, &code1, &code2,
3846 &multi_step_cvt, &interm_types))
3847 break;
3850 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3851 goto unsupported;
3853 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3854 codecvt2 = ERROR_MARK;
3855 else
3857 multi_step_cvt++;
3858 interm_types.safe_push (cvt_type);
3859 cvt_type = NULL_TREE;
3861 break;
3863 case NARROW:
3864 gcc_assert (op_type == unary_op);
3865 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3866 &code1, &multi_step_cvt,
3867 &interm_types))
3868 break;
3870 if (code != FIX_TRUNC_EXPR
3871 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3872 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3873 goto unsupported;
3875 rhs_mode = TYPE_MODE (rhs_type);
3876 cvt_type
3877 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3878 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3879 if (cvt_type == NULL_TREE)
3880 goto unsupported;
3881 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3882 &decl1, &codecvt1))
3883 goto unsupported;
3884 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3885 &code1, &multi_step_cvt,
3886 &interm_types))
3887 break;
3888 goto unsupported;
3890 default:
3891 gcc_unreachable ();
3894 if (!vec_stmt) /* transformation not required. */
3896 if (dump_enabled_p ())
3897 dump_printf_loc (MSG_NOTE, vect_location,
3898 "=== vectorizable_conversion ===\n");
3899 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3901 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3902 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3904 else if (modifier == NARROW)
3906 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3907 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3909 else
3911 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3912 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3914 interm_types.release ();
3915 return true;
3918 /** Transform. **/
3919 if (dump_enabled_p ())
3920 dump_printf_loc (MSG_NOTE, vect_location,
3921 "transform conversion. ncopies = %d.\n", ncopies);
3923 if (op_type == binary_op)
3925 if (CONSTANT_CLASS_P (op0))
3926 op0 = fold_convert (TREE_TYPE (op1), op0);
3927 else if (CONSTANT_CLASS_P (op1))
3928 op1 = fold_convert (TREE_TYPE (op0), op1);
3931 /* In case of multi-step conversion, we first generate conversion operations
3932 to the intermediate types, and then from that types to the final one.
3933 We create vector destinations for the intermediate type (TYPES) received
3934 from supportable_*_operation, and store them in the correct order
3935 for future use in vect_create_vectorized_*_stmts (). */
3936 vec_dsts.create (multi_step_cvt + 1);
3937 vec_dest = vect_create_destination_var (scalar_dest,
3938 (cvt_type && modifier == WIDEN)
3939 ? cvt_type : vectype_out);
3940 vec_dsts.quick_push (vec_dest);
3942 if (multi_step_cvt)
3944 for (i = interm_types.length () - 1;
3945 interm_types.iterate (i, &intermediate_type); i--)
3947 vec_dest = vect_create_destination_var (scalar_dest,
3948 intermediate_type);
3949 vec_dsts.quick_push (vec_dest);
3953 if (cvt_type)
3954 vec_dest = vect_create_destination_var (scalar_dest,
3955 modifier == WIDEN
3956 ? vectype_out : cvt_type);
3958 if (!slp_node)
3960 if (modifier == WIDEN)
3962 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3963 if (op_type == binary_op)
3964 vec_oprnds1.create (1);
3966 else if (modifier == NARROW)
3967 vec_oprnds0.create (
3968 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3970 else if (code == WIDEN_LSHIFT_EXPR)
3971 vec_oprnds1.create (slp_node->vec_stmts_size);
3973 last_oprnd = op0;
3974 prev_stmt_info = NULL;
3975 switch (modifier)
3977 case NONE:
3978 for (j = 0; j < ncopies; j++)
3980 if (j == 0)
3981 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3982 -1);
3983 else
3984 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3986 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3988 /* Arguments are ready, create the new vector stmt. */
3989 if (code1 == CALL_EXPR)
3991 new_stmt = gimple_build_call (decl1, 1, vop0);
3992 new_temp = make_ssa_name (vec_dest, new_stmt);
3993 gimple_call_set_lhs (new_stmt, new_temp);
3995 else
3997 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3998 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3999 new_temp = make_ssa_name (vec_dest, new_stmt);
4000 gimple_assign_set_lhs (new_stmt, new_temp);
4003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4004 if (slp_node)
4005 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4006 else
4008 if (!prev_stmt_info)
4009 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4010 else
4011 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4012 prev_stmt_info = vinfo_for_stmt (new_stmt);
4016 break;
4018 case WIDEN:
4019 /* In case the vectorization factor (VF) is bigger than the number
4020 of elements that we can fit in a vectype (nunits), we have to
4021 generate more than one vector stmt - i.e - we need to "unroll"
4022 the vector stmt by a factor VF/nunits. */
4023 for (j = 0; j < ncopies; j++)
4025 /* Handle uses. */
4026 if (j == 0)
4028 if (slp_node)
4030 if (code == WIDEN_LSHIFT_EXPR)
4032 unsigned int k;
4034 vec_oprnd1 = op1;
4035 /* Store vec_oprnd1 for every vector stmt to be created
4036 for SLP_NODE. We check during the analysis that all
4037 the shift arguments are the same. */
4038 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4039 vec_oprnds1.quick_push (vec_oprnd1);
4041 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4042 slp_node, -1);
4044 else
4045 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4046 &vec_oprnds1, slp_node, -1);
4048 else
4050 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4051 vec_oprnds0.quick_push (vec_oprnd0);
4052 if (op_type == binary_op)
4054 if (code == WIDEN_LSHIFT_EXPR)
4055 vec_oprnd1 = op1;
4056 else
4057 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4058 vec_oprnds1.quick_push (vec_oprnd1);
4062 else
4064 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4065 vec_oprnds0.truncate (0);
4066 vec_oprnds0.quick_push (vec_oprnd0);
4067 if (op_type == binary_op)
4069 if (code == WIDEN_LSHIFT_EXPR)
4070 vec_oprnd1 = op1;
4071 else
4072 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4073 vec_oprnd1);
4074 vec_oprnds1.truncate (0);
4075 vec_oprnds1.quick_push (vec_oprnd1);
4079 /* Arguments are ready. Create the new vector stmts. */
4080 for (i = multi_step_cvt; i >= 0; i--)
4082 tree this_dest = vec_dsts[i];
4083 enum tree_code c1 = code1, c2 = code2;
4084 if (i == 0 && codecvt2 != ERROR_MARK)
4086 c1 = codecvt1;
4087 c2 = codecvt2;
4089 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4090 &vec_oprnds1,
4091 stmt, this_dest, gsi,
4092 c1, c2, decl1, decl2,
4093 op_type);
4096 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4098 if (cvt_type)
4100 if (codecvt1 == CALL_EXPR)
4102 new_stmt = gimple_build_call (decl1, 1, vop0);
4103 new_temp = make_ssa_name (vec_dest, new_stmt);
4104 gimple_call_set_lhs (new_stmt, new_temp);
4106 else
4108 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4109 new_temp = make_ssa_name (vec_dest);
4110 new_stmt = gimple_build_assign (new_temp, codecvt1,
4111 vop0);
4114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4116 else
4117 new_stmt = SSA_NAME_DEF_STMT (vop0);
4119 if (slp_node)
4120 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4121 else
4123 if (!prev_stmt_info)
4124 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4125 else
4126 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4127 prev_stmt_info = vinfo_for_stmt (new_stmt);
4132 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4133 break;
4135 case NARROW:
4136 /* In case the vectorization factor (VF) is bigger than the number
4137 of elements that we can fit in a vectype (nunits), we have to
4138 generate more than one vector stmt - i.e - we need to "unroll"
4139 the vector stmt by a factor VF/nunits. */
4140 for (j = 0; j < ncopies; j++)
4142 /* Handle uses. */
4143 if (slp_node)
4144 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4145 slp_node, -1);
4146 else
4148 vec_oprnds0.truncate (0);
4149 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4150 vect_pow2 (multi_step_cvt) - 1);
4153 /* Arguments are ready. Create the new vector stmts. */
4154 if (cvt_type)
4155 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4157 if (codecvt1 == CALL_EXPR)
4159 new_stmt = gimple_build_call (decl1, 1, vop0);
4160 new_temp = make_ssa_name (vec_dest, new_stmt);
4161 gimple_call_set_lhs (new_stmt, new_temp);
4163 else
4165 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4166 new_temp = make_ssa_name (vec_dest);
4167 new_stmt = gimple_build_assign (new_temp, codecvt1,
4168 vop0);
4171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4172 vec_oprnds0[i] = new_temp;
4175 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4176 stmt, vec_dsts, gsi,
4177 slp_node, code1,
4178 &prev_stmt_info);
4181 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4182 break;
4185 vec_oprnds0.release ();
4186 vec_oprnds1.release ();
4187 vec_dsts.release ();
4188 interm_types.release ();
4190 return true;
4194 /* Function vectorizable_assignment.
4196 Check if STMT performs an assignment (copy) that can be vectorized.
4197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4198 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4201 static bool
4202 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4203 gimple **vec_stmt, slp_tree slp_node)
4205 tree vec_dest;
4206 tree scalar_dest;
4207 tree op;
4208 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4210 tree new_temp;
4211 gimple *def_stmt;
4212 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4213 int ncopies;
4214 int i, j;
4215 vec<tree> vec_oprnds = vNULL;
4216 tree vop;
4217 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4218 vec_info *vinfo = stmt_info->vinfo;
4219 gimple *new_stmt = NULL;
4220 stmt_vec_info prev_stmt_info = NULL;
4221 enum tree_code code;
4222 tree vectype_in;
4224 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4225 return false;
4227 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4228 && ! vec_stmt)
4229 return false;
4231 /* Is vectorizable assignment? */
4232 if (!is_gimple_assign (stmt))
4233 return false;
4235 scalar_dest = gimple_assign_lhs (stmt);
4236 if (TREE_CODE (scalar_dest) != SSA_NAME)
4237 return false;
4239 code = gimple_assign_rhs_code (stmt);
4240 if (gimple_assign_single_p (stmt)
4241 || code == PAREN_EXPR
4242 || CONVERT_EXPR_CODE_P (code))
4243 op = gimple_assign_rhs1 (stmt);
4244 else
4245 return false;
4247 if (code == VIEW_CONVERT_EXPR)
4248 op = TREE_OPERAND (op, 0);
4250 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4251 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4253 /* Multiple types in SLP are handled by creating the appropriate number of
4254 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4255 case of SLP. */
4256 if (slp_node || PURE_SLP_STMT (stmt_info))
4257 ncopies = 1;
4258 else
4259 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4261 gcc_assert (ncopies >= 1);
4263 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4265 if (dump_enabled_p ())
4266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4267 "use not simple.\n");
4268 return false;
4271 /* We can handle NOP_EXPR conversions that do not change the number
4272 of elements or the vector size. */
4273 if ((CONVERT_EXPR_CODE_P (code)
4274 || code == VIEW_CONVERT_EXPR)
4275 && (!vectype_in
4276 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4277 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4278 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4279 return false;
4281 /* We do not handle bit-precision changes. */
4282 if ((CONVERT_EXPR_CODE_P (code)
4283 || code == VIEW_CONVERT_EXPR)
4284 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4285 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4286 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4287 || ((TYPE_PRECISION (TREE_TYPE (op))
4288 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4289 /* But a conversion that does not change the bit-pattern is ok. */
4290 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4291 > TYPE_PRECISION (TREE_TYPE (op)))
4292 && TYPE_UNSIGNED (TREE_TYPE (op)))
4293 /* Conversion between boolean types of different sizes is
4294 a simple assignment in case their vectypes are same
4295 boolean vectors. */
4296 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4297 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4299 if (dump_enabled_p ())
4300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4301 "type conversion to/from bit-precision "
4302 "unsupported.\n");
4303 return false;
4306 if (!vec_stmt) /* transformation not required. */
4308 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4309 if (dump_enabled_p ())
4310 dump_printf_loc (MSG_NOTE, vect_location,
4311 "=== vectorizable_assignment ===\n");
4312 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4313 return true;
4316 /** Transform. **/
4317 if (dump_enabled_p ())
4318 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4320 /* Handle def. */
4321 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4323 /* Handle use. */
4324 for (j = 0; j < ncopies; j++)
4326 /* Handle uses. */
4327 if (j == 0)
4328 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4329 else
4330 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4332 /* Arguments are ready. create the new vector stmt. */
4333 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4335 if (CONVERT_EXPR_CODE_P (code)
4336 || code == VIEW_CONVERT_EXPR)
4337 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4338 new_stmt = gimple_build_assign (vec_dest, vop);
4339 new_temp = make_ssa_name (vec_dest, new_stmt);
4340 gimple_assign_set_lhs (new_stmt, new_temp);
4341 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4342 if (slp_node)
4343 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4346 if (slp_node)
4347 continue;
4349 if (j == 0)
4350 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4351 else
4352 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4354 prev_stmt_info = vinfo_for_stmt (new_stmt);
4357 vec_oprnds.release ();
4358 return true;
4362 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4363 either as shift by a scalar or by a vector. */
4365 bool
4366 vect_supportable_shift (enum tree_code code, tree scalar_type)
4369 machine_mode vec_mode;
4370 optab optab;
4371 int icode;
4372 tree vectype;
4374 vectype = get_vectype_for_scalar_type (scalar_type);
4375 if (!vectype)
4376 return false;
4378 optab = optab_for_tree_code (code, vectype, optab_scalar);
4379 if (!optab
4380 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4382 optab = optab_for_tree_code (code, vectype, optab_vector);
4383 if (!optab
4384 || (optab_handler (optab, TYPE_MODE (vectype))
4385 == CODE_FOR_nothing))
4386 return false;
4389 vec_mode = TYPE_MODE (vectype);
4390 icode = (int) optab_handler (optab, vec_mode);
4391 if (icode == CODE_FOR_nothing)
4392 return false;
4394 return true;
4398 /* Function vectorizable_shift.
4400 Check if STMT performs a shift operation that can be vectorized.
4401 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4402 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4403 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4405 static bool
4406 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4407 gimple **vec_stmt, slp_tree slp_node)
4409 tree vec_dest;
4410 tree scalar_dest;
4411 tree op0, op1 = NULL;
4412 tree vec_oprnd1 = NULL_TREE;
4413 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4414 tree vectype;
4415 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4416 enum tree_code code;
4417 machine_mode vec_mode;
4418 tree new_temp;
4419 optab optab;
4420 int icode;
4421 machine_mode optab_op2_mode;
4422 gimple *def_stmt;
4423 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4424 gimple *new_stmt = NULL;
4425 stmt_vec_info prev_stmt_info;
4426 int nunits_in;
4427 int nunits_out;
4428 tree vectype_out;
4429 tree op1_vectype;
4430 int ncopies;
4431 int j, i;
4432 vec<tree> vec_oprnds0 = vNULL;
4433 vec<tree> vec_oprnds1 = vNULL;
4434 tree vop0, vop1;
4435 unsigned int k;
4436 bool scalar_shift_arg = true;
4437 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4438 vec_info *vinfo = stmt_info->vinfo;
4439 int vf;
4441 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4442 return false;
4444 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4445 && ! vec_stmt)
4446 return false;
4448 /* Is STMT a vectorizable binary/unary operation? */
4449 if (!is_gimple_assign (stmt))
4450 return false;
4452 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4453 return false;
4455 code = gimple_assign_rhs_code (stmt);
4457 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4458 || code == RROTATE_EXPR))
4459 return false;
4461 scalar_dest = gimple_assign_lhs (stmt);
4462 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4463 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4464 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4466 if (dump_enabled_p ())
4467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4468 "bit-precision shifts not supported.\n");
4469 return false;
4472 op0 = gimple_assign_rhs1 (stmt);
4473 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4475 if (dump_enabled_p ())
4476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4477 "use not simple.\n");
4478 return false;
4480 /* If op0 is an external or constant def use a vector type with
4481 the same size as the output vector type. */
4482 if (!vectype)
4483 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4484 if (vec_stmt)
4485 gcc_assert (vectype);
4486 if (!vectype)
4488 if (dump_enabled_p ())
4489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4490 "no vectype for scalar type\n");
4491 return false;
4494 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4495 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4496 if (nunits_out != nunits_in)
4497 return false;
4499 op1 = gimple_assign_rhs2 (stmt);
4500 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4502 if (dump_enabled_p ())
4503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4504 "use not simple.\n");
4505 return false;
4508 if (loop_vinfo)
4509 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4510 else
4511 vf = 1;
4513 /* Multiple types in SLP are handled by creating the appropriate number of
4514 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4515 case of SLP. */
4516 if (slp_node || PURE_SLP_STMT (stmt_info))
4517 ncopies = 1;
4518 else
4519 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4521 gcc_assert (ncopies >= 1);
4523 /* Determine whether the shift amount is a vector, or scalar. If the
4524 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4526 if ((dt[1] == vect_internal_def
4527 || dt[1] == vect_induction_def)
4528 && !slp_node)
4529 scalar_shift_arg = false;
4530 else if (dt[1] == vect_constant_def
4531 || dt[1] == vect_external_def
4532 || dt[1] == vect_internal_def)
4534 /* In SLP, need to check whether the shift count is the same,
4535 in loops if it is a constant or invariant, it is always
4536 a scalar shift. */
4537 if (slp_node)
4539 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4540 gimple *slpstmt;
4542 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4543 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4544 scalar_shift_arg = false;
4547 /* If the shift amount is computed by a pattern stmt we cannot
4548 use the scalar amount directly thus give up and use a vector
4549 shift. */
4550 if (dt[1] == vect_internal_def)
4552 gimple *def = SSA_NAME_DEF_STMT (op1);
4553 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4554 scalar_shift_arg = false;
4557 else
4559 if (dump_enabled_p ())
4560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4561 "operand mode requires invariant argument.\n");
4562 return false;
4565 /* Vector shifted by vector. */
4566 if (!scalar_shift_arg)
4568 optab = optab_for_tree_code (code, vectype, optab_vector);
4569 if (dump_enabled_p ())
4570 dump_printf_loc (MSG_NOTE, vect_location,
4571 "vector/vector shift/rotate found.\n");
4573 if (!op1_vectype)
4574 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4575 if (op1_vectype == NULL_TREE
4576 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4578 if (dump_enabled_p ())
4579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4580 "unusable type for last operand in"
4581 " vector/vector shift/rotate.\n");
4582 return false;
4585 /* See if the machine has a vector shifted by scalar insn and if not
4586 then see if it has a vector shifted by vector insn. */
4587 else
4589 optab = optab_for_tree_code (code, vectype, optab_scalar);
4590 if (optab
4591 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4593 if (dump_enabled_p ())
4594 dump_printf_loc (MSG_NOTE, vect_location,
4595 "vector/scalar shift/rotate found.\n");
4597 else
4599 optab = optab_for_tree_code (code, vectype, optab_vector);
4600 if (optab
4601 && (optab_handler (optab, TYPE_MODE (vectype))
4602 != CODE_FOR_nothing))
4604 scalar_shift_arg = false;
4606 if (dump_enabled_p ())
4607 dump_printf_loc (MSG_NOTE, vect_location,
4608 "vector/vector shift/rotate found.\n");
4610 /* Unlike the other binary operators, shifts/rotates have
4611 the rhs being int, instead of the same type as the lhs,
4612 so make sure the scalar is the right type if we are
4613 dealing with vectors of long long/long/short/char. */
4614 if (dt[1] == vect_constant_def)
4615 op1 = fold_convert (TREE_TYPE (vectype), op1);
4616 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4617 TREE_TYPE (op1)))
4619 if (slp_node
4620 && TYPE_MODE (TREE_TYPE (vectype))
4621 != TYPE_MODE (TREE_TYPE (op1)))
4623 if (dump_enabled_p ())
4624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4625 "unusable type for last operand in"
4626 " vector/vector shift/rotate.\n");
4627 return false;
4629 if (vec_stmt && !slp_node)
4631 op1 = fold_convert (TREE_TYPE (vectype), op1);
4632 op1 = vect_init_vector (stmt, op1,
4633 TREE_TYPE (vectype), NULL);
4640 /* Supportable by target? */
4641 if (!optab)
4643 if (dump_enabled_p ())
4644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4645 "no optab.\n");
4646 return false;
4648 vec_mode = TYPE_MODE (vectype);
4649 icode = (int) optab_handler (optab, vec_mode);
4650 if (icode == CODE_FOR_nothing)
4652 if (dump_enabled_p ())
4653 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4654 "op not supported by target.\n");
4655 /* Check only during analysis. */
4656 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4657 || (vf < vect_min_worthwhile_factor (code)
4658 && !vec_stmt))
4659 return false;
4660 if (dump_enabled_p ())
4661 dump_printf_loc (MSG_NOTE, vect_location,
4662 "proceeding using word mode.\n");
4665 /* Worthwhile without SIMD support? Check only during analysis. */
4666 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4667 && vf < vect_min_worthwhile_factor (code)
4668 && !vec_stmt)
4670 if (dump_enabled_p ())
4671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4672 "not worthwhile without SIMD support.\n");
4673 return false;
4676 if (!vec_stmt) /* transformation not required. */
4678 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4679 if (dump_enabled_p ())
4680 dump_printf_loc (MSG_NOTE, vect_location,
4681 "=== vectorizable_shift ===\n");
4682 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4683 return true;
4686 /** Transform. **/
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_NOTE, vect_location,
4690 "transform binary/unary operation.\n");
4692 /* Handle def. */
4693 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4695 prev_stmt_info = NULL;
4696 for (j = 0; j < ncopies; j++)
4698 /* Handle uses. */
4699 if (j == 0)
4701 if (scalar_shift_arg)
4703 /* Vector shl and shr insn patterns can be defined with scalar
4704 operand 2 (shift operand). In this case, use constant or loop
4705 invariant op1 directly, without extending it to vector mode
4706 first. */
4707 optab_op2_mode = insn_data[icode].operand[2].mode;
4708 if (!VECTOR_MODE_P (optab_op2_mode))
4710 if (dump_enabled_p ())
4711 dump_printf_loc (MSG_NOTE, vect_location,
4712 "operand 1 using scalar mode.\n");
4713 vec_oprnd1 = op1;
4714 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4715 vec_oprnds1.quick_push (vec_oprnd1);
4716 if (slp_node)
4718 /* Store vec_oprnd1 for every vector stmt to be created
4719 for SLP_NODE. We check during the analysis that all
4720 the shift arguments are the same.
4721 TODO: Allow different constants for different vector
4722 stmts generated for an SLP instance. */
4723 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4724 vec_oprnds1.quick_push (vec_oprnd1);
4729 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4730 (a special case for certain kind of vector shifts); otherwise,
4731 operand 1 should be of a vector type (the usual case). */
4732 if (vec_oprnd1)
4733 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4734 slp_node, -1);
4735 else
4736 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4737 slp_node, -1);
4739 else
4740 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4742 /* Arguments are ready. Create the new vector stmt. */
4743 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4745 vop1 = vec_oprnds1[i];
4746 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4747 new_temp = make_ssa_name (vec_dest, new_stmt);
4748 gimple_assign_set_lhs (new_stmt, new_temp);
4749 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4750 if (slp_node)
4751 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4754 if (slp_node)
4755 continue;
4757 if (j == 0)
4758 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4759 else
4760 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4761 prev_stmt_info = vinfo_for_stmt (new_stmt);
4764 vec_oprnds0.release ();
4765 vec_oprnds1.release ();
4767 return true;
4771 /* Function vectorizable_operation.
4773 Check if STMT performs a binary, unary or ternary operation that can
4774 be vectorized.
4775 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4776 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4777 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4779 static bool
4780 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4781 gimple **vec_stmt, slp_tree slp_node)
4783 tree vec_dest;
4784 tree scalar_dest;
4785 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4786 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4787 tree vectype;
4788 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4789 enum tree_code code;
4790 machine_mode vec_mode;
4791 tree new_temp;
4792 int op_type;
4793 optab optab;
4794 bool target_support_p;
4795 gimple *def_stmt;
4796 enum vect_def_type dt[3]
4797 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4798 gimple *new_stmt = NULL;
4799 stmt_vec_info prev_stmt_info;
4800 int nunits_in;
4801 int nunits_out;
4802 tree vectype_out;
4803 int ncopies;
4804 int j, i;
4805 vec<tree> vec_oprnds0 = vNULL;
4806 vec<tree> vec_oprnds1 = vNULL;
4807 vec<tree> vec_oprnds2 = vNULL;
4808 tree vop0, vop1, vop2;
4809 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4810 vec_info *vinfo = stmt_info->vinfo;
4811 int vf;
4813 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4814 return false;
4816 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4817 && ! vec_stmt)
4818 return false;
4820 /* Is STMT a vectorizable binary/unary operation? */
4821 if (!is_gimple_assign (stmt))
4822 return false;
4824 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4825 return false;
4827 code = gimple_assign_rhs_code (stmt);
4829 /* For pointer addition, we should use the normal plus for
4830 the vector addition. */
4831 if (code == POINTER_PLUS_EXPR)
4832 code = PLUS_EXPR;
4834 /* Support only unary or binary operations. */
4835 op_type = TREE_CODE_LENGTH (code);
4836 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4838 if (dump_enabled_p ())
4839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4840 "num. args = %d (not unary/binary/ternary op).\n",
4841 op_type);
4842 return false;
4845 scalar_dest = gimple_assign_lhs (stmt);
4846 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4848 /* Most operations cannot handle bit-precision types without extra
4849 truncations. */
4850 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4851 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4852 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4853 /* Exception are bitwise binary operations. */
4854 && code != BIT_IOR_EXPR
4855 && code != BIT_XOR_EXPR
4856 && code != BIT_AND_EXPR)
4858 if (dump_enabled_p ())
4859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4860 "bit-precision arithmetic not supported.\n");
4861 return false;
4864 op0 = gimple_assign_rhs1 (stmt);
4865 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4867 if (dump_enabled_p ())
4868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4869 "use not simple.\n");
4870 return false;
4872 /* If op0 is an external or constant def use a vector type with
4873 the same size as the output vector type. */
4874 if (!vectype)
4876 /* For boolean type we cannot determine vectype by
4877 invariant value (don't know whether it is a vector
4878 of booleans or vector of integers). We use output
4879 vectype because operations on boolean don't change
4880 type. */
4881 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
4883 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
4885 if (dump_enabled_p ())
4886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4887 "not supported operation on bool value.\n");
4888 return false;
4890 vectype = vectype_out;
4892 else
4893 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4895 if (vec_stmt)
4896 gcc_assert (vectype);
4897 if (!vectype)
4899 if (dump_enabled_p ())
4901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4902 "no vectype for scalar type ");
4903 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4904 TREE_TYPE (op0));
4905 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4908 return false;
4911 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4912 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4913 if (nunits_out != nunits_in)
4914 return false;
4916 if (op_type == binary_op || op_type == ternary_op)
4918 op1 = gimple_assign_rhs2 (stmt);
4919 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
4921 if (dump_enabled_p ())
4922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4923 "use not simple.\n");
4924 return false;
4927 if (op_type == ternary_op)
4929 op2 = gimple_assign_rhs3 (stmt);
4930 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
4932 if (dump_enabled_p ())
4933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4934 "use not simple.\n");
4935 return false;
4939 if (loop_vinfo)
4940 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4941 else
4942 vf = 1;
4944 /* Multiple types in SLP are handled by creating the appropriate number of
4945 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4946 case of SLP. */
4947 if (slp_node || PURE_SLP_STMT (stmt_info))
4948 ncopies = 1;
4949 else
4950 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4952 gcc_assert (ncopies >= 1);
4954 /* Shifts are handled in vectorizable_shift (). */
4955 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4956 || code == RROTATE_EXPR)
4957 return false;
4959 /* Supportable by target? */
4961 vec_mode = TYPE_MODE (vectype);
4962 if (code == MULT_HIGHPART_EXPR)
4963 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
4964 else
4966 optab = optab_for_tree_code (code, vectype, optab_default);
4967 if (!optab)
4969 if (dump_enabled_p ())
4970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4971 "no optab.\n");
4972 return false;
4974 target_support_p = (optab_handler (optab, vec_mode)
4975 != CODE_FOR_nothing);
4978 if (!target_support_p)
4980 if (dump_enabled_p ())
4981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4982 "op not supported by target.\n");
4983 /* Check only during analysis. */
4984 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4985 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4986 return false;
4987 if (dump_enabled_p ())
4988 dump_printf_loc (MSG_NOTE, vect_location,
4989 "proceeding using word mode.\n");
4992 /* Worthwhile without SIMD support? Check only during analysis. */
4993 if (!VECTOR_MODE_P (vec_mode)
4994 && !vec_stmt
4995 && vf < vect_min_worthwhile_factor (code))
4997 if (dump_enabled_p ())
4998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4999 "not worthwhile without SIMD support.\n");
5000 return false;
5003 if (!vec_stmt) /* transformation not required. */
5005 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_NOTE, vect_location,
5008 "=== vectorizable_operation ===\n");
5009 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5010 return true;
5013 /** Transform. **/
5015 if (dump_enabled_p ())
5016 dump_printf_loc (MSG_NOTE, vect_location,
5017 "transform binary/unary operation.\n");
5019 /* Handle def. */
5020 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5022 /* In case the vectorization factor (VF) is bigger than the number
5023 of elements that we can fit in a vectype (nunits), we have to generate
5024 more than one vector stmt - i.e - we need to "unroll" the
5025 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5026 from one copy of the vector stmt to the next, in the field
5027 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5028 stages to find the correct vector defs to be used when vectorizing
5029 stmts that use the defs of the current stmt. The example below
5030 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5031 we need to create 4 vectorized stmts):
5033 before vectorization:
5034 RELATED_STMT VEC_STMT
5035 S1: x = memref - -
5036 S2: z = x + 1 - -
5038 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5039 there):
5040 RELATED_STMT VEC_STMT
5041 VS1_0: vx0 = memref0 VS1_1 -
5042 VS1_1: vx1 = memref1 VS1_2 -
5043 VS1_2: vx2 = memref2 VS1_3 -
5044 VS1_3: vx3 = memref3 - -
5045 S1: x = load - VS1_0
5046 S2: z = x + 1 - -
5048 step2: vectorize stmt S2 (done here):
5049 To vectorize stmt S2 we first need to find the relevant vector
5050 def for the first operand 'x'. This is, as usual, obtained from
5051 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5052 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5053 relevant vector def 'vx0'. Having found 'vx0' we can generate
5054 the vector stmt VS2_0, and as usual, record it in the
5055 STMT_VINFO_VEC_STMT of stmt S2.
5056 When creating the second copy (VS2_1), we obtain the relevant vector
5057 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5058 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5059 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5060 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5061 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5062 chain of stmts and pointers:
5063 RELATED_STMT VEC_STMT
5064 VS1_0: vx0 = memref0 VS1_1 -
5065 VS1_1: vx1 = memref1 VS1_2 -
5066 VS1_2: vx2 = memref2 VS1_3 -
5067 VS1_3: vx3 = memref3 - -
5068 S1: x = load - VS1_0
5069 VS2_0: vz0 = vx0 + v1 VS2_1 -
5070 VS2_1: vz1 = vx1 + v1 VS2_2 -
5071 VS2_2: vz2 = vx2 + v1 VS2_3 -
5072 VS2_3: vz3 = vx3 + v1 - -
5073 S2: z = x + 1 - VS2_0 */
5075 prev_stmt_info = NULL;
5076 for (j = 0; j < ncopies; j++)
5078 /* Handle uses. */
5079 if (j == 0)
5081 if (op_type == binary_op || op_type == ternary_op)
5082 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5083 slp_node, -1);
5084 else
5085 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5086 slp_node, -1);
5087 if (op_type == ternary_op)
5088 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5089 slp_node, -1);
5091 else
5093 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5094 if (op_type == ternary_op)
5096 tree vec_oprnd = vec_oprnds2.pop ();
5097 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5098 vec_oprnd));
5102 /* Arguments are ready. Create the new vector stmt. */
5103 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5105 vop1 = ((op_type == binary_op || op_type == ternary_op)
5106 ? vec_oprnds1[i] : NULL_TREE);
5107 vop2 = ((op_type == ternary_op)
5108 ? vec_oprnds2[i] : NULL_TREE);
5109 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5110 new_temp = make_ssa_name (vec_dest, new_stmt);
5111 gimple_assign_set_lhs (new_stmt, new_temp);
5112 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5113 if (slp_node)
5114 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5117 if (slp_node)
5118 continue;
5120 if (j == 0)
5121 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5122 else
5123 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5124 prev_stmt_info = vinfo_for_stmt (new_stmt);
5127 vec_oprnds0.release ();
5128 vec_oprnds1.release ();
5129 vec_oprnds2.release ();
5131 return true;
5134 /* A helper function to ensure data reference DR's base alignment
5135 for STMT_INFO. */
5137 static void
5138 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5140 if (!dr->aux)
5141 return;
5143 if (DR_VECT_AUX (dr)->base_misaligned)
5145 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5146 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5148 if (decl_in_symtab_p (base_decl))
5149 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5150 else
5152 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
5153 DECL_USER_ALIGN (base_decl) = 1;
5155 DR_VECT_AUX (dr)->base_misaligned = false;
5160 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5161 reversal of the vector elements. If that is impossible to do,
5162 returns NULL. */
5164 static tree
5165 perm_mask_for_reverse (tree vectype)
5167 int i, nunits;
5168 unsigned char *sel;
5170 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5171 sel = XALLOCAVEC (unsigned char, nunits);
5173 for (i = 0; i < nunits; ++i)
5174 sel[i] = nunits - 1 - i;
5176 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5177 return NULL_TREE;
5178 return vect_gen_perm_mask_checked (vectype, sel);
5181 /* Function get_group_alias_ptr_type.
5183 Return the alias type for the group starting at FIRST_STMT. */
5185 static tree
5186 get_group_alias_ptr_type (gimple *first_stmt)
5188 struct data_reference *first_dr, *next_dr;
5189 gimple *next_stmt;
5191 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5192 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5193 while (next_stmt)
5195 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5196 if (get_alias_set (DR_REF (first_dr))
5197 != get_alias_set (DR_REF (next_dr)))
5199 if (dump_enabled_p ())
5200 dump_printf_loc (MSG_NOTE, vect_location,
5201 "conflicting alias set types.\n");
5202 return ptr_type_node;
5204 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5206 return reference_alias_ptr_type (DR_REF (first_dr));
5210 /* Function vectorizable_store.
5212 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5213 can be vectorized.
5214 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5215 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5216 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5218 static bool
5219 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5220 slp_tree slp_node)
5222 tree scalar_dest;
5223 tree data_ref;
5224 tree op;
5225 tree vec_oprnd = NULL_TREE;
5226 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5227 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5228 tree elem_type;
5229 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5230 struct loop *loop = NULL;
5231 machine_mode vec_mode;
5232 tree dummy;
5233 enum dr_alignment_support alignment_support_scheme;
5234 gimple *def_stmt;
5235 enum vect_def_type dt;
5236 stmt_vec_info prev_stmt_info = NULL;
5237 tree dataref_ptr = NULL_TREE;
5238 tree dataref_offset = NULL_TREE;
5239 gimple *ptr_incr = NULL;
5240 int ncopies;
5241 int j;
5242 gimple *next_stmt, *first_stmt = NULL;
5243 bool grouped_store = false;
5244 bool store_lanes_p = false;
5245 unsigned int group_size, i;
5246 vec<tree> dr_chain = vNULL;
5247 vec<tree> oprnds = vNULL;
5248 vec<tree> result_chain = vNULL;
5249 bool inv_p;
5250 bool negative = false;
5251 tree offset = NULL_TREE;
5252 vec<tree> vec_oprnds = vNULL;
5253 bool slp = (slp_node != NULL);
5254 unsigned int vec_num;
5255 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5256 vec_info *vinfo = stmt_info->vinfo;
5257 tree aggr_type;
5258 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5259 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5260 int scatter_scale = 1;
5261 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5262 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5263 gimple *new_stmt;
5264 tree ref_type;
5266 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5267 return false;
5269 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5270 && ! vec_stmt)
5271 return false;
5273 /* Is vectorizable store? */
5275 if (!is_gimple_assign (stmt))
5276 return false;
5278 scalar_dest = gimple_assign_lhs (stmt);
5279 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5280 && is_pattern_stmt_p (stmt_info))
5281 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5282 if (TREE_CODE (scalar_dest) != ARRAY_REF
5283 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5284 && TREE_CODE (scalar_dest) != INDIRECT_REF
5285 && TREE_CODE (scalar_dest) != COMPONENT_REF
5286 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5287 && TREE_CODE (scalar_dest) != REALPART_EXPR
5288 && TREE_CODE (scalar_dest) != MEM_REF)
5289 return false;
5291 gcc_assert (gimple_assign_single_p (stmt));
5293 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5294 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5296 if (loop_vinfo)
5297 loop = LOOP_VINFO_LOOP (loop_vinfo);
5299 /* Multiple types in SLP are handled by creating the appropriate number of
5300 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5301 case of SLP. */
5302 if (slp || PURE_SLP_STMT (stmt_info))
5303 ncopies = 1;
5304 else
5305 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5307 gcc_assert (ncopies >= 1);
5309 /* FORNOW. This restriction should be relaxed. */
5310 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5312 if (dump_enabled_p ())
5313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5314 "multiple types in nested loop.\n");
5315 return false;
5318 op = gimple_assign_rhs1 (stmt);
5320 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5324 "use not simple.\n");
5325 return false;
5328 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5329 return false;
5331 elem_type = TREE_TYPE (vectype);
5332 vec_mode = TYPE_MODE (vectype);
5334 /* FORNOW. In some cases can vectorize even if data-type not supported
5335 (e.g. - array initialization with 0). */
5336 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5337 return false;
5339 if (!STMT_VINFO_DATA_REF (stmt_info))
5340 return false;
5342 if (!STMT_VINFO_STRIDED_P (stmt_info))
5344 negative =
5345 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5346 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5347 size_zero_node) < 0;
5348 if (negative && ncopies > 1)
5350 if (dump_enabled_p ())
5351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5352 "multiple types with negative step.\n");
5353 return false;
5355 if (negative)
5357 gcc_assert (!grouped_store);
5358 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5359 if (alignment_support_scheme != dr_aligned
5360 && alignment_support_scheme != dr_unaligned_supported)
5362 if (dump_enabled_p ())
5363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5364 "negative step but alignment required.\n");
5365 return false;
5367 if (dt != vect_constant_def
5368 && dt != vect_external_def
5369 && !perm_mask_for_reverse (vectype))
5371 if (dump_enabled_p ())
5372 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5373 "negative step and reversing not supported.\n");
5374 return false;
5379 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5381 grouped_store = true;
5382 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5383 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5384 if (!slp
5385 && !PURE_SLP_STMT (stmt_info)
5386 && !STMT_VINFO_STRIDED_P (stmt_info))
5388 if (vect_store_lanes_supported (vectype, group_size))
5389 store_lanes_p = true;
5390 else if (!vect_grouped_store_supported (vectype, group_size))
5391 return false;
5394 if (STMT_VINFO_STRIDED_P (stmt_info)
5395 && (slp || PURE_SLP_STMT (stmt_info))
5396 && (group_size > nunits
5397 || nunits % group_size != 0))
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5400 "unhandled strided group store\n");
5401 return false;
5404 if (first_stmt == stmt)
5406 /* STMT is the leader of the group. Check the operands of all the
5407 stmts of the group. */
5408 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5409 while (next_stmt)
5411 gcc_assert (gimple_assign_single_p (next_stmt));
5412 op = gimple_assign_rhs1 (next_stmt);
5413 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5417 "use not simple.\n");
5418 return false;
5420 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5425 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5427 gimple *def_stmt;
5428 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5429 &scatter_off, &scatter_scale);
5430 gcc_assert (scatter_decl);
5431 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5432 &scatter_off_vectype))
5434 if (dump_enabled_p ())
5435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5436 "scatter index use not simple.");
5437 return false;
5441 if (!vec_stmt) /* transformation not required. */
5443 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5444 /* The SLP costs are calculated during SLP analysis. */
5445 if (!PURE_SLP_STMT (stmt_info))
5446 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5447 NULL, NULL, NULL);
5448 return true;
5451 /** Transform. **/
5453 ensure_base_align (stmt_info, dr);
5455 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5457 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5458 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5459 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5460 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5461 edge pe = loop_preheader_edge (loop);
5462 gimple_seq seq;
5463 basic_block new_bb;
5464 enum { NARROW, NONE, WIDEN } modifier;
5465 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5467 if (nunits == (unsigned int) scatter_off_nunits)
5468 modifier = NONE;
5469 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5471 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5472 modifier = WIDEN;
5474 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5475 sel[i] = i | nunits;
5477 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5478 gcc_assert (perm_mask != NULL_TREE);
5480 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5482 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5483 modifier = NARROW;
5485 for (i = 0; i < (unsigned int) nunits; ++i)
5486 sel[i] = i | scatter_off_nunits;
5488 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5489 gcc_assert (perm_mask != NULL_TREE);
5490 ncopies *= 2;
5492 else
5493 gcc_unreachable ();
5495 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5496 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5497 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5498 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5499 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5500 scaletype = TREE_VALUE (arglist);
5502 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5503 && TREE_CODE (rettype) == VOID_TYPE);
5505 ptr = fold_convert (ptrtype, scatter_base);
5506 if (!is_gimple_min_invariant (ptr))
5508 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5509 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5510 gcc_assert (!new_bb);
5513 /* Currently we support only unconditional scatter stores,
5514 so mask should be all ones. */
5515 mask = build_int_cst (masktype, -1);
5516 mask = vect_init_vector (stmt, mask, masktype, NULL);
5518 scale = build_int_cst (scaletype, scatter_scale);
5520 prev_stmt_info = NULL;
5521 for (j = 0; j < ncopies; ++j)
5523 if (j == 0)
5525 src = vec_oprnd1
5526 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5527 op = vec_oprnd0
5528 = vect_get_vec_def_for_operand (scatter_off, stmt);
5530 else if (modifier != NONE && (j & 1))
5532 if (modifier == WIDEN)
5534 src = vec_oprnd1
5535 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5536 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5537 stmt, gsi);
5539 else if (modifier == NARROW)
5541 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5542 stmt, gsi);
5543 op = vec_oprnd0
5544 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5546 else
5547 gcc_unreachable ();
5549 else
5551 src = vec_oprnd1
5552 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5553 op = vec_oprnd0
5554 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5557 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5559 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5560 == TYPE_VECTOR_SUBPARTS (srctype));
5561 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5562 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5563 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5565 src = var;
5568 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5570 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5571 == TYPE_VECTOR_SUBPARTS (idxtype));
5572 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5573 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5574 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5575 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5576 op = var;
5579 new_stmt
5580 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5582 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5584 if (prev_stmt_info == NULL)
5585 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5586 else
5587 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5588 prev_stmt_info = vinfo_for_stmt (new_stmt);
5590 return true;
5593 if (grouped_store)
5595 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5596 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5598 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5600 /* FORNOW */
5601 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5603 /* We vectorize all the stmts of the interleaving group when we
5604 reach the last stmt in the group. */
5605 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5606 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5607 && !slp)
5609 *vec_stmt = NULL;
5610 return true;
5613 if (slp)
5615 grouped_store = false;
5616 /* VEC_NUM is the number of vect stmts to be created for this
5617 group. */
5618 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5619 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5620 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5621 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5622 op = gimple_assign_rhs1 (first_stmt);
5624 else
5625 /* VEC_NUM is the number of vect stmts to be created for this
5626 group. */
5627 vec_num = group_size;
5629 ref_type = get_group_alias_ptr_type (first_stmt);
5631 else
5633 first_stmt = stmt;
5634 first_dr = dr;
5635 group_size = vec_num = 1;
5636 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5639 if (dump_enabled_p ())
5640 dump_printf_loc (MSG_NOTE, vect_location,
5641 "transform store. ncopies = %d\n", ncopies);
5643 if (STMT_VINFO_STRIDED_P (stmt_info))
5645 gimple_stmt_iterator incr_gsi;
5646 bool insert_after;
5647 gimple *incr;
5648 tree offvar;
5649 tree ivstep;
5650 tree running_off;
5651 gimple_seq stmts = NULL;
5652 tree stride_base, stride_step, alias_off;
5653 tree vec_oprnd;
5654 unsigned int g;
5656 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5658 stride_base
5659 = fold_build_pointer_plus
5660 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5661 size_binop (PLUS_EXPR,
5662 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5663 convert_to_ptrofftype (DR_INIT (first_dr))));
5664 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5666 /* For a store with loop-invariant (but other than power-of-2)
5667 stride (i.e. not a grouped access) like so:
5669 for (i = 0; i < n; i += stride)
5670 array[i] = ...;
5672 we generate a new induction variable and new stores from
5673 the components of the (vectorized) rhs:
5675 for (j = 0; ; j += VF*stride)
5676 vectemp = ...;
5677 tmp1 = vectemp[0];
5678 array[j] = tmp1;
5679 tmp2 = vectemp[1];
5680 array[j + stride] = tmp2;
5684 unsigned nstores = nunits;
5685 tree ltype = elem_type;
5686 if (slp)
5688 nstores = nunits / group_size;
5689 if (group_size < nunits)
5690 ltype = build_vector_type (elem_type, group_size);
5691 else
5692 ltype = vectype;
5693 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5694 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5695 group_size = 1;
5698 ivstep = stride_step;
5699 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5700 build_int_cst (TREE_TYPE (ivstep),
5701 ncopies * nstores));
5703 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5705 create_iv (stride_base, ivstep, NULL,
5706 loop, &incr_gsi, insert_after,
5707 &offvar, NULL);
5708 incr = gsi_stmt (incr_gsi);
5709 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5711 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5712 if (stmts)
5713 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5715 prev_stmt_info = NULL;
5716 alias_off = build_int_cst (ref_type, 0);
5717 next_stmt = first_stmt;
5718 for (g = 0; g < group_size; g++)
5720 running_off = offvar;
5721 if (g)
5723 tree size = TYPE_SIZE_UNIT (ltype);
5724 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5725 size);
5726 tree newoff = copy_ssa_name (running_off, NULL);
5727 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5728 running_off, pos);
5729 vect_finish_stmt_generation (stmt, incr, gsi);
5730 running_off = newoff;
5732 for (j = 0; j < ncopies; j++)
5734 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5735 and first_stmt == stmt. */
5736 if (j == 0)
5738 if (slp)
5740 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5741 slp_node, -1);
5742 vec_oprnd = vec_oprnds[0];
5744 else
5746 gcc_assert (gimple_assign_single_p (next_stmt));
5747 op = gimple_assign_rhs1 (next_stmt);
5748 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5751 else
5753 if (slp)
5754 vec_oprnd = vec_oprnds[j];
5755 else
5757 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5758 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5762 for (i = 0; i < nstores; i++)
5764 tree newref, newoff;
5765 gimple *incr, *assign;
5766 tree size = TYPE_SIZE (ltype);
5767 /* Extract the i'th component. */
5768 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5769 bitsize_int (i), size);
5770 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5771 size, pos);
5773 elem = force_gimple_operand_gsi (gsi, elem, true,
5774 NULL_TREE, true,
5775 GSI_SAME_STMT);
5777 newref = build2 (MEM_REF, ltype,
5778 running_off, alias_off);
5780 /* And store it to *running_off. */
5781 assign = gimple_build_assign (newref, elem);
5782 vect_finish_stmt_generation (stmt, assign, gsi);
5784 newoff = copy_ssa_name (running_off, NULL);
5785 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5786 running_off, stride_step);
5787 vect_finish_stmt_generation (stmt, incr, gsi);
5789 running_off = newoff;
5790 if (g == group_size - 1
5791 && !slp)
5793 if (j == 0 && i == 0)
5794 STMT_VINFO_VEC_STMT (stmt_info)
5795 = *vec_stmt = assign;
5796 else
5797 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5798 prev_stmt_info = vinfo_for_stmt (assign);
5802 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5804 return true;
5807 dr_chain.create (group_size);
5808 oprnds.create (group_size);
5810 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5811 gcc_assert (alignment_support_scheme);
5812 /* Targets with store-lane instructions must not require explicit
5813 realignment. */
5814 gcc_assert (!store_lanes_p
5815 || alignment_support_scheme == dr_aligned
5816 || alignment_support_scheme == dr_unaligned_supported);
5818 if (negative)
5819 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5821 if (store_lanes_p)
5822 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5823 else
5824 aggr_type = vectype;
5826 /* In case the vectorization factor (VF) is bigger than the number
5827 of elements that we can fit in a vectype (nunits), we have to generate
5828 more than one vector stmt - i.e - we need to "unroll" the
5829 vector stmt by a factor VF/nunits. For more details see documentation in
5830 vect_get_vec_def_for_copy_stmt. */
5832 /* In case of interleaving (non-unit grouped access):
5834 S1: &base + 2 = x2
5835 S2: &base = x0
5836 S3: &base + 1 = x1
5837 S4: &base + 3 = x3
5839 We create vectorized stores starting from base address (the access of the
5840 first stmt in the chain (S2 in the above example), when the last store stmt
5841 of the chain (S4) is reached:
5843 VS1: &base = vx2
5844 VS2: &base + vec_size*1 = vx0
5845 VS3: &base + vec_size*2 = vx1
5846 VS4: &base + vec_size*3 = vx3
5848 Then permutation statements are generated:
5850 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5851 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5854 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5855 (the order of the data-refs in the output of vect_permute_store_chain
5856 corresponds to the order of scalar stmts in the interleaving chain - see
5857 the documentation of vect_permute_store_chain()).
5859 In case of both multiple types and interleaving, above vector stores and
5860 permutation stmts are created for every copy. The result vector stmts are
5861 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5862 STMT_VINFO_RELATED_STMT for the next copies.
5865 prev_stmt_info = NULL;
5866 for (j = 0; j < ncopies; j++)
5869 if (j == 0)
5871 if (slp)
5873 /* Get vectorized arguments for SLP_NODE. */
5874 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5875 NULL, slp_node, -1);
5877 vec_oprnd = vec_oprnds[0];
5879 else
5881 /* For interleaved stores we collect vectorized defs for all the
5882 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5883 used as an input to vect_permute_store_chain(), and OPRNDS as
5884 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5886 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5887 OPRNDS are of size 1. */
5888 next_stmt = first_stmt;
5889 for (i = 0; i < group_size; i++)
5891 /* Since gaps are not supported for interleaved stores,
5892 GROUP_SIZE is the exact number of stmts in the chain.
5893 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5894 there is no interleaving, GROUP_SIZE is 1, and only one
5895 iteration of the loop will be executed. */
5896 gcc_assert (next_stmt
5897 && gimple_assign_single_p (next_stmt));
5898 op = gimple_assign_rhs1 (next_stmt);
5900 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5901 dr_chain.quick_push (vec_oprnd);
5902 oprnds.quick_push (vec_oprnd);
5903 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5907 /* We should have catched mismatched types earlier. */
5908 gcc_assert (useless_type_conversion_p (vectype,
5909 TREE_TYPE (vec_oprnd)));
5910 bool simd_lane_access_p
5911 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5912 if (simd_lane_access_p
5913 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5914 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5915 && integer_zerop (DR_OFFSET (first_dr))
5916 && integer_zerop (DR_INIT (first_dr))
5917 && alias_sets_conflict_p (get_alias_set (aggr_type),
5918 get_alias_set (TREE_TYPE (ref_type))))
5920 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5921 dataref_offset = build_int_cst (ref_type, 0);
5922 inv_p = false;
5924 else
5925 dataref_ptr
5926 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5927 simd_lane_access_p ? loop : NULL,
5928 offset, &dummy, gsi, &ptr_incr,
5929 simd_lane_access_p, &inv_p);
5930 gcc_assert (bb_vinfo || !inv_p);
5932 else
5934 /* For interleaved stores we created vectorized defs for all the
5935 defs stored in OPRNDS in the previous iteration (previous copy).
5936 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5937 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5938 next copy.
5939 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5940 OPRNDS are of size 1. */
5941 for (i = 0; i < group_size; i++)
5943 op = oprnds[i];
5944 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
5945 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5946 dr_chain[i] = vec_oprnd;
5947 oprnds[i] = vec_oprnd;
5949 if (dataref_offset)
5950 dataref_offset
5951 = int_const_binop (PLUS_EXPR, dataref_offset,
5952 TYPE_SIZE_UNIT (aggr_type));
5953 else
5954 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5955 TYPE_SIZE_UNIT (aggr_type));
5958 if (store_lanes_p)
5960 tree vec_array;
5962 /* Combine all the vectors into an array. */
5963 vec_array = create_vector_array (vectype, vec_num);
5964 for (i = 0; i < vec_num; i++)
5966 vec_oprnd = dr_chain[i];
5967 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5970 /* Emit:
5971 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5972 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
5973 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5974 gimple_call_set_lhs (new_stmt, data_ref);
5975 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5977 else
5979 new_stmt = NULL;
5980 if (grouped_store)
5982 if (j == 0)
5983 result_chain.create (group_size);
5984 /* Permute. */
5985 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5986 &result_chain);
5989 next_stmt = first_stmt;
5990 for (i = 0; i < vec_num; i++)
5992 unsigned align, misalign;
5994 if (i > 0)
5995 /* Bump the vector pointer. */
5996 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5997 stmt, NULL_TREE);
5999 if (slp)
6000 vec_oprnd = vec_oprnds[i];
6001 else if (grouped_store)
6002 /* For grouped stores vectorized defs are interleaved in
6003 vect_permute_store_chain(). */
6004 vec_oprnd = result_chain[i];
6006 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
6007 dataref_ptr,
6008 dataref_offset
6009 ? dataref_offset
6010 : build_int_cst (ref_type, 0));
6011 align = TYPE_ALIGN_UNIT (vectype);
6012 if (aligned_access_p (first_dr))
6013 misalign = 0;
6014 else if (DR_MISALIGNMENT (first_dr) == -1)
6016 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6017 align = TYPE_ALIGN_UNIT (elem_type);
6018 else
6019 align = get_object_alignment (DR_REF (first_dr))
6020 / BITS_PER_UNIT;
6021 misalign = 0;
6022 TREE_TYPE (data_ref)
6023 = build_aligned_type (TREE_TYPE (data_ref),
6024 align * BITS_PER_UNIT);
6026 else
6028 TREE_TYPE (data_ref)
6029 = build_aligned_type (TREE_TYPE (data_ref),
6030 TYPE_ALIGN (elem_type));
6031 misalign = DR_MISALIGNMENT (first_dr);
6033 if (dataref_offset == NULL_TREE
6034 && TREE_CODE (dataref_ptr) == SSA_NAME)
6035 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6036 misalign);
6038 if (negative
6039 && dt != vect_constant_def
6040 && dt != vect_external_def)
6042 tree perm_mask = perm_mask_for_reverse (vectype);
6043 tree perm_dest
6044 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6045 vectype);
6046 tree new_temp = make_ssa_name (perm_dest);
6048 /* Generate the permute statement. */
6049 gimple *perm_stmt
6050 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6051 vec_oprnd, perm_mask);
6052 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6054 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6055 vec_oprnd = new_temp;
6058 /* Arguments are ready. Create the new vector stmt. */
6059 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6060 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6062 if (slp)
6063 continue;
6065 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6066 if (!next_stmt)
6067 break;
6070 if (!slp)
6072 if (j == 0)
6073 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6074 else
6075 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6076 prev_stmt_info = vinfo_for_stmt (new_stmt);
6080 dr_chain.release ();
6081 oprnds.release ();
6082 result_chain.release ();
6083 vec_oprnds.release ();
6085 return true;
6088 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6089 VECTOR_CST mask. No checks are made that the target platform supports the
6090 mask, so callers may wish to test can_vec_perm_p separately, or use
6091 vect_gen_perm_mask_checked. */
6093 tree
6094 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6096 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6097 int i, nunits;
6099 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6101 mask_elt_type = lang_hooks.types.type_for_mode
6102 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6103 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6105 mask_elts = XALLOCAVEC (tree, nunits);
6106 for (i = nunits - 1; i >= 0; i--)
6107 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6108 mask_vec = build_vector (mask_type, mask_elts);
6110 return mask_vec;
6113 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6114 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6116 tree
6117 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6119 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6120 return vect_gen_perm_mask_any (vectype, sel);
6123 /* Given a vector variable X and Y, that was generated for the scalar
6124 STMT, generate instructions to permute the vector elements of X and Y
6125 using permutation mask MASK_VEC, insert them at *GSI and return the
6126 permuted vector variable. */
6128 static tree
6129 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6130 gimple_stmt_iterator *gsi)
6132 tree vectype = TREE_TYPE (x);
6133 tree perm_dest, data_ref;
6134 gimple *perm_stmt;
6136 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6137 data_ref = make_ssa_name (perm_dest);
6139 /* Generate the permute statement. */
6140 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6141 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6143 return data_ref;
6146 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6147 inserting them on the loops preheader edge. Returns true if we
6148 were successful in doing so (and thus STMT can be moved then),
6149 otherwise returns false. */
6151 static bool
6152 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6154 ssa_op_iter i;
6155 tree op;
6156 bool any = false;
6158 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6160 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6161 if (!gimple_nop_p (def_stmt)
6162 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6164 /* Make sure we don't need to recurse. While we could do
6165 so in simple cases when there are more complex use webs
6166 we don't have an easy way to preserve stmt order to fulfil
6167 dependencies within them. */
6168 tree op2;
6169 ssa_op_iter i2;
6170 if (gimple_code (def_stmt) == GIMPLE_PHI)
6171 return false;
6172 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6174 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6175 if (!gimple_nop_p (def_stmt2)
6176 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6177 return false;
6179 any = true;
6183 if (!any)
6184 return true;
6186 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6188 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6189 if (!gimple_nop_p (def_stmt)
6190 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6192 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6193 gsi_remove (&gsi, false);
6194 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6198 return true;
6201 /* vectorizable_load.
6203 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6204 can be vectorized.
6205 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6206 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6207 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6209 static bool
6210 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6211 slp_tree slp_node, slp_instance slp_node_instance)
6213 tree scalar_dest;
6214 tree vec_dest = NULL;
6215 tree data_ref = NULL;
6216 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6217 stmt_vec_info prev_stmt_info;
6218 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6219 struct loop *loop = NULL;
6220 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6221 bool nested_in_vect_loop = false;
6222 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6223 tree elem_type;
6224 tree new_temp;
6225 machine_mode mode;
6226 gimple *new_stmt = NULL;
6227 tree dummy;
6228 enum dr_alignment_support alignment_support_scheme;
6229 tree dataref_ptr = NULL_TREE;
6230 tree dataref_offset = NULL_TREE;
6231 gimple *ptr_incr = NULL;
6232 int ncopies;
6233 int i, j, group_size, group_gap_adj;
6234 tree msq = NULL_TREE, lsq;
6235 tree offset = NULL_TREE;
6236 tree byte_offset = NULL_TREE;
6237 tree realignment_token = NULL_TREE;
6238 gphi *phi = NULL;
6239 vec<tree> dr_chain = vNULL;
6240 bool grouped_load = false;
6241 bool load_lanes_p = false;
6242 gimple *first_stmt;
6243 gimple *first_stmt_for_drptr = NULL;
6244 bool inv_p;
6245 bool negative = false;
6246 bool compute_in_loop = false;
6247 struct loop *at_loop;
6248 int vec_num;
6249 bool slp = (slp_node != NULL);
6250 bool slp_perm = false;
6251 enum tree_code code;
6252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6253 int vf;
6254 tree aggr_type;
6255 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6256 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6257 int gather_scale = 1;
6258 enum vect_def_type gather_dt = vect_unknown_def_type;
6259 vec_info *vinfo = stmt_info->vinfo;
6260 tree ref_type;
6262 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6263 return false;
6265 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6266 && ! vec_stmt)
6267 return false;
6269 /* Is vectorizable load? */
6270 if (!is_gimple_assign (stmt))
6271 return false;
6273 scalar_dest = gimple_assign_lhs (stmt);
6274 if (TREE_CODE (scalar_dest) != SSA_NAME)
6275 return false;
6277 code = gimple_assign_rhs_code (stmt);
6278 if (code != ARRAY_REF
6279 && code != BIT_FIELD_REF
6280 && code != INDIRECT_REF
6281 && code != COMPONENT_REF
6282 && code != IMAGPART_EXPR
6283 && code != REALPART_EXPR
6284 && code != MEM_REF
6285 && TREE_CODE_CLASS (code) != tcc_declaration)
6286 return false;
6288 if (!STMT_VINFO_DATA_REF (stmt_info))
6289 return false;
6291 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6292 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6294 if (loop_vinfo)
6296 loop = LOOP_VINFO_LOOP (loop_vinfo);
6297 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6298 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6300 else
6301 vf = 1;
6303 /* Multiple types in SLP are handled by creating the appropriate number of
6304 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6305 case of SLP. */
6306 if (slp || PURE_SLP_STMT (stmt_info))
6307 ncopies = 1;
6308 else
6309 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6311 gcc_assert (ncopies >= 1);
6313 /* FORNOW. This restriction should be relaxed. */
6314 if (nested_in_vect_loop && ncopies > 1)
6316 if (dump_enabled_p ())
6317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6318 "multiple types in nested loop.\n");
6319 return false;
6322 /* Invalidate assumptions made by dependence analysis when vectorization
6323 on the unrolled body effectively re-orders stmts. */
6324 if (ncopies > 1
6325 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6326 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6327 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6329 if (dump_enabled_p ())
6330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6331 "cannot perform implicit CSE when unrolling "
6332 "with negative dependence distance\n");
6333 return false;
6336 elem_type = TREE_TYPE (vectype);
6337 mode = TYPE_MODE (vectype);
6339 /* FORNOW. In some cases can vectorize even if data-type not supported
6340 (e.g. - data copies). */
6341 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6343 if (dump_enabled_p ())
6344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6345 "Aligned load, but unsupported type.\n");
6346 return false;
6349 /* Check if the load is a part of an interleaving chain. */
6350 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6352 grouped_load = true;
6353 /* FORNOW */
6354 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6356 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6358 /* If this is single-element interleaving with an element distance
6359 that leaves unused vector loads around punt - we at least create
6360 very sub-optimal code in that case (and blow up memory,
6361 see PR65518). */
6362 bool force_peeling = false;
6363 if (first_stmt == stmt
6364 && !GROUP_NEXT_ELEMENT (stmt_info))
6366 if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6368 if (dump_enabled_p ())
6369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6370 "single-element interleaving not supported "
6371 "for not adjacent vector loads\n");
6372 return false;
6375 /* Single-element interleaving requires peeling for gaps. */
6376 force_peeling = true;
6379 /* If there is a gap in the end of the group or the group size cannot
6380 be made a multiple of the vector element count then we access excess
6381 elements in the last iteration and thus need to peel that off. */
6382 if (loop_vinfo
6383 && ! STMT_VINFO_STRIDED_P (stmt_info)
6384 && (force_peeling
6385 || GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
6386 || (!slp && vf % GROUP_SIZE (vinfo_for_stmt (first_stmt)) != 0)))
6388 if (dump_enabled_p ())
6389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6390 "Data access with gaps requires scalar "
6391 "epilogue loop\n");
6392 if (loop->inner)
6394 if (dump_enabled_p ())
6395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6396 "Peeling for outer loop is not supported\n");
6397 return false;
6400 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
6403 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6404 slp_perm = true;
6406 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6408 /* ??? The following is overly pessimistic (as well as the loop
6409 case above) in the case we can statically determine the excess
6410 elements loaded are within the bounds of a decl that is accessed.
6411 Likewise for BB vectorizations using masked loads is a possibility. */
6412 if (bb_vinfo && slp_perm && group_size % nunits != 0)
6414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6415 "BB vectorization with gaps at the end of a load "
6416 "is not supported\n");
6417 return false;
6420 if (!slp
6421 && !PURE_SLP_STMT (stmt_info)
6422 && !STMT_VINFO_STRIDED_P (stmt_info))
6424 if (vect_load_lanes_supported (vectype, group_size))
6425 load_lanes_p = true;
6426 else if (!vect_grouped_load_supported (vectype, group_size))
6427 return false;
6430 /* Invalidate assumptions made by dependence analysis when vectorization
6431 on the unrolled body effectively re-orders stmts. */
6432 if (!PURE_SLP_STMT (stmt_info)
6433 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6434 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6435 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6437 if (dump_enabled_p ())
6438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6439 "cannot perform implicit CSE when performing "
6440 "group loads with negative dependence distance\n");
6441 return false;
6444 /* Similarly when the stmt is a load that is both part of a SLP
6445 instance and a loop vectorized stmt via the same-dr mechanism
6446 we have to give up. */
6447 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6448 && (STMT_SLP_TYPE (stmt_info)
6449 != STMT_SLP_TYPE (vinfo_for_stmt
6450 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6452 if (dump_enabled_p ())
6453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6454 "conflicting SLP types for CSEd load\n");
6455 return false;
6460 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6462 gimple *def_stmt;
6463 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6464 &gather_off, &gather_scale);
6465 gcc_assert (gather_decl);
6466 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6467 &gather_off_vectype))
6469 if (dump_enabled_p ())
6470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6471 "gather index use not simple.\n");
6472 return false;
6475 else if (STMT_VINFO_STRIDED_P (stmt_info))
6477 if ((grouped_load
6478 && (slp || PURE_SLP_STMT (stmt_info)))
6479 && (group_size > nunits
6480 || nunits % group_size != 0))
6482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6483 "unhandled strided group load\n");
6484 return false;
6487 else
6489 negative = tree_int_cst_compare (nested_in_vect_loop
6490 ? STMT_VINFO_DR_STEP (stmt_info)
6491 : DR_STEP (dr),
6492 size_zero_node) < 0;
6493 if (negative && ncopies > 1)
6495 if (dump_enabled_p ())
6496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6497 "multiple types with negative step.\n");
6498 return false;
6501 if (negative)
6503 if (grouped_load)
6505 if (dump_enabled_p ())
6506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6507 "negative step for group load not supported"
6508 "\n");
6509 return false;
6511 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6512 if (alignment_support_scheme != dr_aligned
6513 && alignment_support_scheme != dr_unaligned_supported)
6515 if (dump_enabled_p ())
6516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6517 "negative step but alignment required.\n");
6518 return false;
6520 if (!perm_mask_for_reverse (vectype))
6522 if (dump_enabled_p ())
6523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6524 "negative step and reversing not supported."
6525 "\n");
6526 return false;
6531 if (!vec_stmt) /* transformation not required. */
6533 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6534 /* The SLP costs are calculated during SLP analysis. */
6535 if (!PURE_SLP_STMT (stmt_info))
6536 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6537 NULL, NULL, NULL);
6538 return true;
6541 if (dump_enabled_p ())
6542 dump_printf_loc (MSG_NOTE, vect_location,
6543 "transform load. ncopies = %d\n", ncopies);
6545 /** Transform. **/
6547 ensure_base_align (stmt_info, dr);
6549 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6551 tree vec_oprnd0 = NULL_TREE, op;
6552 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6553 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6554 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6555 edge pe = loop_preheader_edge (loop);
6556 gimple_seq seq;
6557 basic_block new_bb;
6558 enum { NARROW, NONE, WIDEN } modifier;
6559 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6561 if (nunits == gather_off_nunits)
6562 modifier = NONE;
6563 else if (nunits == gather_off_nunits / 2)
6565 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6566 modifier = WIDEN;
6568 for (i = 0; i < gather_off_nunits; ++i)
6569 sel[i] = i | nunits;
6571 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6573 else if (nunits == gather_off_nunits * 2)
6575 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6576 modifier = NARROW;
6578 for (i = 0; i < nunits; ++i)
6579 sel[i] = i < gather_off_nunits
6580 ? i : i + nunits - gather_off_nunits;
6582 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6583 ncopies *= 2;
6585 else
6586 gcc_unreachable ();
6588 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6589 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6590 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6591 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6592 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6593 scaletype = TREE_VALUE (arglist);
6594 gcc_checking_assert (types_compatible_p (srctype, rettype));
6596 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6598 ptr = fold_convert (ptrtype, gather_base);
6599 if (!is_gimple_min_invariant (ptr))
6601 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6602 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6603 gcc_assert (!new_bb);
6606 /* Currently we support only unconditional gather loads,
6607 so mask should be all ones. */
6608 if (TREE_CODE (masktype) == INTEGER_TYPE)
6609 mask = build_int_cst (masktype, -1);
6610 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6612 mask = build_int_cst (TREE_TYPE (masktype), -1);
6613 mask = build_vector_from_val (masktype, mask);
6614 mask = vect_init_vector (stmt, mask, masktype, NULL);
6616 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6618 REAL_VALUE_TYPE r;
6619 long tmp[6];
6620 for (j = 0; j < 6; ++j)
6621 tmp[j] = -1;
6622 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6623 mask = build_real (TREE_TYPE (masktype), r);
6624 mask = build_vector_from_val (masktype, mask);
6625 mask = vect_init_vector (stmt, mask, masktype, NULL);
6627 else
6628 gcc_unreachable ();
6630 scale = build_int_cst (scaletype, gather_scale);
6632 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6633 merge = build_int_cst (TREE_TYPE (rettype), 0);
6634 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6636 REAL_VALUE_TYPE r;
6637 long tmp[6];
6638 for (j = 0; j < 6; ++j)
6639 tmp[j] = 0;
6640 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6641 merge = build_real (TREE_TYPE (rettype), r);
6643 else
6644 gcc_unreachable ();
6645 merge = build_vector_from_val (rettype, merge);
6646 merge = vect_init_vector (stmt, merge, rettype, NULL);
6648 prev_stmt_info = NULL;
6649 for (j = 0; j < ncopies; ++j)
6651 if (modifier == WIDEN && (j & 1))
6652 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6653 perm_mask, stmt, gsi);
6654 else if (j == 0)
6655 op = vec_oprnd0
6656 = vect_get_vec_def_for_operand (gather_off, stmt);
6657 else
6658 op = vec_oprnd0
6659 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6661 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6663 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6664 == TYPE_VECTOR_SUBPARTS (idxtype));
6665 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6666 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6667 new_stmt
6668 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6669 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6670 op = var;
6673 new_stmt
6674 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6676 if (!useless_type_conversion_p (vectype, rettype))
6678 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6679 == TYPE_VECTOR_SUBPARTS (rettype));
6680 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6681 gimple_call_set_lhs (new_stmt, op);
6682 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6683 var = make_ssa_name (vec_dest);
6684 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6685 new_stmt
6686 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6688 else
6690 var = make_ssa_name (vec_dest, new_stmt);
6691 gimple_call_set_lhs (new_stmt, var);
6694 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6696 if (modifier == NARROW)
6698 if ((j & 1) == 0)
6700 prev_res = var;
6701 continue;
6703 var = permute_vec_elements (prev_res, var,
6704 perm_mask, stmt, gsi);
6705 new_stmt = SSA_NAME_DEF_STMT (var);
6708 if (prev_stmt_info == NULL)
6709 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6710 else
6711 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6712 prev_stmt_info = vinfo_for_stmt (new_stmt);
6714 return true;
6716 else if (STMT_VINFO_STRIDED_P (stmt_info))
6718 gimple_stmt_iterator incr_gsi;
6719 bool insert_after;
6720 gimple *incr;
6721 tree offvar;
6722 tree ivstep;
6723 tree running_off;
6724 vec<constructor_elt, va_gc> *v = NULL;
6725 gimple_seq stmts = NULL;
6726 tree stride_base, stride_step, alias_off;
6728 gcc_assert (!nested_in_vect_loop);
6730 if (slp && grouped_load)
6732 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6733 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6734 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6735 ref_type = get_group_alias_ptr_type (first_stmt);
6737 else
6739 first_stmt = stmt;
6740 first_dr = dr;
6741 group_size = 1;
6742 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6745 stride_base
6746 = fold_build_pointer_plus
6747 (DR_BASE_ADDRESS (first_dr),
6748 size_binop (PLUS_EXPR,
6749 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6750 convert_to_ptrofftype (DR_INIT (first_dr))));
6751 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6753 /* For a load with loop-invariant (but other than power-of-2)
6754 stride (i.e. not a grouped access) like so:
6756 for (i = 0; i < n; i += stride)
6757 ... = array[i];
6759 we generate a new induction variable and new accesses to
6760 form a new vector (or vectors, depending on ncopies):
6762 for (j = 0; ; j += VF*stride)
6763 tmp1 = array[j];
6764 tmp2 = array[j + stride];
6766 vectemp = {tmp1, tmp2, ...}
6769 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6770 build_int_cst (TREE_TYPE (stride_step), vf));
6772 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6774 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6775 loop, &incr_gsi, insert_after,
6776 &offvar, NULL);
6777 incr = gsi_stmt (incr_gsi);
6778 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6780 stride_step = force_gimple_operand (unshare_expr (stride_step),
6781 &stmts, true, NULL_TREE);
6782 if (stmts)
6783 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6785 prev_stmt_info = NULL;
6786 running_off = offvar;
6787 alias_off = build_int_cst (ref_type, 0);
6788 int nloads = nunits;
6789 tree ltype = TREE_TYPE (vectype);
6790 auto_vec<tree> dr_chain;
6791 if (slp)
6793 nloads = nunits / group_size;
6794 if (group_size < nunits)
6795 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6796 else
6797 ltype = vectype;
6798 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6799 /* For SLP permutation support we need to load the whole group,
6800 not only the number of vector stmts the permutation result
6801 fits in. */
6802 if (slp_perm)
6804 ncopies = (group_size * vf + nunits - 1) / nunits;
6805 dr_chain.create (ncopies);
6807 else
6808 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6810 for (j = 0; j < ncopies; j++)
6812 tree vec_inv;
6814 if (nloads > 1)
6816 vec_alloc (v, nloads);
6817 for (i = 0; i < nloads; i++)
6819 tree newref, newoff;
6820 gimple *incr;
6821 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6823 newref = force_gimple_operand_gsi (gsi, newref, true,
6824 NULL_TREE, true,
6825 GSI_SAME_STMT);
6826 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6827 newoff = copy_ssa_name (running_off);
6828 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6829 running_off, stride_step);
6830 vect_finish_stmt_generation (stmt, incr, gsi);
6832 running_off = newoff;
6835 vec_inv = build_constructor (vectype, v);
6836 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6837 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6839 else
6841 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6842 build2 (MEM_REF, ltype,
6843 running_off, alias_off));
6844 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6846 tree newoff = copy_ssa_name (running_off);
6847 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6848 running_off, stride_step);
6849 vect_finish_stmt_generation (stmt, incr, gsi);
6851 running_off = newoff;
6854 if (slp)
6856 if (slp_perm)
6857 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6858 else
6859 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6861 else
6863 if (j == 0)
6864 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6865 else
6866 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6867 prev_stmt_info = vinfo_for_stmt (new_stmt);
6870 if (slp_perm)
6871 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6872 slp_node_instance, false);
6873 return true;
6876 if (grouped_load)
6878 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6879 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6880 /* For SLP vectorization we directly vectorize a subchain
6881 without permutation. */
6882 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6883 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6884 /* For BB vectorization always use the first stmt to base
6885 the data ref pointer on. */
6886 if (bb_vinfo)
6887 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6889 /* Check if the chain of loads is already vectorized. */
6890 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6891 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6892 ??? But we can only do so if there is exactly one
6893 as we have no way to get at the rest. Leave the CSE
6894 opportunity alone.
6895 ??? With the group load eventually participating
6896 in multiple different permutations (having multiple
6897 slp nodes which refer to the same group) the CSE
6898 is even wrong code. See PR56270. */
6899 && !slp)
6901 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6902 return true;
6904 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6905 group_gap_adj = 0;
6907 /* VEC_NUM is the number of vect stmts to be created for this group. */
6908 if (slp)
6910 grouped_load = false;
6911 /* For SLP permutation support we need to load the whole group,
6912 not only the number of vector stmts the permutation result
6913 fits in. */
6914 if (slp_perm)
6915 vec_num = (group_size * vf + nunits - 1) / nunits;
6916 else
6917 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6918 group_gap_adj = vf * group_size - nunits * vec_num;
6920 else
6921 vec_num = group_size;
6923 ref_type = get_group_alias_ptr_type (first_stmt);
6925 else
6927 first_stmt = stmt;
6928 first_dr = dr;
6929 group_size = vec_num = 1;
6930 group_gap_adj = 0;
6931 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6934 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6935 gcc_assert (alignment_support_scheme);
6936 /* Targets with load-lane instructions must not require explicit
6937 realignment. */
6938 gcc_assert (!load_lanes_p
6939 || alignment_support_scheme == dr_aligned
6940 || alignment_support_scheme == dr_unaligned_supported);
6942 /* In case the vectorization factor (VF) is bigger than the number
6943 of elements that we can fit in a vectype (nunits), we have to generate
6944 more than one vector stmt - i.e - we need to "unroll" the
6945 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6946 from one copy of the vector stmt to the next, in the field
6947 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6948 stages to find the correct vector defs to be used when vectorizing
6949 stmts that use the defs of the current stmt. The example below
6950 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6951 need to create 4 vectorized stmts):
6953 before vectorization:
6954 RELATED_STMT VEC_STMT
6955 S1: x = memref - -
6956 S2: z = x + 1 - -
6958 step 1: vectorize stmt S1:
6959 We first create the vector stmt VS1_0, and, as usual, record a
6960 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6961 Next, we create the vector stmt VS1_1, and record a pointer to
6962 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6963 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6964 stmts and pointers:
6965 RELATED_STMT VEC_STMT
6966 VS1_0: vx0 = memref0 VS1_1 -
6967 VS1_1: vx1 = memref1 VS1_2 -
6968 VS1_2: vx2 = memref2 VS1_3 -
6969 VS1_3: vx3 = memref3 - -
6970 S1: x = load - VS1_0
6971 S2: z = x + 1 - -
6973 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6974 information we recorded in RELATED_STMT field is used to vectorize
6975 stmt S2. */
6977 /* In case of interleaving (non-unit grouped access):
6979 S1: x2 = &base + 2
6980 S2: x0 = &base
6981 S3: x1 = &base + 1
6982 S4: x3 = &base + 3
6984 Vectorized loads are created in the order of memory accesses
6985 starting from the access of the first stmt of the chain:
6987 VS1: vx0 = &base
6988 VS2: vx1 = &base + vec_size*1
6989 VS3: vx3 = &base + vec_size*2
6990 VS4: vx4 = &base + vec_size*3
6992 Then permutation statements are generated:
6994 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6995 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6998 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6999 (the order of the data-refs in the output of vect_permute_load_chain
7000 corresponds to the order of scalar stmts in the interleaving chain - see
7001 the documentation of vect_permute_load_chain()).
7002 The generation of permutation stmts and recording them in
7003 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7005 In case of both multiple types and interleaving, the vector loads and
7006 permutation stmts above are created for every copy. The result vector
7007 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7008 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7010 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7011 on a target that supports unaligned accesses (dr_unaligned_supported)
7012 we generate the following code:
7013 p = initial_addr;
7014 indx = 0;
7015 loop {
7016 p = p + indx * vectype_size;
7017 vec_dest = *(p);
7018 indx = indx + 1;
7021 Otherwise, the data reference is potentially unaligned on a target that
7022 does not support unaligned accesses (dr_explicit_realign_optimized) -
7023 then generate the following code, in which the data in each iteration is
7024 obtained by two vector loads, one from the previous iteration, and one
7025 from the current iteration:
7026 p1 = initial_addr;
7027 msq_init = *(floor(p1))
7028 p2 = initial_addr + VS - 1;
7029 realignment_token = call target_builtin;
7030 indx = 0;
7031 loop {
7032 p2 = p2 + indx * vectype_size
7033 lsq = *(floor(p2))
7034 vec_dest = realign_load (msq, lsq, realignment_token)
7035 indx = indx + 1;
7036 msq = lsq;
7037 } */
7039 /* If the misalignment remains the same throughout the execution of the
7040 loop, we can create the init_addr and permutation mask at the loop
7041 preheader. Otherwise, it needs to be created inside the loop.
7042 This can only occur when vectorizing memory accesses in the inner-loop
7043 nested within an outer-loop that is being vectorized. */
7045 if (nested_in_vect_loop
7046 && (TREE_INT_CST_LOW (DR_STEP (dr))
7047 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7049 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7050 compute_in_loop = true;
7053 if ((alignment_support_scheme == dr_explicit_realign_optimized
7054 || alignment_support_scheme == dr_explicit_realign)
7055 && !compute_in_loop)
7057 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7058 alignment_support_scheme, NULL_TREE,
7059 &at_loop);
7060 if (alignment_support_scheme == dr_explicit_realign_optimized)
7062 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7063 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7064 size_one_node);
7067 else
7068 at_loop = loop;
7070 if (negative)
7071 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7073 if (load_lanes_p)
7074 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7075 else
7076 aggr_type = vectype;
7078 prev_stmt_info = NULL;
7079 for (j = 0; j < ncopies; j++)
7081 /* 1. Create the vector or array pointer update chain. */
7082 if (j == 0)
7084 bool simd_lane_access_p
7085 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7086 if (simd_lane_access_p
7087 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7088 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7089 && integer_zerop (DR_OFFSET (first_dr))
7090 && integer_zerop (DR_INIT (first_dr))
7091 && alias_sets_conflict_p (get_alias_set (aggr_type),
7092 get_alias_set (TREE_TYPE (ref_type)))
7093 && (alignment_support_scheme == dr_aligned
7094 || alignment_support_scheme == dr_unaligned_supported))
7096 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7097 dataref_offset = build_int_cst (ref_type, 0);
7098 inv_p = false;
7100 else if (first_stmt_for_drptr
7101 && first_stmt != first_stmt_for_drptr)
7103 dataref_ptr
7104 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7105 at_loop, offset, &dummy, gsi,
7106 &ptr_incr, simd_lane_access_p,
7107 &inv_p, byte_offset);
7108 /* Adjust the pointer by the difference to first_stmt. */
7109 data_reference_p ptrdr
7110 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7111 tree diff = fold_convert (sizetype,
7112 size_binop (MINUS_EXPR,
7113 DR_INIT (first_dr),
7114 DR_INIT (ptrdr)));
7115 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7116 stmt, diff);
7118 else
7119 dataref_ptr
7120 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7121 offset, &dummy, gsi, &ptr_incr,
7122 simd_lane_access_p, &inv_p,
7123 byte_offset);
7125 else if (dataref_offset)
7126 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7127 TYPE_SIZE_UNIT (aggr_type));
7128 else
7129 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7130 TYPE_SIZE_UNIT (aggr_type));
7132 if (grouped_load || slp_perm)
7133 dr_chain.create (vec_num);
7135 if (load_lanes_p)
7137 tree vec_array;
7139 vec_array = create_vector_array (vectype, vec_num);
7141 /* Emit:
7142 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7143 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7144 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7145 gimple_call_set_lhs (new_stmt, vec_array);
7146 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7148 /* Extract each vector into an SSA_NAME. */
7149 for (i = 0; i < vec_num; i++)
7151 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7152 vec_array, i);
7153 dr_chain.quick_push (new_temp);
7156 /* Record the mapping between SSA_NAMEs and statements. */
7157 vect_record_grouped_load_vectors (stmt, dr_chain);
7159 else
7161 for (i = 0; i < vec_num; i++)
7163 if (i > 0)
7164 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7165 stmt, NULL_TREE);
7167 /* 2. Create the vector-load in the loop. */
7168 switch (alignment_support_scheme)
7170 case dr_aligned:
7171 case dr_unaligned_supported:
7173 unsigned int align, misalign;
7175 data_ref
7176 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7177 dataref_offset
7178 ? dataref_offset
7179 : build_int_cst (ref_type, 0));
7180 align = TYPE_ALIGN_UNIT (vectype);
7181 if (alignment_support_scheme == dr_aligned)
7183 gcc_assert (aligned_access_p (first_dr));
7184 misalign = 0;
7186 else if (DR_MISALIGNMENT (first_dr) == -1)
7188 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7189 align = TYPE_ALIGN_UNIT (elem_type);
7190 else
7191 align = (get_object_alignment (DR_REF (first_dr))
7192 / BITS_PER_UNIT);
7193 misalign = 0;
7194 TREE_TYPE (data_ref)
7195 = build_aligned_type (TREE_TYPE (data_ref),
7196 align * BITS_PER_UNIT);
7198 else
7200 TREE_TYPE (data_ref)
7201 = build_aligned_type (TREE_TYPE (data_ref),
7202 TYPE_ALIGN (elem_type));
7203 misalign = DR_MISALIGNMENT (first_dr);
7205 if (dataref_offset == NULL_TREE
7206 && TREE_CODE (dataref_ptr) == SSA_NAME)
7207 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7208 align, misalign);
7209 break;
7211 case dr_explicit_realign:
7213 tree ptr, bump;
7215 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7217 if (compute_in_loop)
7218 msq = vect_setup_realignment (first_stmt, gsi,
7219 &realignment_token,
7220 dr_explicit_realign,
7221 dataref_ptr, NULL);
7223 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7224 ptr = copy_ssa_name (dataref_ptr);
7225 else
7226 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7227 new_stmt = gimple_build_assign
7228 (ptr, BIT_AND_EXPR, dataref_ptr,
7229 build_int_cst
7230 (TREE_TYPE (dataref_ptr),
7231 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7233 data_ref
7234 = build2 (MEM_REF, vectype, ptr,
7235 build_int_cst (ref_type, 0));
7236 vec_dest = vect_create_destination_var (scalar_dest,
7237 vectype);
7238 new_stmt = gimple_build_assign (vec_dest, data_ref);
7239 new_temp = make_ssa_name (vec_dest, new_stmt);
7240 gimple_assign_set_lhs (new_stmt, new_temp);
7241 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7242 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7243 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7244 msq = new_temp;
7246 bump = size_binop (MULT_EXPR, vs,
7247 TYPE_SIZE_UNIT (elem_type));
7248 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7249 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7250 new_stmt = gimple_build_assign
7251 (NULL_TREE, BIT_AND_EXPR, ptr,
7252 build_int_cst
7253 (TREE_TYPE (ptr),
7254 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7255 ptr = copy_ssa_name (ptr, new_stmt);
7256 gimple_assign_set_lhs (new_stmt, ptr);
7257 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7258 data_ref
7259 = build2 (MEM_REF, vectype, ptr,
7260 build_int_cst (ref_type, 0));
7261 break;
7263 case dr_explicit_realign_optimized:
7264 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7265 new_temp = copy_ssa_name (dataref_ptr);
7266 else
7267 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7268 new_stmt = gimple_build_assign
7269 (new_temp, BIT_AND_EXPR, dataref_ptr,
7270 build_int_cst
7271 (TREE_TYPE (dataref_ptr),
7272 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7273 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7274 data_ref
7275 = build2 (MEM_REF, vectype, new_temp,
7276 build_int_cst (ref_type, 0));
7277 break;
7278 default:
7279 gcc_unreachable ();
7281 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7282 new_stmt = gimple_build_assign (vec_dest, data_ref);
7283 new_temp = make_ssa_name (vec_dest, new_stmt);
7284 gimple_assign_set_lhs (new_stmt, new_temp);
7285 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7287 /* 3. Handle explicit realignment if necessary/supported.
7288 Create in loop:
7289 vec_dest = realign_load (msq, lsq, realignment_token) */
7290 if (alignment_support_scheme == dr_explicit_realign_optimized
7291 || alignment_support_scheme == dr_explicit_realign)
7293 lsq = gimple_assign_lhs (new_stmt);
7294 if (!realignment_token)
7295 realignment_token = dataref_ptr;
7296 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7297 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7298 msq, lsq, realignment_token);
7299 new_temp = make_ssa_name (vec_dest, new_stmt);
7300 gimple_assign_set_lhs (new_stmt, new_temp);
7301 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7303 if (alignment_support_scheme == dr_explicit_realign_optimized)
7305 gcc_assert (phi);
7306 if (i == vec_num - 1 && j == ncopies - 1)
7307 add_phi_arg (phi, lsq,
7308 loop_latch_edge (containing_loop),
7309 UNKNOWN_LOCATION);
7310 msq = lsq;
7314 /* 4. Handle invariant-load. */
7315 if (inv_p && !bb_vinfo)
7317 gcc_assert (!grouped_load);
7318 /* If we have versioned for aliasing or the loop doesn't
7319 have any data dependencies that would preclude this,
7320 then we are sure this is a loop invariant load and
7321 thus we can insert it on the preheader edge. */
7322 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7323 && !nested_in_vect_loop
7324 && hoist_defs_of_uses (stmt, loop))
7326 if (dump_enabled_p ())
7328 dump_printf_loc (MSG_NOTE, vect_location,
7329 "hoisting out of the vectorized "
7330 "loop: ");
7331 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7333 tree tem = copy_ssa_name (scalar_dest);
7334 gsi_insert_on_edge_immediate
7335 (loop_preheader_edge (loop),
7336 gimple_build_assign (tem,
7337 unshare_expr
7338 (gimple_assign_rhs1 (stmt))));
7339 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7340 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7341 set_vinfo_for_stmt (new_stmt,
7342 new_stmt_vec_info (new_stmt, vinfo));
7344 else
7346 gimple_stmt_iterator gsi2 = *gsi;
7347 gsi_next (&gsi2);
7348 new_temp = vect_init_vector (stmt, scalar_dest,
7349 vectype, &gsi2);
7350 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7354 if (negative)
7356 tree perm_mask = perm_mask_for_reverse (vectype);
7357 new_temp = permute_vec_elements (new_temp, new_temp,
7358 perm_mask, stmt, gsi);
7359 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7362 /* Collect vector loads and later create their permutation in
7363 vect_transform_grouped_load (). */
7364 if (grouped_load || slp_perm)
7365 dr_chain.quick_push (new_temp);
7367 /* Store vector loads in the corresponding SLP_NODE. */
7368 if (slp && !slp_perm)
7369 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7371 /* Bump the vector pointer to account for a gap or for excess
7372 elements loaded for a permuted SLP load. */
7373 if (group_gap_adj != 0)
7375 bool ovf;
7376 tree bump
7377 = wide_int_to_tree (sizetype,
7378 wi::smul (TYPE_SIZE_UNIT (elem_type),
7379 group_gap_adj, &ovf));
7380 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7381 stmt, bump);
7385 if (slp && !slp_perm)
7386 continue;
7388 if (slp_perm)
7390 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7391 slp_node_instance, false))
7393 dr_chain.release ();
7394 return false;
7397 else
7399 if (grouped_load)
7401 if (!load_lanes_p)
7402 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7403 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7405 else
7407 if (j == 0)
7408 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7409 else
7410 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7411 prev_stmt_info = vinfo_for_stmt (new_stmt);
7414 dr_chain.release ();
7417 return true;
7420 /* Function vect_is_simple_cond.
7422 Input:
7423 LOOP - the loop that is being vectorized.
7424 COND - Condition that is checked for simple use.
7426 Output:
7427 *COMP_VECTYPE - the vector type for the comparison.
7429 Returns whether a COND can be vectorized. Checks whether
7430 condition operands are supportable using vec_is_simple_use. */
7432 static bool
7433 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7435 tree lhs, rhs;
7436 enum vect_def_type dt;
7437 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7439 /* Mask case. */
7440 if (TREE_CODE (cond) == SSA_NAME
7441 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7443 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7444 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7445 &dt, comp_vectype)
7446 || !*comp_vectype
7447 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7448 return false;
7449 return true;
7452 if (!COMPARISON_CLASS_P (cond))
7453 return false;
7455 lhs = TREE_OPERAND (cond, 0);
7456 rhs = TREE_OPERAND (cond, 1);
7458 if (TREE_CODE (lhs) == SSA_NAME)
7460 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7461 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7462 return false;
7464 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7465 && TREE_CODE (lhs) != FIXED_CST)
7466 return false;
7468 if (TREE_CODE (rhs) == SSA_NAME)
7470 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7471 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7472 return false;
7474 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7475 && TREE_CODE (rhs) != FIXED_CST)
7476 return false;
7478 if (vectype1 && vectype2
7479 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7480 return false;
7482 *comp_vectype = vectype1 ? vectype1 : vectype2;
7483 return true;
7486 /* vectorizable_condition.
7488 Check if STMT is conditional modify expression that can be vectorized.
7489 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7490 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7491 at GSI.
7493 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7494 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7495 else clause if it is 2).
7497 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7499 bool
7500 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7501 gimple **vec_stmt, tree reduc_def, int reduc_index,
7502 slp_tree slp_node)
7504 tree scalar_dest = NULL_TREE;
7505 tree vec_dest = NULL_TREE;
7506 tree cond_expr, then_clause, else_clause;
7507 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7508 tree comp_vectype = NULL_TREE;
7509 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7510 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7511 tree vec_compare;
7512 tree new_temp;
7513 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7514 enum vect_def_type dt, dts[4];
7515 int ncopies;
7516 enum tree_code code;
7517 stmt_vec_info prev_stmt_info = NULL;
7518 int i, j;
7519 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7520 vec<tree> vec_oprnds0 = vNULL;
7521 vec<tree> vec_oprnds1 = vNULL;
7522 vec<tree> vec_oprnds2 = vNULL;
7523 vec<tree> vec_oprnds3 = vNULL;
7524 tree vec_cmp_type;
7525 bool masked = false;
7527 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7528 return false;
7530 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7532 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7533 return false;
7535 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7536 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7537 && reduc_def))
7538 return false;
7540 /* FORNOW: not yet supported. */
7541 if (STMT_VINFO_LIVE_P (stmt_info))
7543 if (dump_enabled_p ())
7544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7545 "value used after loop.\n");
7546 return false;
7550 /* Is vectorizable conditional operation? */
7551 if (!is_gimple_assign (stmt))
7552 return false;
7554 code = gimple_assign_rhs_code (stmt);
7556 if (code != COND_EXPR)
7557 return false;
7559 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7560 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7561 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7563 if (slp_node || PURE_SLP_STMT (stmt_info))
7564 ncopies = 1;
7565 else
7566 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7568 gcc_assert (ncopies >= 1);
7569 if (reduc_index && ncopies > 1)
7570 return false; /* FORNOW */
7572 cond_expr = gimple_assign_rhs1 (stmt);
7573 then_clause = gimple_assign_rhs2 (stmt);
7574 else_clause = gimple_assign_rhs3 (stmt);
7576 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7577 || !comp_vectype)
7578 return false;
7580 gimple *def_stmt;
7581 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7582 &vectype1))
7583 return false;
7584 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7585 &vectype2))
7586 return false;
7588 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7589 return false;
7591 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7592 return false;
7594 masked = !COMPARISON_CLASS_P (cond_expr);
7595 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7597 if (vec_cmp_type == NULL_TREE)
7598 return false;
7600 if (!vec_stmt)
7602 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7603 return expand_vec_cond_expr_p (vectype, comp_vectype);
7606 /* Transform. */
7608 if (!slp_node)
7610 vec_oprnds0.create (1);
7611 vec_oprnds1.create (1);
7612 vec_oprnds2.create (1);
7613 vec_oprnds3.create (1);
7616 /* Handle def. */
7617 scalar_dest = gimple_assign_lhs (stmt);
7618 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7620 /* Handle cond expr. */
7621 for (j = 0; j < ncopies; j++)
7623 gassign *new_stmt = NULL;
7624 if (j == 0)
7626 if (slp_node)
7628 auto_vec<tree, 4> ops;
7629 auto_vec<vec<tree>, 4> vec_defs;
7631 if (masked)
7632 ops.safe_push (cond_expr);
7633 else
7635 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7636 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7638 ops.safe_push (then_clause);
7639 ops.safe_push (else_clause);
7640 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7641 vec_oprnds3 = vec_defs.pop ();
7642 vec_oprnds2 = vec_defs.pop ();
7643 if (!masked)
7644 vec_oprnds1 = vec_defs.pop ();
7645 vec_oprnds0 = vec_defs.pop ();
7647 ops.release ();
7648 vec_defs.release ();
7650 else
7652 gimple *gtemp;
7653 if (masked)
7655 vec_cond_lhs
7656 = vect_get_vec_def_for_operand (cond_expr, stmt,
7657 comp_vectype);
7658 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7659 &gtemp, &dts[0]);
7661 else
7663 vec_cond_lhs =
7664 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7665 stmt, comp_vectype);
7666 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7667 loop_vinfo, &gtemp, &dts[0]);
7669 vec_cond_rhs =
7670 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7671 stmt, comp_vectype);
7672 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7673 loop_vinfo, &gtemp, &dts[1]);
7675 if (reduc_index == 1)
7676 vec_then_clause = reduc_def;
7677 else
7679 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7680 stmt);
7681 vect_is_simple_use (then_clause, loop_vinfo,
7682 &gtemp, &dts[2]);
7684 if (reduc_index == 2)
7685 vec_else_clause = reduc_def;
7686 else
7688 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7689 stmt);
7690 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7694 else
7696 vec_cond_lhs
7697 = vect_get_vec_def_for_stmt_copy (dts[0],
7698 vec_oprnds0.pop ());
7699 if (!masked)
7700 vec_cond_rhs
7701 = vect_get_vec_def_for_stmt_copy (dts[1],
7702 vec_oprnds1.pop ());
7704 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7705 vec_oprnds2.pop ());
7706 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7707 vec_oprnds3.pop ());
7710 if (!slp_node)
7712 vec_oprnds0.quick_push (vec_cond_lhs);
7713 if (!masked)
7714 vec_oprnds1.quick_push (vec_cond_rhs);
7715 vec_oprnds2.quick_push (vec_then_clause);
7716 vec_oprnds3.quick_push (vec_else_clause);
7719 /* Arguments are ready. Create the new vector stmt. */
7720 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7722 vec_then_clause = vec_oprnds2[i];
7723 vec_else_clause = vec_oprnds3[i];
7725 if (masked)
7726 vec_compare = vec_cond_lhs;
7727 else
7729 vec_cond_rhs = vec_oprnds1[i];
7730 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7731 vec_cond_lhs, vec_cond_rhs);
7733 new_temp = make_ssa_name (vec_dest);
7734 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7735 vec_compare, vec_then_clause,
7736 vec_else_clause);
7737 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7738 if (slp_node)
7739 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7742 if (slp_node)
7743 continue;
7745 if (j == 0)
7746 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7747 else
7748 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7750 prev_stmt_info = vinfo_for_stmt (new_stmt);
7753 vec_oprnds0.release ();
7754 vec_oprnds1.release ();
7755 vec_oprnds2.release ();
7756 vec_oprnds3.release ();
7758 return true;
7761 /* vectorizable_comparison.
7763 Check if STMT is comparison expression that can be vectorized.
7764 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7765 comparison, put it in VEC_STMT, and insert it at GSI.
7767 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7769 bool
7770 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7771 gimple **vec_stmt, tree reduc_def,
7772 slp_tree slp_node)
7774 tree lhs, rhs1, rhs2;
7775 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7776 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7777 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7778 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7779 tree new_temp;
7780 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7781 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7782 unsigned nunits;
7783 int ncopies;
7784 enum tree_code code;
7785 stmt_vec_info prev_stmt_info = NULL;
7786 int i, j;
7787 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7788 vec<tree> vec_oprnds0 = vNULL;
7789 vec<tree> vec_oprnds1 = vNULL;
7790 gimple *def_stmt;
7791 tree mask_type;
7792 tree mask;
7794 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7795 return false;
7797 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
7798 return false;
7800 mask_type = vectype;
7801 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7803 if (slp_node || PURE_SLP_STMT (stmt_info))
7804 ncopies = 1;
7805 else
7806 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7808 gcc_assert (ncopies >= 1);
7809 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7810 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7811 && reduc_def))
7812 return false;
7814 if (STMT_VINFO_LIVE_P (stmt_info))
7816 if (dump_enabled_p ())
7817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7818 "value used after loop.\n");
7819 return false;
7822 if (!is_gimple_assign (stmt))
7823 return false;
7825 code = gimple_assign_rhs_code (stmt);
7827 if (TREE_CODE_CLASS (code) != tcc_comparison)
7828 return false;
7830 rhs1 = gimple_assign_rhs1 (stmt);
7831 rhs2 = gimple_assign_rhs2 (stmt);
7833 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7834 &dts[0], &vectype1))
7835 return false;
7837 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7838 &dts[1], &vectype2))
7839 return false;
7841 if (vectype1 && vectype2
7842 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7843 return false;
7845 vectype = vectype1 ? vectype1 : vectype2;
7847 /* Invariant comparison. */
7848 if (!vectype)
7850 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
7851 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
7852 return false;
7854 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7855 return false;
7857 if (!vec_stmt)
7859 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7860 vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
7861 return expand_vec_cmp_expr_p (vectype, mask_type);
7864 /* Transform. */
7865 if (!slp_node)
7867 vec_oprnds0.create (1);
7868 vec_oprnds1.create (1);
7871 /* Handle def. */
7872 lhs = gimple_assign_lhs (stmt);
7873 mask = vect_create_destination_var (lhs, mask_type);
7875 /* Handle cmp expr. */
7876 for (j = 0; j < ncopies; j++)
7878 gassign *new_stmt = NULL;
7879 if (j == 0)
7881 if (slp_node)
7883 auto_vec<tree, 2> ops;
7884 auto_vec<vec<tree>, 2> vec_defs;
7886 ops.safe_push (rhs1);
7887 ops.safe_push (rhs2);
7888 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7889 vec_oprnds1 = vec_defs.pop ();
7890 vec_oprnds0 = vec_defs.pop ();
7892 else
7894 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
7895 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
7898 else
7900 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
7901 vec_oprnds0.pop ());
7902 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
7903 vec_oprnds1.pop ());
7906 if (!slp_node)
7908 vec_oprnds0.quick_push (vec_rhs1);
7909 vec_oprnds1.quick_push (vec_rhs2);
7912 /* Arguments are ready. Create the new vector stmt. */
7913 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
7915 vec_rhs2 = vec_oprnds1[i];
7917 new_temp = make_ssa_name (mask);
7918 new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
7919 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7920 if (slp_node)
7921 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7924 if (slp_node)
7925 continue;
7927 if (j == 0)
7928 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7929 else
7930 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7932 prev_stmt_info = vinfo_for_stmt (new_stmt);
7935 vec_oprnds0.release ();
7936 vec_oprnds1.release ();
7938 return true;
7941 /* Make sure the statement is vectorizable. */
7943 bool
7944 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
7946 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7947 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7948 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7949 bool ok;
7950 tree scalar_type, vectype;
7951 gimple *pattern_stmt;
7952 gimple_seq pattern_def_seq;
7954 if (dump_enabled_p ())
7956 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7957 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7960 if (gimple_has_volatile_ops (stmt))
7962 if (dump_enabled_p ())
7963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7964 "not vectorized: stmt has volatile operands\n");
7966 return false;
7969 /* Skip stmts that do not need to be vectorized. In loops this is expected
7970 to include:
7971 - the COND_EXPR which is the loop exit condition
7972 - any LABEL_EXPRs in the loop
7973 - computations that are used only for array indexing or loop control.
7974 In basic blocks we only analyze statements that are a part of some SLP
7975 instance, therefore, all the statements are relevant.
7977 Pattern statement needs to be analyzed instead of the original statement
7978 if the original statement is not relevant. Otherwise, we analyze both
7979 statements. In basic blocks we are called from some SLP instance
7980 traversal, don't analyze pattern stmts instead, the pattern stmts
7981 already will be part of SLP instance. */
7983 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7984 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7985 && !STMT_VINFO_LIVE_P (stmt_info))
7987 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7988 && pattern_stmt
7989 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7990 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7992 /* Analyze PATTERN_STMT instead of the original stmt. */
7993 stmt = pattern_stmt;
7994 stmt_info = vinfo_for_stmt (pattern_stmt);
7995 if (dump_enabled_p ())
7997 dump_printf_loc (MSG_NOTE, vect_location,
7998 "==> examining pattern statement: ");
7999 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8002 else
8004 if (dump_enabled_p ())
8005 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8007 return true;
8010 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8011 && node == NULL
8012 && pattern_stmt
8013 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8014 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8016 /* Analyze PATTERN_STMT too. */
8017 if (dump_enabled_p ())
8019 dump_printf_loc (MSG_NOTE, vect_location,
8020 "==> examining pattern statement: ");
8021 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8024 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8025 return false;
8028 if (is_pattern_stmt_p (stmt_info)
8029 && node == NULL
8030 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8032 gimple_stmt_iterator si;
8034 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8036 gimple *pattern_def_stmt = gsi_stmt (si);
8037 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8038 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8040 /* Analyze def stmt of STMT if it's a pattern stmt. */
8041 if (dump_enabled_p ())
8043 dump_printf_loc (MSG_NOTE, vect_location,
8044 "==> examining pattern def statement: ");
8045 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8048 if (!vect_analyze_stmt (pattern_def_stmt,
8049 need_to_vectorize, node))
8050 return false;
8055 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8057 case vect_internal_def:
8058 break;
8060 case vect_reduction_def:
8061 case vect_nested_cycle:
8062 gcc_assert (!bb_vinfo
8063 && (relevance == vect_used_in_outer
8064 || relevance == vect_used_in_outer_by_reduction
8065 || relevance == vect_used_by_reduction
8066 || relevance == vect_unused_in_scope));
8067 break;
8069 case vect_induction_def:
8070 case vect_constant_def:
8071 case vect_external_def:
8072 case vect_unknown_def_type:
8073 default:
8074 gcc_unreachable ();
8077 if (bb_vinfo)
8079 gcc_assert (PURE_SLP_STMT (stmt_info));
8081 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8082 if (dump_enabled_p ())
8084 dump_printf_loc (MSG_NOTE, vect_location,
8085 "get vectype for scalar type: ");
8086 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8087 dump_printf (MSG_NOTE, "\n");
8090 vectype = get_vectype_for_scalar_type (scalar_type);
8091 if (!vectype)
8093 if (dump_enabled_p ())
8095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8096 "not SLPed: unsupported data-type ");
8097 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8098 scalar_type);
8099 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8101 return false;
8104 if (dump_enabled_p ())
8106 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8107 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8108 dump_printf (MSG_NOTE, "\n");
8111 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8114 if (STMT_VINFO_RELEVANT_P (stmt_info))
8116 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8117 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8118 || (is_gimple_call (stmt)
8119 && gimple_call_lhs (stmt) == NULL_TREE));
8120 *need_to_vectorize = true;
8123 if (PURE_SLP_STMT (stmt_info) && !node)
8125 dump_printf_loc (MSG_NOTE, vect_location,
8126 "handled only by SLP analysis\n");
8127 return true;
8130 ok = true;
8131 if (!bb_vinfo
8132 && (STMT_VINFO_RELEVANT_P (stmt_info)
8133 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8134 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8135 || vectorizable_conversion (stmt, NULL, NULL, node)
8136 || vectorizable_shift (stmt, NULL, NULL, node)
8137 || vectorizable_operation (stmt, NULL, NULL, node)
8138 || vectorizable_assignment (stmt, NULL, NULL, node)
8139 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8140 || vectorizable_call (stmt, NULL, NULL, node)
8141 || vectorizable_store (stmt, NULL, NULL, node)
8142 || vectorizable_reduction (stmt, NULL, NULL, node)
8143 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8144 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8145 else
8147 if (bb_vinfo)
8148 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8149 || vectorizable_conversion (stmt, NULL, NULL, node)
8150 || vectorizable_shift (stmt, NULL, NULL, node)
8151 || vectorizable_operation (stmt, NULL, NULL, node)
8152 || vectorizable_assignment (stmt, NULL, NULL, node)
8153 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8154 || vectorizable_call (stmt, NULL, NULL, node)
8155 || vectorizable_store (stmt, NULL, NULL, node)
8156 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8157 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8160 if (!ok)
8162 if (dump_enabled_p ())
8164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8165 "not vectorized: relevant stmt not ");
8166 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8167 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8170 return false;
8173 if (bb_vinfo)
8174 return true;
8176 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8177 need extra handling, except for vectorizable reductions. */
8178 if (STMT_VINFO_LIVE_P (stmt_info)
8179 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8180 ok = vectorizable_live_operation (stmt, NULL, NULL);
8182 if (!ok)
8184 if (dump_enabled_p ())
8186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8187 "not vectorized: live stmt not ");
8188 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8189 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8192 return false;
8195 return true;
8199 /* Function vect_transform_stmt.
8201 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8203 bool
8204 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8205 bool *grouped_store, slp_tree slp_node,
8206 slp_instance slp_node_instance)
8208 bool is_store = false;
8209 gimple *vec_stmt = NULL;
8210 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8211 bool done;
8213 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8215 switch (STMT_VINFO_TYPE (stmt_info))
8217 case type_demotion_vec_info_type:
8218 case type_promotion_vec_info_type:
8219 case type_conversion_vec_info_type:
8220 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8221 gcc_assert (done);
8222 break;
8224 case induc_vec_info_type:
8225 gcc_assert (!slp_node);
8226 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8227 gcc_assert (done);
8228 break;
8230 case shift_vec_info_type:
8231 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8232 gcc_assert (done);
8233 break;
8235 case op_vec_info_type:
8236 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8237 gcc_assert (done);
8238 break;
8240 case assignment_vec_info_type:
8241 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8242 gcc_assert (done);
8243 break;
8245 case load_vec_info_type:
8246 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8247 slp_node_instance);
8248 gcc_assert (done);
8249 break;
8251 case store_vec_info_type:
8252 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8253 gcc_assert (done);
8254 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8256 /* In case of interleaving, the whole chain is vectorized when the
8257 last store in the chain is reached. Store stmts before the last
8258 one are skipped, and there vec_stmt_info shouldn't be freed
8259 meanwhile. */
8260 *grouped_store = true;
8261 if (STMT_VINFO_VEC_STMT (stmt_info))
8262 is_store = true;
8264 else
8265 is_store = true;
8266 break;
8268 case condition_vec_info_type:
8269 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8270 gcc_assert (done);
8271 break;
8273 case comparison_vec_info_type:
8274 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8275 gcc_assert (done);
8276 break;
8278 case call_vec_info_type:
8279 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8280 stmt = gsi_stmt (*gsi);
8281 if (is_gimple_call (stmt)
8282 && gimple_call_internal_p (stmt)
8283 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8284 is_store = true;
8285 break;
8287 case call_simd_clone_vec_info_type:
8288 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8289 stmt = gsi_stmt (*gsi);
8290 break;
8292 case reduc_vec_info_type:
8293 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8294 gcc_assert (done);
8295 break;
8297 default:
8298 if (!STMT_VINFO_LIVE_P (stmt_info))
8300 if (dump_enabled_p ())
8301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8302 "stmt not supported.\n");
8303 gcc_unreachable ();
8307 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8308 This would break hybrid SLP vectorization. */
8309 if (slp_node)
8310 gcc_assert (!vec_stmt
8311 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8313 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8314 is being vectorized, but outside the immediately enclosing loop. */
8315 if (vec_stmt
8316 && STMT_VINFO_LOOP_VINFO (stmt_info)
8317 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8318 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8319 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8320 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8321 || STMT_VINFO_RELEVANT (stmt_info) ==
8322 vect_used_in_outer_by_reduction))
8324 struct loop *innerloop = LOOP_VINFO_LOOP (
8325 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8326 imm_use_iterator imm_iter;
8327 use_operand_p use_p;
8328 tree scalar_dest;
8329 gimple *exit_phi;
8331 if (dump_enabled_p ())
8332 dump_printf_loc (MSG_NOTE, vect_location,
8333 "Record the vdef for outer-loop vectorization.\n");
8335 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8336 (to be used when vectorizing outer-loop stmts that use the DEF of
8337 STMT). */
8338 if (gimple_code (stmt) == GIMPLE_PHI)
8339 scalar_dest = PHI_RESULT (stmt);
8340 else
8341 scalar_dest = gimple_assign_lhs (stmt);
8343 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8345 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8347 exit_phi = USE_STMT (use_p);
8348 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8353 /* Handle stmts whose DEF is used outside the loop-nest that is
8354 being vectorized. */
8355 if (STMT_VINFO_LIVE_P (stmt_info)
8356 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8358 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
8359 gcc_assert (done);
8362 if (vec_stmt)
8363 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8365 return is_store;
8369 /* Remove a group of stores (for SLP or interleaving), free their
8370 stmt_vec_info. */
8372 void
8373 vect_remove_stores (gimple *first_stmt)
8375 gimple *next = first_stmt;
8376 gimple *tmp;
8377 gimple_stmt_iterator next_si;
8379 while (next)
8381 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8383 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8384 if (is_pattern_stmt_p (stmt_info))
8385 next = STMT_VINFO_RELATED_STMT (stmt_info);
8386 /* Free the attached stmt_vec_info and remove the stmt. */
8387 next_si = gsi_for_stmt (next);
8388 unlink_stmt_vdef (next);
8389 gsi_remove (&next_si, true);
8390 release_defs (next);
8391 free_stmt_vec_info (next);
8392 next = tmp;
8397 /* Function new_stmt_vec_info.
8399 Create and initialize a new stmt_vec_info struct for STMT. */
8401 stmt_vec_info
8402 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8404 stmt_vec_info res;
8405 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8407 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8408 STMT_VINFO_STMT (res) = stmt;
8409 res->vinfo = vinfo;
8410 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8411 STMT_VINFO_LIVE_P (res) = false;
8412 STMT_VINFO_VECTYPE (res) = NULL;
8413 STMT_VINFO_VEC_STMT (res) = NULL;
8414 STMT_VINFO_VECTORIZABLE (res) = true;
8415 STMT_VINFO_IN_PATTERN_P (res) = false;
8416 STMT_VINFO_RELATED_STMT (res) = NULL;
8417 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8418 STMT_VINFO_DATA_REF (res) = NULL;
8419 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8421 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8422 STMT_VINFO_DR_OFFSET (res) = NULL;
8423 STMT_VINFO_DR_INIT (res) = NULL;
8424 STMT_VINFO_DR_STEP (res) = NULL;
8425 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8427 if (gimple_code (stmt) == GIMPLE_PHI
8428 && is_loop_header_bb_p (gimple_bb (stmt)))
8429 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8430 else
8431 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8433 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8434 STMT_SLP_TYPE (res) = loop_vect;
8435 STMT_VINFO_NUM_SLP_USES (res) = 0;
8437 GROUP_FIRST_ELEMENT (res) = NULL;
8438 GROUP_NEXT_ELEMENT (res) = NULL;
8439 GROUP_SIZE (res) = 0;
8440 GROUP_STORE_COUNT (res) = 0;
8441 GROUP_GAP (res) = 0;
8442 GROUP_SAME_DR_STMT (res) = NULL;
8444 return res;
8448 /* Create a hash table for stmt_vec_info. */
8450 void
8451 init_stmt_vec_info_vec (void)
8453 gcc_assert (!stmt_vec_info_vec.exists ());
8454 stmt_vec_info_vec.create (50);
8458 /* Free hash table for stmt_vec_info. */
8460 void
8461 free_stmt_vec_info_vec (void)
8463 unsigned int i;
8464 stmt_vec_info info;
8465 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8466 if (info != NULL)
8467 free_stmt_vec_info (STMT_VINFO_STMT (info));
8468 gcc_assert (stmt_vec_info_vec.exists ());
8469 stmt_vec_info_vec.release ();
8473 /* Free stmt vectorization related info. */
8475 void
8476 free_stmt_vec_info (gimple *stmt)
8478 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8480 if (!stmt_info)
8481 return;
8483 /* Check if this statement has a related "pattern stmt"
8484 (introduced by the vectorizer during the pattern recognition
8485 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8486 too. */
8487 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8489 stmt_vec_info patt_info
8490 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8491 if (patt_info)
8493 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8494 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8495 gimple_set_bb (patt_stmt, NULL);
8496 tree lhs = gimple_get_lhs (patt_stmt);
8497 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8498 release_ssa_name (lhs);
8499 if (seq)
8501 gimple_stmt_iterator si;
8502 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8504 gimple *seq_stmt = gsi_stmt (si);
8505 gimple_set_bb (seq_stmt, NULL);
8506 lhs = gimple_get_lhs (seq_stmt);
8507 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8508 release_ssa_name (lhs);
8509 free_stmt_vec_info (seq_stmt);
8512 free_stmt_vec_info (patt_stmt);
8516 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8517 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8518 set_vinfo_for_stmt (stmt, NULL);
8519 free (stmt_info);
8523 /* Function get_vectype_for_scalar_type_and_size.
8525 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8526 by the target. */
8528 static tree
8529 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8531 machine_mode inner_mode = TYPE_MODE (scalar_type);
8532 machine_mode simd_mode;
8533 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8534 int nunits;
8535 tree vectype;
8537 if (nbytes == 0)
8538 return NULL_TREE;
8540 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8541 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8542 return NULL_TREE;
8544 /* For vector types of elements whose mode precision doesn't
8545 match their types precision we use a element type of mode
8546 precision. The vectorization routines will have to make sure
8547 they support the proper result truncation/extension.
8548 We also make sure to build vector types with INTEGER_TYPE
8549 component type only. */
8550 if (INTEGRAL_TYPE_P (scalar_type)
8551 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8552 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8553 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8554 TYPE_UNSIGNED (scalar_type));
8556 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8557 When the component mode passes the above test simply use a type
8558 corresponding to that mode. The theory is that any use that
8559 would cause problems with this will disable vectorization anyway. */
8560 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8561 && !INTEGRAL_TYPE_P (scalar_type))
8562 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8564 /* We can't build a vector type of elements with alignment bigger than
8565 their size. */
8566 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8567 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8568 TYPE_UNSIGNED (scalar_type));
8570 /* If we felt back to using the mode fail if there was
8571 no scalar type for it. */
8572 if (scalar_type == NULL_TREE)
8573 return NULL_TREE;
8575 /* If no size was supplied use the mode the target prefers. Otherwise
8576 lookup a vector mode of the specified size. */
8577 if (size == 0)
8578 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8579 else
8580 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8581 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8582 if (nunits <= 1)
8583 return NULL_TREE;
8585 vectype = build_vector_type (scalar_type, nunits);
8587 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8588 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8589 return NULL_TREE;
8591 return vectype;
8594 unsigned int current_vector_size;
8596 /* Function get_vectype_for_scalar_type.
8598 Returns the vector type corresponding to SCALAR_TYPE as supported
8599 by the target. */
8601 tree
8602 get_vectype_for_scalar_type (tree scalar_type)
8604 tree vectype;
8605 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8606 current_vector_size);
8607 if (vectype
8608 && current_vector_size == 0)
8609 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8610 return vectype;
8613 /* Function get_mask_type_for_scalar_type.
8615 Returns the mask type corresponding to a result of comparison
8616 of vectors of specified SCALAR_TYPE as supported by target. */
8618 tree
8619 get_mask_type_for_scalar_type (tree scalar_type)
8621 tree vectype = get_vectype_for_scalar_type (scalar_type);
8623 if (!vectype)
8624 return NULL;
8626 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8627 current_vector_size);
8630 /* Function get_same_sized_vectype
8632 Returns a vector type corresponding to SCALAR_TYPE of size
8633 VECTOR_TYPE if supported by the target. */
8635 tree
8636 get_same_sized_vectype (tree scalar_type, tree vector_type)
8638 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8639 return build_same_sized_truth_vector_type (vector_type);
8641 return get_vectype_for_scalar_type_and_size
8642 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8645 /* Function vect_is_simple_use.
8647 Input:
8648 VINFO - the vect info of the loop or basic block that is being vectorized.
8649 OPERAND - operand in the loop or bb.
8650 Output:
8651 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8652 DT - the type of definition
8654 Returns whether a stmt with OPERAND can be vectorized.
8655 For loops, supportable operands are constants, loop invariants, and operands
8656 that are defined by the current iteration of the loop. Unsupportable
8657 operands are those that are defined by a previous iteration of the loop (as
8658 is the case in reduction/induction computations).
8659 For basic blocks, supportable operands are constants and bb invariants.
8660 For now, operands defined outside the basic block are not supported. */
8662 bool
8663 vect_is_simple_use (tree operand, vec_info *vinfo,
8664 gimple **def_stmt, enum vect_def_type *dt)
8666 *def_stmt = NULL;
8667 *dt = vect_unknown_def_type;
8669 if (dump_enabled_p ())
8671 dump_printf_loc (MSG_NOTE, vect_location,
8672 "vect_is_simple_use: operand ");
8673 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8674 dump_printf (MSG_NOTE, "\n");
8677 if (CONSTANT_CLASS_P (operand))
8679 *dt = vect_constant_def;
8680 return true;
8683 if (is_gimple_min_invariant (operand))
8685 *dt = vect_external_def;
8686 return true;
8689 if (TREE_CODE (operand) != SSA_NAME)
8691 if (dump_enabled_p ())
8692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8693 "not ssa-name.\n");
8694 return false;
8697 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8699 *dt = vect_external_def;
8700 return true;
8703 *def_stmt = SSA_NAME_DEF_STMT (operand);
8704 if (dump_enabled_p ())
8706 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8707 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8710 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8711 *dt = vect_external_def;
8712 else
8714 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8715 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8718 if (dump_enabled_p ())
8720 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8721 switch (*dt)
8723 case vect_uninitialized_def:
8724 dump_printf (MSG_NOTE, "uninitialized\n");
8725 break;
8726 case vect_constant_def:
8727 dump_printf (MSG_NOTE, "constant\n");
8728 break;
8729 case vect_external_def:
8730 dump_printf (MSG_NOTE, "external\n");
8731 break;
8732 case vect_internal_def:
8733 dump_printf (MSG_NOTE, "internal\n");
8734 break;
8735 case vect_induction_def:
8736 dump_printf (MSG_NOTE, "induction\n");
8737 break;
8738 case vect_reduction_def:
8739 dump_printf (MSG_NOTE, "reduction\n");
8740 break;
8741 case vect_double_reduction_def:
8742 dump_printf (MSG_NOTE, "double reduction\n");
8743 break;
8744 case vect_nested_cycle:
8745 dump_printf (MSG_NOTE, "nested cycle\n");
8746 break;
8747 case vect_unknown_def_type:
8748 dump_printf (MSG_NOTE, "unknown\n");
8749 break;
8753 if (*dt == vect_unknown_def_type)
8755 if (dump_enabled_p ())
8756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8757 "Unsupported pattern.\n");
8758 return false;
8761 switch (gimple_code (*def_stmt))
8763 case GIMPLE_PHI:
8764 case GIMPLE_ASSIGN:
8765 case GIMPLE_CALL:
8766 break;
8767 default:
8768 if (dump_enabled_p ())
8769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8770 "unsupported defining stmt:\n");
8771 return false;
8774 return true;
8777 /* Function vect_is_simple_use.
8779 Same as vect_is_simple_use but also determines the vector operand
8780 type of OPERAND and stores it to *VECTYPE. If the definition of
8781 OPERAND is vect_uninitialized_def, vect_constant_def or
8782 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8783 is responsible to compute the best suited vector type for the
8784 scalar operand. */
8786 bool
8787 vect_is_simple_use (tree operand, vec_info *vinfo,
8788 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
8790 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
8791 return false;
8793 /* Now get a vector type if the def is internal, otherwise supply
8794 NULL_TREE and leave it up to the caller to figure out a proper
8795 type for the use stmt. */
8796 if (*dt == vect_internal_def
8797 || *dt == vect_induction_def
8798 || *dt == vect_reduction_def
8799 || *dt == vect_double_reduction_def
8800 || *dt == vect_nested_cycle)
8802 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8804 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8805 && !STMT_VINFO_RELEVANT (stmt_info)
8806 && !STMT_VINFO_LIVE_P (stmt_info))
8807 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8809 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8810 gcc_assert (*vectype != NULL_TREE);
8812 else if (*dt == vect_uninitialized_def
8813 || *dt == vect_constant_def
8814 || *dt == vect_external_def)
8815 *vectype = NULL_TREE;
8816 else
8817 gcc_unreachable ();
8819 return true;
8823 /* Function supportable_widening_operation
8825 Check whether an operation represented by the code CODE is a
8826 widening operation that is supported by the target platform in
8827 vector form (i.e., when operating on arguments of type VECTYPE_IN
8828 producing a result of type VECTYPE_OUT).
8830 Widening operations we currently support are NOP (CONVERT), FLOAT
8831 and WIDEN_MULT. This function checks if these operations are supported
8832 by the target platform either directly (via vector tree-codes), or via
8833 target builtins.
8835 Output:
8836 - CODE1 and CODE2 are codes of vector operations to be used when
8837 vectorizing the operation, if available.
8838 - MULTI_STEP_CVT determines the number of required intermediate steps in
8839 case of multi-step conversion (like char->short->int - in that case
8840 MULTI_STEP_CVT will be 1).
8841 - INTERM_TYPES contains the intermediate type required to perform the
8842 widening operation (short in the above example). */
8844 bool
8845 supportable_widening_operation (enum tree_code code, gimple *stmt,
8846 tree vectype_out, tree vectype_in,
8847 enum tree_code *code1, enum tree_code *code2,
8848 int *multi_step_cvt,
8849 vec<tree> *interm_types)
8851 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8852 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8853 struct loop *vect_loop = NULL;
8854 machine_mode vec_mode;
8855 enum insn_code icode1, icode2;
8856 optab optab1, optab2;
8857 tree vectype = vectype_in;
8858 tree wide_vectype = vectype_out;
8859 enum tree_code c1, c2;
8860 int i;
8861 tree prev_type, intermediate_type;
8862 machine_mode intermediate_mode, prev_mode;
8863 optab optab3, optab4;
8865 *multi_step_cvt = 0;
8866 if (loop_info)
8867 vect_loop = LOOP_VINFO_LOOP (loop_info);
8869 switch (code)
8871 case WIDEN_MULT_EXPR:
8872 /* The result of a vectorized widening operation usually requires
8873 two vectors (because the widened results do not fit into one vector).
8874 The generated vector results would normally be expected to be
8875 generated in the same order as in the original scalar computation,
8876 i.e. if 8 results are generated in each vector iteration, they are
8877 to be organized as follows:
8878 vect1: [res1,res2,res3,res4],
8879 vect2: [res5,res6,res7,res8].
8881 However, in the special case that the result of the widening
8882 operation is used in a reduction computation only, the order doesn't
8883 matter (because when vectorizing a reduction we change the order of
8884 the computation). Some targets can take advantage of this and
8885 generate more efficient code. For example, targets like Altivec,
8886 that support widen_mult using a sequence of {mult_even,mult_odd}
8887 generate the following vectors:
8888 vect1: [res1,res3,res5,res7],
8889 vect2: [res2,res4,res6,res8].
8891 When vectorizing outer-loops, we execute the inner-loop sequentially
8892 (each vectorized inner-loop iteration contributes to VF outer-loop
8893 iterations in parallel). We therefore don't allow to change the
8894 order of the computation in the inner-loop during outer-loop
8895 vectorization. */
8896 /* TODO: Another case in which order doesn't *really* matter is when we
8897 widen and then contract again, e.g. (short)((int)x * y >> 8).
8898 Normally, pack_trunc performs an even/odd permute, whereas the
8899 repack from an even/odd expansion would be an interleave, which
8900 would be significantly simpler for e.g. AVX2. */
8901 /* In any case, in order to avoid duplicating the code below, recurse
8902 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8903 are properly set up for the caller. If we fail, we'll continue with
8904 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8905 if (vect_loop
8906 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8907 && !nested_in_vect_loop_p (vect_loop, stmt)
8908 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8909 stmt, vectype_out, vectype_in,
8910 code1, code2, multi_step_cvt,
8911 interm_types))
8913 /* Elements in a vector with vect_used_by_reduction property cannot
8914 be reordered if the use chain with this property does not have the
8915 same operation. One such an example is s += a * b, where elements
8916 in a and b cannot be reordered. Here we check if the vector defined
8917 by STMT is only directly used in the reduction statement. */
8918 tree lhs = gimple_assign_lhs (stmt);
8919 use_operand_p dummy;
8920 gimple *use_stmt;
8921 stmt_vec_info use_stmt_info = NULL;
8922 if (single_imm_use (lhs, &dummy, &use_stmt)
8923 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8924 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8925 return true;
8927 c1 = VEC_WIDEN_MULT_LO_EXPR;
8928 c2 = VEC_WIDEN_MULT_HI_EXPR;
8929 break;
8931 case DOT_PROD_EXPR:
8932 c1 = DOT_PROD_EXPR;
8933 c2 = DOT_PROD_EXPR;
8934 break;
8936 case SAD_EXPR:
8937 c1 = SAD_EXPR;
8938 c2 = SAD_EXPR;
8939 break;
8941 case VEC_WIDEN_MULT_EVEN_EXPR:
8942 /* Support the recursion induced just above. */
8943 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8944 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8945 break;
8947 case WIDEN_LSHIFT_EXPR:
8948 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8949 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8950 break;
8952 CASE_CONVERT:
8953 c1 = VEC_UNPACK_LO_EXPR;
8954 c2 = VEC_UNPACK_HI_EXPR;
8955 break;
8957 case FLOAT_EXPR:
8958 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8959 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8960 break;
8962 case FIX_TRUNC_EXPR:
8963 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8964 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8965 computing the operation. */
8966 return false;
8968 default:
8969 gcc_unreachable ();
8972 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8973 std::swap (c1, c2);
8975 if (code == FIX_TRUNC_EXPR)
8977 /* The signedness is determined from output operand. */
8978 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8979 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8981 else
8983 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8984 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8987 if (!optab1 || !optab2)
8988 return false;
8990 vec_mode = TYPE_MODE (vectype);
8991 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8992 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8993 return false;
8995 *code1 = c1;
8996 *code2 = c2;
8998 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8999 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9000 /* For scalar masks we may have different boolean
9001 vector types having the same QImode. Thus we
9002 add additional check for elements number. */
9003 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9004 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9005 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9007 /* Check if it's a multi-step conversion that can be done using intermediate
9008 types. */
9010 prev_type = vectype;
9011 prev_mode = vec_mode;
9013 if (!CONVERT_EXPR_CODE_P (code))
9014 return false;
9016 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9017 intermediate steps in promotion sequence. We try
9018 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9019 not. */
9020 interm_types->create (MAX_INTERM_CVT_STEPS);
9021 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9023 intermediate_mode = insn_data[icode1].operand[0].mode;
9024 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9026 intermediate_type
9027 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9028 current_vector_size);
9029 if (intermediate_mode != TYPE_MODE (intermediate_type))
9030 return false;
9032 else
9033 intermediate_type
9034 = lang_hooks.types.type_for_mode (intermediate_mode,
9035 TYPE_UNSIGNED (prev_type));
9037 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9038 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9040 if (!optab3 || !optab4
9041 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9042 || insn_data[icode1].operand[0].mode != intermediate_mode
9043 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9044 || insn_data[icode2].operand[0].mode != intermediate_mode
9045 || ((icode1 = optab_handler (optab3, intermediate_mode))
9046 == CODE_FOR_nothing)
9047 || ((icode2 = optab_handler (optab4, intermediate_mode))
9048 == CODE_FOR_nothing))
9049 break;
9051 interm_types->quick_push (intermediate_type);
9052 (*multi_step_cvt)++;
9054 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9055 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9056 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9057 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9058 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9060 prev_type = intermediate_type;
9061 prev_mode = intermediate_mode;
9064 interm_types->release ();
9065 return false;
9069 /* Function supportable_narrowing_operation
9071 Check whether an operation represented by the code CODE is a
9072 narrowing operation that is supported by the target platform in
9073 vector form (i.e., when operating on arguments of type VECTYPE_IN
9074 and producing a result of type VECTYPE_OUT).
9076 Narrowing operations we currently support are NOP (CONVERT) and
9077 FIX_TRUNC. This function checks if these operations are supported by
9078 the target platform directly via vector tree-codes.
9080 Output:
9081 - CODE1 is the code of a vector operation to be used when
9082 vectorizing the operation, if available.
9083 - MULTI_STEP_CVT determines the number of required intermediate steps in
9084 case of multi-step conversion (like int->short->char - in that case
9085 MULTI_STEP_CVT will be 1).
9086 - INTERM_TYPES contains the intermediate type required to perform the
9087 narrowing operation (short in the above example). */
9089 bool
9090 supportable_narrowing_operation (enum tree_code code,
9091 tree vectype_out, tree vectype_in,
9092 enum tree_code *code1, int *multi_step_cvt,
9093 vec<tree> *interm_types)
9095 machine_mode vec_mode;
9096 enum insn_code icode1;
9097 optab optab1, interm_optab;
9098 tree vectype = vectype_in;
9099 tree narrow_vectype = vectype_out;
9100 enum tree_code c1;
9101 tree intermediate_type, prev_type;
9102 machine_mode intermediate_mode, prev_mode;
9103 int i;
9104 bool uns;
9106 *multi_step_cvt = 0;
9107 switch (code)
9109 CASE_CONVERT:
9110 c1 = VEC_PACK_TRUNC_EXPR;
9111 break;
9113 case FIX_TRUNC_EXPR:
9114 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9115 break;
9117 case FLOAT_EXPR:
9118 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9119 tree code and optabs used for computing the operation. */
9120 return false;
9122 default:
9123 gcc_unreachable ();
9126 if (code == FIX_TRUNC_EXPR)
9127 /* The signedness is determined from output operand. */
9128 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9129 else
9130 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9132 if (!optab1)
9133 return false;
9135 vec_mode = TYPE_MODE (vectype);
9136 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9137 return false;
9139 *code1 = c1;
9141 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9142 /* For scalar masks we may have different boolean
9143 vector types having the same QImode. Thus we
9144 add additional check for elements number. */
9145 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9146 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9147 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9149 /* Check if it's a multi-step conversion that can be done using intermediate
9150 types. */
9151 prev_mode = vec_mode;
9152 prev_type = vectype;
9153 if (code == FIX_TRUNC_EXPR)
9154 uns = TYPE_UNSIGNED (vectype_out);
9155 else
9156 uns = TYPE_UNSIGNED (vectype);
9158 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9159 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9160 costly than signed. */
9161 if (code == FIX_TRUNC_EXPR && uns)
9163 enum insn_code icode2;
9165 intermediate_type
9166 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9167 interm_optab
9168 = optab_for_tree_code (c1, intermediate_type, optab_default);
9169 if (interm_optab != unknown_optab
9170 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9171 && insn_data[icode1].operand[0].mode
9172 == insn_data[icode2].operand[0].mode)
9174 uns = false;
9175 optab1 = interm_optab;
9176 icode1 = icode2;
9180 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9181 intermediate steps in promotion sequence. We try
9182 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9183 interm_types->create (MAX_INTERM_CVT_STEPS);
9184 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9186 intermediate_mode = insn_data[icode1].operand[0].mode;
9187 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9189 intermediate_type
9190 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9191 current_vector_size);
9192 if (intermediate_mode != TYPE_MODE (intermediate_type))
9193 return false;
9195 else
9196 intermediate_type
9197 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9198 interm_optab
9199 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9200 optab_default);
9201 if (!interm_optab
9202 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9203 || insn_data[icode1].operand[0].mode != intermediate_mode
9204 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9205 == CODE_FOR_nothing))
9206 break;
9208 interm_types->quick_push (intermediate_type);
9209 (*multi_step_cvt)++;
9211 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9212 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9213 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9214 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9216 prev_mode = intermediate_mode;
9217 prev_type = intermediate_type;
9218 optab1 = interm_optab;
9221 interm_types->release ();
9222 return false;