PR ipa/64481
[official-gcc.git] / gcc / tree-vect-stmts.c
blob43fc51c8f3ee4289e1770d9016fa29d4481982d3
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "input.h"
43 #include "function.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "basic-block.h"
47 #include "gimple-pretty-print.h"
48 #include "tree-ssa-alias.h"
49 #include "internal-fn.h"
50 #include "tree-eh.h"
51 #include "gimple-expr.h"
52 #include "is-a.h"
53 #include "gimple.h"
54 #include "gimplify.h"
55 #include "gimple-iterator.h"
56 #include "gimplify-me.h"
57 #include "gimple-ssa.h"
58 #include "tree-cfg.h"
59 #include "tree-phinodes.h"
60 #include "ssa-iterators.h"
61 #include "stringpool.h"
62 #include "tree-ssanames.h"
63 #include "tree-ssa-loop-manip.h"
64 #include "cfgloop.h"
65 #include "tree-ssa-loop.h"
66 #include "tree-scalar-evolution.h"
67 #include "expr.h"
68 #include "recog.h" /* FIXME: for insn_data */
69 #include "insn-codes.h"
70 #include "optabs.h"
71 #include "diagnostic-core.h"
72 #include "tree-vectorizer.h"
73 #include "dumpfile.h"
74 #include "hash-map.h"
75 #include "plugin-api.h"
76 #include "ipa-ref.h"
77 #include "cgraph.h"
78 #include "builtins.h"
80 /* For lang_hooks.types.type_for_mode. */
81 #include "langhooks.h"
83 /* Return the vectorized type for the given statement. */
85 tree
86 stmt_vectype (struct _stmt_vec_info *stmt_info)
88 return STMT_VINFO_VECTYPE (stmt_info);
91 /* Return TRUE iff the given statement is in an inner loop relative to
92 the loop being vectorized. */
93 bool
94 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
96 gimple stmt = STMT_VINFO_STMT (stmt_info);
97 basic_block bb = gimple_bb (stmt);
98 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
99 struct loop* loop;
101 if (!loop_vinfo)
102 return false;
104 loop = LOOP_VINFO_LOOP (loop_vinfo);
106 return (bb->loop_father == loop->inner);
109 /* Record the cost of a statement, either by directly informing the
110 target model or by saving it in a vector for later processing.
111 Return a preliminary estimate of the statement's cost. */
113 unsigned
114 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
115 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
116 int misalign, enum vect_cost_model_location where)
118 if (body_cost_vec)
120 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
121 add_stmt_info_to_vec (body_cost_vec, count, kind,
122 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
123 misalign);
124 return (unsigned)
125 (builtin_vectorization_cost (kind, vectype, misalign) * count);
128 else
130 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
131 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
132 void *target_cost_data;
134 if (loop_vinfo)
135 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
136 else
137 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
139 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
140 misalign, where);
144 /* Return a variable of type ELEM_TYPE[NELEMS]. */
146 static tree
147 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
149 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
150 "vect_array");
153 /* ARRAY is an array of vectors created by create_vector_array.
154 Return an SSA_NAME for the vector in index N. The reference
155 is part of the vectorization of STMT and the vector is associated
156 with scalar destination SCALAR_DEST. */
158 static tree
159 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
160 tree array, unsigned HOST_WIDE_INT n)
162 tree vect_type, vect, vect_name, array_ref;
163 gimple new_stmt;
165 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
166 vect_type = TREE_TYPE (TREE_TYPE (array));
167 vect = vect_create_destination_var (scalar_dest, vect_type);
168 array_ref = build4 (ARRAY_REF, vect_type, array,
169 build_int_cst (size_type_node, n),
170 NULL_TREE, NULL_TREE);
172 new_stmt = gimple_build_assign (vect, array_ref);
173 vect_name = make_ssa_name (vect, new_stmt);
174 gimple_assign_set_lhs (new_stmt, vect_name);
175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
177 return vect_name;
180 /* ARRAY is an array of vectors created by create_vector_array.
181 Emit code to store SSA_NAME VECT in index N of the array.
182 The store is part of the vectorization of STMT. */
184 static void
185 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
186 tree array, unsigned HOST_WIDE_INT n)
188 tree array_ref;
189 gimple new_stmt;
191 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
192 build_int_cst (size_type_node, n),
193 NULL_TREE, NULL_TREE);
195 new_stmt = gimple_build_assign (array_ref, vect);
196 vect_finish_stmt_generation (stmt, new_stmt, gsi);
199 /* PTR is a pointer to an array of type TYPE. Return a representation
200 of *PTR. The memory reference replaces those in FIRST_DR
201 (and its group). */
203 static tree
204 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
206 tree mem_ref, alias_ptr_type;
208 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
209 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
210 /* Arrays have the same alignment as their type. */
211 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
212 return mem_ref;
215 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
217 /* Function vect_mark_relevant.
219 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
221 static void
222 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
223 enum vect_relevant relevant, bool live_p,
224 bool used_in_pattern)
226 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
227 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
228 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
229 gimple pattern_stmt;
231 if (dump_enabled_p ())
232 dump_printf_loc (MSG_NOTE, vect_location,
233 "mark relevant %d, live %d.\n", relevant, live_p);
235 /* If this stmt is an original stmt in a pattern, we might need to mark its
236 related pattern stmt instead of the original stmt. However, such stmts
237 may have their own uses that are not in any pattern, in such cases the
238 stmt itself should be marked. */
239 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
241 bool found = false;
242 if (!used_in_pattern)
244 imm_use_iterator imm_iter;
245 use_operand_p use_p;
246 gimple use_stmt;
247 tree lhs;
248 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
249 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
251 if (is_gimple_assign (stmt))
252 lhs = gimple_assign_lhs (stmt);
253 else
254 lhs = gimple_call_lhs (stmt);
256 /* This use is out of pattern use, if LHS has other uses that are
257 pattern uses, we should mark the stmt itself, and not the pattern
258 stmt. */
259 if (lhs && TREE_CODE (lhs) == SSA_NAME)
260 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
262 if (is_gimple_debug (USE_STMT (use_p)))
263 continue;
264 use_stmt = USE_STMT (use_p);
266 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
267 continue;
269 if (vinfo_for_stmt (use_stmt)
270 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
272 found = true;
273 break;
278 if (!found)
280 /* This is the last stmt in a sequence that was detected as a
281 pattern that can potentially be vectorized. Don't mark the stmt
282 as relevant/live because it's not going to be vectorized.
283 Instead mark the pattern-stmt that replaces it. */
285 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
287 if (dump_enabled_p ())
288 dump_printf_loc (MSG_NOTE, vect_location,
289 "last stmt in pattern. don't mark"
290 " relevant/live.\n");
291 stmt_info = vinfo_for_stmt (pattern_stmt);
292 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
293 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
294 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
295 stmt = pattern_stmt;
299 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
300 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
301 STMT_VINFO_RELEVANT (stmt_info) = relevant;
303 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
304 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
306 if (dump_enabled_p ())
307 dump_printf_loc (MSG_NOTE, vect_location,
308 "already marked relevant/live.\n");
309 return;
312 worklist->safe_push (stmt);
316 /* Function vect_stmt_relevant_p.
318 Return true if STMT in loop that is represented by LOOP_VINFO is
319 "relevant for vectorization".
321 A stmt is considered "relevant for vectorization" if:
322 - it has uses outside the loop.
323 - it has vdefs (it alters memory).
324 - control stmts in the loop (except for the exit condition).
326 CHECKME: what other side effects would the vectorizer allow? */
328 static bool
329 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
330 enum vect_relevant *relevant, bool *live_p)
332 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
333 ssa_op_iter op_iter;
334 imm_use_iterator imm_iter;
335 use_operand_p use_p;
336 def_operand_p def_p;
338 *relevant = vect_unused_in_scope;
339 *live_p = false;
341 /* cond stmt other than loop exit cond. */
342 if (is_ctrl_stmt (stmt)
343 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
344 != loop_exit_ctrl_vec_info_type)
345 *relevant = vect_used_in_scope;
347 /* changing memory. */
348 if (gimple_code (stmt) != GIMPLE_PHI)
349 if (gimple_vdef (stmt)
350 && !gimple_clobber_p (stmt))
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_NOTE, vect_location,
354 "vec_stmt_relevant_p: stmt has vdefs.\n");
355 *relevant = vect_used_in_scope;
358 /* uses outside the loop. */
359 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
361 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
363 basic_block bb = gimple_bb (USE_STMT (use_p));
364 if (!flow_bb_inside_loop_p (loop, bb))
366 if (dump_enabled_p ())
367 dump_printf_loc (MSG_NOTE, vect_location,
368 "vec_stmt_relevant_p: used out of loop.\n");
370 if (is_gimple_debug (USE_STMT (use_p)))
371 continue;
373 /* We expect all such uses to be in the loop exit phis
374 (because of loop closed form) */
375 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
376 gcc_assert (bb == single_exit (loop)->dest);
378 *live_p = true;
383 return (*live_p || *relevant);
387 /* Function exist_non_indexing_operands_for_use_p
389 USE is one of the uses attached to STMT. Check if USE is
390 used in STMT for anything other than indexing an array. */
392 static bool
393 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
395 tree operand;
396 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
398 /* USE corresponds to some operand in STMT. If there is no data
399 reference in STMT, then any operand that corresponds to USE
400 is not indexing an array. */
401 if (!STMT_VINFO_DATA_REF (stmt_info))
402 return true;
404 /* STMT has a data_ref. FORNOW this means that its of one of
405 the following forms:
406 -1- ARRAY_REF = var
407 -2- var = ARRAY_REF
408 (This should have been verified in analyze_data_refs).
410 'var' in the second case corresponds to a def, not a use,
411 so USE cannot correspond to any operands that are not used
412 for array indexing.
414 Therefore, all we need to check is if STMT falls into the
415 first case, and whether var corresponds to USE. */
417 if (!gimple_assign_copy_p (stmt))
419 if (is_gimple_call (stmt)
420 && gimple_call_internal_p (stmt))
421 switch (gimple_call_internal_fn (stmt))
423 case IFN_MASK_STORE:
424 operand = gimple_call_arg (stmt, 3);
425 if (operand == use)
426 return true;
427 /* FALLTHRU */
428 case IFN_MASK_LOAD:
429 operand = gimple_call_arg (stmt, 2);
430 if (operand == use)
431 return true;
432 break;
433 default:
434 break;
436 return false;
439 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
440 return false;
441 operand = gimple_assign_rhs1 (stmt);
442 if (TREE_CODE (operand) != SSA_NAME)
443 return false;
445 if (operand == use)
446 return true;
448 return false;
453 Function process_use.
455 Inputs:
456 - a USE in STMT in a loop represented by LOOP_VINFO
457 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
458 that defined USE. This is done by calling mark_relevant and passing it
459 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
460 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
461 be performed.
463 Outputs:
464 Generally, LIVE_P and RELEVANT are used to define the liveness and
465 relevance info of the DEF_STMT of this USE:
466 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
467 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
468 Exceptions:
469 - case 1: If USE is used only for address computations (e.g. array indexing),
470 which does not need to be directly vectorized, then the liveness/relevance
471 of the respective DEF_STMT is left unchanged.
472 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
473 skip DEF_STMT cause it had already been processed.
474 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
475 be modified accordingly.
477 Return true if everything is as expected. Return false otherwise. */
479 static bool
480 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
481 enum vect_relevant relevant, vec<gimple> *worklist,
482 bool force)
484 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
485 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
486 stmt_vec_info dstmt_vinfo;
487 basic_block bb, def_bb;
488 tree def;
489 gimple def_stmt;
490 enum vect_def_type dt;
492 /* case 1: we are only interested in uses that need to be vectorized. Uses
493 that are used for address computation are not considered relevant. */
494 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
495 return true;
497 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
501 "not vectorized: unsupported use in stmt.\n");
502 return false;
505 if (!def_stmt || gimple_nop_p (def_stmt))
506 return true;
508 def_bb = gimple_bb (def_stmt);
509 if (!flow_bb_inside_loop_p (loop, def_bb))
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
513 return true;
516 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
517 DEF_STMT must have already been processed, because this should be the
518 only way that STMT, which is a reduction-phi, was put in the worklist,
519 as there should be no other uses for DEF_STMT in the loop. So we just
520 check that everything is as expected, and we are done. */
521 dstmt_vinfo = vinfo_for_stmt (def_stmt);
522 bb = gimple_bb (stmt);
523 if (gimple_code (stmt) == GIMPLE_PHI
524 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
525 && gimple_code (def_stmt) != GIMPLE_PHI
526 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
527 && bb->loop_father == def_bb->loop_father)
529 if (dump_enabled_p ())
530 dump_printf_loc (MSG_NOTE, vect_location,
531 "reduc-stmt defining reduc-phi in the same nest.\n");
532 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
533 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
534 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
535 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
536 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
537 return true;
540 /* case 3a: outer-loop stmt defining an inner-loop stmt:
541 outer-loop-header-bb:
542 d = def_stmt
543 inner-loop:
544 stmt # use (d)
545 outer-loop-tail-bb:
546 ... */
547 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
549 if (dump_enabled_p ())
550 dump_printf_loc (MSG_NOTE, vect_location,
551 "outer-loop def-stmt defining inner-loop stmt.\n");
553 switch (relevant)
555 case vect_unused_in_scope:
556 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
557 vect_used_in_scope : vect_unused_in_scope;
558 break;
560 case vect_used_in_outer_by_reduction:
561 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
562 relevant = vect_used_by_reduction;
563 break;
565 case vect_used_in_outer:
566 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
567 relevant = vect_used_in_scope;
568 break;
570 case vect_used_in_scope:
571 break;
573 default:
574 gcc_unreachable ();
578 /* case 3b: inner-loop stmt defining an outer-loop stmt:
579 outer-loop-header-bb:
581 inner-loop:
582 d = def_stmt
583 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
584 stmt # use (d) */
585 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
587 if (dump_enabled_p ())
588 dump_printf_loc (MSG_NOTE, vect_location,
589 "inner-loop def-stmt defining outer-loop stmt.\n");
591 switch (relevant)
593 case vect_unused_in_scope:
594 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
595 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
596 vect_used_in_outer_by_reduction : vect_unused_in_scope;
597 break;
599 case vect_used_by_reduction:
600 relevant = vect_used_in_outer_by_reduction;
601 break;
603 case vect_used_in_scope:
604 relevant = vect_used_in_outer;
605 break;
607 default:
608 gcc_unreachable ();
612 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
613 is_pattern_stmt_p (stmt_vinfo));
614 return true;
618 /* Function vect_mark_stmts_to_be_vectorized.
620 Not all stmts in the loop need to be vectorized. For example:
622 for i...
623 for j...
624 1. T0 = i + j
625 2. T1 = a[T0]
627 3. j = j + 1
629 Stmt 1 and 3 do not need to be vectorized, because loop control and
630 addressing of vectorized data-refs are handled differently.
632 This pass detects such stmts. */
634 bool
635 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
637 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
638 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
639 unsigned int nbbs = loop->num_nodes;
640 gimple_stmt_iterator si;
641 gimple stmt;
642 unsigned int i;
643 stmt_vec_info stmt_vinfo;
644 basic_block bb;
645 gimple phi;
646 bool live_p;
647 enum vect_relevant relevant, tmp_relevant;
648 enum vect_def_type def_type;
650 if (dump_enabled_p ())
651 dump_printf_loc (MSG_NOTE, vect_location,
652 "=== vect_mark_stmts_to_be_vectorized ===\n");
654 auto_vec<gimple, 64> worklist;
656 /* 1. Init worklist. */
657 for (i = 0; i < nbbs; i++)
659 bb = bbs[i];
660 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
662 phi = gsi_stmt (si);
663 if (dump_enabled_p ())
665 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
667 dump_printf (MSG_NOTE, "\n");
670 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
671 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
673 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
675 stmt = gsi_stmt (si);
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
679 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
680 dump_printf (MSG_NOTE, "\n");
683 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
684 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
688 /* 2. Process_worklist */
689 while (worklist.length () > 0)
691 use_operand_p use_p;
692 ssa_op_iter iter;
694 stmt = worklist.pop ();
695 if (dump_enabled_p ())
697 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
698 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
699 dump_printf (MSG_NOTE, "\n");
702 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
703 (DEF_STMT) as relevant/irrelevant and live/dead according to the
704 liveness and relevance properties of STMT. */
705 stmt_vinfo = vinfo_for_stmt (stmt);
706 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
707 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
709 /* Generally, the liveness and relevance properties of STMT are
710 propagated as is to the DEF_STMTs of its USEs:
711 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
712 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
714 One exception is when STMT has been identified as defining a reduction
715 variable; in this case we set the liveness/relevance as follows:
716 live_p = false
717 relevant = vect_used_by_reduction
718 This is because we distinguish between two kinds of relevant stmts -
719 those that are used by a reduction computation, and those that are
720 (also) used by a regular computation. This allows us later on to
721 identify stmts that are used solely by a reduction, and therefore the
722 order of the results that they produce does not have to be kept. */
724 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
725 tmp_relevant = relevant;
726 switch (def_type)
728 case vect_reduction_def:
729 switch (tmp_relevant)
731 case vect_unused_in_scope:
732 relevant = vect_used_by_reduction;
733 break;
735 case vect_used_by_reduction:
736 if (gimple_code (stmt) == GIMPLE_PHI)
737 break;
738 /* fall through */
740 default:
741 if (dump_enabled_p ())
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
743 "unsupported use of reduction.\n");
744 return false;
747 live_p = false;
748 break;
750 case vect_nested_cycle:
751 if (tmp_relevant != vect_unused_in_scope
752 && tmp_relevant != vect_used_in_outer_by_reduction
753 && tmp_relevant != vect_used_in_outer)
755 if (dump_enabled_p ())
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
757 "unsupported use of nested cycle.\n");
759 return false;
762 live_p = false;
763 break;
765 case vect_double_reduction_def:
766 if (tmp_relevant != vect_unused_in_scope
767 && tmp_relevant != vect_used_by_reduction)
769 if (dump_enabled_p ())
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
771 "unsupported use of double reduction.\n");
773 return false;
776 live_p = false;
777 break;
779 default:
780 break;
783 if (is_pattern_stmt_p (stmt_vinfo))
785 /* Pattern statements are not inserted into the code, so
786 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
787 have to scan the RHS or function arguments instead. */
788 if (is_gimple_assign (stmt))
790 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
791 tree op = gimple_assign_rhs1 (stmt);
793 i = 1;
794 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
796 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
797 live_p, relevant, &worklist, false)
798 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
799 live_p, relevant, &worklist, false))
800 return false;
801 i = 2;
803 for (; i < gimple_num_ops (stmt); i++)
805 op = gimple_op (stmt, i);
806 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
807 &worklist, false))
808 return false;
811 else if (is_gimple_call (stmt))
813 for (i = 0; i < gimple_call_num_args (stmt); i++)
815 tree arg = gimple_call_arg (stmt, i);
816 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
817 &worklist, false))
818 return false;
822 else
823 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
825 tree op = USE_FROM_PTR (use_p);
826 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
827 &worklist, false))
828 return false;
831 if (STMT_VINFO_GATHER_P (stmt_vinfo))
833 tree off;
834 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
835 gcc_assert (decl);
836 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
837 &worklist, true))
838 return false;
840 } /* while worklist */
842 return true;
846 /* Function vect_model_simple_cost.
848 Models cost for simple operations, i.e. those that only emit ncopies of a
849 single op. Right now, this does not account for multiple insns that could
850 be generated for the single vector op. We will handle that shortly. */
852 void
853 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
854 enum vect_def_type *dt,
855 stmt_vector_for_cost *prologue_cost_vec,
856 stmt_vector_for_cost *body_cost_vec)
858 int i;
859 int inside_cost = 0, prologue_cost = 0;
861 /* The SLP costs were already calculated during SLP tree build. */
862 if (PURE_SLP_STMT (stmt_info))
863 return;
865 /* FORNOW: Assuming maximum 2 args per stmts. */
866 for (i = 0; i < 2; i++)
867 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
868 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
869 stmt_info, 0, vect_prologue);
871 /* Pass the inside-of-loop statements to the target-specific cost model. */
872 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
873 stmt_info, 0, vect_body);
875 if (dump_enabled_p ())
876 dump_printf_loc (MSG_NOTE, vect_location,
877 "vect_model_simple_cost: inside_cost = %d, "
878 "prologue_cost = %d .\n", inside_cost, prologue_cost);
882 /* Model cost for type demotion and promotion operations. PWR is normally
883 zero for single-step promotions and demotions. It will be one if
884 two-step promotion/demotion is required, and so on. Each additional
885 step doubles the number of instructions required. */
887 static void
888 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
889 enum vect_def_type *dt, int pwr)
891 int i, tmp;
892 int inside_cost = 0, prologue_cost = 0;
893 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
894 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
895 void *target_cost_data;
897 /* The SLP costs were already calculated during SLP tree build. */
898 if (PURE_SLP_STMT (stmt_info))
899 return;
901 if (loop_vinfo)
902 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
903 else
904 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
906 for (i = 0; i < pwr + 1; i++)
908 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
909 (i + 1) : i;
910 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
911 vec_promote_demote, stmt_info, 0,
912 vect_body);
915 /* FORNOW: Assuming maximum 2 args per stmts. */
916 for (i = 0; i < 2; i++)
917 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
918 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
919 stmt_info, 0, vect_prologue);
921 if (dump_enabled_p ())
922 dump_printf_loc (MSG_NOTE, vect_location,
923 "vect_model_promotion_demotion_cost: inside_cost = %d, "
924 "prologue_cost = %d .\n", inside_cost, prologue_cost);
927 /* Function vect_cost_group_size
929 For grouped load or store, return the group_size only if it is the first
930 load or store of a group, else return 1. This ensures that group size is
931 only returned once per group. */
933 static int
934 vect_cost_group_size (stmt_vec_info stmt_info)
936 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
938 if (first_stmt == STMT_VINFO_STMT (stmt_info))
939 return GROUP_SIZE (stmt_info);
941 return 1;
945 /* Function vect_model_store_cost
947 Models cost for stores. In the case of grouped accesses, one access
948 has the overhead of the grouped access attributed to it. */
950 void
951 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
952 bool store_lanes_p, enum vect_def_type dt,
953 slp_tree slp_node,
954 stmt_vector_for_cost *prologue_cost_vec,
955 stmt_vector_for_cost *body_cost_vec)
957 int group_size;
958 unsigned int inside_cost = 0, prologue_cost = 0;
959 struct data_reference *first_dr;
960 gimple first_stmt;
962 /* The SLP costs were already calculated during SLP tree build. */
963 if (PURE_SLP_STMT (stmt_info))
964 return;
966 if (dt == vect_constant_def || dt == vect_external_def)
967 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
968 stmt_info, 0, vect_prologue);
970 /* Grouped access? */
971 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
973 if (slp_node)
975 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
976 group_size = 1;
978 else
980 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
981 group_size = vect_cost_group_size (stmt_info);
984 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
986 /* Not a grouped access. */
987 else
989 group_size = 1;
990 first_dr = STMT_VINFO_DATA_REF (stmt_info);
993 /* We assume that the cost of a single store-lanes instruction is
994 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
995 access is instead being provided by a permute-and-store operation,
996 include the cost of the permutes. */
997 if (!store_lanes_p && group_size > 1)
999 /* Uses a high and low interleave or shuffle operations for each
1000 needed permute. */
1001 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1002 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1003 stmt_info, 0, vect_body);
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE, vect_location,
1007 "vect_model_store_cost: strided group_size = %d .\n",
1008 group_size);
1011 /* Costs of the stores. */
1012 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1014 if (dump_enabled_p ())
1015 dump_printf_loc (MSG_NOTE, vect_location,
1016 "vect_model_store_cost: inside_cost = %d, "
1017 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1021 /* Calculate cost of DR's memory access. */
1022 void
1023 vect_get_store_cost (struct data_reference *dr, int ncopies,
1024 unsigned int *inside_cost,
1025 stmt_vector_for_cost *body_cost_vec)
1027 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1028 gimple stmt = DR_STMT (dr);
1029 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1031 switch (alignment_support_scheme)
1033 case dr_aligned:
1035 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1036 vector_store, stmt_info, 0,
1037 vect_body);
1039 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_NOTE, vect_location,
1041 "vect_model_store_cost: aligned.\n");
1042 break;
1045 case dr_unaligned_supported:
1047 /* Here, we assign an additional cost for the unaligned store. */
1048 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1049 unaligned_store, stmt_info,
1050 DR_MISALIGNMENT (dr), vect_body);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE, vect_location,
1053 "vect_model_store_cost: unaligned supported by "
1054 "hardware.\n");
1055 break;
1058 case dr_unaligned_unsupported:
1060 *inside_cost = VECT_MAX_COST;
1062 if (dump_enabled_p ())
1063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1064 "vect_model_store_cost: unsupported access.\n");
1065 break;
1068 default:
1069 gcc_unreachable ();
1074 /* Function vect_model_load_cost
1076 Models cost for loads. In the case of grouped accesses, the last access
1077 has the overhead of the grouped access attributed to it. Since unaligned
1078 accesses are supported for loads, we also account for the costs of the
1079 access scheme chosen. */
1081 void
1082 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1083 bool load_lanes_p, slp_tree slp_node,
1084 stmt_vector_for_cost *prologue_cost_vec,
1085 stmt_vector_for_cost *body_cost_vec)
1087 int group_size;
1088 gimple first_stmt;
1089 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1090 unsigned int inside_cost = 0, prologue_cost = 0;
1092 /* The SLP costs were already calculated during SLP tree build. */
1093 if (PURE_SLP_STMT (stmt_info))
1094 return;
1096 /* Grouped accesses? */
1097 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1098 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1100 group_size = vect_cost_group_size (stmt_info);
1101 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1103 /* Not a grouped access. */
1104 else
1106 group_size = 1;
1107 first_dr = dr;
1110 /* We assume that the cost of a single load-lanes instruction is
1111 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1112 access is instead being provided by a load-and-permute operation,
1113 include the cost of the permutes. */
1114 if (!load_lanes_p && group_size > 1)
1116 /* Uses an even and odd extract operations or shuffle operations
1117 for each needed permute. */
1118 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1119 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1120 stmt_info, 0, vect_body);
1122 if (dump_enabled_p ())
1123 dump_printf_loc (MSG_NOTE, vect_location,
1124 "vect_model_load_cost: strided group_size = %d .\n",
1125 group_size);
1128 /* The loads themselves. */
1129 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1131 /* N scalar loads plus gathering them into a vector. */
1132 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1133 inside_cost += record_stmt_cost (body_cost_vec,
1134 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1135 scalar_load, stmt_info, 0, vect_body);
1136 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1137 stmt_info, 0, vect_body);
1139 else
1140 vect_get_load_cost (first_dr, ncopies,
1141 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1142 || group_size > 1 || slp_node),
1143 &inside_cost, &prologue_cost,
1144 prologue_cost_vec, body_cost_vec, true);
1146 if (dump_enabled_p ())
1147 dump_printf_loc (MSG_NOTE, vect_location,
1148 "vect_model_load_cost: inside_cost = %d, "
1149 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1153 /* Calculate cost of DR's memory access. */
1154 void
1155 vect_get_load_cost (struct data_reference *dr, int ncopies,
1156 bool add_realign_cost, unsigned int *inside_cost,
1157 unsigned int *prologue_cost,
1158 stmt_vector_for_cost *prologue_cost_vec,
1159 stmt_vector_for_cost *body_cost_vec,
1160 bool record_prologue_costs)
1162 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1163 gimple stmt = DR_STMT (dr);
1164 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1166 switch (alignment_support_scheme)
1168 case dr_aligned:
1170 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1171 stmt_info, 0, vect_body);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: aligned.\n");
1177 break;
1179 case dr_unaligned_supported:
1181 /* Here, we assign an additional cost for the unaligned load. */
1182 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1183 unaligned_load, stmt_info,
1184 DR_MISALIGNMENT (dr), vect_body);
1186 if (dump_enabled_p ())
1187 dump_printf_loc (MSG_NOTE, vect_location,
1188 "vect_model_load_cost: unaligned supported by "
1189 "hardware.\n");
1191 break;
1193 case dr_explicit_realign:
1195 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1196 vector_load, stmt_info, 0, vect_body);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1198 vec_perm, stmt_info, 0, vect_body);
1200 /* FIXME: If the misalignment remains fixed across the iterations of
1201 the containing loop, the following cost should be added to the
1202 prologue costs. */
1203 if (targetm.vectorize.builtin_mask_for_load)
1204 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1205 stmt_info, 0, vect_body);
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_NOTE, vect_location,
1209 "vect_model_load_cost: explicit realign\n");
1211 break;
1213 case dr_explicit_realign_optimized:
1215 if (dump_enabled_p ())
1216 dump_printf_loc (MSG_NOTE, vect_location,
1217 "vect_model_load_cost: unaligned software "
1218 "pipelined.\n");
1220 /* Unaligned software pipeline has a load of an address, an initial
1221 load, and possibly a mask operation to "prime" the loop. However,
1222 if this is an access in a group of loads, which provide grouped
1223 access, then the above cost should only be considered for one
1224 access in the group. Inside the loop, there is a load op
1225 and a realignment op. */
1227 if (add_realign_cost && record_prologue_costs)
1229 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1230 vector_stmt, stmt_info,
1231 0, vect_prologue);
1232 if (targetm.vectorize.builtin_mask_for_load)
1233 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1234 vector_stmt, stmt_info,
1235 0, vect_prologue);
1238 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1239 stmt_info, 0, vect_body);
1240 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1241 stmt_info, 0, vect_body);
1243 if (dump_enabled_p ())
1244 dump_printf_loc (MSG_NOTE, vect_location,
1245 "vect_model_load_cost: explicit realign optimized"
1246 "\n");
1248 break;
1251 case dr_unaligned_unsupported:
1253 *inside_cost = VECT_MAX_COST;
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1257 "vect_model_load_cost: unsupported access.\n");
1258 break;
1261 default:
1262 gcc_unreachable ();
1266 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1267 the loop preheader for the vectorized stmt STMT. */
1269 static void
1270 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1272 if (gsi)
1273 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1274 else
1276 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1277 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1279 if (loop_vinfo)
1281 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1282 basic_block new_bb;
1283 edge pe;
1285 if (nested_in_vect_loop_p (loop, stmt))
1286 loop = loop->inner;
1288 pe = loop_preheader_edge (loop);
1289 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1290 gcc_assert (!new_bb);
1292 else
1294 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1295 basic_block bb;
1296 gimple_stmt_iterator gsi_bb_start;
1298 gcc_assert (bb_vinfo);
1299 bb = BB_VINFO_BB (bb_vinfo);
1300 gsi_bb_start = gsi_after_labels (bb);
1301 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1305 if (dump_enabled_p ())
1307 dump_printf_loc (MSG_NOTE, vect_location,
1308 "created new init_stmt: ");
1309 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1310 dump_printf (MSG_NOTE, "\n");
1314 /* Function vect_init_vector.
1316 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1317 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1318 vector type a vector with all elements equal to VAL is created first.
1319 Place the initialization at BSI if it is not NULL. Otherwise, place the
1320 initialization at the loop preheader.
1321 Return the DEF of INIT_STMT.
1322 It will be used in the vectorization of STMT. */
1324 tree
1325 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1327 tree new_var;
1328 gimple init_stmt;
1329 tree vec_oprnd;
1330 tree new_temp;
1332 if (TREE_CODE (type) == VECTOR_TYPE
1333 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1335 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1337 if (CONSTANT_CLASS_P (val))
1338 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1339 else
1341 new_temp = make_ssa_name (TREE_TYPE (type));
1342 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1343 vect_init_vector_1 (stmt, init_stmt, gsi);
1344 val = new_temp;
1347 val = build_vector_from_val (type, val);
1350 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1351 init_stmt = gimple_build_assign (new_var, val);
1352 new_temp = make_ssa_name (new_var, init_stmt);
1353 gimple_assign_set_lhs (init_stmt, new_temp);
1354 vect_init_vector_1 (stmt, init_stmt, gsi);
1355 vec_oprnd = gimple_assign_lhs (init_stmt);
1356 return vec_oprnd;
1360 /* Function vect_get_vec_def_for_operand.
1362 OP is an operand in STMT. This function returns a (vector) def that will be
1363 used in the vectorized stmt for STMT.
1365 In the case that OP is an SSA_NAME which is defined in the loop, then
1366 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1368 In case OP is an invariant or constant, a new stmt that creates a vector def
1369 needs to be introduced. */
1371 tree
1372 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1374 tree vec_oprnd;
1375 gimple vec_stmt;
1376 gimple def_stmt;
1377 stmt_vec_info def_stmt_info = NULL;
1378 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1379 unsigned int nunits;
1380 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1381 tree def;
1382 enum vect_def_type dt;
1383 bool is_simple_use;
1384 tree vector_type;
1386 if (dump_enabled_p ())
1388 dump_printf_loc (MSG_NOTE, vect_location,
1389 "vect_get_vec_def_for_operand: ");
1390 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1391 dump_printf (MSG_NOTE, "\n");
1394 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1395 &def_stmt, &def, &dt);
1396 gcc_assert (is_simple_use);
1397 if (dump_enabled_p ())
1399 int loc_printed = 0;
1400 if (def)
1402 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1403 loc_printed = 1;
1404 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1405 dump_printf (MSG_NOTE, "\n");
1407 if (def_stmt)
1409 if (loc_printed)
1410 dump_printf (MSG_NOTE, " def_stmt = ");
1411 else
1412 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1414 dump_printf (MSG_NOTE, "\n");
1418 switch (dt)
1420 /* Case 1: operand is a constant. */
1421 case vect_constant_def:
1423 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1424 gcc_assert (vector_type);
1425 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1427 if (scalar_def)
1428 *scalar_def = op;
1430 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1431 if (dump_enabled_p ())
1432 dump_printf_loc (MSG_NOTE, vect_location,
1433 "Create vector_cst. nunits = %d\n", nunits);
1435 return vect_init_vector (stmt, op, vector_type, NULL);
1438 /* Case 2: operand is defined outside the loop - loop invariant. */
1439 case vect_external_def:
1441 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1442 gcc_assert (vector_type);
1444 if (scalar_def)
1445 *scalar_def = def;
1447 /* Create 'vec_inv = {inv,inv,..,inv}' */
1448 if (dump_enabled_p ())
1449 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1451 return vect_init_vector (stmt, def, vector_type, NULL);
1454 /* Case 3: operand is defined inside the loop. */
1455 case vect_internal_def:
1457 if (scalar_def)
1458 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1460 /* Get the def from the vectorized stmt. */
1461 def_stmt_info = vinfo_for_stmt (def_stmt);
1463 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1464 /* Get vectorized pattern statement. */
1465 if (!vec_stmt
1466 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1467 && !STMT_VINFO_RELEVANT (def_stmt_info))
1468 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1469 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1470 gcc_assert (vec_stmt);
1471 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1472 vec_oprnd = PHI_RESULT (vec_stmt);
1473 else if (is_gimple_call (vec_stmt))
1474 vec_oprnd = gimple_call_lhs (vec_stmt);
1475 else
1476 vec_oprnd = gimple_assign_lhs (vec_stmt);
1477 return vec_oprnd;
1480 /* Case 4: operand is defined by a loop header phi - reduction */
1481 case vect_reduction_def:
1482 case vect_double_reduction_def:
1483 case vect_nested_cycle:
1485 struct loop *loop;
1487 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1488 loop = (gimple_bb (def_stmt))->loop_father;
1490 /* Get the def before the loop */
1491 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1492 return get_initial_def_for_reduction (stmt, op, scalar_def);
1495 /* Case 5: operand is defined by loop-header phi - induction. */
1496 case vect_induction_def:
1498 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1500 /* Get the def from the vectorized stmt. */
1501 def_stmt_info = vinfo_for_stmt (def_stmt);
1502 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1503 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1504 vec_oprnd = PHI_RESULT (vec_stmt);
1505 else
1506 vec_oprnd = gimple_get_lhs (vec_stmt);
1507 return vec_oprnd;
1510 default:
1511 gcc_unreachable ();
1516 /* Function vect_get_vec_def_for_stmt_copy
1518 Return a vector-def for an operand. This function is used when the
1519 vectorized stmt to be created (by the caller to this function) is a "copy"
1520 created in case the vectorized result cannot fit in one vector, and several
1521 copies of the vector-stmt are required. In this case the vector-def is
1522 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1523 of the stmt that defines VEC_OPRND.
1524 DT is the type of the vector def VEC_OPRND.
1526 Context:
1527 In case the vectorization factor (VF) is bigger than the number
1528 of elements that can fit in a vectype (nunits), we have to generate
1529 more than one vector stmt to vectorize the scalar stmt. This situation
1530 arises when there are multiple data-types operated upon in the loop; the
1531 smallest data-type determines the VF, and as a result, when vectorizing
1532 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1533 vector stmt (each computing a vector of 'nunits' results, and together
1534 computing 'VF' results in each iteration). This function is called when
1535 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1536 which VF=16 and nunits=4, so the number of copies required is 4):
1538 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1540 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1541 VS1.1: vx.1 = memref1 VS1.2
1542 VS1.2: vx.2 = memref2 VS1.3
1543 VS1.3: vx.3 = memref3
1545 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1546 VSnew.1: vz1 = vx.1 + ... VSnew.2
1547 VSnew.2: vz2 = vx.2 + ... VSnew.3
1548 VSnew.3: vz3 = vx.3 + ...
1550 The vectorization of S1 is explained in vectorizable_load.
1551 The vectorization of S2:
1552 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1553 the function 'vect_get_vec_def_for_operand' is called to
1554 get the relevant vector-def for each operand of S2. For operand x it
1555 returns the vector-def 'vx.0'.
1557 To create the remaining copies of the vector-stmt (VSnew.j), this
1558 function is called to get the relevant vector-def for each operand. It is
1559 obtained from the respective VS1.j stmt, which is recorded in the
1560 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1562 For example, to obtain the vector-def 'vx.1' in order to create the
1563 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1564 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1565 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1566 and return its def ('vx.1').
1567 Overall, to create the above sequence this function will be called 3 times:
1568 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1569 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1570 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1572 tree
1573 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1575 gimple vec_stmt_for_operand;
1576 stmt_vec_info def_stmt_info;
1578 /* Do nothing; can reuse same def. */
1579 if (dt == vect_external_def || dt == vect_constant_def )
1580 return vec_oprnd;
1582 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1583 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1584 gcc_assert (def_stmt_info);
1585 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1586 gcc_assert (vec_stmt_for_operand);
1587 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1588 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1589 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1590 else
1591 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1592 return vec_oprnd;
1596 /* Get vectorized definitions for the operands to create a copy of an original
1597 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1599 static void
1600 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1601 vec<tree> *vec_oprnds0,
1602 vec<tree> *vec_oprnds1)
1604 tree vec_oprnd = vec_oprnds0->pop ();
1606 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1607 vec_oprnds0->quick_push (vec_oprnd);
1609 if (vec_oprnds1 && vec_oprnds1->length ())
1611 vec_oprnd = vec_oprnds1->pop ();
1612 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1613 vec_oprnds1->quick_push (vec_oprnd);
1618 /* Get vectorized definitions for OP0 and OP1.
1619 REDUC_INDEX is the index of reduction operand in case of reduction,
1620 and -1 otherwise. */
1622 void
1623 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1624 vec<tree> *vec_oprnds0,
1625 vec<tree> *vec_oprnds1,
1626 slp_tree slp_node, int reduc_index)
1628 if (slp_node)
1630 int nops = (op1 == NULL_TREE) ? 1 : 2;
1631 auto_vec<tree> ops (nops);
1632 auto_vec<vec<tree> > vec_defs (nops);
1634 ops.quick_push (op0);
1635 if (op1)
1636 ops.quick_push (op1);
1638 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1640 *vec_oprnds0 = vec_defs[0];
1641 if (op1)
1642 *vec_oprnds1 = vec_defs[1];
1644 else
1646 tree vec_oprnd;
1648 vec_oprnds0->create (1);
1649 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1650 vec_oprnds0->quick_push (vec_oprnd);
1652 if (op1)
1654 vec_oprnds1->create (1);
1655 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1656 vec_oprnds1->quick_push (vec_oprnd);
1662 /* Function vect_finish_stmt_generation.
1664 Insert a new stmt. */
1666 void
1667 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1668 gimple_stmt_iterator *gsi)
1670 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1671 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1672 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1674 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1676 if (!gsi_end_p (*gsi)
1677 && gimple_has_mem_ops (vec_stmt))
1679 gimple at_stmt = gsi_stmt (*gsi);
1680 tree vuse = gimple_vuse (at_stmt);
1681 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1683 tree vdef = gimple_vdef (at_stmt);
1684 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1685 /* If we have an SSA vuse and insert a store, update virtual
1686 SSA form to avoid triggering the renamer. Do so only
1687 if we can easily see all uses - which is what almost always
1688 happens with the way vectorized stmts are inserted. */
1689 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1690 && ((is_gimple_assign (vec_stmt)
1691 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1692 || (is_gimple_call (vec_stmt)
1693 && !(gimple_call_flags (vec_stmt)
1694 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1696 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1697 gimple_set_vdef (vec_stmt, new_vdef);
1698 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1702 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1704 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1705 bb_vinfo));
1707 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1710 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1711 dump_printf (MSG_NOTE, "\n");
1714 gimple_set_location (vec_stmt, gimple_location (stmt));
1716 /* While EH edges will generally prevent vectorization, stmt might
1717 e.g. be in a must-not-throw region. Ensure newly created stmts
1718 that could throw are part of the same region. */
1719 int lp_nr = lookup_stmt_eh_lp (stmt);
1720 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1721 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1724 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1725 a function declaration if the target has a vectorized version
1726 of the function, or NULL_TREE if the function cannot be vectorized. */
1728 tree
1729 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1731 tree fndecl = gimple_call_fndecl (call);
1733 /* We only handle functions that do not read or clobber memory -- i.e.
1734 const or novops ones. */
1735 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1736 return NULL_TREE;
1738 if (!fndecl
1739 || TREE_CODE (fndecl) != FUNCTION_DECL
1740 || !DECL_BUILT_IN (fndecl))
1741 return NULL_TREE;
1743 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1744 vectype_in);
1748 static tree permute_vec_elements (tree, tree, tree, gimple,
1749 gimple_stmt_iterator *);
1752 /* Function vectorizable_mask_load_store.
1754 Check if STMT performs a conditional load or store that can be vectorized.
1755 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1756 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1757 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1759 static bool
1760 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1761 gimple *vec_stmt, slp_tree slp_node)
1763 tree vec_dest = NULL;
1764 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1765 stmt_vec_info prev_stmt_info;
1766 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1767 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1768 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1769 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1770 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1771 tree elem_type;
1772 gimple new_stmt;
1773 tree dummy;
1774 tree dataref_ptr = NULL_TREE;
1775 gimple ptr_incr;
1776 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1777 int ncopies;
1778 int i, j;
1779 bool inv_p;
1780 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1781 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1782 int gather_scale = 1;
1783 enum vect_def_type gather_dt = vect_unknown_def_type;
1784 bool is_store;
1785 tree mask;
1786 gimple def_stmt;
1787 tree def;
1788 enum vect_def_type dt;
1790 if (slp_node != NULL)
1791 return false;
1793 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1794 gcc_assert (ncopies >= 1);
1796 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1797 mask = gimple_call_arg (stmt, 2);
1798 if (TYPE_PRECISION (TREE_TYPE (mask))
1799 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1800 return false;
1802 /* FORNOW. This restriction should be relaxed. */
1803 if (nested_in_vect_loop && ncopies > 1)
1805 if (dump_enabled_p ())
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807 "multiple types in nested loop.");
1808 return false;
1811 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1812 return false;
1814 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1815 return false;
1817 if (!STMT_VINFO_DATA_REF (stmt_info))
1818 return false;
1820 elem_type = TREE_TYPE (vectype);
1822 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1823 return false;
1825 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1826 return false;
1828 if (STMT_VINFO_GATHER_P (stmt_info))
1830 gimple def_stmt;
1831 tree def;
1832 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1833 &gather_off, &gather_scale);
1834 gcc_assert (gather_decl);
1835 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1836 &def_stmt, &def, &gather_dt,
1837 &gather_off_vectype))
1839 if (dump_enabled_p ())
1840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1841 "gather index use not simple.");
1842 return false;
1845 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1846 tree masktype
1847 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1848 if (TREE_CODE (masktype) == INTEGER_TYPE)
1850 if (dump_enabled_p ())
1851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1852 "masked gather with integer mask not supported.");
1853 return false;
1856 else if (tree_int_cst_compare (nested_in_vect_loop
1857 ? STMT_VINFO_DR_STEP (stmt_info)
1858 : DR_STEP (dr), size_zero_node) <= 0)
1859 return false;
1860 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1861 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1862 return false;
1864 if (TREE_CODE (mask) != SSA_NAME)
1865 return false;
1867 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1868 &def_stmt, &def, &dt))
1869 return false;
1871 if (is_store)
1873 tree rhs = gimple_call_arg (stmt, 3);
1874 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1875 &def_stmt, &def, &dt))
1876 return false;
1879 if (!vec_stmt) /* transformation not required. */
1881 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1882 if (is_store)
1883 vect_model_store_cost (stmt_info, ncopies, false, dt,
1884 NULL, NULL, NULL);
1885 else
1886 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1887 return true;
1890 /** Transform. **/
1892 if (STMT_VINFO_GATHER_P (stmt_info))
1894 tree vec_oprnd0 = NULL_TREE, op;
1895 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1896 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1897 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1898 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1899 tree mask_perm_mask = NULL_TREE;
1900 edge pe = loop_preheader_edge (loop);
1901 gimple_seq seq;
1902 basic_block new_bb;
1903 enum { NARROW, NONE, WIDEN } modifier;
1904 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1906 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1907 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1908 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1909 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1910 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1911 scaletype = TREE_VALUE (arglist);
1912 gcc_checking_assert (types_compatible_p (srctype, rettype)
1913 && types_compatible_p (srctype, masktype));
1915 if (nunits == gather_off_nunits)
1916 modifier = NONE;
1917 else if (nunits == gather_off_nunits / 2)
1919 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1920 modifier = WIDEN;
1922 for (i = 0; i < gather_off_nunits; ++i)
1923 sel[i] = i | nunits;
1925 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1927 else if (nunits == gather_off_nunits * 2)
1929 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1930 modifier = NARROW;
1932 for (i = 0; i < nunits; ++i)
1933 sel[i] = i < gather_off_nunits
1934 ? i : i + nunits - gather_off_nunits;
1936 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1937 ncopies *= 2;
1938 for (i = 0; i < nunits; ++i)
1939 sel[i] = i | gather_off_nunits;
1940 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1942 else
1943 gcc_unreachable ();
1945 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1947 ptr = fold_convert (ptrtype, gather_base);
1948 if (!is_gimple_min_invariant (ptr))
1950 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1951 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1952 gcc_assert (!new_bb);
1955 scale = build_int_cst (scaletype, gather_scale);
1957 prev_stmt_info = NULL;
1958 for (j = 0; j < ncopies; ++j)
1960 if (modifier == WIDEN && (j & 1))
1961 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1962 perm_mask, stmt, gsi);
1963 else if (j == 0)
1964 op = vec_oprnd0
1965 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1966 else
1967 op = vec_oprnd0
1968 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1970 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1972 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1973 == TYPE_VECTOR_SUBPARTS (idxtype));
1974 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1975 var = make_ssa_name (var);
1976 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1977 new_stmt
1978 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1979 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1980 op = var;
1983 if (mask_perm_mask && (j & 1))
1984 mask_op = permute_vec_elements (mask_op, mask_op,
1985 mask_perm_mask, stmt, gsi);
1986 else
1988 if (j == 0)
1989 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1990 else
1992 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1993 &def_stmt, &def, &dt);
1994 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1997 mask_op = vec_mask;
1998 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2000 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2001 == TYPE_VECTOR_SUBPARTS (masktype));
2002 var = vect_get_new_vect_var (masktype, vect_simple_var,
2003 NULL);
2004 var = make_ssa_name (var);
2005 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2006 new_stmt
2007 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2009 mask_op = var;
2013 new_stmt
2014 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2015 scale);
2017 if (!useless_type_conversion_p (vectype, rettype))
2019 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2020 == TYPE_VECTOR_SUBPARTS (rettype));
2021 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2022 op = make_ssa_name (var, new_stmt);
2023 gimple_call_set_lhs (new_stmt, op);
2024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2025 var = make_ssa_name (vec_dest);
2026 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2027 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2029 else
2031 var = make_ssa_name (vec_dest, new_stmt);
2032 gimple_call_set_lhs (new_stmt, var);
2035 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2037 if (modifier == NARROW)
2039 if ((j & 1) == 0)
2041 prev_res = var;
2042 continue;
2044 var = permute_vec_elements (prev_res, var,
2045 perm_mask, stmt, gsi);
2046 new_stmt = SSA_NAME_DEF_STMT (var);
2049 if (prev_stmt_info == NULL)
2050 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2051 else
2052 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2053 prev_stmt_info = vinfo_for_stmt (new_stmt);
2056 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2057 from the IL. */
2058 tree lhs = gimple_call_lhs (stmt);
2059 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2060 set_vinfo_for_stmt (new_stmt, stmt_info);
2061 set_vinfo_for_stmt (stmt, NULL);
2062 STMT_VINFO_STMT (stmt_info) = new_stmt;
2063 gsi_replace (gsi, new_stmt, true);
2064 return true;
2066 else if (is_store)
2068 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2069 prev_stmt_info = NULL;
2070 for (i = 0; i < ncopies; i++)
2072 unsigned align, misalign;
2074 if (i == 0)
2076 tree rhs = gimple_call_arg (stmt, 3);
2077 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2078 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2079 /* We should have catched mismatched types earlier. */
2080 gcc_assert (useless_type_conversion_p (vectype,
2081 TREE_TYPE (vec_rhs)));
2082 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2083 NULL_TREE, &dummy, gsi,
2084 &ptr_incr, false, &inv_p);
2085 gcc_assert (!inv_p);
2087 else
2089 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2090 &def, &dt);
2091 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2092 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2093 &def, &dt);
2094 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2095 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2096 TYPE_SIZE_UNIT (vectype));
2099 align = TYPE_ALIGN_UNIT (vectype);
2100 if (aligned_access_p (dr))
2101 misalign = 0;
2102 else if (DR_MISALIGNMENT (dr) == -1)
2104 align = TYPE_ALIGN_UNIT (elem_type);
2105 misalign = 0;
2107 else
2108 misalign = DR_MISALIGNMENT (dr);
2109 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2110 misalign);
2111 new_stmt
2112 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2113 gimple_call_arg (stmt, 1),
2114 vec_mask, vec_rhs);
2115 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2116 if (i == 0)
2117 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2118 else
2119 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2120 prev_stmt_info = vinfo_for_stmt (new_stmt);
2123 else
2125 tree vec_mask = NULL_TREE;
2126 prev_stmt_info = NULL;
2127 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2128 for (i = 0; i < ncopies; i++)
2130 unsigned align, misalign;
2132 if (i == 0)
2134 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2135 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2136 NULL_TREE, &dummy, gsi,
2137 &ptr_incr, false, &inv_p);
2138 gcc_assert (!inv_p);
2140 else
2142 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2143 &def, &dt);
2144 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2145 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2146 TYPE_SIZE_UNIT (vectype));
2149 align = TYPE_ALIGN_UNIT (vectype);
2150 if (aligned_access_p (dr))
2151 misalign = 0;
2152 else if (DR_MISALIGNMENT (dr) == -1)
2154 align = TYPE_ALIGN_UNIT (elem_type);
2155 misalign = 0;
2157 else
2158 misalign = DR_MISALIGNMENT (dr);
2159 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2160 misalign);
2161 new_stmt
2162 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2163 gimple_call_arg (stmt, 1),
2164 vec_mask);
2165 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2166 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2167 if (i == 0)
2168 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2169 else
2170 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2171 prev_stmt_info = vinfo_for_stmt (new_stmt);
2175 if (!is_store)
2177 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2178 from the IL. */
2179 tree lhs = gimple_call_lhs (stmt);
2180 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2181 set_vinfo_for_stmt (new_stmt, stmt_info);
2182 set_vinfo_for_stmt (stmt, NULL);
2183 STMT_VINFO_STMT (stmt_info) = new_stmt;
2184 gsi_replace (gsi, new_stmt, true);
2187 return true;
2191 /* Function vectorizable_call.
2193 Check if GS performs a function call that can be vectorized.
2194 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2195 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2196 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2198 static bool
2199 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2200 slp_tree slp_node)
2202 gcall *stmt;
2203 tree vec_dest;
2204 tree scalar_dest;
2205 tree op, type;
2206 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2207 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2208 tree vectype_out, vectype_in;
2209 int nunits_in;
2210 int nunits_out;
2211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2212 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2213 tree fndecl, new_temp, def, rhs_type;
2214 gimple def_stmt;
2215 enum vect_def_type dt[3]
2216 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2217 gimple new_stmt = NULL;
2218 int ncopies, j;
2219 vec<tree> vargs = vNULL;
2220 enum { NARROW, NONE, WIDEN } modifier;
2221 size_t i, nargs;
2222 tree lhs;
2224 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2225 return false;
2227 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2228 return false;
2230 /* Is GS a vectorizable call? */
2231 stmt = dyn_cast <gcall *> (gs);
2232 if (!stmt)
2233 return false;
2235 if (gimple_call_internal_p (stmt)
2236 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2237 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2238 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2239 slp_node);
2241 if (gimple_call_lhs (stmt) == NULL_TREE
2242 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2243 return false;
2245 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2247 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2249 /* Process function arguments. */
2250 rhs_type = NULL_TREE;
2251 vectype_in = NULL_TREE;
2252 nargs = gimple_call_num_args (stmt);
2254 /* Bail out if the function has more than three arguments, we do not have
2255 interesting builtin functions to vectorize with more than two arguments
2256 except for fma. No arguments is also not good. */
2257 if (nargs == 0 || nargs > 3)
2258 return false;
2260 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2261 if (gimple_call_internal_p (stmt)
2262 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2264 nargs = 0;
2265 rhs_type = unsigned_type_node;
2268 for (i = 0; i < nargs; i++)
2270 tree opvectype;
2272 op = gimple_call_arg (stmt, i);
2274 /* We can only handle calls with arguments of the same type. */
2275 if (rhs_type
2276 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2278 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2280 "argument types differ.\n");
2281 return false;
2283 if (!rhs_type)
2284 rhs_type = TREE_TYPE (op);
2286 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2287 &def_stmt, &def, &dt[i], &opvectype))
2289 if (dump_enabled_p ())
2290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2291 "use not simple.\n");
2292 return false;
2295 if (!vectype_in)
2296 vectype_in = opvectype;
2297 else if (opvectype
2298 && opvectype != vectype_in)
2300 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 "argument vector types differ.\n");
2303 return false;
2306 /* If all arguments are external or constant defs use a vector type with
2307 the same size as the output vector type. */
2308 if (!vectype_in)
2309 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2310 if (vec_stmt)
2311 gcc_assert (vectype_in);
2312 if (!vectype_in)
2314 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317 "no vectype for scalar type ");
2318 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2319 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2322 return false;
2325 /* FORNOW */
2326 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2327 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2328 if (nunits_in == nunits_out / 2)
2329 modifier = NARROW;
2330 else if (nunits_out == nunits_in)
2331 modifier = NONE;
2332 else if (nunits_out == nunits_in / 2)
2333 modifier = WIDEN;
2334 else
2335 return false;
2337 /* For now, we only vectorize functions if a target specific builtin
2338 is available. TODO -- in some cases, it might be profitable to
2339 insert the calls for pieces of the vector, in order to be able
2340 to vectorize other operations in the loop. */
2341 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2342 if (fndecl == NULL_TREE)
2344 if (gimple_call_internal_p (stmt)
2345 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2346 && !slp_node
2347 && loop_vinfo
2348 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2349 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2350 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2351 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2353 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2354 { 0, 1, 2, ... vf - 1 } vector. */
2355 gcc_assert (nargs == 0);
2357 else
2359 if (dump_enabled_p ())
2360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2361 "function is not vectorizable.\n");
2362 return false;
2366 gcc_assert (!gimple_vuse (stmt));
2368 if (slp_node || PURE_SLP_STMT (stmt_info))
2369 ncopies = 1;
2370 else if (modifier == NARROW)
2371 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2372 else
2373 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2375 /* Sanity check: make sure that at least one copy of the vectorized stmt
2376 needs to be generated. */
2377 gcc_assert (ncopies >= 1);
2379 if (!vec_stmt) /* transformation not required. */
2381 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2384 "\n");
2385 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2386 return true;
2389 /** Transform. **/
2391 if (dump_enabled_p ())
2392 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2394 /* Handle def. */
2395 scalar_dest = gimple_call_lhs (stmt);
2396 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2398 prev_stmt_info = NULL;
2399 switch (modifier)
2401 case NONE:
2402 for (j = 0; j < ncopies; ++j)
2404 /* Build argument list for the vectorized call. */
2405 if (j == 0)
2406 vargs.create (nargs);
2407 else
2408 vargs.truncate (0);
2410 if (slp_node)
2412 auto_vec<vec<tree> > vec_defs (nargs);
2413 vec<tree> vec_oprnds0;
2415 for (i = 0; i < nargs; i++)
2416 vargs.quick_push (gimple_call_arg (stmt, i));
2417 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2418 vec_oprnds0 = vec_defs[0];
2420 /* Arguments are ready. Create the new vector stmt. */
2421 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2423 size_t k;
2424 for (k = 0; k < nargs; k++)
2426 vec<tree> vec_oprndsk = vec_defs[k];
2427 vargs[k] = vec_oprndsk[i];
2429 new_stmt = gimple_build_call_vec (fndecl, vargs);
2430 new_temp = make_ssa_name (vec_dest, new_stmt);
2431 gimple_call_set_lhs (new_stmt, new_temp);
2432 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2433 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2436 for (i = 0; i < nargs; i++)
2438 vec<tree> vec_oprndsi = vec_defs[i];
2439 vec_oprndsi.release ();
2441 continue;
2444 for (i = 0; i < nargs; i++)
2446 op = gimple_call_arg (stmt, i);
2447 if (j == 0)
2448 vec_oprnd0
2449 = vect_get_vec_def_for_operand (op, stmt, NULL);
2450 else
2452 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2453 vec_oprnd0
2454 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2457 vargs.quick_push (vec_oprnd0);
2460 if (gimple_call_internal_p (stmt)
2461 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2463 tree *v = XALLOCAVEC (tree, nunits_out);
2464 int k;
2465 for (k = 0; k < nunits_out; ++k)
2466 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2467 tree cst = build_vector (vectype_out, v);
2468 tree new_var
2469 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2470 gimple init_stmt = gimple_build_assign (new_var, cst);
2471 new_temp = make_ssa_name (new_var, init_stmt);
2472 gimple_assign_set_lhs (init_stmt, new_temp);
2473 vect_init_vector_1 (stmt, init_stmt, NULL);
2474 new_temp = make_ssa_name (vec_dest);
2475 new_stmt = gimple_build_assign (new_temp,
2476 gimple_assign_lhs (init_stmt));
2478 else
2480 new_stmt = gimple_build_call_vec (fndecl, vargs);
2481 new_temp = make_ssa_name (vec_dest, new_stmt);
2482 gimple_call_set_lhs (new_stmt, new_temp);
2484 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2486 if (j == 0)
2487 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2488 else
2489 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2491 prev_stmt_info = vinfo_for_stmt (new_stmt);
2494 break;
2496 case NARROW:
2497 for (j = 0; j < ncopies; ++j)
2499 /* Build argument list for the vectorized call. */
2500 if (j == 0)
2501 vargs.create (nargs * 2);
2502 else
2503 vargs.truncate (0);
2505 if (slp_node)
2507 auto_vec<vec<tree> > vec_defs (nargs);
2508 vec<tree> vec_oprnds0;
2510 for (i = 0; i < nargs; i++)
2511 vargs.quick_push (gimple_call_arg (stmt, i));
2512 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2513 vec_oprnds0 = vec_defs[0];
2515 /* Arguments are ready. Create the new vector stmt. */
2516 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2518 size_t k;
2519 vargs.truncate (0);
2520 for (k = 0; k < nargs; k++)
2522 vec<tree> vec_oprndsk = vec_defs[k];
2523 vargs.quick_push (vec_oprndsk[i]);
2524 vargs.quick_push (vec_oprndsk[i + 1]);
2526 new_stmt = gimple_build_call_vec (fndecl, vargs);
2527 new_temp = make_ssa_name (vec_dest, new_stmt);
2528 gimple_call_set_lhs (new_stmt, new_temp);
2529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2530 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2533 for (i = 0; i < nargs; i++)
2535 vec<tree> vec_oprndsi = vec_defs[i];
2536 vec_oprndsi.release ();
2538 continue;
2541 for (i = 0; i < nargs; i++)
2543 op = gimple_call_arg (stmt, i);
2544 if (j == 0)
2546 vec_oprnd0
2547 = vect_get_vec_def_for_operand (op, stmt, NULL);
2548 vec_oprnd1
2549 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2551 else
2553 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2554 vec_oprnd0
2555 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2556 vec_oprnd1
2557 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2560 vargs.quick_push (vec_oprnd0);
2561 vargs.quick_push (vec_oprnd1);
2564 new_stmt = gimple_build_call_vec (fndecl, vargs);
2565 new_temp = make_ssa_name (vec_dest, new_stmt);
2566 gimple_call_set_lhs (new_stmt, new_temp);
2567 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2569 if (j == 0)
2570 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2571 else
2572 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2574 prev_stmt_info = vinfo_for_stmt (new_stmt);
2577 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2579 break;
2581 case WIDEN:
2582 /* No current target implements this case. */
2583 return false;
2586 vargs.release ();
2588 /* The call in STMT might prevent it from being removed in dce.
2589 We however cannot remove it here, due to the way the ssa name
2590 it defines is mapped to the new definition. So just replace
2591 rhs of the statement with something harmless. */
2593 if (slp_node)
2594 return true;
2596 type = TREE_TYPE (scalar_dest);
2597 if (is_pattern_stmt_p (stmt_info))
2598 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2599 else
2600 lhs = gimple_call_lhs (stmt);
2601 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2602 set_vinfo_for_stmt (new_stmt, stmt_info);
2603 set_vinfo_for_stmt (stmt, NULL);
2604 STMT_VINFO_STMT (stmt_info) = new_stmt;
2605 gsi_replace (gsi, new_stmt, false);
2607 return true;
2611 struct simd_call_arg_info
2613 tree vectype;
2614 tree op;
2615 enum vect_def_type dt;
2616 HOST_WIDE_INT linear_step;
2617 unsigned int align;
2620 /* Function vectorizable_simd_clone_call.
2622 Check if STMT performs a function call that can be vectorized
2623 by calling a simd clone of the function.
2624 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2625 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2626 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2628 static bool
2629 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2630 gimple *vec_stmt, slp_tree slp_node)
2632 tree vec_dest;
2633 tree scalar_dest;
2634 tree op, type;
2635 tree vec_oprnd0 = NULL_TREE;
2636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2637 tree vectype;
2638 unsigned int nunits;
2639 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2640 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2641 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2642 tree fndecl, new_temp, def;
2643 gimple def_stmt;
2644 gimple new_stmt = NULL;
2645 int ncopies, j;
2646 vec<simd_call_arg_info> arginfo = vNULL;
2647 vec<tree> vargs = vNULL;
2648 size_t i, nargs;
2649 tree lhs, rtype, ratype;
2650 vec<constructor_elt, va_gc> *ret_ctor_elts;
2652 /* Is STMT a vectorizable call? */
2653 if (!is_gimple_call (stmt))
2654 return false;
2656 fndecl = gimple_call_fndecl (stmt);
2657 if (fndecl == NULL_TREE)
2658 return false;
2660 struct cgraph_node *node = cgraph_node::get (fndecl);
2661 if (node == NULL || node->simd_clones == NULL)
2662 return false;
2664 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2665 return false;
2667 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2668 return false;
2670 if (gimple_call_lhs (stmt)
2671 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2672 return false;
2674 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2676 vectype = STMT_VINFO_VECTYPE (stmt_info);
2678 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2679 return false;
2681 /* FORNOW */
2682 if (slp_node || PURE_SLP_STMT (stmt_info))
2683 return false;
2685 /* Process function arguments. */
2686 nargs = gimple_call_num_args (stmt);
2688 /* Bail out if the function has zero arguments. */
2689 if (nargs == 0)
2690 return false;
2692 arginfo.create (nargs);
2694 for (i = 0; i < nargs; i++)
2696 simd_call_arg_info thisarginfo;
2697 affine_iv iv;
2699 thisarginfo.linear_step = 0;
2700 thisarginfo.align = 0;
2701 thisarginfo.op = NULL_TREE;
2703 op = gimple_call_arg (stmt, i);
2704 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2705 &def_stmt, &def, &thisarginfo.dt,
2706 &thisarginfo.vectype)
2707 || thisarginfo.dt == vect_uninitialized_def)
2709 if (dump_enabled_p ())
2710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2711 "use not simple.\n");
2712 arginfo.release ();
2713 return false;
2716 if (thisarginfo.dt == vect_constant_def
2717 || thisarginfo.dt == vect_external_def)
2718 gcc_assert (thisarginfo.vectype == NULL_TREE);
2719 else
2720 gcc_assert (thisarginfo.vectype != NULL_TREE);
2722 /* For linear arguments, the analyze phase should have saved
2723 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2724 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2725 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2727 gcc_assert (vec_stmt);
2728 thisarginfo.linear_step
2729 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2730 thisarginfo.op
2731 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2732 /* If loop has been peeled for alignment, we need to adjust it. */
2733 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2734 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2735 if (n1 != n2)
2737 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2738 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2739 tree opt = TREE_TYPE (thisarginfo.op);
2740 bias = fold_convert (TREE_TYPE (step), bias);
2741 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2742 thisarginfo.op
2743 = fold_build2 (POINTER_TYPE_P (opt)
2744 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2745 thisarginfo.op, bias);
2748 else if (!vec_stmt
2749 && thisarginfo.dt != vect_constant_def
2750 && thisarginfo.dt != vect_external_def
2751 && loop_vinfo
2752 && TREE_CODE (op) == SSA_NAME
2753 && simple_iv (loop, loop_containing_stmt (stmt), op,
2754 &iv, false)
2755 && tree_fits_shwi_p (iv.step))
2757 thisarginfo.linear_step = tree_to_shwi (iv.step);
2758 thisarginfo.op = iv.base;
2760 else if ((thisarginfo.dt == vect_constant_def
2761 || thisarginfo.dt == vect_external_def)
2762 && POINTER_TYPE_P (TREE_TYPE (op)))
2763 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2765 arginfo.quick_push (thisarginfo);
2768 unsigned int badness = 0;
2769 struct cgraph_node *bestn = NULL;
2770 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2771 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2772 else
2773 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2774 n = n->simdclone->next_clone)
2776 unsigned int this_badness = 0;
2777 if (n->simdclone->simdlen
2778 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2779 || n->simdclone->nargs != nargs)
2780 continue;
2781 if (n->simdclone->simdlen
2782 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2783 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2784 - exact_log2 (n->simdclone->simdlen)) * 1024;
2785 if (n->simdclone->inbranch)
2786 this_badness += 2048;
2787 int target_badness = targetm.simd_clone.usable (n);
2788 if (target_badness < 0)
2789 continue;
2790 this_badness += target_badness * 512;
2791 /* FORNOW: Have to add code to add the mask argument. */
2792 if (n->simdclone->inbranch)
2793 continue;
2794 for (i = 0; i < nargs; i++)
2796 switch (n->simdclone->args[i].arg_type)
2798 case SIMD_CLONE_ARG_TYPE_VECTOR:
2799 if (!useless_type_conversion_p
2800 (n->simdclone->args[i].orig_type,
2801 TREE_TYPE (gimple_call_arg (stmt, i))))
2802 i = -1;
2803 else if (arginfo[i].dt == vect_constant_def
2804 || arginfo[i].dt == vect_external_def
2805 || arginfo[i].linear_step)
2806 this_badness += 64;
2807 break;
2808 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2809 if (arginfo[i].dt != vect_constant_def
2810 && arginfo[i].dt != vect_external_def)
2811 i = -1;
2812 break;
2813 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2814 if (arginfo[i].dt == vect_constant_def
2815 || arginfo[i].dt == vect_external_def
2816 || (arginfo[i].linear_step
2817 != n->simdclone->args[i].linear_step))
2818 i = -1;
2819 break;
2820 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2821 /* FORNOW */
2822 i = -1;
2823 break;
2824 case SIMD_CLONE_ARG_TYPE_MASK:
2825 gcc_unreachable ();
2827 if (i == (size_t) -1)
2828 break;
2829 if (n->simdclone->args[i].alignment > arginfo[i].align)
2831 i = -1;
2832 break;
2834 if (arginfo[i].align)
2835 this_badness += (exact_log2 (arginfo[i].align)
2836 - exact_log2 (n->simdclone->args[i].alignment));
2838 if (i == (size_t) -1)
2839 continue;
2840 if (bestn == NULL || this_badness < badness)
2842 bestn = n;
2843 badness = this_badness;
2847 if (bestn == NULL)
2849 arginfo.release ();
2850 return false;
2853 for (i = 0; i < nargs; i++)
2854 if ((arginfo[i].dt == vect_constant_def
2855 || arginfo[i].dt == vect_external_def)
2856 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2858 arginfo[i].vectype
2859 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2860 i)));
2861 if (arginfo[i].vectype == NULL
2862 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2863 > bestn->simdclone->simdlen))
2865 arginfo.release ();
2866 return false;
2870 fndecl = bestn->decl;
2871 nunits = bestn->simdclone->simdlen;
2872 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2874 /* If the function isn't const, only allow it in simd loops where user
2875 has asserted that at least nunits consecutive iterations can be
2876 performed using SIMD instructions. */
2877 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2878 && gimple_vuse (stmt))
2880 arginfo.release ();
2881 return false;
2884 /* Sanity check: make sure that at least one copy of the vectorized stmt
2885 needs to be generated. */
2886 gcc_assert (ncopies >= 1);
2888 if (!vec_stmt) /* transformation not required. */
2890 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2891 for (i = 0; i < nargs; i++)
2892 if (bestn->simdclone->args[i].arg_type
2893 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2895 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2896 + 1);
2897 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2898 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2899 ? size_type_node : TREE_TYPE (arginfo[i].op);
2900 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2901 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2903 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2904 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_NOTE, vect_location,
2906 "=== vectorizable_simd_clone_call ===\n");
2907 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2908 arginfo.release ();
2909 return true;
2912 /** Transform. **/
2914 if (dump_enabled_p ())
2915 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2917 /* Handle def. */
2918 scalar_dest = gimple_call_lhs (stmt);
2919 vec_dest = NULL_TREE;
2920 rtype = NULL_TREE;
2921 ratype = NULL_TREE;
2922 if (scalar_dest)
2924 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2925 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2926 if (TREE_CODE (rtype) == ARRAY_TYPE)
2928 ratype = rtype;
2929 rtype = TREE_TYPE (ratype);
2933 prev_stmt_info = NULL;
2934 for (j = 0; j < ncopies; ++j)
2936 /* Build argument list for the vectorized call. */
2937 if (j == 0)
2938 vargs.create (nargs);
2939 else
2940 vargs.truncate (0);
2942 for (i = 0; i < nargs; i++)
2944 unsigned int k, l, m, o;
2945 tree atype;
2946 op = gimple_call_arg (stmt, i);
2947 switch (bestn->simdclone->args[i].arg_type)
2949 case SIMD_CLONE_ARG_TYPE_VECTOR:
2950 atype = bestn->simdclone->args[i].vector_type;
2951 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2952 for (m = j * o; m < (j + 1) * o; m++)
2954 if (TYPE_VECTOR_SUBPARTS (atype)
2955 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2957 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2958 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2959 / TYPE_VECTOR_SUBPARTS (atype));
2960 gcc_assert ((k & (k - 1)) == 0);
2961 if (m == 0)
2962 vec_oprnd0
2963 = vect_get_vec_def_for_operand (op, stmt, NULL);
2964 else
2966 vec_oprnd0 = arginfo[i].op;
2967 if ((m & (k - 1)) == 0)
2968 vec_oprnd0
2969 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2970 vec_oprnd0);
2972 arginfo[i].op = vec_oprnd0;
2973 vec_oprnd0
2974 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2975 size_int (prec),
2976 bitsize_int ((m & (k - 1)) * prec));
2977 new_stmt
2978 = gimple_build_assign (make_ssa_name (atype),
2979 vec_oprnd0);
2980 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2981 vargs.safe_push (gimple_assign_lhs (new_stmt));
2983 else
2985 k = (TYPE_VECTOR_SUBPARTS (atype)
2986 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2987 gcc_assert ((k & (k - 1)) == 0);
2988 vec<constructor_elt, va_gc> *ctor_elts;
2989 if (k != 1)
2990 vec_alloc (ctor_elts, k);
2991 else
2992 ctor_elts = NULL;
2993 for (l = 0; l < k; l++)
2995 if (m == 0 && l == 0)
2996 vec_oprnd0
2997 = vect_get_vec_def_for_operand (op, stmt, NULL);
2998 else
2999 vec_oprnd0
3000 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3001 arginfo[i].op);
3002 arginfo[i].op = vec_oprnd0;
3003 if (k == 1)
3004 break;
3005 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3006 vec_oprnd0);
3008 if (k == 1)
3009 vargs.safe_push (vec_oprnd0);
3010 else
3012 vec_oprnd0 = build_constructor (atype, ctor_elts);
3013 new_stmt
3014 = gimple_build_assign (make_ssa_name (atype),
3015 vec_oprnd0);
3016 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3017 vargs.safe_push (gimple_assign_lhs (new_stmt));
3021 break;
3022 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3023 vargs.safe_push (op);
3024 break;
3025 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3026 if (j == 0)
3028 gimple_seq stmts;
3029 arginfo[i].op
3030 = force_gimple_operand (arginfo[i].op, &stmts, true,
3031 NULL_TREE);
3032 if (stmts != NULL)
3034 basic_block new_bb;
3035 edge pe = loop_preheader_edge (loop);
3036 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3037 gcc_assert (!new_bb);
3039 tree phi_res = copy_ssa_name (op);
3040 gphi *new_phi = create_phi_node (phi_res, loop->header);
3041 set_vinfo_for_stmt (new_phi,
3042 new_stmt_vec_info (new_phi, loop_vinfo,
3043 NULL));
3044 add_phi_arg (new_phi, arginfo[i].op,
3045 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3046 enum tree_code code
3047 = POINTER_TYPE_P (TREE_TYPE (op))
3048 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3049 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3050 ? sizetype : TREE_TYPE (op);
3051 widest_int cst
3052 = wi::mul (bestn->simdclone->args[i].linear_step,
3053 ncopies * nunits);
3054 tree tcst = wide_int_to_tree (type, cst);
3055 tree phi_arg = copy_ssa_name (op);
3056 new_stmt
3057 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3058 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3059 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3060 set_vinfo_for_stmt (new_stmt,
3061 new_stmt_vec_info (new_stmt, loop_vinfo,
3062 NULL));
3063 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3064 UNKNOWN_LOCATION);
3065 arginfo[i].op = phi_res;
3066 vargs.safe_push (phi_res);
3068 else
3070 enum tree_code code
3071 = POINTER_TYPE_P (TREE_TYPE (op))
3072 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3073 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3074 ? sizetype : TREE_TYPE (op);
3075 widest_int cst
3076 = wi::mul (bestn->simdclone->args[i].linear_step,
3077 j * nunits);
3078 tree tcst = wide_int_to_tree (type, cst);
3079 new_temp = make_ssa_name (TREE_TYPE (op));
3080 new_stmt = gimple_build_assign (new_temp, code,
3081 arginfo[i].op, tcst);
3082 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3083 vargs.safe_push (new_temp);
3085 break;
3086 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3087 default:
3088 gcc_unreachable ();
3092 new_stmt = gimple_build_call_vec (fndecl, vargs);
3093 if (vec_dest)
3095 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3096 if (ratype)
3097 new_temp = create_tmp_var (ratype);
3098 else if (TYPE_VECTOR_SUBPARTS (vectype)
3099 == TYPE_VECTOR_SUBPARTS (rtype))
3100 new_temp = make_ssa_name (vec_dest, new_stmt);
3101 else
3102 new_temp = make_ssa_name (rtype, new_stmt);
3103 gimple_call_set_lhs (new_stmt, new_temp);
3105 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3107 if (vec_dest)
3109 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3111 unsigned int k, l;
3112 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3113 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3114 gcc_assert ((k & (k - 1)) == 0);
3115 for (l = 0; l < k; l++)
3117 tree t;
3118 if (ratype)
3120 t = build_fold_addr_expr (new_temp);
3121 t = build2 (MEM_REF, vectype, t,
3122 build_int_cst (TREE_TYPE (t),
3123 l * prec / BITS_PER_UNIT));
3125 else
3126 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3127 size_int (prec), bitsize_int (l * prec));
3128 new_stmt
3129 = gimple_build_assign (make_ssa_name (vectype), t);
3130 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3131 if (j == 0 && l == 0)
3132 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3133 else
3134 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3136 prev_stmt_info = vinfo_for_stmt (new_stmt);
3139 if (ratype)
3141 tree clobber = build_constructor (ratype, NULL);
3142 TREE_THIS_VOLATILE (clobber) = 1;
3143 new_stmt = gimple_build_assign (new_temp, clobber);
3144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3146 continue;
3148 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3150 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3151 / TYPE_VECTOR_SUBPARTS (rtype));
3152 gcc_assert ((k & (k - 1)) == 0);
3153 if ((j & (k - 1)) == 0)
3154 vec_alloc (ret_ctor_elts, k);
3155 if (ratype)
3157 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3158 for (m = 0; m < o; m++)
3160 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3161 size_int (m), NULL_TREE, NULL_TREE);
3162 new_stmt
3163 = gimple_build_assign (make_ssa_name (rtype), tem);
3164 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3165 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3166 gimple_assign_lhs (new_stmt));
3168 tree clobber = build_constructor (ratype, NULL);
3169 TREE_THIS_VOLATILE (clobber) = 1;
3170 new_stmt = gimple_build_assign (new_temp, clobber);
3171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3173 else
3174 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3175 if ((j & (k - 1)) != k - 1)
3176 continue;
3177 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3178 new_stmt
3179 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3180 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3182 if ((unsigned) j == k - 1)
3183 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3184 else
3185 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3187 prev_stmt_info = vinfo_for_stmt (new_stmt);
3188 continue;
3190 else if (ratype)
3192 tree t = build_fold_addr_expr (new_temp);
3193 t = build2 (MEM_REF, vectype, t,
3194 build_int_cst (TREE_TYPE (t), 0));
3195 new_stmt
3196 = gimple_build_assign (make_ssa_name (vec_dest), t);
3197 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3198 tree clobber = build_constructor (ratype, NULL);
3199 TREE_THIS_VOLATILE (clobber) = 1;
3200 vect_finish_stmt_generation (stmt,
3201 gimple_build_assign (new_temp,
3202 clobber), gsi);
3206 if (j == 0)
3207 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3208 else
3209 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3211 prev_stmt_info = vinfo_for_stmt (new_stmt);
3214 vargs.release ();
3216 /* The call in STMT might prevent it from being removed in dce.
3217 We however cannot remove it here, due to the way the ssa name
3218 it defines is mapped to the new definition. So just replace
3219 rhs of the statement with something harmless. */
3221 if (slp_node)
3222 return true;
3224 if (scalar_dest)
3226 type = TREE_TYPE (scalar_dest);
3227 if (is_pattern_stmt_p (stmt_info))
3228 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3229 else
3230 lhs = gimple_call_lhs (stmt);
3231 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3233 else
3234 new_stmt = gimple_build_nop ();
3235 set_vinfo_for_stmt (new_stmt, stmt_info);
3236 set_vinfo_for_stmt (stmt, NULL);
3237 STMT_VINFO_STMT (stmt_info) = new_stmt;
3238 gsi_replace (gsi, new_stmt, true);
3239 unlink_stmt_vdef (stmt);
3241 return true;
3245 /* Function vect_gen_widened_results_half
3247 Create a vector stmt whose code, type, number of arguments, and result
3248 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3249 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3250 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3251 needs to be created (DECL is a function-decl of a target-builtin).
3252 STMT is the original scalar stmt that we are vectorizing. */
3254 static gimple
3255 vect_gen_widened_results_half (enum tree_code code,
3256 tree decl,
3257 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3258 tree vec_dest, gimple_stmt_iterator *gsi,
3259 gimple stmt)
3261 gimple new_stmt;
3262 tree new_temp;
3264 /* Generate half of the widened result: */
3265 if (code == CALL_EXPR)
3267 /* Target specific support */
3268 if (op_type == binary_op)
3269 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3270 else
3271 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3272 new_temp = make_ssa_name (vec_dest, new_stmt);
3273 gimple_call_set_lhs (new_stmt, new_temp);
3275 else
3277 /* Generic support */
3278 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3279 if (op_type != binary_op)
3280 vec_oprnd1 = NULL;
3281 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3282 new_temp = make_ssa_name (vec_dest, new_stmt);
3283 gimple_assign_set_lhs (new_stmt, new_temp);
3285 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3287 return new_stmt;
3291 /* Get vectorized definitions for loop-based vectorization. For the first
3292 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3293 scalar operand), and for the rest we get a copy with
3294 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3295 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3296 The vectors are collected into VEC_OPRNDS. */
3298 static void
3299 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3300 vec<tree> *vec_oprnds, int multi_step_cvt)
3302 tree vec_oprnd;
3304 /* Get first vector operand. */
3305 /* All the vector operands except the very first one (that is scalar oprnd)
3306 are stmt copies. */
3307 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3308 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3309 else
3310 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3312 vec_oprnds->quick_push (vec_oprnd);
3314 /* Get second vector operand. */
3315 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3316 vec_oprnds->quick_push (vec_oprnd);
3318 *oprnd = vec_oprnd;
3320 /* For conversion in multiple steps, continue to get operands
3321 recursively. */
3322 if (multi_step_cvt)
3323 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3327 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3328 For multi-step conversions store the resulting vectors and call the function
3329 recursively. */
3331 static void
3332 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3333 int multi_step_cvt, gimple stmt,
3334 vec<tree> vec_dsts,
3335 gimple_stmt_iterator *gsi,
3336 slp_tree slp_node, enum tree_code code,
3337 stmt_vec_info *prev_stmt_info)
3339 unsigned int i;
3340 tree vop0, vop1, new_tmp, vec_dest;
3341 gimple new_stmt;
3342 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3344 vec_dest = vec_dsts.pop ();
3346 for (i = 0; i < vec_oprnds->length (); i += 2)
3348 /* Create demotion operation. */
3349 vop0 = (*vec_oprnds)[i];
3350 vop1 = (*vec_oprnds)[i + 1];
3351 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3352 new_tmp = make_ssa_name (vec_dest, new_stmt);
3353 gimple_assign_set_lhs (new_stmt, new_tmp);
3354 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3356 if (multi_step_cvt)
3357 /* Store the resulting vector for next recursive call. */
3358 (*vec_oprnds)[i/2] = new_tmp;
3359 else
3361 /* This is the last step of the conversion sequence. Store the
3362 vectors in SLP_NODE or in vector info of the scalar statement
3363 (or in STMT_VINFO_RELATED_STMT chain). */
3364 if (slp_node)
3365 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3366 else
3368 if (!*prev_stmt_info)
3369 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3370 else
3371 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3373 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3378 /* For multi-step demotion operations we first generate demotion operations
3379 from the source type to the intermediate types, and then combine the
3380 results (stored in VEC_OPRNDS) in demotion operation to the destination
3381 type. */
3382 if (multi_step_cvt)
3384 /* At each level of recursion we have half of the operands we had at the
3385 previous level. */
3386 vec_oprnds->truncate ((i+1)/2);
3387 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3388 stmt, vec_dsts, gsi, slp_node,
3389 VEC_PACK_TRUNC_EXPR,
3390 prev_stmt_info);
3393 vec_dsts.quick_push (vec_dest);
3397 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3398 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3399 the resulting vectors and call the function recursively. */
3401 static void
3402 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3403 vec<tree> *vec_oprnds1,
3404 gimple stmt, tree vec_dest,
3405 gimple_stmt_iterator *gsi,
3406 enum tree_code code1,
3407 enum tree_code code2, tree decl1,
3408 tree decl2, int op_type)
3410 int i;
3411 tree vop0, vop1, new_tmp1, new_tmp2;
3412 gimple new_stmt1, new_stmt2;
3413 vec<tree> vec_tmp = vNULL;
3415 vec_tmp.create (vec_oprnds0->length () * 2);
3416 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3418 if (op_type == binary_op)
3419 vop1 = (*vec_oprnds1)[i];
3420 else
3421 vop1 = NULL_TREE;
3423 /* Generate the two halves of promotion operation. */
3424 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3425 op_type, vec_dest, gsi, stmt);
3426 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3427 op_type, vec_dest, gsi, stmt);
3428 if (is_gimple_call (new_stmt1))
3430 new_tmp1 = gimple_call_lhs (new_stmt1);
3431 new_tmp2 = gimple_call_lhs (new_stmt2);
3433 else
3435 new_tmp1 = gimple_assign_lhs (new_stmt1);
3436 new_tmp2 = gimple_assign_lhs (new_stmt2);
3439 /* Store the results for the next step. */
3440 vec_tmp.quick_push (new_tmp1);
3441 vec_tmp.quick_push (new_tmp2);
3444 vec_oprnds0->release ();
3445 *vec_oprnds0 = vec_tmp;
3449 /* Check if STMT performs a conversion operation, that can be vectorized.
3450 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3451 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3452 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3454 static bool
3455 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3456 gimple *vec_stmt, slp_tree slp_node)
3458 tree vec_dest;
3459 tree scalar_dest;
3460 tree op0, op1 = NULL_TREE;
3461 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3462 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3463 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3464 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3465 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3466 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3467 tree new_temp;
3468 tree def;
3469 gimple def_stmt;
3470 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3471 gimple new_stmt = NULL;
3472 stmt_vec_info prev_stmt_info;
3473 int nunits_in;
3474 int nunits_out;
3475 tree vectype_out, vectype_in;
3476 int ncopies, i, j;
3477 tree lhs_type, rhs_type;
3478 enum { NARROW, NONE, WIDEN } modifier;
3479 vec<tree> vec_oprnds0 = vNULL;
3480 vec<tree> vec_oprnds1 = vNULL;
3481 tree vop0;
3482 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3483 int multi_step_cvt = 0;
3484 vec<tree> vec_dsts = vNULL;
3485 vec<tree> interm_types = vNULL;
3486 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3487 int op_type;
3488 machine_mode rhs_mode;
3489 unsigned short fltsz;
3491 /* Is STMT a vectorizable conversion? */
3493 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3494 return false;
3496 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3497 return false;
3499 if (!is_gimple_assign (stmt))
3500 return false;
3502 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3503 return false;
3505 code = gimple_assign_rhs_code (stmt);
3506 if (!CONVERT_EXPR_CODE_P (code)
3507 && code != FIX_TRUNC_EXPR
3508 && code != FLOAT_EXPR
3509 && code != WIDEN_MULT_EXPR
3510 && code != WIDEN_LSHIFT_EXPR)
3511 return false;
3513 op_type = TREE_CODE_LENGTH (code);
3515 /* Check types of lhs and rhs. */
3516 scalar_dest = gimple_assign_lhs (stmt);
3517 lhs_type = TREE_TYPE (scalar_dest);
3518 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3520 op0 = gimple_assign_rhs1 (stmt);
3521 rhs_type = TREE_TYPE (op0);
3523 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3524 && !((INTEGRAL_TYPE_P (lhs_type)
3525 && INTEGRAL_TYPE_P (rhs_type))
3526 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3527 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3528 return false;
3530 if ((INTEGRAL_TYPE_P (lhs_type)
3531 && (TYPE_PRECISION (lhs_type)
3532 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3533 || (INTEGRAL_TYPE_P (rhs_type)
3534 && (TYPE_PRECISION (rhs_type)
3535 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3537 if (dump_enabled_p ())
3538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3539 "type conversion to/from bit-precision unsupported."
3540 "\n");
3541 return false;
3544 /* Check the operands of the operation. */
3545 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3546 &def_stmt, &def, &dt[0], &vectype_in))
3548 if (dump_enabled_p ())
3549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3550 "use not simple.\n");
3551 return false;
3553 if (op_type == binary_op)
3555 bool ok;
3557 op1 = gimple_assign_rhs2 (stmt);
3558 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3559 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3560 OP1. */
3561 if (CONSTANT_CLASS_P (op0))
3562 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3563 &def_stmt, &def, &dt[1], &vectype_in);
3564 else
3565 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3566 &def, &dt[1]);
3568 if (!ok)
3570 if (dump_enabled_p ())
3571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3572 "use not simple.\n");
3573 return false;
3577 /* If op0 is an external or constant defs use a vector type of
3578 the same size as the output vector type. */
3579 if (!vectype_in)
3580 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3581 if (vec_stmt)
3582 gcc_assert (vectype_in);
3583 if (!vectype_in)
3585 if (dump_enabled_p ())
3587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3588 "no vectype for scalar type ");
3589 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3590 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3593 return false;
3596 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3597 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3598 if (nunits_in < nunits_out)
3599 modifier = NARROW;
3600 else if (nunits_out == nunits_in)
3601 modifier = NONE;
3602 else
3603 modifier = WIDEN;
3605 /* Multiple types in SLP are handled by creating the appropriate number of
3606 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3607 case of SLP. */
3608 if (slp_node || PURE_SLP_STMT (stmt_info))
3609 ncopies = 1;
3610 else if (modifier == NARROW)
3611 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3612 else
3613 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3615 /* Sanity check: make sure that at least one copy of the vectorized stmt
3616 needs to be generated. */
3617 gcc_assert (ncopies >= 1);
3619 /* Supportable by target? */
3620 switch (modifier)
3622 case NONE:
3623 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3624 return false;
3625 if (supportable_convert_operation (code, vectype_out, vectype_in,
3626 &decl1, &code1))
3627 break;
3628 /* FALLTHRU */
3629 unsupported:
3630 if (dump_enabled_p ())
3631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3632 "conversion not supported by target.\n");
3633 return false;
3635 case WIDEN:
3636 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3637 &code1, &code2, &multi_step_cvt,
3638 &interm_types))
3640 /* Binary widening operation can only be supported directly by the
3641 architecture. */
3642 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3643 break;
3646 if (code != FLOAT_EXPR
3647 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3648 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3649 goto unsupported;
3651 rhs_mode = TYPE_MODE (rhs_type);
3652 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3653 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3654 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3655 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3657 cvt_type
3658 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3659 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3660 if (cvt_type == NULL_TREE)
3661 goto unsupported;
3663 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3665 if (!supportable_convert_operation (code, vectype_out,
3666 cvt_type, &decl1, &codecvt1))
3667 goto unsupported;
3669 else if (!supportable_widening_operation (code, stmt, vectype_out,
3670 cvt_type, &codecvt1,
3671 &codecvt2, &multi_step_cvt,
3672 &interm_types))
3673 continue;
3674 else
3675 gcc_assert (multi_step_cvt == 0);
3677 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3678 vectype_in, &code1, &code2,
3679 &multi_step_cvt, &interm_types))
3680 break;
3683 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3684 goto unsupported;
3686 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3687 codecvt2 = ERROR_MARK;
3688 else
3690 multi_step_cvt++;
3691 interm_types.safe_push (cvt_type);
3692 cvt_type = NULL_TREE;
3694 break;
3696 case NARROW:
3697 gcc_assert (op_type == unary_op);
3698 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3699 &code1, &multi_step_cvt,
3700 &interm_types))
3701 break;
3703 if (code != FIX_TRUNC_EXPR
3704 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3705 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3706 goto unsupported;
3708 rhs_mode = TYPE_MODE (rhs_type);
3709 cvt_type
3710 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3711 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3712 if (cvt_type == NULL_TREE)
3713 goto unsupported;
3714 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3715 &decl1, &codecvt1))
3716 goto unsupported;
3717 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3718 &code1, &multi_step_cvt,
3719 &interm_types))
3720 break;
3721 goto unsupported;
3723 default:
3724 gcc_unreachable ();
3727 if (!vec_stmt) /* transformation not required. */
3729 if (dump_enabled_p ())
3730 dump_printf_loc (MSG_NOTE, vect_location,
3731 "=== vectorizable_conversion ===\n");
3732 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3734 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3735 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3737 else if (modifier == NARROW)
3739 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3740 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3742 else
3744 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3745 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3747 interm_types.release ();
3748 return true;
3751 /** Transform. **/
3752 if (dump_enabled_p ())
3753 dump_printf_loc (MSG_NOTE, vect_location,
3754 "transform conversion. ncopies = %d.\n", ncopies);
3756 if (op_type == binary_op)
3758 if (CONSTANT_CLASS_P (op0))
3759 op0 = fold_convert (TREE_TYPE (op1), op0);
3760 else if (CONSTANT_CLASS_P (op1))
3761 op1 = fold_convert (TREE_TYPE (op0), op1);
3764 /* In case of multi-step conversion, we first generate conversion operations
3765 to the intermediate types, and then from that types to the final one.
3766 We create vector destinations for the intermediate type (TYPES) received
3767 from supportable_*_operation, and store them in the correct order
3768 for future use in vect_create_vectorized_*_stmts (). */
3769 vec_dsts.create (multi_step_cvt + 1);
3770 vec_dest = vect_create_destination_var (scalar_dest,
3771 (cvt_type && modifier == WIDEN)
3772 ? cvt_type : vectype_out);
3773 vec_dsts.quick_push (vec_dest);
3775 if (multi_step_cvt)
3777 for (i = interm_types.length () - 1;
3778 interm_types.iterate (i, &intermediate_type); i--)
3780 vec_dest = vect_create_destination_var (scalar_dest,
3781 intermediate_type);
3782 vec_dsts.quick_push (vec_dest);
3786 if (cvt_type)
3787 vec_dest = vect_create_destination_var (scalar_dest,
3788 modifier == WIDEN
3789 ? vectype_out : cvt_type);
3791 if (!slp_node)
3793 if (modifier == WIDEN)
3795 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3796 if (op_type == binary_op)
3797 vec_oprnds1.create (1);
3799 else if (modifier == NARROW)
3800 vec_oprnds0.create (
3801 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3803 else if (code == WIDEN_LSHIFT_EXPR)
3804 vec_oprnds1.create (slp_node->vec_stmts_size);
3806 last_oprnd = op0;
3807 prev_stmt_info = NULL;
3808 switch (modifier)
3810 case NONE:
3811 for (j = 0; j < ncopies; j++)
3813 if (j == 0)
3814 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3815 -1);
3816 else
3817 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3819 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3821 /* Arguments are ready, create the new vector stmt. */
3822 if (code1 == CALL_EXPR)
3824 new_stmt = gimple_build_call (decl1, 1, vop0);
3825 new_temp = make_ssa_name (vec_dest, new_stmt);
3826 gimple_call_set_lhs (new_stmt, new_temp);
3828 else
3830 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3831 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3832 new_temp = make_ssa_name (vec_dest, new_stmt);
3833 gimple_assign_set_lhs (new_stmt, new_temp);
3836 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3837 if (slp_node)
3838 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3841 if (j == 0)
3842 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3843 else
3844 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3845 prev_stmt_info = vinfo_for_stmt (new_stmt);
3847 break;
3849 case WIDEN:
3850 /* In case the vectorization factor (VF) is bigger than the number
3851 of elements that we can fit in a vectype (nunits), we have to
3852 generate more than one vector stmt - i.e - we need to "unroll"
3853 the vector stmt by a factor VF/nunits. */
3854 for (j = 0; j < ncopies; j++)
3856 /* Handle uses. */
3857 if (j == 0)
3859 if (slp_node)
3861 if (code == WIDEN_LSHIFT_EXPR)
3863 unsigned int k;
3865 vec_oprnd1 = op1;
3866 /* Store vec_oprnd1 for every vector stmt to be created
3867 for SLP_NODE. We check during the analysis that all
3868 the shift arguments are the same. */
3869 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3870 vec_oprnds1.quick_push (vec_oprnd1);
3872 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3873 slp_node, -1);
3875 else
3876 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3877 &vec_oprnds1, slp_node, -1);
3879 else
3881 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3882 vec_oprnds0.quick_push (vec_oprnd0);
3883 if (op_type == binary_op)
3885 if (code == WIDEN_LSHIFT_EXPR)
3886 vec_oprnd1 = op1;
3887 else
3888 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3889 NULL);
3890 vec_oprnds1.quick_push (vec_oprnd1);
3894 else
3896 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3897 vec_oprnds0.truncate (0);
3898 vec_oprnds0.quick_push (vec_oprnd0);
3899 if (op_type == binary_op)
3901 if (code == WIDEN_LSHIFT_EXPR)
3902 vec_oprnd1 = op1;
3903 else
3904 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3905 vec_oprnd1);
3906 vec_oprnds1.truncate (0);
3907 vec_oprnds1.quick_push (vec_oprnd1);
3911 /* Arguments are ready. Create the new vector stmts. */
3912 for (i = multi_step_cvt; i >= 0; i--)
3914 tree this_dest = vec_dsts[i];
3915 enum tree_code c1 = code1, c2 = code2;
3916 if (i == 0 && codecvt2 != ERROR_MARK)
3918 c1 = codecvt1;
3919 c2 = codecvt2;
3921 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3922 &vec_oprnds1,
3923 stmt, this_dest, gsi,
3924 c1, c2, decl1, decl2,
3925 op_type);
3928 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3930 if (cvt_type)
3932 if (codecvt1 == CALL_EXPR)
3934 new_stmt = gimple_build_call (decl1, 1, vop0);
3935 new_temp = make_ssa_name (vec_dest, new_stmt);
3936 gimple_call_set_lhs (new_stmt, new_temp);
3938 else
3940 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3941 new_temp = make_ssa_name (vec_dest);
3942 new_stmt = gimple_build_assign (new_temp, codecvt1,
3943 vop0);
3946 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3948 else
3949 new_stmt = SSA_NAME_DEF_STMT (vop0);
3951 if (slp_node)
3952 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3953 else
3955 if (!prev_stmt_info)
3956 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3957 else
3958 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3959 prev_stmt_info = vinfo_for_stmt (new_stmt);
3964 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3965 break;
3967 case NARROW:
3968 /* In case the vectorization factor (VF) is bigger than the number
3969 of elements that we can fit in a vectype (nunits), we have to
3970 generate more than one vector stmt - i.e - we need to "unroll"
3971 the vector stmt by a factor VF/nunits. */
3972 for (j = 0; j < ncopies; j++)
3974 /* Handle uses. */
3975 if (slp_node)
3976 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3977 slp_node, -1);
3978 else
3980 vec_oprnds0.truncate (0);
3981 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3982 vect_pow2 (multi_step_cvt) - 1);
3985 /* Arguments are ready. Create the new vector stmts. */
3986 if (cvt_type)
3987 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3989 if (codecvt1 == CALL_EXPR)
3991 new_stmt = gimple_build_call (decl1, 1, vop0);
3992 new_temp = make_ssa_name (vec_dest, new_stmt);
3993 gimple_call_set_lhs (new_stmt, new_temp);
3995 else
3997 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3998 new_temp = make_ssa_name (vec_dest);
3999 new_stmt = gimple_build_assign (new_temp, codecvt1,
4000 vop0);
4003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4004 vec_oprnds0[i] = new_temp;
4007 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4008 stmt, vec_dsts, gsi,
4009 slp_node, code1,
4010 &prev_stmt_info);
4013 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4014 break;
4017 vec_oprnds0.release ();
4018 vec_oprnds1.release ();
4019 vec_dsts.release ();
4020 interm_types.release ();
4022 return true;
4026 /* Function vectorizable_assignment.
4028 Check if STMT performs an assignment (copy) that can be vectorized.
4029 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4030 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4031 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4033 static bool
4034 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4035 gimple *vec_stmt, slp_tree slp_node)
4037 tree vec_dest;
4038 tree scalar_dest;
4039 tree op;
4040 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4041 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4042 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4043 tree new_temp;
4044 tree def;
4045 gimple def_stmt;
4046 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4047 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4048 int ncopies;
4049 int i, j;
4050 vec<tree> vec_oprnds = vNULL;
4051 tree vop;
4052 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4053 gimple new_stmt = NULL;
4054 stmt_vec_info prev_stmt_info = NULL;
4055 enum tree_code code;
4056 tree vectype_in;
4058 /* Multiple types in SLP are handled by creating the appropriate number of
4059 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4060 case of SLP. */
4061 if (slp_node || PURE_SLP_STMT (stmt_info))
4062 ncopies = 1;
4063 else
4064 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4066 gcc_assert (ncopies >= 1);
4068 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4069 return false;
4071 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4072 return false;
4074 /* Is vectorizable assignment? */
4075 if (!is_gimple_assign (stmt))
4076 return false;
4078 scalar_dest = gimple_assign_lhs (stmt);
4079 if (TREE_CODE (scalar_dest) != SSA_NAME)
4080 return false;
4082 code = gimple_assign_rhs_code (stmt);
4083 if (gimple_assign_single_p (stmt)
4084 || code == PAREN_EXPR
4085 || CONVERT_EXPR_CODE_P (code))
4086 op = gimple_assign_rhs1 (stmt);
4087 else
4088 return false;
4090 if (code == VIEW_CONVERT_EXPR)
4091 op = TREE_OPERAND (op, 0);
4093 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4094 &def_stmt, &def, &dt[0], &vectype_in))
4096 if (dump_enabled_p ())
4097 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4098 "use not simple.\n");
4099 return false;
4102 /* We can handle NOP_EXPR conversions that do not change the number
4103 of elements or the vector size. */
4104 if ((CONVERT_EXPR_CODE_P (code)
4105 || code == VIEW_CONVERT_EXPR)
4106 && (!vectype_in
4107 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4108 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4109 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4110 return false;
4112 /* We do not handle bit-precision changes. */
4113 if ((CONVERT_EXPR_CODE_P (code)
4114 || code == VIEW_CONVERT_EXPR)
4115 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4116 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4117 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4118 || ((TYPE_PRECISION (TREE_TYPE (op))
4119 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4120 /* But a conversion that does not change the bit-pattern is ok. */
4121 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4122 > TYPE_PRECISION (TREE_TYPE (op)))
4123 && TYPE_UNSIGNED (TREE_TYPE (op))))
4125 if (dump_enabled_p ())
4126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4127 "type conversion to/from bit-precision "
4128 "unsupported.\n");
4129 return false;
4132 if (!vec_stmt) /* transformation not required. */
4134 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4135 if (dump_enabled_p ())
4136 dump_printf_loc (MSG_NOTE, vect_location,
4137 "=== vectorizable_assignment ===\n");
4138 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4139 return true;
4142 /** Transform. **/
4143 if (dump_enabled_p ())
4144 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4146 /* Handle def. */
4147 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4149 /* Handle use. */
4150 for (j = 0; j < ncopies; j++)
4152 /* Handle uses. */
4153 if (j == 0)
4154 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4155 else
4156 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4158 /* Arguments are ready. create the new vector stmt. */
4159 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4161 if (CONVERT_EXPR_CODE_P (code)
4162 || code == VIEW_CONVERT_EXPR)
4163 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4164 new_stmt = gimple_build_assign (vec_dest, vop);
4165 new_temp = make_ssa_name (vec_dest, new_stmt);
4166 gimple_assign_set_lhs (new_stmt, new_temp);
4167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4168 if (slp_node)
4169 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4172 if (slp_node)
4173 continue;
4175 if (j == 0)
4176 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4177 else
4178 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4180 prev_stmt_info = vinfo_for_stmt (new_stmt);
4183 vec_oprnds.release ();
4184 return true;
4188 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4189 either as shift by a scalar or by a vector. */
4191 bool
4192 vect_supportable_shift (enum tree_code code, tree scalar_type)
4195 machine_mode vec_mode;
4196 optab optab;
4197 int icode;
4198 tree vectype;
4200 vectype = get_vectype_for_scalar_type (scalar_type);
4201 if (!vectype)
4202 return false;
4204 optab = optab_for_tree_code (code, vectype, optab_scalar);
4205 if (!optab
4206 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4208 optab = optab_for_tree_code (code, vectype, optab_vector);
4209 if (!optab
4210 || (optab_handler (optab, TYPE_MODE (vectype))
4211 == CODE_FOR_nothing))
4212 return false;
4215 vec_mode = TYPE_MODE (vectype);
4216 icode = (int) optab_handler (optab, vec_mode);
4217 if (icode == CODE_FOR_nothing)
4218 return false;
4220 return true;
4224 /* Function vectorizable_shift.
4226 Check if STMT performs a shift operation that can be vectorized.
4227 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4228 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4229 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4231 static bool
4232 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4233 gimple *vec_stmt, slp_tree slp_node)
4235 tree vec_dest;
4236 tree scalar_dest;
4237 tree op0, op1 = NULL;
4238 tree vec_oprnd1 = NULL_TREE;
4239 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4240 tree vectype;
4241 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4242 enum tree_code code;
4243 machine_mode vec_mode;
4244 tree new_temp;
4245 optab optab;
4246 int icode;
4247 machine_mode optab_op2_mode;
4248 tree def;
4249 gimple def_stmt;
4250 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4251 gimple new_stmt = NULL;
4252 stmt_vec_info prev_stmt_info;
4253 int nunits_in;
4254 int nunits_out;
4255 tree vectype_out;
4256 tree op1_vectype;
4257 int ncopies;
4258 int j, i;
4259 vec<tree> vec_oprnds0 = vNULL;
4260 vec<tree> vec_oprnds1 = vNULL;
4261 tree vop0, vop1;
4262 unsigned int k;
4263 bool scalar_shift_arg = true;
4264 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4265 int vf;
4267 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4268 return false;
4270 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4271 return false;
4273 /* Is STMT a vectorizable binary/unary operation? */
4274 if (!is_gimple_assign (stmt))
4275 return false;
4277 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4278 return false;
4280 code = gimple_assign_rhs_code (stmt);
4282 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4283 || code == RROTATE_EXPR))
4284 return false;
4286 scalar_dest = gimple_assign_lhs (stmt);
4287 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4288 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4289 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4291 if (dump_enabled_p ())
4292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4293 "bit-precision shifts not supported.\n");
4294 return false;
4297 op0 = gimple_assign_rhs1 (stmt);
4298 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4299 &def_stmt, &def, &dt[0], &vectype))
4301 if (dump_enabled_p ())
4302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4303 "use not simple.\n");
4304 return false;
4306 /* If op0 is an external or constant def use a vector type with
4307 the same size as the output vector type. */
4308 if (!vectype)
4309 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4310 if (vec_stmt)
4311 gcc_assert (vectype);
4312 if (!vectype)
4314 if (dump_enabled_p ())
4315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4316 "no vectype for scalar type\n");
4317 return false;
4320 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4321 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4322 if (nunits_out != nunits_in)
4323 return false;
4325 op1 = gimple_assign_rhs2 (stmt);
4326 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4327 &def, &dt[1], &op1_vectype))
4329 if (dump_enabled_p ())
4330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4331 "use not simple.\n");
4332 return false;
4335 if (loop_vinfo)
4336 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4337 else
4338 vf = 1;
4340 /* Multiple types in SLP are handled by creating the appropriate number of
4341 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4342 case of SLP. */
4343 if (slp_node || PURE_SLP_STMT (stmt_info))
4344 ncopies = 1;
4345 else
4346 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4348 gcc_assert (ncopies >= 1);
4350 /* Determine whether the shift amount is a vector, or scalar. If the
4351 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4353 if (dt[1] == vect_internal_def && !slp_node)
4354 scalar_shift_arg = false;
4355 else if (dt[1] == vect_constant_def
4356 || dt[1] == vect_external_def
4357 || dt[1] == vect_internal_def)
4359 /* In SLP, need to check whether the shift count is the same,
4360 in loops if it is a constant or invariant, it is always
4361 a scalar shift. */
4362 if (slp_node)
4364 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4365 gimple slpstmt;
4367 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4368 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4369 scalar_shift_arg = false;
4372 else
4374 if (dump_enabled_p ())
4375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4376 "operand mode requires invariant argument.\n");
4377 return false;
4380 /* Vector shifted by vector. */
4381 if (!scalar_shift_arg)
4383 optab = optab_for_tree_code (code, vectype, optab_vector);
4384 if (dump_enabled_p ())
4385 dump_printf_loc (MSG_NOTE, vect_location,
4386 "vector/vector shift/rotate found.\n");
4388 if (!op1_vectype)
4389 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4390 if (op1_vectype == NULL_TREE
4391 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4393 if (dump_enabled_p ())
4394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4395 "unusable type for last operand in"
4396 " vector/vector shift/rotate.\n");
4397 return false;
4400 /* See if the machine has a vector shifted by scalar insn and if not
4401 then see if it has a vector shifted by vector insn. */
4402 else
4404 optab = optab_for_tree_code (code, vectype, optab_scalar);
4405 if (optab
4406 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4408 if (dump_enabled_p ())
4409 dump_printf_loc (MSG_NOTE, vect_location,
4410 "vector/scalar shift/rotate found.\n");
4412 else
4414 optab = optab_for_tree_code (code, vectype, optab_vector);
4415 if (optab
4416 && (optab_handler (optab, TYPE_MODE (vectype))
4417 != CODE_FOR_nothing))
4419 scalar_shift_arg = false;
4421 if (dump_enabled_p ())
4422 dump_printf_loc (MSG_NOTE, vect_location,
4423 "vector/vector shift/rotate found.\n");
4425 /* Unlike the other binary operators, shifts/rotates have
4426 the rhs being int, instead of the same type as the lhs,
4427 so make sure the scalar is the right type if we are
4428 dealing with vectors of long long/long/short/char. */
4429 if (dt[1] == vect_constant_def)
4430 op1 = fold_convert (TREE_TYPE (vectype), op1);
4431 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4432 TREE_TYPE (op1)))
4434 if (slp_node
4435 && TYPE_MODE (TREE_TYPE (vectype))
4436 != TYPE_MODE (TREE_TYPE (op1)))
4438 if (dump_enabled_p ())
4439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4440 "unusable type for last operand in"
4441 " vector/vector shift/rotate.\n");
4442 return false;
4444 if (vec_stmt && !slp_node)
4446 op1 = fold_convert (TREE_TYPE (vectype), op1);
4447 op1 = vect_init_vector (stmt, op1,
4448 TREE_TYPE (vectype), NULL);
4455 /* Supportable by target? */
4456 if (!optab)
4458 if (dump_enabled_p ())
4459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4460 "no optab.\n");
4461 return false;
4463 vec_mode = TYPE_MODE (vectype);
4464 icode = (int) optab_handler (optab, vec_mode);
4465 if (icode == CODE_FOR_nothing)
4467 if (dump_enabled_p ())
4468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4469 "op not supported by target.\n");
4470 /* Check only during analysis. */
4471 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4472 || (vf < vect_min_worthwhile_factor (code)
4473 && !vec_stmt))
4474 return false;
4475 if (dump_enabled_p ())
4476 dump_printf_loc (MSG_NOTE, vect_location,
4477 "proceeding using word mode.\n");
4480 /* Worthwhile without SIMD support? Check only during analysis. */
4481 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4482 && vf < vect_min_worthwhile_factor (code)
4483 && !vec_stmt)
4485 if (dump_enabled_p ())
4486 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4487 "not worthwhile without SIMD support.\n");
4488 return false;
4491 if (!vec_stmt) /* transformation not required. */
4493 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4494 if (dump_enabled_p ())
4495 dump_printf_loc (MSG_NOTE, vect_location,
4496 "=== vectorizable_shift ===\n");
4497 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4498 return true;
4501 /** Transform. **/
4503 if (dump_enabled_p ())
4504 dump_printf_loc (MSG_NOTE, vect_location,
4505 "transform binary/unary operation.\n");
4507 /* Handle def. */
4508 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4510 prev_stmt_info = NULL;
4511 for (j = 0; j < ncopies; j++)
4513 /* Handle uses. */
4514 if (j == 0)
4516 if (scalar_shift_arg)
4518 /* Vector shl and shr insn patterns can be defined with scalar
4519 operand 2 (shift operand). In this case, use constant or loop
4520 invariant op1 directly, without extending it to vector mode
4521 first. */
4522 optab_op2_mode = insn_data[icode].operand[2].mode;
4523 if (!VECTOR_MODE_P (optab_op2_mode))
4525 if (dump_enabled_p ())
4526 dump_printf_loc (MSG_NOTE, vect_location,
4527 "operand 1 using scalar mode.\n");
4528 vec_oprnd1 = op1;
4529 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4530 vec_oprnds1.quick_push (vec_oprnd1);
4531 if (slp_node)
4533 /* Store vec_oprnd1 for every vector stmt to be created
4534 for SLP_NODE. We check during the analysis that all
4535 the shift arguments are the same.
4536 TODO: Allow different constants for different vector
4537 stmts generated for an SLP instance. */
4538 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4539 vec_oprnds1.quick_push (vec_oprnd1);
4544 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4545 (a special case for certain kind of vector shifts); otherwise,
4546 operand 1 should be of a vector type (the usual case). */
4547 if (vec_oprnd1)
4548 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4549 slp_node, -1);
4550 else
4551 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4552 slp_node, -1);
4554 else
4555 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4557 /* Arguments are ready. Create the new vector stmt. */
4558 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4560 vop1 = vec_oprnds1[i];
4561 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4562 new_temp = make_ssa_name (vec_dest, new_stmt);
4563 gimple_assign_set_lhs (new_stmt, new_temp);
4564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4565 if (slp_node)
4566 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4569 if (slp_node)
4570 continue;
4572 if (j == 0)
4573 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4574 else
4575 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4576 prev_stmt_info = vinfo_for_stmt (new_stmt);
4579 vec_oprnds0.release ();
4580 vec_oprnds1.release ();
4582 return true;
4586 /* Function vectorizable_operation.
4588 Check if STMT performs a binary, unary or ternary operation that can
4589 be vectorized.
4590 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4591 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4592 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4594 static bool
4595 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4596 gimple *vec_stmt, slp_tree slp_node)
4598 tree vec_dest;
4599 tree scalar_dest;
4600 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4601 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4602 tree vectype;
4603 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4604 enum tree_code code;
4605 machine_mode vec_mode;
4606 tree new_temp;
4607 int op_type;
4608 optab optab;
4609 int icode;
4610 tree def;
4611 gimple def_stmt;
4612 enum vect_def_type dt[3]
4613 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4614 gimple new_stmt = NULL;
4615 stmt_vec_info prev_stmt_info;
4616 int nunits_in;
4617 int nunits_out;
4618 tree vectype_out;
4619 int ncopies;
4620 int j, i;
4621 vec<tree> vec_oprnds0 = vNULL;
4622 vec<tree> vec_oprnds1 = vNULL;
4623 vec<tree> vec_oprnds2 = vNULL;
4624 tree vop0, vop1, vop2;
4625 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4626 int vf;
4628 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4629 return false;
4631 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4632 return false;
4634 /* Is STMT a vectorizable binary/unary operation? */
4635 if (!is_gimple_assign (stmt))
4636 return false;
4638 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4639 return false;
4641 code = gimple_assign_rhs_code (stmt);
4643 /* For pointer addition, we should use the normal plus for
4644 the vector addition. */
4645 if (code == POINTER_PLUS_EXPR)
4646 code = PLUS_EXPR;
4648 /* Support only unary or binary operations. */
4649 op_type = TREE_CODE_LENGTH (code);
4650 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4652 if (dump_enabled_p ())
4653 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4654 "num. args = %d (not unary/binary/ternary op).\n",
4655 op_type);
4656 return false;
4659 scalar_dest = gimple_assign_lhs (stmt);
4660 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4662 /* Most operations cannot handle bit-precision types without extra
4663 truncations. */
4664 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4665 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4666 /* Exception are bitwise binary operations. */
4667 && code != BIT_IOR_EXPR
4668 && code != BIT_XOR_EXPR
4669 && code != BIT_AND_EXPR)
4671 if (dump_enabled_p ())
4672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4673 "bit-precision arithmetic not supported.\n");
4674 return false;
4677 op0 = gimple_assign_rhs1 (stmt);
4678 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4679 &def_stmt, &def, &dt[0], &vectype))
4681 if (dump_enabled_p ())
4682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4683 "use not simple.\n");
4684 return false;
4686 /* If op0 is an external or constant def use a vector type with
4687 the same size as the output vector type. */
4688 if (!vectype)
4689 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4690 if (vec_stmt)
4691 gcc_assert (vectype);
4692 if (!vectype)
4694 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4697 "no vectype for scalar type ");
4698 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4699 TREE_TYPE (op0));
4700 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4703 return false;
4706 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4707 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4708 if (nunits_out != nunits_in)
4709 return false;
4711 if (op_type == binary_op || op_type == ternary_op)
4713 op1 = gimple_assign_rhs2 (stmt);
4714 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4715 &def, &dt[1]))
4717 if (dump_enabled_p ())
4718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4719 "use not simple.\n");
4720 return false;
4723 if (op_type == ternary_op)
4725 op2 = gimple_assign_rhs3 (stmt);
4726 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4727 &def, &dt[2]))
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4731 "use not simple.\n");
4732 return false;
4736 if (loop_vinfo)
4737 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4738 else
4739 vf = 1;
4741 /* Multiple types in SLP are handled by creating the appropriate number of
4742 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4743 case of SLP. */
4744 if (slp_node || PURE_SLP_STMT (stmt_info))
4745 ncopies = 1;
4746 else
4747 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4749 gcc_assert (ncopies >= 1);
4751 /* Shifts are handled in vectorizable_shift (). */
4752 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4753 || code == RROTATE_EXPR)
4754 return false;
4756 /* Supportable by target? */
4758 vec_mode = TYPE_MODE (vectype);
4759 if (code == MULT_HIGHPART_EXPR)
4761 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4762 icode = LAST_INSN_CODE;
4763 else
4764 icode = CODE_FOR_nothing;
4766 else
4768 optab = optab_for_tree_code (code, vectype, optab_default);
4769 if (!optab)
4771 if (dump_enabled_p ())
4772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4773 "no optab.\n");
4774 return false;
4776 icode = (int) optab_handler (optab, vec_mode);
4779 if (icode == CODE_FOR_nothing)
4781 if (dump_enabled_p ())
4782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4783 "op not supported by target.\n");
4784 /* Check only during analysis. */
4785 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4786 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4787 return false;
4788 if (dump_enabled_p ())
4789 dump_printf_loc (MSG_NOTE, vect_location,
4790 "proceeding using word mode.\n");
4793 /* Worthwhile without SIMD support? Check only during analysis. */
4794 if (!VECTOR_MODE_P (vec_mode)
4795 && !vec_stmt
4796 && vf < vect_min_worthwhile_factor (code))
4798 if (dump_enabled_p ())
4799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4800 "not worthwhile without SIMD support.\n");
4801 return false;
4804 if (!vec_stmt) /* transformation not required. */
4806 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4807 if (dump_enabled_p ())
4808 dump_printf_loc (MSG_NOTE, vect_location,
4809 "=== vectorizable_operation ===\n");
4810 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4811 return true;
4814 /** Transform. **/
4816 if (dump_enabled_p ())
4817 dump_printf_loc (MSG_NOTE, vect_location,
4818 "transform binary/unary operation.\n");
4820 /* Handle def. */
4821 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4823 /* In case the vectorization factor (VF) is bigger than the number
4824 of elements that we can fit in a vectype (nunits), we have to generate
4825 more than one vector stmt - i.e - we need to "unroll" the
4826 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4827 from one copy of the vector stmt to the next, in the field
4828 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4829 stages to find the correct vector defs to be used when vectorizing
4830 stmts that use the defs of the current stmt. The example below
4831 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4832 we need to create 4 vectorized stmts):
4834 before vectorization:
4835 RELATED_STMT VEC_STMT
4836 S1: x = memref - -
4837 S2: z = x + 1 - -
4839 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4840 there):
4841 RELATED_STMT VEC_STMT
4842 VS1_0: vx0 = memref0 VS1_1 -
4843 VS1_1: vx1 = memref1 VS1_2 -
4844 VS1_2: vx2 = memref2 VS1_3 -
4845 VS1_3: vx3 = memref3 - -
4846 S1: x = load - VS1_0
4847 S2: z = x + 1 - -
4849 step2: vectorize stmt S2 (done here):
4850 To vectorize stmt S2 we first need to find the relevant vector
4851 def for the first operand 'x'. This is, as usual, obtained from
4852 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4853 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4854 relevant vector def 'vx0'. Having found 'vx0' we can generate
4855 the vector stmt VS2_0, and as usual, record it in the
4856 STMT_VINFO_VEC_STMT of stmt S2.
4857 When creating the second copy (VS2_1), we obtain the relevant vector
4858 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4859 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4860 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4861 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4862 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4863 chain of stmts and pointers:
4864 RELATED_STMT VEC_STMT
4865 VS1_0: vx0 = memref0 VS1_1 -
4866 VS1_1: vx1 = memref1 VS1_2 -
4867 VS1_2: vx2 = memref2 VS1_3 -
4868 VS1_3: vx3 = memref3 - -
4869 S1: x = load - VS1_0
4870 VS2_0: vz0 = vx0 + v1 VS2_1 -
4871 VS2_1: vz1 = vx1 + v1 VS2_2 -
4872 VS2_2: vz2 = vx2 + v1 VS2_3 -
4873 VS2_3: vz3 = vx3 + v1 - -
4874 S2: z = x + 1 - VS2_0 */
4876 prev_stmt_info = NULL;
4877 for (j = 0; j < ncopies; j++)
4879 /* Handle uses. */
4880 if (j == 0)
4882 if (op_type == binary_op || op_type == ternary_op)
4883 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4884 slp_node, -1);
4885 else
4886 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4887 slp_node, -1);
4888 if (op_type == ternary_op)
4890 vec_oprnds2.create (1);
4891 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4892 stmt,
4893 NULL));
4896 else
4898 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4899 if (op_type == ternary_op)
4901 tree vec_oprnd = vec_oprnds2.pop ();
4902 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4903 vec_oprnd));
4907 /* Arguments are ready. Create the new vector stmt. */
4908 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4910 vop1 = ((op_type == binary_op || op_type == ternary_op)
4911 ? vec_oprnds1[i] : NULL_TREE);
4912 vop2 = ((op_type == ternary_op)
4913 ? vec_oprnds2[i] : NULL_TREE);
4914 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4915 new_temp = make_ssa_name (vec_dest, new_stmt);
4916 gimple_assign_set_lhs (new_stmt, new_temp);
4917 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4918 if (slp_node)
4919 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4922 if (slp_node)
4923 continue;
4925 if (j == 0)
4926 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4927 else
4928 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4929 prev_stmt_info = vinfo_for_stmt (new_stmt);
4932 vec_oprnds0.release ();
4933 vec_oprnds1.release ();
4934 vec_oprnds2.release ();
4936 return true;
4939 /* A helper function to ensure data reference DR's base alignment
4940 for STMT_INFO. */
4942 static void
4943 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4945 if (!dr->aux)
4946 return;
4948 if (((dataref_aux *)dr->aux)->base_misaligned)
4950 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4951 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4953 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4954 DECL_USER_ALIGN (base_decl) = 1;
4955 ((dataref_aux *)dr->aux)->base_misaligned = false;
4960 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4961 reversal of the vector elements. If that is impossible to do,
4962 returns NULL. */
4964 static tree
4965 perm_mask_for_reverse (tree vectype)
4967 int i, nunits;
4968 unsigned char *sel;
4970 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4971 sel = XALLOCAVEC (unsigned char, nunits);
4973 for (i = 0; i < nunits; ++i)
4974 sel[i] = nunits - 1 - i;
4976 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4977 return NULL_TREE;
4978 return vect_gen_perm_mask_checked (vectype, sel);
4981 /* Function vectorizable_store.
4983 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4984 can be vectorized.
4985 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4986 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4987 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4989 static bool
4990 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4991 slp_tree slp_node)
4993 tree scalar_dest;
4994 tree data_ref;
4995 tree op;
4996 tree vec_oprnd = NULL_TREE;
4997 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4998 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4999 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5000 tree elem_type;
5001 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5002 struct loop *loop = NULL;
5003 machine_mode vec_mode;
5004 tree dummy;
5005 enum dr_alignment_support alignment_support_scheme;
5006 tree def;
5007 gimple def_stmt;
5008 enum vect_def_type dt;
5009 stmt_vec_info prev_stmt_info = NULL;
5010 tree dataref_ptr = NULL_TREE;
5011 tree dataref_offset = NULL_TREE;
5012 gimple ptr_incr = NULL;
5013 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5014 int ncopies;
5015 int j;
5016 gimple next_stmt, first_stmt = NULL;
5017 bool grouped_store = false;
5018 bool store_lanes_p = false;
5019 unsigned int group_size, i;
5020 vec<tree> dr_chain = vNULL;
5021 vec<tree> oprnds = vNULL;
5022 vec<tree> result_chain = vNULL;
5023 bool inv_p;
5024 bool negative = false;
5025 tree offset = NULL_TREE;
5026 vec<tree> vec_oprnds = vNULL;
5027 bool slp = (slp_node != NULL);
5028 unsigned int vec_num;
5029 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5030 tree aggr_type;
5032 if (loop_vinfo)
5033 loop = LOOP_VINFO_LOOP (loop_vinfo);
5035 /* Multiple types in SLP are handled by creating the appropriate number of
5036 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5037 case of SLP. */
5038 if (slp || PURE_SLP_STMT (stmt_info))
5039 ncopies = 1;
5040 else
5041 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5043 gcc_assert (ncopies >= 1);
5045 /* FORNOW. This restriction should be relaxed. */
5046 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5048 if (dump_enabled_p ())
5049 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5050 "multiple types in nested loop.\n");
5051 return false;
5054 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5055 return false;
5057 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5058 return false;
5060 /* Is vectorizable store? */
5062 if (!is_gimple_assign (stmt))
5063 return false;
5065 scalar_dest = gimple_assign_lhs (stmt);
5066 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5067 && is_pattern_stmt_p (stmt_info))
5068 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5069 if (TREE_CODE (scalar_dest) != ARRAY_REF
5070 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5071 && TREE_CODE (scalar_dest) != INDIRECT_REF
5072 && TREE_CODE (scalar_dest) != COMPONENT_REF
5073 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5074 && TREE_CODE (scalar_dest) != REALPART_EXPR
5075 && TREE_CODE (scalar_dest) != MEM_REF)
5076 return false;
5078 gcc_assert (gimple_assign_single_p (stmt));
5079 op = gimple_assign_rhs1 (stmt);
5080 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5081 &def, &dt))
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5085 "use not simple.\n");
5086 return false;
5089 elem_type = TREE_TYPE (vectype);
5090 vec_mode = TYPE_MODE (vectype);
5092 /* FORNOW. In some cases can vectorize even if data-type not supported
5093 (e.g. - array initialization with 0). */
5094 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5095 return false;
5097 if (!STMT_VINFO_DATA_REF (stmt_info))
5098 return false;
5100 negative =
5101 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5102 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5103 size_zero_node) < 0;
5104 if (negative && ncopies > 1)
5106 if (dump_enabled_p ())
5107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5108 "multiple types with negative step.\n");
5109 return false;
5112 if (negative)
5114 gcc_assert (!grouped_store);
5115 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5116 if (alignment_support_scheme != dr_aligned
5117 && alignment_support_scheme != dr_unaligned_supported)
5119 if (dump_enabled_p ())
5120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5121 "negative step but alignment required.\n");
5122 return false;
5124 if (dt != vect_constant_def
5125 && dt != vect_external_def
5126 && !perm_mask_for_reverse (vectype))
5128 if (dump_enabled_p ())
5129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5130 "negative step and reversing not supported.\n");
5131 return false;
5135 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5137 grouped_store = true;
5138 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5139 if (!slp && !PURE_SLP_STMT (stmt_info))
5141 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5142 if (vect_store_lanes_supported (vectype, group_size))
5143 store_lanes_p = true;
5144 else if (!vect_grouped_store_supported (vectype, group_size))
5145 return false;
5148 if (first_stmt == stmt)
5150 /* STMT is the leader of the group. Check the operands of all the
5151 stmts of the group. */
5152 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5153 while (next_stmt)
5155 gcc_assert (gimple_assign_single_p (next_stmt));
5156 op = gimple_assign_rhs1 (next_stmt);
5157 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5158 &def_stmt, &def, &dt))
5160 if (dump_enabled_p ())
5161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5162 "use not simple.\n");
5163 return false;
5165 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5170 if (!vec_stmt) /* transformation not required. */
5172 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5173 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5174 NULL, NULL, NULL);
5175 return true;
5178 /** Transform. **/
5180 ensure_base_align (stmt_info, dr);
5182 if (grouped_store)
5184 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5185 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5187 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5189 /* FORNOW */
5190 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5192 /* We vectorize all the stmts of the interleaving group when we
5193 reach the last stmt in the group. */
5194 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5195 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5196 && !slp)
5198 *vec_stmt = NULL;
5199 return true;
5202 if (slp)
5204 grouped_store = false;
5205 /* VEC_NUM is the number of vect stmts to be created for this
5206 group. */
5207 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5208 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5209 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5210 op = gimple_assign_rhs1 (first_stmt);
5212 else
5213 /* VEC_NUM is the number of vect stmts to be created for this
5214 group. */
5215 vec_num = group_size;
5217 else
5219 first_stmt = stmt;
5220 first_dr = dr;
5221 group_size = vec_num = 1;
5224 if (dump_enabled_p ())
5225 dump_printf_loc (MSG_NOTE, vect_location,
5226 "transform store. ncopies = %d\n", ncopies);
5228 dr_chain.create (group_size);
5229 oprnds.create (group_size);
5231 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5232 gcc_assert (alignment_support_scheme);
5233 /* Targets with store-lane instructions must not require explicit
5234 realignment. */
5235 gcc_assert (!store_lanes_p
5236 || alignment_support_scheme == dr_aligned
5237 || alignment_support_scheme == dr_unaligned_supported);
5239 if (negative)
5240 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5242 if (store_lanes_p)
5243 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5244 else
5245 aggr_type = vectype;
5247 /* In case the vectorization factor (VF) is bigger than the number
5248 of elements that we can fit in a vectype (nunits), we have to generate
5249 more than one vector stmt - i.e - we need to "unroll" the
5250 vector stmt by a factor VF/nunits. For more details see documentation in
5251 vect_get_vec_def_for_copy_stmt. */
5253 /* In case of interleaving (non-unit grouped access):
5255 S1: &base + 2 = x2
5256 S2: &base = x0
5257 S3: &base + 1 = x1
5258 S4: &base + 3 = x3
5260 We create vectorized stores starting from base address (the access of the
5261 first stmt in the chain (S2 in the above example), when the last store stmt
5262 of the chain (S4) is reached:
5264 VS1: &base = vx2
5265 VS2: &base + vec_size*1 = vx0
5266 VS3: &base + vec_size*2 = vx1
5267 VS4: &base + vec_size*3 = vx3
5269 Then permutation statements are generated:
5271 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5272 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5275 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5276 (the order of the data-refs in the output of vect_permute_store_chain
5277 corresponds to the order of scalar stmts in the interleaving chain - see
5278 the documentation of vect_permute_store_chain()).
5280 In case of both multiple types and interleaving, above vector stores and
5281 permutation stmts are created for every copy. The result vector stmts are
5282 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5283 STMT_VINFO_RELATED_STMT for the next copies.
5286 prev_stmt_info = NULL;
5287 for (j = 0; j < ncopies; j++)
5289 gimple new_stmt;
5291 if (j == 0)
5293 if (slp)
5295 /* Get vectorized arguments for SLP_NODE. */
5296 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5297 NULL, slp_node, -1);
5299 vec_oprnd = vec_oprnds[0];
5301 else
5303 /* For interleaved stores we collect vectorized defs for all the
5304 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5305 used as an input to vect_permute_store_chain(), and OPRNDS as
5306 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5308 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5309 OPRNDS are of size 1. */
5310 next_stmt = first_stmt;
5311 for (i = 0; i < group_size; i++)
5313 /* Since gaps are not supported for interleaved stores,
5314 GROUP_SIZE is the exact number of stmts in the chain.
5315 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5316 there is no interleaving, GROUP_SIZE is 1, and only one
5317 iteration of the loop will be executed. */
5318 gcc_assert (next_stmt
5319 && gimple_assign_single_p (next_stmt));
5320 op = gimple_assign_rhs1 (next_stmt);
5322 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5323 NULL);
5324 dr_chain.quick_push (vec_oprnd);
5325 oprnds.quick_push (vec_oprnd);
5326 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5330 /* We should have catched mismatched types earlier. */
5331 gcc_assert (useless_type_conversion_p (vectype,
5332 TREE_TYPE (vec_oprnd)));
5333 bool simd_lane_access_p
5334 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5335 if (simd_lane_access_p
5336 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5337 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5338 && integer_zerop (DR_OFFSET (first_dr))
5339 && integer_zerop (DR_INIT (first_dr))
5340 && alias_sets_conflict_p (get_alias_set (aggr_type),
5341 get_alias_set (DR_REF (first_dr))))
5343 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5344 dataref_offset = build_int_cst (reference_alias_ptr_type
5345 (DR_REF (first_dr)), 0);
5346 inv_p = false;
5348 else
5349 dataref_ptr
5350 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5351 simd_lane_access_p ? loop : NULL,
5352 offset, &dummy, gsi, &ptr_incr,
5353 simd_lane_access_p, &inv_p);
5354 gcc_assert (bb_vinfo || !inv_p);
5356 else
5358 /* For interleaved stores we created vectorized defs for all the
5359 defs stored in OPRNDS in the previous iteration (previous copy).
5360 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5361 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5362 next copy.
5363 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5364 OPRNDS are of size 1. */
5365 for (i = 0; i < group_size; i++)
5367 op = oprnds[i];
5368 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5369 &def, &dt);
5370 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5371 dr_chain[i] = vec_oprnd;
5372 oprnds[i] = vec_oprnd;
5374 if (dataref_offset)
5375 dataref_offset
5376 = int_const_binop (PLUS_EXPR, dataref_offset,
5377 TYPE_SIZE_UNIT (aggr_type));
5378 else
5379 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5380 TYPE_SIZE_UNIT (aggr_type));
5383 if (store_lanes_p)
5385 tree vec_array;
5387 /* Combine all the vectors into an array. */
5388 vec_array = create_vector_array (vectype, vec_num);
5389 for (i = 0; i < vec_num; i++)
5391 vec_oprnd = dr_chain[i];
5392 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5395 /* Emit:
5396 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5397 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5398 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5399 gimple_call_set_lhs (new_stmt, data_ref);
5400 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5402 else
5404 new_stmt = NULL;
5405 if (grouped_store)
5407 if (j == 0)
5408 result_chain.create (group_size);
5409 /* Permute. */
5410 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5411 &result_chain);
5414 next_stmt = first_stmt;
5415 for (i = 0; i < vec_num; i++)
5417 unsigned align, misalign;
5419 if (i > 0)
5420 /* Bump the vector pointer. */
5421 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5422 stmt, NULL_TREE);
5424 if (slp)
5425 vec_oprnd = vec_oprnds[i];
5426 else if (grouped_store)
5427 /* For grouped stores vectorized defs are interleaved in
5428 vect_permute_store_chain(). */
5429 vec_oprnd = result_chain[i];
5431 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5432 dataref_offset
5433 ? dataref_offset
5434 : build_int_cst (reference_alias_ptr_type
5435 (DR_REF (first_dr)), 0));
5436 align = TYPE_ALIGN_UNIT (vectype);
5437 if (aligned_access_p (first_dr))
5438 misalign = 0;
5439 else if (DR_MISALIGNMENT (first_dr) == -1)
5441 TREE_TYPE (data_ref)
5442 = build_aligned_type (TREE_TYPE (data_ref),
5443 TYPE_ALIGN (elem_type));
5444 align = TYPE_ALIGN_UNIT (elem_type);
5445 misalign = 0;
5447 else
5449 TREE_TYPE (data_ref)
5450 = build_aligned_type (TREE_TYPE (data_ref),
5451 TYPE_ALIGN (elem_type));
5452 misalign = DR_MISALIGNMENT (first_dr);
5454 if (dataref_offset == NULL_TREE)
5455 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5456 misalign);
5458 if (negative
5459 && dt != vect_constant_def
5460 && dt != vect_external_def)
5462 tree perm_mask = perm_mask_for_reverse (vectype);
5463 tree perm_dest
5464 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5465 vectype);
5466 tree new_temp = make_ssa_name (perm_dest);
5468 /* Generate the permute statement. */
5469 gimple perm_stmt
5470 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5471 vec_oprnd, perm_mask);
5472 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5474 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5475 vec_oprnd = new_temp;
5478 /* Arguments are ready. Create the new vector stmt. */
5479 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5480 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5482 if (slp)
5483 continue;
5485 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5486 if (!next_stmt)
5487 break;
5490 if (!slp)
5492 if (j == 0)
5493 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5494 else
5495 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5496 prev_stmt_info = vinfo_for_stmt (new_stmt);
5500 dr_chain.release ();
5501 oprnds.release ();
5502 result_chain.release ();
5503 vec_oprnds.release ();
5505 return true;
5508 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5509 VECTOR_CST mask. No checks are made that the target platform supports the
5510 mask, so callers may wish to test can_vec_perm_p separately, or use
5511 vect_gen_perm_mask_checked. */
5513 tree
5514 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5516 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5517 int i, nunits;
5519 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5521 mask_elt_type = lang_hooks.types.type_for_mode
5522 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5523 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5525 mask_elts = XALLOCAVEC (tree, nunits);
5526 for (i = nunits - 1; i >= 0; i--)
5527 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5528 mask_vec = build_vector (mask_type, mask_elts);
5530 return mask_vec;
5533 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5534 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5536 tree
5537 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5539 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5540 return vect_gen_perm_mask_any (vectype, sel);
5543 /* Given a vector variable X and Y, that was generated for the scalar
5544 STMT, generate instructions to permute the vector elements of X and Y
5545 using permutation mask MASK_VEC, insert them at *GSI and return the
5546 permuted vector variable. */
5548 static tree
5549 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5550 gimple_stmt_iterator *gsi)
5552 tree vectype = TREE_TYPE (x);
5553 tree perm_dest, data_ref;
5554 gimple perm_stmt;
5556 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5557 data_ref = make_ssa_name (perm_dest);
5559 /* Generate the permute statement. */
5560 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5561 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5563 return data_ref;
5566 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5567 inserting them on the loops preheader edge. Returns true if we
5568 were successful in doing so (and thus STMT can be moved then),
5569 otherwise returns false. */
5571 static bool
5572 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5574 ssa_op_iter i;
5575 tree op;
5576 bool any = false;
5578 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5580 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5581 if (!gimple_nop_p (def_stmt)
5582 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5584 /* Make sure we don't need to recurse. While we could do
5585 so in simple cases when there are more complex use webs
5586 we don't have an easy way to preserve stmt order to fulfil
5587 dependencies within them. */
5588 tree op2;
5589 ssa_op_iter i2;
5590 if (gimple_code (def_stmt) == GIMPLE_PHI)
5591 return false;
5592 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5594 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5595 if (!gimple_nop_p (def_stmt2)
5596 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5597 return false;
5599 any = true;
5603 if (!any)
5604 return true;
5606 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5608 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5609 if (!gimple_nop_p (def_stmt)
5610 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5612 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5613 gsi_remove (&gsi, false);
5614 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5618 return true;
5621 /* vectorizable_load.
5623 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5624 can be vectorized.
5625 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5626 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5627 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5629 static bool
5630 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5631 slp_tree slp_node, slp_instance slp_node_instance)
5633 tree scalar_dest;
5634 tree vec_dest = NULL;
5635 tree data_ref = NULL;
5636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5637 stmt_vec_info prev_stmt_info;
5638 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5639 struct loop *loop = NULL;
5640 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5641 bool nested_in_vect_loop = false;
5642 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5643 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5644 tree elem_type;
5645 tree new_temp;
5646 machine_mode mode;
5647 gimple new_stmt = NULL;
5648 tree dummy;
5649 enum dr_alignment_support alignment_support_scheme;
5650 tree dataref_ptr = NULL_TREE;
5651 tree dataref_offset = NULL_TREE;
5652 gimple ptr_incr = NULL;
5653 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5654 int ncopies;
5655 int i, j, group_size, group_gap;
5656 tree msq = NULL_TREE, lsq;
5657 tree offset = NULL_TREE;
5658 tree byte_offset = NULL_TREE;
5659 tree realignment_token = NULL_TREE;
5660 gphi *phi = NULL;
5661 vec<tree> dr_chain = vNULL;
5662 bool grouped_load = false;
5663 bool load_lanes_p = false;
5664 gimple first_stmt;
5665 bool inv_p;
5666 bool negative = false;
5667 bool compute_in_loop = false;
5668 struct loop *at_loop;
5669 int vec_num;
5670 bool slp = (slp_node != NULL);
5671 bool slp_perm = false;
5672 enum tree_code code;
5673 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5674 int vf;
5675 tree aggr_type;
5676 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5677 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5678 int gather_scale = 1;
5679 enum vect_def_type gather_dt = vect_unknown_def_type;
5681 if (loop_vinfo)
5683 loop = LOOP_VINFO_LOOP (loop_vinfo);
5684 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5685 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5687 else
5688 vf = 1;
5690 /* Multiple types in SLP are handled by creating the appropriate number of
5691 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5692 case of SLP. */
5693 if (slp || PURE_SLP_STMT (stmt_info))
5694 ncopies = 1;
5695 else
5696 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5698 gcc_assert (ncopies >= 1);
5700 /* FORNOW. This restriction should be relaxed. */
5701 if (nested_in_vect_loop && ncopies > 1)
5703 if (dump_enabled_p ())
5704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5705 "multiple types in nested loop.\n");
5706 return false;
5709 /* Invalidate assumptions made by dependence analysis when vectorization
5710 on the unrolled body effectively re-orders stmts. */
5711 if (ncopies > 1
5712 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5713 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5714 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5718 "cannot perform implicit CSE when unrolling "
5719 "with negative dependence distance\n");
5720 return false;
5723 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5724 return false;
5726 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5727 return false;
5729 /* Is vectorizable load? */
5730 if (!is_gimple_assign (stmt))
5731 return false;
5733 scalar_dest = gimple_assign_lhs (stmt);
5734 if (TREE_CODE (scalar_dest) != SSA_NAME)
5735 return false;
5737 code = gimple_assign_rhs_code (stmt);
5738 if (code != ARRAY_REF
5739 && code != BIT_FIELD_REF
5740 && code != INDIRECT_REF
5741 && code != COMPONENT_REF
5742 && code != IMAGPART_EXPR
5743 && code != REALPART_EXPR
5744 && code != MEM_REF
5745 && TREE_CODE_CLASS (code) != tcc_declaration)
5746 return false;
5748 if (!STMT_VINFO_DATA_REF (stmt_info))
5749 return false;
5751 elem_type = TREE_TYPE (vectype);
5752 mode = TYPE_MODE (vectype);
5754 /* FORNOW. In some cases can vectorize even if data-type not supported
5755 (e.g. - data copies). */
5756 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5758 if (dump_enabled_p ())
5759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5760 "Aligned load, but unsupported type.\n");
5761 return false;
5764 /* Check if the load is a part of an interleaving chain. */
5765 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5767 grouped_load = true;
5768 /* FORNOW */
5769 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5771 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5772 if (!slp && !PURE_SLP_STMT (stmt_info))
5774 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5775 if (vect_load_lanes_supported (vectype, group_size))
5776 load_lanes_p = true;
5777 else if (!vect_grouped_load_supported (vectype, group_size))
5778 return false;
5781 /* Invalidate assumptions made by dependence analysis when vectorization
5782 on the unrolled body effectively re-orders stmts. */
5783 if (!PURE_SLP_STMT (stmt_info)
5784 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5785 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5786 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5788 if (dump_enabled_p ())
5789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5790 "cannot perform implicit CSE when performing "
5791 "group loads with negative dependence distance\n");
5792 return false;
5795 /* Similarly when the stmt is a load that is both part of a SLP
5796 instance and a loop vectorized stmt via the same-dr mechanism
5797 we have to give up. */
5798 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5799 && (STMT_SLP_TYPE (stmt_info)
5800 != STMT_SLP_TYPE (vinfo_for_stmt
5801 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5803 if (dump_enabled_p ())
5804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5805 "conflicting SLP types for CSEd load\n");
5806 return false;
5811 if (STMT_VINFO_GATHER_P (stmt_info))
5813 gimple def_stmt;
5814 tree def;
5815 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5816 &gather_off, &gather_scale);
5817 gcc_assert (gather_decl);
5818 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5819 &def_stmt, &def, &gather_dt,
5820 &gather_off_vectype))
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5824 "gather index use not simple.\n");
5825 return false;
5828 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5830 else
5832 negative = tree_int_cst_compare (nested_in_vect_loop
5833 ? STMT_VINFO_DR_STEP (stmt_info)
5834 : DR_STEP (dr),
5835 size_zero_node) < 0;
5836 if (negative && ncopies > 1)
5838 if (dump_enabled_p ())
5839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5840 "multiple types with negative step.\n");
5841 return false;
5844 if (negative)
5846 if (grouped_load)
5848 if (dump_enabled_p ())
5849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5850 "negative step for group load not supported"
5851 "\n");
5852 return false;
5854 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5855 if (alignment_support_scheme != dr_aligned
5856 && alignment_support_scheme != dr_unaligned_supported)
5858 if (dump_enabled_p ())
5859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5860 "negative step but alignment required.\n");
5861 return false;
5863 if (!perm_mask_for_reverse (vectype))
5865 if (dump_enabled_p ())
5866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5867 "negative step and reversing not supported."
5868 "\n");
5869 return false;
5874 if (!vec_stmt) /* transformation not required. */
5876 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5877 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5878 return true;
5881 if (dump_enabled_p ())
5882 dump_printf_loc (MSG_NOTE, vect_location,
5883 "transform load. ncopies = %d\n", ncopies);
5885 /** Transform. **/
5887 ensure_base_align (stmt_info, dr);
5889 if (STMT_VINFO_GATHER_P (stmt_info))
5891 tree vec_oprnd0 = NULL_TREE, op;
5892 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5893 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5894 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5895 edge pe = loop_preheader_edge (loop);
5896 gimple_seq seq;
5897 basic_block new_bb;
5898 enum { NARROW, NONE, WIDEN } modifier;
5899 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5901 if (nunits == gather_off_nunits)
5902 modifier = NONE;
5903 else if (nunits == gather_off_nunits / 2)
5905 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5906 modifier = WIDEN;
5908 for (i = 0; i < gather_off_nunits; ++i)
5909 sel[i] = i | nunits;
5911 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5913 else if (nunits == gather_off_nunits * 2)
5915 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5916 modifier = NARROW;
5918 for (i = 0; i < nunits; ++i)
5919 sel[i] = i < gather_off_nunits
5920 ? i : i + nunits - gather_off_nunits;
5922 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5923 ncopies *= 2;
5925 else
5926 gcc_unreachable ();
5928 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5929 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5930 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5931 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5932 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5933 scaletype = TREE_VALUE (arglist);
5934 gcc_checking_assert (types_compatible_p (srctype, rettype));
5936 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5938 ptr = fold_convert (ptrtype, gather_base);
5939 if (!is_gimple_min_invariant (ptr))
5941 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5942 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5943 gcc_assert (!new_bb);
5946 /* Currently we support only unconditional gather loads,
5947 so mask should be all ones. */
5948 if (TREE_CODE (masktype) == INTEGER_TYPE)
5949 mask = build_int_cst (masktype, -1);
5950 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5952 mask = build_int_cst (TREE_TYPE (masktype), -1);
5953 mask = build_vector_from_val (masktype, mask);
5954 mask = vect_init_vector (stmt, mask, masktype, NULL);
5956 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5958 REAL_VALUE_TYPE r;
5959 long tmp[6];
5960 for (j = 0; j < 6; ++j)
5961 tmp[j] = -1;
5962 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5963 mask = build_real (TREE_TYPE (masktype), r);
5964 mask = build_vector_from_val (masktype, mask);
5965 mask = vect_init_vector (stmt, mask, masktype, NULL);
5967 else
5968 gcc_unreachable ();
5970 scale = build_int_cst (scaletype, gather_scale);
5972 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5973 merge = build_int_cst (TREE_TYPE (rettype), 0);
5974 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5976 REAL_VALUE_TYPE r;
5977 long tmp[6];
5978 for (j = 0; j < 6; ++j)
5979 tmp[j] = 0;
5980 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5981 merge = build_real (TREE_TYPE (rettype), r);
5983 else
5984 gcc_unreachable ();
5985 merge = build_vector_from_val (rettype, merge);
5986 merge = vect_init_vector (stmt, merge, rettype, NULL);
5988 prev_stmt_info = NULL;
5989 for (j = 0; j < ncopies; ++j)
5991 if (modifier == WIDEN && (j & 1))
5992 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5993 perm_mask, stmt, gsi);
5994 else if (j == 0)
5995 op = vec_oprnd0
5996 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5997 else
5998 op = vec_oprnd0
5999 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6001 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6003 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6004 == TYPE_VECTOR_SUBPARTS (idxtype));
6005 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6006 var = make_ssa_name (var);
6007 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6008 new_stmt
6009 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6010 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6011 op = var;
6014 new_stmt
6015 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6017 if (!useless_type_conversion_p (vectype, rettype))
6019 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6020 == TYPE_VECTOR_SUBPARTS (rettype));
6021 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6022 op = make_ssa_name (var, new_stmt);
6023 gimple_call_set_lhs (new_stmt, op);
6024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6025 var = make_ssa_name (vec_dest);
6026 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6027 new_stmt
6028 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6030 else
6032 var = make_ssa_name (vec_dest, new_stmt);
6033 gimple_call_set_lhs (new_stmt, var);
6036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6038 if (modifier == NARROW)
6040 if ((j & 1) == 0)
6042 prev_res = var;
6043 continue;
6045 var = permute_vec_elements (prev_res, var,
6046 perm_mask, stmt, gsi);
6047 new_stmt = SSA_NAME_DEF_STMT (var);
6050 if (prev_stmt_info == NULL)
6051 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6052 else
6053 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6054 prev_stmt_info = vinfo_for_stmt (new_stmt);
6056 return true;
6058 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6060 gimple_stmt_iterator incr_gsi;
6061 bool insert_after;
6062 gimple incr;
6063 tree offvar;
6064 tree ivstep;
6065 tree running_off;
6066 vec<constructor_elt, va_gc> *v = NULL;
6067 gimple_seq stmts = NULL;
6068 tree stride_base, stride_step, alias_off;
6070 gcc_assert (!nested_in_vect_loop);
6072 stride_base
6073 = fold_build_pointer_plus
6074 (unshare_expr (DR_BASE_ADDRESS (dr)),
6075 size_binop (PLUS_EXPR,
6076 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6077 convert_to_ptrofftype (DR_INIT (dr))));
6078 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6080 /* For a load with loop-invariant (but other than power-of-2)
6081 stride (i.e. not a grouped access) like so:
6083 for (i = 0; i < n; i += stride)
6084 ... = array[i];
6086 we generate a new induction variable and new accesses to
6087 form a new vector (or vectors, depending on ncopies):
6089 for (j = 0; ; j += VF*stride)
6090 tmp1 = array[j];
6091 tmp2 = array[j + stride];
6093 vectemp = {tmp1, tmp2, ...}
6096 ivstep = stride_step;
6097 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6098 build_int_cst (TREE_TYPE (ivstep), vf));
6100 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6102 create_iv (stride_base, ivstep, NULL,
6103 loop, &incr_gsi, insert_after,
6104 &offvar, NULL);
6105 incr = gsi_stmt (incr_gsi);
6106 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6108 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6109 if (stmts)
6110 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6112 prev_stmt_info = NULL;
6113 running_off = offvar;
6114 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6115 for (j = 0; j < ncopies; j++)
6117 tree vec_inv;
6119 vec_alloc (v, nunits);
6120 for (i = 0; i < nunits; i++)
6122 tree newref, newoff;
6123 gimple incr;
6124 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6125 running_off, alias_off);
6127 newref = force_gimple_operand_gsi (gsi, newref, true,
6128 NULL_TREE, true,
6129 GSI_SAME_STMT);
6130 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6131 newoff = copy_ssa_name (running_off);
6132 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6133 running_off, stride_step);
6134 vect_finish_stmt_generation (stmt, incr, gsi);
6136 running_off = newoff;
6139 vec_inv = build_constructor (vectype, v);
6140 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6141 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6143 if (j == 0)
6144 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6145 else
6146 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6147 prev_stmt_info = vinfo_for_stmt (new_stmt);
6149 return true;
6152 if (grouped_load)
6154 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6155 if (slp
6156 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6157 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6158 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6160 /* Check if the chain of loads is already vectorized. */
6161 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6162 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6163 ??? But we can only do so if there is exactly one
6164 as we have no way to get at the rest. Leave the CSE
6165 opportunity alone.
6166 ??? With the group load eventually participating
6167 in multiple different permutations (having multiple
6168 slp nodes which refer to the same group) the CSE
6169 is even wrong code. See PR56270. */
6170 && !slp)
6172 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6173 return true;
6175 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6176 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6178 /* VEC_NUM is the number of vect stmts to be created for this group. */
6179 if (slp)
6181 grouped_load = false;
6182 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6183 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6184 slp_perm = true;
6185 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6187 else
6189 vec_num = group_size;
6190 group_gap = 0;
6193 else
6195 first_stmt = stmt;
6196 first_dr = dr;
6197 group_size = vec_num = 1;
6198 group_gap = 0;
6201 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6202 gcc_assert (alignment_support_scheme);
6203 /* Targets with load-lane instructions must not require explicit
6204 realignment. */
6205 gcc_assert (!load_lanes_p
6206 || alignment_support_scheme == dr_aligned
6207 || alignment_support_scheme == dr_unaligned_supported);
6209 /* In case the vectorization factor (VF) is bigger than the number
6210 of elements that we can fit in a vectype (nunits), we have to generate
6211 more than one vector stmt - i.e - we need to "unroll" the
6212 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6213 from one copy of the vector stmt to the next, in the field
6214 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6215 stages to find the correct vector defs to be used when vectorizing
6216 stmts that use the defs of the current stmt. The example below
6217 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6218 need to create 4 vectorized stmts):
6220 before vectorization:
6221 RELATED_STMT VEC_STMT
6222 S1: x = memref - -
6223 S2: z = x + 1 - -
6225 step 1: vectorize stmt S1:
6226 We first create the vector stmt VS1_0, and, as usual, record a
6227 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6228 Next, we create the vector stmt VS1_1, and record a pointer to
6229 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6230 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6231 stmts and pointers:
6232 RELATED_STMT VEC_STMT
6233 VS1_0: vx0 = memref0 VS1_1 -
6234 VS1_1: vx1 = memref1 VS1_2 -
6235 VS1_2: vx2 = memref2 VS1_3 -
6236 VS1_3: vx3 = memref3 - -
6237 S1: x = load - VS1_0
6238 S2: z = x + 1 - -
6240 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6241 information we recorded in RELATED_STMT field is used to vectorize
6242 stmt S2. */
6244 /* In case of interleaving (non-unit grouped access):
6246 S1: x2 = &base + 2
6247 S2: x0 = &base
6248 S3: x1 = &base + 1
6249 S4: x3 = &base + 3
6251 Vectorized loads are created in the order of memory accesses
6252 starting from the access of the first stmt of the chain:
6254 VS1: vx0 = &base
6255 VS2: vx1 = &base + vec_size*1
6256 VS3: vx3 = &base + vec_size*2
6257 VS4: vx4 = &base + vec_size*3
6259 Then permutation statements are generated:
6261 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6262 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6265 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6266 (the order of the data-refs in the output of vect_permute_load_chain
6267 corresponds to the order of scalar stmts in the interleaving chain - see
6268 the documentation of vect_permute_load_chain()).
6269 The generation of permutation stmts and recording them in
6270 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6272 In case of both multiple types and interleaving, the vector loads and
6273 permutation stmts above are created for every copy. The result vector
6274 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6275 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6277 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6278 on a target that supports unaligned accesses (dr_unaligned_supported)
6279 we generate the following code:
6280 p = initial_addr;
6281 indx = 0;
6282 loop {
6283 p = p + indx * vectype_size;
6284 vec_dest = *(p);
6285 indx = indx + 1;
6288 Otherwise, the data reference is potentially unaligned on a target that
6289 does not support unaligned accesses (dr_explicit_realign_optimized) -
6290 then generate the following code, in which the data in each iteration is
6291 obtained by two vector loads, one from the previous iteration, and one
6292 from the current iteration:
6293 p1 = initial_addr;
6294 msq_init = *(floor(p1))
6295 p2 = initial_addr + VS - 1;
6296 realignment_token = call target_builtin;
6297 indx = 0;
6298 loop {
6299 p2 = p2 + indx * vectype_size
6300 lsq = *(floor(p2))
6301 vec_dest = realign_load (msq, lsq, realignment_token)
6302 indx = indx + 1;
6303 msq = lsq;
6304 } */
6306 /* If the misalignment remains the same throughout the execution of the
6307 loop, we can create the init_addr and permutation mask at the loop
6308 preheader. Otherwise, it needs to be created inside the loop.
6309 This can only occur when vectorizing memory accesses in the inner-loop
6310 nested within an outer-loop that is being vectorized. */
6312 if (nested_in_vect_loop
6313 && (TREE_INT_CST_LOW (DR_STEP (dr))
6314 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6316 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6317 compute_in_loop = true;
6320 if ((alignment_support_scheme == dr_explicit_realign_optimized
6321 || alignment_support_scheme == dr_explicit_realign)
6322 && !compute_in_loop)
6324 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6325 alignment_support_scheme, NULL_TREE,
6326 &at_loop);
6327 if (alignment_support_scheme == dr_explicit_realign_optimized)
6329 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6330 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6331 size_one_node);
6334 else
6335 at_loop = loop;
6337 if (negative)
6338 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6340 if (load_lanes_p)
6341 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6342 else
6343 aggr_type = vectype;
6345 prev_stmt_info = NULL;
6346 for (j = 0; j < ncopies; j++)
6348 /* 1. Create the vector or array pointer update chain. */
6349 if (j == 0)
6351 bool simd_lane_access_p
6352 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6353 if (simd_lane_access_p
6354 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6355 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6356 && integer_zerop (DR_OFFSET (first_dr))
6357 && integer_zerop (DR_INIT (first_dr))
6358 && alias_sets_conflict_p (get_alias_set (aggr_type),
6359 get_alias_set (DR_REF (first_dr)))
6360 && (alignment_support_scheme == dr_aligned
6361 || alignment_support_scheme == dr_unaligned_supported))
6363 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6364 dataref_offset = build_int_cst (reference_alias_ptr_type
6365 (DR_REF (first_dr)), 0);
6366 inv_p = false;
6368 else
6369 dataref_ptr
6370 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6371 offset, &dummy, gsi, &ptr_incr,
6372 simd_lane_access_p, &inv_p,
6373 byte_offset);
6375 else if (dataref_offset)
6376 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6377 TYPE_SIZE_UNIT (aggr_type));
6378 else
6379 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6380 TYPE_SIZE_UNIT (aggr_type));
6382 if (grouped_load || slp_perm)
6383 dr_chain.create (vec_num);
6385 if (load_lanes_p)
6387 tree vec_array;
6389 vec_array = create_vector_array (vectype, vec_num);
6391 /* Emit:
6392 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6393 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6394 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6395 gimple_call_set_lhs (new_stmt, vec_array);
6396 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6398 /* Extract each vector into an SSA_NAME. */
6399 for (i = 0; i < vec_num; i++)
6401 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6402 vec_array, i);
6403 dr_chain.quick_push (new_temp);
6406 /* Record the mapping between SSA_NAMEs and statements. */
6407 vect_record_grouped_load_vectors (stmt, dr_chain);
6409 else
6411 for (i = 0; i < vec_num; i++)
6413 if (i > 0)
6414 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6415 stmt, NULL_TREE);
6417 /* 2. Create the vector-load in the loop. */
6418 switch (alignment_support_scheme)
6420 case dr_aligned:
6421 case dr_unaligned_supported:
6423 unsigned int align, misalign;
6425 data_ref
6426 = build2 (MEM_REF, vectype, dataref_ptr,
6427 dataref_offset
6428 ? dataref_offset
6429 : build_int_cst (reference_alias_ptr_type
6430 (DR_REF (first_dr)), 0));
6431 align = TYPE_ALIGN_UNIT (vectype);
6432 if (alignment_support_scheme == dr_aligned)
6434 gcc_assert (aligned_access_p (first_dr));
6435 misalign = 0;
6437 else if (DR_MISALIGNMENT (first_dr) == -1)
6439 TREE_TYPE (data_ref)
6440 = build_aligned_type (TREE_TYPE (data_ref),
6441 TYPE_ALIGN (elem_type));
6442 align = TYPE_ALIGN_UNIT (elem_type);
6443 misalign = 0;
6445 else
6447 TREE_TYPE (data_ref)
6448 = build_aligned_type (TREE_TYPE (data_ref),
6449 TYPE_ALIGN (elem_type));
6450 misalign = DR_MISALIGNMENT (first_dr);
6452 if (dataref_offset == NULL_TREE)
6453 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6454 align, misalign);
6455 break;
6457 case dr_explicit_realign:
6459 tree ptr, bump;
6460 tree vs_minus_1;
6462 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6464 if (compute_in_loop)
6465 msq = vect_setup_realignment (first_stmt, gsi,
6466 &realignment_token,
6467 dr_explicit_realign,
6468 dataref_ptr, NULL);
6470 ptr = copy_ssa_name (dataref_ptr);
6471 new_stmt = gimple_build_assign
6472 (ptr, BIT_AND_EXPR, dataref_ptr,
6473 build_int_cst
6474 (TREE_TYPE (dataref_ptr),
6475 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6476 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6477 data_ref
6478 = build2 (MEM_REF, vectype, ptr,
6479 build_int_cst (reference_alias_ptr_type
6480 (DR_REF (first_dr)), 0));
6481 vec_dest = vect_create_destination_var (scalar_dest,
6482 vectype);
6483 new_stmt = gimple_build_assign (vec_dest, data_ref);
6484 new_temp = make_ssa_name (vec_dest, new_stmt);
6485 gimple_assign_set_lhs (new_stmt, new_temp);
6486 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6487 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6488 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6489 msq = new_temp;
6491 bump = size_binop (MULT_EXPR, vs_minus_1,
6492 TYPE_SIZE_UNIT (elem_type));
6493 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6494 new_stmt = gimple_build_assign
6495 (NULL_TREE, BIT_AND_EXPR, ptr,
6496 build_int_cst
6497 (TREE_TYPE (ptr),
6498 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6499 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6500 gimple_assign_set_lhs (new_stmt, ptr);
6501 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6502 data_ref
6503 = build2 (MEM_REF, vectype, ptr,
6504 build_int_cst (reference_alias_ptr_type
6505 (DR_REF (first_dr)), 0));
6506 break;
6508 case dr_explicit_realign_optimized:
6509 new_temp = copy_ssa_name (dataref_ptr);
6510 new_stmt = gimple_build_assign
6511 (new_temp, BIT_AND_EXPR, dataref_ptr,
6512 build_int_cst
6513 (TREE_TYPE (dataref_ptr),
6514 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6515 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6516 data_ref
6517 = build2 (MEM_REF, vectype, new_temp,
6518 build_int_cst (reference_alias_ptr_type
6519 (DR_REF (first_dr)), 0));
6520 break;
6521 default:
6522 gcc_unreachable ();
6524 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6525 new_stmt = gimple_build_assign (vec_dest, data_ref);
6526 new_temp = make_ssa_name (vec_dest, new_stmt);
6527 gimple_assign_set_lhs (new_stmt, new_temp);
6528 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6530 /* 3. Handle explicit realignment if necessary/supported.
6531 Create in loop:
6532 vec_dest = realign_load (msq, lsq, realignment_token) */
6533 if (alignment_support_scheme == dr_explicit_realign_optimized
6534 || alignment_support_scheme == dr_explicit_realign)
6536 lsq = gimple_assign_lhs (new_stmt);
6537 if (!realignment_token)
6538 realignment_token = dataref_ptr;
6539 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6540 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6541 msq, lsq, realignment_token);
6542 new_temp = make_ssa_name (vec_dest, new_stmt);
6543 gimple_assign_set_lhs (new_stmt, new_temp);
6544 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6546 if (alignment_support_scheme == dr_explicit_realign_optimized)
6548 gcc_assert (phi);
6549 if (i == vec_num - 1 && j == ncopies - 1)
6550 add_phi_arg (phi, lsq,
6551 loop_latch_edge (containing_loop),
6552 UNKNOWN_LOCATION);
6553 msq = lsq;
6557 /* 4. Handle invariant-load. */
6558 if (inv_p && !bb_vinfo)
6560 gcc_assert (!grouped_load);
6561 /* If we have versioned for aliasing or the loop doesn't
6562 have any data dependencies that would preclude this,
6563 then we are sure this is a loop invariant load and
6564 thus we can insert it on the preheader edge. */
6565 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6566 && !nested_in_vect_loop
6567 && hoist_defs_of_uses (stmt, loop))
6569 if (dump_enabled_p ())
6571 dump_printf_loc (MSG_NOTE, vect_location,
6572 "hoisting out of the vectorized "
6573 "loop: ");
6574 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6575 dump_printf (MSG_NOTE, "\n");
6577 tree tem = copy_ssa_name (scalar_dest);
6578 gsi_insert_on_edge_immediate
6579 (loop_preheader_edge (loop),
6580 gimple_build_assign (tem,
6581 unshare_expr
6582 (gimple_assign_rhs1 (stmt))));
6583 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6585 else
6587 gimple_stmt_iterator gsi2 = *gsi;
6588 gsi_next (&gsi2);
6589 new_temp = vect_init_vector (stmt, scalar_dest,
6590 vectype, &gsi2);
6592 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6593 set_vinfo_for_stmt (new_stmt,
6594 new_stmt_vec_info (new_stmt, loop_vinfo,
6595 bb_vinfo));
6598 if (negative)
6600 tree perm_mask = perm_mask_for_reverse (vectype);
6601 new_temp = permute_vec_elements (new_temp, new_temp,
6602 perm_mask, stmt, gsi);
6603 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6606 /* Collect vector loads and later create their permutation in
6607 vect_transform_grouped_load (). */
6608 if (grouped_load || slp_perm)
6609 dr_chain.quick_push (new_temp);
6611 /* Store vector loads in the corresponding SLP_NODE. */
6612 if (slp && !slp_perm)
6613 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6615 /* Bump the vector pointer to account for a gap. */
6616 if (slp && group_gap != 0)
6618 tree bump = size_binop (MULT_EXPR,
6619 TYPE_SIZE_UNIT (elem_type),
6620 size_int (group_gap));
6621 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6622 stmt, bump);
6626 if (slp && !slp_perm)
6627 continue;
6629 if (slp_perm)
6631 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6632 slp_node_instance, false))
6634 dr_chain.release ();
6635 return false;
6638 else
6640 if (grouped_load)
6642 if (!load_lanes_p)
6643 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6644 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6646 else
6648 if (j == 0)
6649 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6650 else
6651 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6652 prev_stmt_info = vinfo_for_stmt (new_stmt);
6655 dr_chain.release ();
6658 return true;
6661 /* Function vect_is_simple_cond.
6663 Input:
6664 LOOP - the loop that is being vectorized.
6665 COND - Condition that is checked for simple use.
6667 Output:
6668 *COMP_VECTYPE - the vector type for the comparison.
6670 Returns whether a COND can be vectorized. Checks whether
6671 condition operands are supportable using vec_is_simple_use. */
6673 static bool
6674 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6675 bb_vec_info bb_vinfo, tree *comp_vectype)
6677 tree lhs, rhs;
6678 tree def;
6679 enum vect_def_type dt;
6680 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6682 if (!COMPARISON_CLASS_P (cond))
6683 return false;
6685 lhs = TREE_OPERAND (cond, 0);
6686 rhs = TREE_OPERAND (cond, 1);
6688 if (TREE_CODE (lhs) == SSA_NAME)
6690 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6691 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6692 &lhs_def_stmt, &def, &dt, &vectype1))
6693 return false;
6695 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6696 && TREE_CODE (lhs) != FIXED_CST)
6697 return false;
6699 if (TREE_CODE (rhs) == SSA_NAME)
6701 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6702 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6703 &rhs_def_stmt, &def, &dt, &vectype2))
6704 return false;
6706 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6707 && TREE_CODE (rhs) != FIXED_CST)
6708 return false;
6710 *comp_vectype = vectype1 ? vectype1 : vectype2;
6711 return true;
6714 /* vectorizable_condition.
6716 Check if STMT is conditional modify expression that can be vectorized.
6717 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6718 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6719 at GSI.
6721 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6722 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6723 else caluse if it is 2).
6725 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6727 bool
6728 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6729 gimple *vec_stmt, tree reduc_def, int reduc_index,
6730 slp_tree slp_node)
6732 tree scalar_dest = NULL_TREE;
6733 tree vec_dest = NULL_TREE;
6734 tree cond_expr, then_clause, else_clause;
6735 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6736 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6737 tree comp_vectype = NULL_TREE;
6738 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6739 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6740 tree vec_compare, vec_cond_expr;
6741 tree new_temp;
6742 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6743 tree def;
6744 enum vect_def_type dt, dts[4];
6745 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6746 int ncopies;
6747 enum tree_code code;
6748 stmt_vec_info prev_stmt_info = NULL;
6749 int i, j;
6750 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6751 vec<tree> vec_oprnds0 = vNULL;
6752 vec<tree> vec_oprnds1 = vNULL;
6753 vec<tree> vec_oprnds2 = vNULL;
6754 vec<tree> vec_oprnds3 = vNULL;
6755 tree vec_cmp_type;
6757 if (slp_node || PURE_SLP_STMT (stmt_info))
6758 ncopies = 1;
6759 else
6760 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6762 gcc_assert (ncopies >= 1);
6763 if (reduc_index && ncopies > 1)
6764 return false; /* FORNOW */
6766 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6767 return false;
6769 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6770 return false;
6772 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6773 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6774 && reduc_def))
6775 return false;
6777 /* FORNOW: not yet supported. */
6778 if (STMT_VINFO_LIVE_P (stmt_info))
6780 if (dump_enabled_p ())
6781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6782 "value used after loop.\n");
6783 return false;
6786 /* Is vectorizable conditional operation? */
6787 if (!is_gimple_assign (stmt))
6788 return false;
6790 code = gimple_assign_rhs_code (stmt);
6792 if (code != COND_EXPR)
6793 return false;
6795 cond_expr = gimple_assign_rhs1 (stmt);
6796 then_clause = gimple_assign_rhs2 (stmt);
6797 else_clause = gimple_assign_rhs3 (stmt);
6799 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6800 &comp_vectype)
6801 || !comp_vectype)
6802 return false;
6804 if (TREE_CODE (then_clause) == SSA_NAME)
6806 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6807 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6808 &then_def_stmt, &def, &dt))
6809 return false;
6811 else if (TREE_CODE (then_clause) != INTEGER_CST
6812 && TREE_CODE (then_clause) != REAL_CST
6813 && TREE_CODE (then_clause) != FIXED_CST)
6814 return false;
6816 if (TREE_CODE (else_clause) == SSA_NAME)
6818 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6819 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6820 &else_def_stmt, &def, &dt))
6821 return false;
6823 else if (TREE_CODE (else_clause) != INTEGER_CST
6824 && TREE_CODE (else_clause) != REAL_CST
6825 && TREE_CODE (else_clause) != FIXED_CST)
6826 return false;
6828 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6829 /* The result of a vector comparison should be signed type. */
6830 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6831 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6832 if (vec_cmp_type == NULL_TREE)
6833 return false;
6835 if (!vec_stmt)
6837 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6838 return expand_vec_cond_expr_p (vectype, comp_vectype);
6841 /* Transform. */
6843 if (!slp_node)
6845 vec_oprnds0.create (1);
6846 vec_oprnds1.create (1);
6847 vec_oprnds2.create (1);
6848 vec_oprnds3.create (1);
6851 /* Handle def. */
6852 scalar_dest = gimple_assign_lhs (stmt);
6853 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6855 /* Handle cond expr. */
6856 for (j = 0; j < ncopies; j++)
6858 gassign *new_stmt = NULL;
6859 if (j == 0)
6861 if (slp_node)
6863 auto_vec<tree, 4> ops;
6864 auto_vec<vec<tree>, 4> vec_defs;
6866 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6867 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6868 ops.safe_push (then_clause);
6869 ops.safe_push (else_clause);
6870 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6871 vec_oprnds3 = vec_defs.pop ();
6872 vec_oprnds2 = vec_defs.pop ();
6873 vec_oprnds1 = vec_defs.pop ();
6874 vec_oprnds0 = vec_defs.pop ();
6876 ops.release ();
6877 vec_defs.release ();
6879 else
6881 gimple gtemp;
6882 vec_cond_lhs =
6883 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6884 stmt, NULL);
6885 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6886 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6888 vec_cond_rhs =
6889 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6890 stmt, NULL);
6891 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6892 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6893 if (reduc_index == 1)
6894 vec_then_clause = reduc_def;
6895 else
6897 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6898 stmt, NULL);
6899 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6900 NULL, &gtemp, &def, &dts[2]);
6902 if (reduc_index == 2)
6903 vec_else_clause = reduc_def;
6904 else
6906 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6907 stmt, NULL);
6908 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6909 NULL, &gtemp, &def, &dts[3]);
6913 else
6915 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6916 vec_oprnds0.pop ());
6917 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6918 vec_oprnds1.pop ());
6919 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6920 vec_oprnds2.pop ());
6921 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6922 vec_oprnds3.pop ());
6925 if (!slp_node)
6927 vec_oprnds0.quick_push (vec_cond_lhs);
6928 vec_oprnds1.quick_push (vec_cond_rhs);
6929 vec_oprnds2.quick_push (vec_then_clause);
6930 vec_oprnds3.quick_push (vec_else_clause);
6933 /* Arguments are ready. Create the new vector stmt. */
6934 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6936 vec_cond_rhs = vec_oprnds1[i];
6937 vec_then_clause = vec_oprnds2[i];
6938 vec_else_clause = vec_oprnds3[i];
6940 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6941 vec_cond_lhs, vec_cond_rhs);
6942 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6943 vec_compare, vec_then_clause, vec_else_clause);
6945 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6946 new_temp = make_ssa_name (vec_dest, new_stmt);
6947 gimple_assign_set_lhs (new_stmt, new_temp);
6948 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6949 if (slp_node)
6950 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6953 if (slp_node)
6954 continue;
6956 if (j == 0)
6957 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6958 else
6959 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6961 prev_stmt_info = vinfo_for_stmt (new_stmt);
6964 vec_oprnds0.release ();
6965 vec_oprnds1.release ();
6966 vec_oprnds2.release ();
6967 vec_oprnds3.release ();
6969 return true;
6973 /* Make sure the statement is vectorizable. */
6975 bool
6976 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6978 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6979 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6980 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6981 bool ok;
6982 tree scalar_type, vectype;
6983 gimple pattern_stmt;
6984 gimple_seq pattern_def_seq;
6986 if (dump_enabled_p ())
6988 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6989 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6990 dump_printf (MSG_NOTE, "\n");
6993 if (gimple_has_volatile_ops (stmt))
6995 if (dump_enabled_p ())
6996 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6997 "not vectorized: stmt has volatile operands\n");
6999 return false;
7002 /* Skip stmts that do not need to be vectorized. In loops this is expected
7003 to include:
7004 - the COND_EXPR which is the loop exit condition
7005 - any LABEL_EXPRs in the loop
7006 - computations that are used only for array indexing or loop control.
7007 In basic blocks we only analyze statements that are a part of some SLP
7008 instance, therefore, all the statements are relevant.
7010 Pattern statement needs to be analyzed instead of the original statement
7011 if the original statement is not relevant. Otherwise, we analyze both
7012 statements. In basic blocks we are called from some SLP instance
7013 traversal, don't analyze pattern stmts instead, the pattern stmts
7014 already will be part of SLP instance. */
7016 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7017 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7018 && !STMT_VINFO_LIVE_P (stmt_info))
7020 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7021 && pattern_stmt
7022 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7023 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7025 /* Analyze PATTERN_STMT instead of the original stmt. */
7026 stmt = pattern_stmt;
7027 stmt_info = vinfo_for_stmt (pattern_stmt);
7028 if (dump_enabled_p ())
7030 dump_printf_loc (MSG_NOTE, vect_location,
7031 "==> examining pattern statement: ");
7032 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7033 dump_printf (MSG_NOTE, "\n");
7036 else
7038 if (dump_enabled_p ())
7039 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7041 return true;
7044 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7045 && node == NULL
7046 && pattern_stmt
7047 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7048 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7050 /* Analyze PATTERN_STMT too. */
7051 if (dump_enabled_p ())
7053 dump_printf_loc (MSG_NOTE, vect_location,
7054 "==> examining pattern statement: ");
7055 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7056 dump_printf (MSG_NOTE, "\n");
7059 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7060 return false;
7063 if (is_pattern_stmt_p (stmt_info)
7064 && node == NULL
7065 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7067 gimple_stmt_iterator si;
7069 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7071 gimple pattern_def_stmt = gsi_stmt (si);
7072 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7073 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7075 /* Analyze def stmt of STMT if it's a pattern stmt. */
7076 if (dump_enabled_p ())
7078 dump_printf_loc (MSG_NOTE, vect_location,
7079 "==> examining pattern def statement: ");
7080 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7081 dump_printf (MSG_NOTE, "\n");
7084 if (!vect_analyze_stmt (pattern_def_stmt,
7085 need_to_vectorize, node))
7086 return false;
7091 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7093 case vect_internal_def:
7094 break;
7096 case vect_reduction_def:
7097 case vect_nested_cycle:
7098 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7099 || relevance == vect_used_in_outer_by_reduction
7100 || relevance == vect_unused_in_scope));
7101 break;
7103 case vect_induction_def:
7104 case vect_constant_def:
7105 case vect_external_def:
7106 case vect_unknown_def_type:
7107 default:
7108 gcc_unreachable ();
7111 if (bb_vinfo)
7113 gcc_assert (PURE_SLP_STMT (stmt_info));
7115 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7116 if (dump_enabled_p ())
7118 dump_printf_loc (MSG_NOTE, vect_location,
7119 "get vectype for scalar type: ");
7120 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7121 dump_printf (MSG_NOTE, "\n");
7124 vectype = get_vectype_for_scalar_type (scalar_type);
7125 if (!vectype)
7127 if (dump_enabled_p ())
7129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7130 "not SLPed: unsupported data-type ");
7131 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7132 scalar_type);
7133 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7135 return false;
7138 if (dump_enabled_p ())
7140 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7141 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7142 dump_printf (MSG_NOTE, "\n");
7145 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7148 if (STMT_VINFO_RELEVANT_P (stmt_info))
7150 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7151 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7152 || (is_gimple_call (stmt)
7153 && gimple_call_lhs (stmt) == NULL_TREE));
7154 *need_to_vectorize = true;
7157 ok = true;
7158 if (!bb_vinfo
7159 && (STMT_VINFO_RELEVANT_P (stmt_info)
7160 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7161 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7162 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7163 || vectorizable_shift (stmt, NULL, NULL, NULL)
7164 || vectorizable_operation (stmt, NULL, NULL, NULL)
7165 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7166 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7167 || vectorizable_call (stmt, NULL, NULL, NULL)
7168 || vectorizable_store (stmt, NULL, NULL, NULL)
7169 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7170 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7171 else
7173 if (bb_vinfo)
7174 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7175 || vectorizable_conversion (stmt, NULL, NULL, node)
7176 || vectorizable_shift (stmt, NULL, NULL, node)
7177 || vectorizable_operation (stmt, NULL, NULL, node)
7178 || vectorizable_assignment (stmt, NULL, NULL, node)
7179 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7180 || vectorizable_call (stmt, NULL, NULL, node)
7181 || vectorizable_store (stmt, NULL, NULL, node)
7182 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7185 if (!ok)
7187 if (dump_enabled_p ())
7189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7190 "not vectorized: relevant stmt not ");
7191 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7192 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7193 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7196 return false;
7199 if (bb_vinfo)
7200 return true;
7202 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7203 need extra handling, except for vectorizable reductions. */
7204 if (STMT_VINFO_LIVE_P (stmt_info)
7205 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7206 ok = vectorizable_live_operation (stmt, NULL, NULL);
7208 if (!ok)
7210 if (dump_enabled_p ())
7212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7213 "not vectorized: live stmt not ");
7214 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7215 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7216 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7219 return false;
7222 return true;
7226 /* Function vect_transform_stmt.
7228 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7230 bool
7231 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7232 bool *grouped_store, slp_tree slp_node,
7233 slp_instance slp_node_instance)
7235 bool is_store = false;
7236 gimple vec_stmt = NULL;
7237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7238 bool done;
7240 switch (STMT_VINFO_TYPE (stmt_info))
7242 case type_demotion_vec_info_type:
7243 case type_promotion_vec_info_type:
7244 case type_conversion_vec_info_type:
7245 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7246 gcc_assert (done);
7247 break;
7249 case induc_vec_info_type:
7250 gcc_assert (!slp_node);
7251 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7252 gcc_assert (done);
7253 break;
7255 case shift_vec_info_type:
7256 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7257 gcc_assert (done);
7258 break;
7260 case op_vec_info_type:
7261 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7262 gcc_assert (done);
7263 break;
7265 case assignment_vec_info_type:
7266 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7267 gcc_assert (done);
7268 break;
7270 case load_vec_info_type:
7271 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7272 slp_node_instance);
7273 gcc_assert (done);
7274 break;
7276 case store_vec_info_type:
7277 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7278 gcc_assert (done);
7279 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7281 /* In case of interleaving, the whole chain is vectorized when the
7282 last store in the chain is reached. Store stmts before the last
7283 one are skipped, and there vec_stmt_info shouldn't be freed
7284 meanwhile. */
7285 *grouped_store = true;
7286 if (STMT_VINFO_VEC_STMT (stmt_info))
7287 is_store = true;
7289 else
7290 is_store = true;
7291 break;
7293 case condition_vec_info_type:
7294 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7295 gcc_assert (done);
7296 break;
7298 case call_vec_info_type:
7299 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7300 stmt = gsi_stmt (*gsi);
7301 if (is_gimple_call (stmt)
7302 && gimple_call_internal_p (stmt)
7303 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7304 is_store = true;
7305 break;
7307 case call_simd_clone_vec_info_type:
7308 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7309 stmt = gsi_stmt (*gsi);
7310 break;
7312 case reduc_vec_info_type:
7313 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7314 gcc_assert (done);
7315 break;
7317 default:
7318 if (!STMT_VINFO_LIVE_P (stmt_info))
7320 if (dump_enabled_p ())
7321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7322 "stmt not supported.\n");
7323 gcc_unreachable ();
7327 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7328 is being vectorized, but outside the immediately enclosing loop. */
7329 if (vec_stmt
7330 && STMT_VINFO_LOOP_VINFO (stmt_info)
7331 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7332 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7333 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7334 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7335 || STMT_VINFO_RELEVANT (stmt_info) ==
7336 vect_used_in_outer_by_reduction))
7338 struct loop *innerloop = LOOP_VINFO_LOOP (
7339 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7340 imm_use_iterator imm_iter;
7341 use_operand_p use_p;
7342 tree scalar_dest;
7343 gimple exit_phi;
7345 if (dump_enabled_p ())
7346 dump_printf_loc (MSG_NOTE, vect_location,
7347 "Record the vdef for outer-loop vectorization.\n");
7349 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7350 (to be used when vectorizing outer-loop stmts that use the DEF of
7351 STMT). */
7352 if (gimple_code (stmt) == GIMPLE_PHI)
7353 scalar_dest = PHI_RESULT (stmt);
7354 else
7355 scalar_dest = gimple_assign_lhs (stmt);
7357 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7359 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7361 exit_phi = USE_STMT (use_p);
7362 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7367 /* Handle stmts whose DEF is used outside the loop-nest that is
7368 being vectorized. */
7369 if (STMT_VINFO_LIVE_P (stmt_info)
7370 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7372 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7373 gcc_assert (done);
7376 if (vec_stmt)
7377 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7379 return is_store;
7383 /* Remove a group of stores (for SLP or interleaving), free their
7384 stmt_vec_info. */
7386 void
7387 vect_remove_stores (gimple first_stmt)
7389 gimple next = first_stmt;
7390 gimple tmp;
7391 gimple_stmt_iterator next_si;
7393 while (next)
7395 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7397 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7398 if (is_pattern_stmt_p (stmt_info))
7399 next = STMT_VINFO_RELATED_STMT (stmt_info);
7400 /* Free the attached stmt_vec_info and remove the stmt. */
7401 next_si = gsi_for_stmt (next);
7402 unlink_stmt_vdef (next);
7403 gsi_remove (&next_si, true);
7404 release_defs (next);
7405 free_stmt_vec_info (next);
7406 next = tmp;
7411 /* Function new_stmt_vec_info.
7413 Create and initialize a new stmt_vec_info struct for STMT. */
7415 stmt_vec_info
7416 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7417 bb_vec_info bb_vinfo)
7419 stmt_vec_info res;
7420 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7422 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7423 STMT_VINFO_STMT (res) = stmt;
7424 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7425 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7426 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7427 STMT_VINFO_LIVE_P (res) = false;
7428 STMT_VINFO_VECTYPE (res) = NULL;
7429 STMT_VINFO_VEC_STMT (res) = NULL;
7430 STMT_VINFO_VECTORIZABLE (res) = true;
7431 STMT_VINFO_IN_PATTERN_P (res) = false;
7432 STMT_VINFO_RELATED_STMT (res) = NULL;
7433 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7434 STMT_VINFO_DATA_REF (res) = NULL;
7436 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7437 STMT_VINFO_DR_OFFSET (res) = NULL;
7438 STMT_VINFO_DR_INIT (res) = NULL;
7439 STMT_VINFO_DR_STEP (res) = NULL;
7440 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7442 if (gimple_code (stmt) == GIMPLE_PHI
7443 && is_loop_header_bb_p (gimple_bb (stmt)))
7444 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7445 else
7446 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7448 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7449 STMT_SLP_TYPE (res) = loop_vect;
7450 GROUP_FIRST_ELEMENT (res) = NULL;
7451 GROUP_NEXT_ELEMENT (res) = NULL;
7452 GROUP_SIZE (res) = 0;
7453 GROUP_STORE_COUNT (res) = 0;
7454 GROUP_GAP (res) = 0;
7455 GROUP_SAME_DR_STMT (res) = NULL;
7457 return res;
7461 /* Create a hash table for stmt_vec_info. */
7463 void
7464 init_stmt_vec_info_vec (void)
7466 gcc_assert (!stmt_vec_info_vec.exists ());
7467 stmt_vec_info_vec.create (50);
7471 /* Free hash table for stmt_vec_info. */
7473 void
7474 free_stmt_vec_info_vec (void)
7476 unsigned int i;
7477 vec_void_p info;
7478 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7479 if (info != NULL)
7480 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7481 gcc_assert (stmt_vec_info_vec.exists ());
7482 stmt_vec_info_vec.release ();
7486 /* Free stmt vectorization related info. */
7488 void
7489 free_stmt_vec_info (gimple stmt)
7491 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7493 if (!stmt_info)
7494 return;
7496 /* Check if this statement has a related "pattern stmt"
7497 (introduced by the vectorizer during the pattern recognition
7498 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7499 too. */
7500 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7502 stmt_vec_info patt_info
7503 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7504 if (patt_info)
7506 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7507 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7508 gimple_set_bb (patt_stmt, NULL);
7509 tree lhs = gimple_get_lhs (patt_stmt);
7510 if (TREE_CODE (lhs) == SSA_NAME)
7511 release_ssa_name (lhs);
7512 if (seq)
7514 gimple_stmt_iterator si;
7515 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7517 gimple seq_stmt = gsi_stmt (si);
7518 gimple_set_bb (seq_stmt, NULL);
7519 lhs = gimple_get_lhs (patt_stmt);
7520 if (TREE_CODE (lhs) == SSA_NAME)
7521 release_ssa_name (lhs);
7522 free_stmt_vec_info (seq_stmt);
7525 free_stmt_vec_info (patt_stmt);
7529 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7530 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7531 set_vinfo_for_stmt (stmt, NULL);
7532 free (stmt_info);
7536 /* Function get_vectype_for_scalar_type_and_size.
7538 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7539 by the target. */
7541 static tree
7542 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7544 machine_mode inner_mode = TYPE_MODE (scalar_type);
7545 machine_mode simd_mode;
7546 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7547 int nunits;
7548 tree vectype;
7550 if (nbytes == 0)
7551 return NULL_TREE;
7553 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7554 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7555 return NULL_TREE;
7557 /* For vector types of elements whose mode precision doesn't
7558 match their types precision we use a element type of mode
7559 precision. The vectorization routines will have to make sure
7560 they support the proper result truncation/extension.
7561 We also make sure to build vector types with INTEGER_TYPE
7562 component type only. */
7563 if (INTEGRAL_TYPE_P (scalar_type)
7564 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7565 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7566 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7567 TYPE_UNSIGNED (scalar_type));
7569 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7570 When the component mode passes the above test simply use a type
7571 corresponding to that mode. The theory is that any use that
7572 would cause problems with this will disable vectorization anyway. */
7573 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7574 && !INTEGRAL_TYPE_P (scalar_type))
7575 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7577 /* We can't build a vector type of elements with alignment bigger than
7578 their size. */
7579 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7580 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7581 TYPE_UNSIGNED (scalar_type));
7583 /* If we felt back to using the mode fail if there was
7584 no scalar type for it. */
7585 if (scalar_type == NULL_TREE)
7586 return NULL_TREE;
7588 /* If no size was supplied use the mode the target prefers. Otherwise
7589 lookup a vector mode of the specified size. */
7590 if (size == 0)
7591 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7592 else
7593 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7594 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7595 if (nunits <= 1)
7596 return NULL_TREE;
7598 vectype = build_vector_type (scalar_type, nunits);
7600 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7601 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7602 return NULL_TREE;
7604 return vectype;
7607 unsigned int current_vector_size;
7609 /* Function get_vectype_for_scalar_type.
7611 Returns the vector type corresponding to SCALAR_TYPE as supported
7612 by the target. */
7614 tree
7615 get_vectype_for_scalar_type (tree scalar_type)
7617 tree vectype;
7618 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7619 current_vector_size);
7620 if (vectype
7621 && current_vector_size == 0)
7622 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7623 return vectype;
7626 /* Function get_same_sized_vectype
7628 Returns a vector type corresponding to SCALAR_TYPE of size
7629 VECTOR_TYPE if supported by the target. */
7631 tree
7632 get_same_sized_vectype (tree scalar_type, tree vector_type)
7634 return get_vectype_for_scalar_type_and_size
7635 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7638 /* Function vect_is_simple_use.
7640 Input:
7641 LOOP_VINFO - the vect info of the loop that is being vectorized.
7642 BB_VINFO - the vect info of the basic block that is being vectorized.
7643 OPERAND - operand of STMT in the loop or bb.
7644 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7646 Returns whether a stmt with OPERAND can be vectorized.
7647 For loops, supportable operands are constants, loop invariants, and operands
7648 that are defined by the current iteration of the loop. Unsupportable
7649 operands are those that are defined by a previous iteration of the loop (as
7650 is the case in reduction/induction computations).
7651 For basic blocks, supportable operands are constants and bb invariants.
7652 For now, operands defined outside the basic block are not supported. */
7654 bool
7655 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7656 bb_vec_info bb_vinfo, gimple *def_stmt,
7657 tree *def, enum vect_def_type *dt)
7659 basic_block bb;
7660 stmt_vec_info stmt_vinfo;
7661 struct loop *loop = NULL;
7663 if (loop_vinfo)
7664 loop = LOOP_VINFO_LOOP (loop_vinfo);
7666 *def_stmt = NULL;
7667 *def = NULL_TREE;
7669 if (dump_enabled_p ())
7671 dump_printf_loc (MSG_NOTE, vect_location,
7672 "vect_is_simple_use: operand ");
7673 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7674 dump_printf (MSG_NOTE, "\n");
7677 if (CONSTANT_CLASS_P (operand))
7679 *dt = vect_constant_def;
7680 return true;
7683 if (is_gimple_min_invariant (operand))
7685 *def = operand;
7686 *dt = vect_external_def;
7687 return true;
7690 if (TREE_CODE (operand) == PAREN_EXPR)
7692 if (dump_enabled_p ())
7693 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7694 operand = TREE_OPERAND (operand, 0);
7697 if (TREE_CODE (operand) != SSA_NAME)
7699 if (dump_enabled_p ())
7700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7701 "not ssa-name.\n");
7702 return false;
7705 *def_stmt = SSA_NAME_DEF_STMT (operand);
7706 if (*def_stmt == NULL)
7708 if (dump_enabled_p ())
7709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7710 "no def_stmt.\n");
7711 return false;
7714 if (dump_enabled_p ())
7716 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7717 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7718 dump_printf (MSG_NOTE, "\n");
7721 /* Empty stmt is expected only in case of a function argument.
7722 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7723 if (gimple_nop_p (*def_stmt))
7725 *def = operand;
7726 *dt = vect_external_def;
7727 return true;
7730 bb = gimple_bb (*def_stmt);
7732 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7733 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7734 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7735 *dt = vect_external_def;
7736 else
7738 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7739 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7742 if (*dt == vect_unknown_def_type
7743 || (stmt
7744 && *dt == vect_double_reduction_def
7745 && gimple_code (stmt) != GIMPLE_PHI))
7747 if (dump_enabled_p ())
7748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7749 "Unsupported pattern.\n");
7750 return false;
7753 if (dump_enabled_p ())
7754 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7756 switch (gimple_code (*def_stmt))
7758 case GIMPLE_PHI:
7759 *def = gimple_phi_result (*def_stmt);
7760 break;
7762 case GIMPLE_ASSIGN:
7763 *def = gimple_assign_lhs (*def_stmt);
7764 break;
7766 case GIMPLE_CALL:
7767 *def = gimple_call_lhs (*def_stmt);
7768 if (*def != NULL)
7769 break;
7770 /* FALLTHRU */
7771 default:
7772 if (dump_enabled_p ())
7773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7774 "unsupported defining stmt:\n");
7775 return false;
7778 return true;
7781 /* Function vect_is_simple_use_1.
7783 Same as vect_is_simple_use_1 but also determines the vector operand
7784 type of OPERAND and stores it to *VECTYPE. If the definition of
7785 OPERAND is vect_uninitialized_def, vect_constant_def or
7786 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7787 is responsible to compute the best suited vector type for the
7788 scalar operand. */
7790 bool
7791 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7792 bb_vec_info bb_vinfo, gimple *def_stmt,
7793 tree *def, enum vect_def_type *dt, tree *vectype)
7795 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7796 def, dt))
7797 return false;
7799 /* Now get a vector type if the def is internal, otherwise supply
7800 NULL_TREE and leave it up to the caller to figure out a proper
7801 type for the use stmt. */
7802 if (*dt == vect_internal_def
7803 || *dt == vect_induction_def
7804 || *dt == vect_reduction_def
7805 || *dt == vect_double_reduction_def
7806 || *dt == vect_nested_cycle)
7808 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7810 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7811 && !STMT_VINFO_RELEVANT (stmt_info)
7812 && !STMT_VINFO_LIVE_P (stmt_info))
7813 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7815 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7816 gcc_assert (*vectype != NULL_TREE);
7818 else if (*dt == vect_uninitialized_def
7819 || *dt == vect_constant_def
7820 || *dt == vect_external_def)
7821 *vectype = NULL_TREE;
7822 else
7823 gcc_unreachable ();
7825 return true;
7829 /* Function supportable_widening_operation
7831 Check whether an operation represented by the code CODE is a
7832 widening operation that is supported by the target platform in
7833 vector form (i.e., when operating on arguments of type VECTYPE_IN
7834 producing a result of type VECTYPE_OUT).
7836 Widening operations we currently support are NOP (CONVERT), FLOAT
7837 and WIDEN_MULT. This function checks if these operations are supported
7838 by the target platform either directly (via vector tree-codes), or via
7839 target builtins.
7841 Output:
7842 - CODE1 and CODE2 are codes of vector operations to be used when
7843 vectorizing the operation, if available.
7844 - MULTI_STEP_CVT determines the number of required intermediate steps in
7845 case of multi-step conversion (like char->short->int - in that case
7846 MULTI_STEP_CVT will be 1).
7847 - INTERM_TYPES contains the intermediate type required to perform the
7848 widening operation (short in the above example). */
7850 bool
7851 supportable_widening_operation (enum tree_code code, gimple stmt,
7852 tree vectype_out, tree vectype_in,
7853 enum tree_code *code1, enum tree_code *code2,
7854 int *multi_step_cvt,
7855 vec<tree> *interm_types)
7857 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7858 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7859 struct loop *vect_loop = NULL;
7860 machine_mode vec_mode;
7861 enum insn_code icode1, icode2;
7862 optab optab1, optab2;
7863 tree vectype = vectype_in;
7864 tree wide_vectype = vectype_out;
7865 enum tree_code c1, c2;
7866 int i;
7867 tree prev_type, intermediate_type;
7868 machine_mode intermediate_mode, prev_mode;
7869 optab optab3, optab4;
7871 *multi_step_cvt = 0;
7872 if (loop_info)
7873 vect_loop = LOOP_VINFO_LOOP (loop_info);
7875 switch (code)
7877 case WIDEN_MULT_EXPR:
7878 /* The result of a vectorized widening operation usually requires
7879 two vectors (because the widened results do not fit into one vector).
7880 The generated vector results would normally be expected to be
7881 generated in the same order as in the original scalar computation,
7882 i.e. if 8 results are generated in each vector iteration, they are
7883 to be organized as follows:
7884 vect1: [res1,res2,res3,res4],
7885 vect2: [res5,res6,res7,res8].
7887 However, in the special case that the result of the widening
7888 operation is used in a reduction computation only, the order doesn't
7889 matter (because when vectorizing a reduction we change the order of
7890 the computation). Some targets can take advantage of this and
7891 generate more efficient code. For example, targets like Altivec,
7892 that support widen_mult using a sequence of {mult_even,mult_odd}
7893 generate the following vectors:
7894 vect1: [res1,res3,res5,res7],
7895 vect2: [res2,res4,res6,res8].
7897 When vectorizing outer-loops, we execute the inner-loop sequentially
7898 (each vectorized inner-loop iteration contributes to VF outer-loop
7899 iterations in parallel). We therefore don't allow to change the
7900 order of the computation in the inner-loop during outer-loop
7901 vectorization. */
7902 /* TODO: Another case in which order doesn't *really* matter is when we
7903 widen and then contract again, e.g. (short)((int)x * y >> 8).
7904 Normally, pack_trunc performs an even/odd permute, whereas the
7905 repack from an even/odd expansion would be an interleave, which
7906 would be significantly simpler for e.g. AVX2. */
7907 /* In any case, in order to avoid duplicating the code below, recurse
7908 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7909 are properly set up for the caller. If we fail, we'll continue with
7910 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7911 if (vect_loop
7912 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7913 && !nested_in_vect_loop_p (vect_loop, stmt)
7914 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7915 stmt, vectype_out, vectype_in,
7916 code1, code2, multi_step_cvt,
7917 interm_types))
7919 /* Elements in a vector with vect_used_by_reduction property cannot
7920 be reordered if the use chain with this property does not have the
7921 same operation. One such an example is s += a * b, where elements
7922 in a and b cannot be reordered. Here we check if the vector defined
7923 by STMT is only directly used in the reduction statement. */
7924 tree lhs = gimple_assign_lhs (stmt);
7925 use_operand_p dummy;
7926 gimple use_stmt;
7927 stmt_vec_info use_stmt_info = NULL;
7928 if (single_imm_use (lhs, &dummy, &use_stmt)
7929 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7930 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7931 return true;
7933 c1 = VEC_WIDEN_MULT_LO_EXPR;
7934 c2 = VEC_WIDEN_MULT_HI_EXPR;
7935 break;
7937 case VEC_WIDEN_MULT_EVEN_EXPR:
7938 /* Support the recursion induced just above. */
7939 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7940 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7941 break;
7943 case WIDEN_LSHIFT_EXPR:
7944 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7945 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7946 break;
7948 CASE_CONVERT:
7949 c1 = VEC_UNPACK_LO_EXPR;
7950 c2 = VEC_UNPACK_HI_EXPR;
7951 break;
7953 case FLOAT_EXPR:
7954 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7955 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7956 break;
7958 case FIX_TRUNC_EXPR:
7959 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7960 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7961 computing the operation. */
7962 return false;
7964 default:
7965 gcc_unreachable ();
7968 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7970 enum tree_code ctmp = c1;
7971 c1 = c2;
7972 c2 = ctmp;
7975 if (code == FIX_TRUNC_EXPR)
7977 /* The signedness is determined from output operand. */
7978 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7979 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7981 else
7983 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7984 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7987 if (!optab1 || !optab2)
7988 return false;
7990 vec_mode = TYPE_MODE (vectype);
7991 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7992 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7993 return false;
7995 *code1 = c1;
7996 *code2 = c2;
7998 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7999 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8000 return true;
8002 /* Check if it's a multi-step conversion that can be done using intermediate
8003 types. */
8005 prev_type = vectype;
8006 prev_mode = vec_mode;
8008 if (!CONVERT_EXPR_CODE_P (code))
8009 return false;
8011 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8012 intermediate steps in promotion sequence. We try
8013 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8014 not. */
8015 interm_types->create (MAX_INTERM_CVT_STEPS);
8016 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8018 intermediate_mode = insn_data[icode1].operand[0].mode;
8019 intermediate_type
8020 = lang_hooks.types.type_for_mode (intermediate_mode,
8021 TYPE_UNSIGNED (prev_type));
8022 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8023 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8025 if (!optab3 || !optab4
8026 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8027 || insn_data[icode1].operand[0].mode != intermediate_mode
8028 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8029 || insn_data[icode2].operand[0].mode != intermediate_mode
8030 || ((icode1 = optab_handler (optab3, intermediate_mode))
8031 == CODE_FOR_nothing)
8032 || ((icode2 = optab_handler (optab4, intermediate_mode))
8033 == CODE_FOR_nothing))
8034 break;
8036 interm_types->quick_push (intermediate_type);
8037 (*multi_step_cvt)++;
8039 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8040 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8041 return true;
8043 prev_type = intermediate_type;
8044 prev_mode = intermediate_mode;
8047 interm_types->release ();
8048 return false;
8052 /* Function supportable_narrowing_operation
8054 Check whether an operation represented by the code CODE is a
8055 narrowing operation that is supported by the target platform in
8056 vector form (i.e., when operating on arguments of type VECTYPE_IN
8057 and producing a result of type VECTYPE_OUT).
8059 Narrowing operations we currently support are NOP (CONVERT) and
8060 FIX_TRUNC. This function checks if these operations are supported by
8061 the target platform directly via vector tree-codes.
8063 Output:
8064 - CODE1 is the code of a vector operation to be used when
8065 vectorizing the operation, if available.
8066 - MULTI_STEP_CVT determines the number of required intermediate steps in
8067 case of multi-step conversion (like int->short->char - in that case
8068 MULTI_STEP_CVT will be 1).
8069 - INTERM_TYPES contains the intermediate type required to perform the
8070 narrowing operation (short in the above example). */
8072 bool
8073 supportable_narrowing_operation (enum tree_code code,
8074 tree vectype_out, tree vectype_in,
8075 enum tree_code *code1, int *multi_step_cvt,
8076 vec<tree> *interm_types)
8078 machine_mode vec_mode;
8079 enum insn_code icode1;
8080 optab optab1, interm_optab;
8081 tree vectype = vectype_in;
8082 tree narrow_vectype = vectype_out;
8083 enum tree_code c1;
8084 tree intermediate_type;
8085 machine_mode intermediate_mode, prev_mode;
8086 int i;
8087 bool uns;
8089 *multi_step_cvt = 0;
8090 switch (code)
8092 CASE_CONVERT:
8093 c1 = VEC_PACK_TRUNC_EXPR;
8094 break;
8096 case FIX_TRUNC_EXPR:
8097 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8098 break;
8100 case FLOAT_EXPR:
8101 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8102 tree code and optabs used for computing the operation. */
8103 return false;
8105 default:
8106 gcc_unreachable ();
8109 if (code == FIX_TRUNC_EXPR)
8110 /* The signedness is determined from output operand. */
8111 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8112 else
8113 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8115 if (!optab1)
8116 return false;
8118 vec_mode = TYPE_MODE (vectype);
8119 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8120 return false;
8122 *code1 = c1;
8124 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8125 return true;
8127 /* Check if it's a multi-step conversion that can be done using intermediate
8128 types. */
8129 prev_mode = vec_mode;
8130 if (code == FIX_TRUNC_EXPR)
8131 uns = TYPE_UNSIGNED (vectype_out);
8132 else
8133 uns = TYPE_UNSIGNED (vectype);
8135 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8136 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8137 costly than signed. */
8138 if (code == FIX_TRUNC_EXPR && uns)
8140 enum insn_code icode2;
8142 intermediate_type
8143 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8144 interm_optab
8145 = optab_for_tree_code (c1, intermediate_type, optab_default);
8146 if (interm_optab != unknown_optab
8147 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8148 && insn_data[icode1].operand[0].mode
8149 == insn_data[icode2].operand[0].mode)
8151 uns = false;
8152 optab1 = interm_optab;
8153 icode1 = icode2;
8157 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8158 intermediate steps in promotion sequence. We try
8159 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8160 interm_types->create (MAX_INTERM_CVT_STEPS);
8161 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8163 intermediate_mode = insn_data[icode1].operand[0].mode;
8164 intermediate_type
8165 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8166 interm_optab
8167 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8168 optab_default);
8169 if (!interm_optab
8170 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8171 || insn_data[icode1].operand[0].mode != intermediate_mode
8172 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8173 == CODE_FOR_nothing))
8174 break;
8176 interm_types->quick_push (intermediate_type);
8177 (*multi_step_cvt)++;
8179 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8180 return true;
8182 prev_mode = intermediate_mode;
8183 optab1 = interm_optab;
8186 interm_types->release ();
8187 return false;