PR target/66563
[official-gcc.git] / gcc / tree-vect-stmts.c
blobcac4768af0bbfb183c3e05c2cff5babfad5a9849
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "alias.h"
28 #include "symtab.h"
29 #include "tree.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "target.h"
33 #include "predict.h"
34 #include "hard-reg-set.h"
35 #include "function.h"
36 #include "dominance.h"
37 #include "cfg.h"
38 #include "basic-block.h"
39 #include "gimple-pretty-print.h"
40 #include "tree-ssa-alias.h"
41 #include "internal-fn.h"
42 #include "tree-eh.h"
43 #include "gimple-expr.h"
44 #include "gimple.h"
45 #include "gimplify.h"
46 #include "gimple-iterator.h"
47 #include "gimplify-me.h"
48 #include "gimple-ssa.h"
49 #include "tree-cfg.h"
50 #include "tree-phinodes.h"
51 #include "ssa-iterators.h"
52 #include "stringpool.h"
53 #include "tree-ssanames.h"
54 #include "tree-ssa-loop-manip.h"
55 #include "cfgloop.h"
56 #include "tree-ssa-loop.h"
57 #include "tree-scalar-evolution.h"
58 #include "rtl.h"
59 #include "flags.h"
60 #include "insn-config.h"
61 #include "expmed.h"
62 #include "dojump.h"
63 #include "explow.h"
64 #include "calls.h"
65 #include "emit-rtl.h"
66 #include "varasm.h"
67 #include "stmt.h"
68 #include "expr.h"
69 #include "recog.h" /* FIXME: for insn_data */
70 #include "insn-codes.h"
71 #include "optabs.h"
72 #include "diagnostic-core.h"
73 #include "tree-vectorizer.h"
74 #include "plugin-api.h"
75 #include "ipa-ref.h"
76 #include "cgraph.h"
77 #include "builtins.h"
79 /* For lang_hooks.types.type_for_mode. */
80 #include "langhooks.h"
82 /* Return the vectorized type for the given statement. */
84 tree
85 stmt_vectype (struct _stmt_vec_info *stmt_info)
87 return STMT_VINFO_VECTYPE (stmt_info);
90 /* Return TRUE iff the given statement is in an inner loop relative to
91 the loop being vectorized. */
92 bool
93 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
95 gimple stmt = STMT_VINFO_STMT (stmt_info);
96 basic_block bb = gimple_bb (stmt);
97 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
98 struct loop* loop;
100 if (!loop_vinfo)
101 return false;
103 loop = LOOP_VINFO_LOOP (loop_vinfo);
105 return (bb->loop_father == loop->inner);
108 /* Record the cost of a statement, either by directly informing the
109 target model or by saving it in a vector for later processing.
110 Return a preliminary estimate of the statement's cost. */
112 unsigned
113 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
114 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
115 int misalign, enum vect_cost_model_location where)
117 if (body_cost_vec)
119 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
120 add_stmt_info_to_vec (body_cost_vec, count, kind,
121 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
122 misalign);
123 return (unsigned)
124 (builtin_vectorization_cost (kind, vectype, misalign) * count);
127 else
129 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
130 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
131 void *target_cost_data;
133 if (loop_vinfo)
134 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
135 else
136 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
138 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
139 misalign, where);
143 /* Return a variable of type ELEM_TYPE[NELEMS]. */
145 static tree
146 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
148 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
149 "vect_array");
152 /* ARRAY is an array of vectors created by create_vector_array.
153 Return an SSA_NAME for the vector in index N. The reference
154 is part of the vectorization of STMT and the vector is associated
155 with scalar destination SCALAR_DEST. */
157 static tree
158 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
159 tree array, unsigned HOST_WIDE_INT n)
161 tree vect_type, vect, vect_name, array_ref;
162 gimple new_stmt;
164 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
165 vect_type = TREE_TYPE (TREE_TYPE (array));
166 vect = vect_create_destination_var (scalar_dest, vect_type);
167 array_ref = build4 (ARRAY_REF, vect_type, array,
168 build_int_cst (size_type_node, n),
169 NULL_TREE, NULL_TREE);
171 new_stmt = gimple_build_assign (vect, array_ref);
172 vect_name = make_ssa_name (vect, new_stmt);
173 gimple_assign_set_lhs (new_stmt, vect_name);
174 vect_finish_stmt_generation (stmt, new_stmt, gsi);
176 return vect_name;
179 /* ARRAY is an array of vectors created by create_vector_array.
180 Emit code to store SSA_NAME VECT in index N of the array.
181 The store is part of the vectorization of STMT. */
183 static void
184 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
185 tree array, unsigned HOST_WIDE_INT n)
187 tree array_ref;
188 gimple new_stmt;
190 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
191 build_int_cst (size_type_node, n),
192 NULL_TREE, NULL_TREE);
194 new_stmt = gimple_build_assign (array_ref, vect);
195 vect_finish_stmt_generation (stmt, new_stmt, gsi);
198 /* PTR is a pointer to an array of type TYPE. Return a representation
199 of *PTR. The memory reference replaces those in FIRST_DR
200 (and its group). */
202 static tree
203 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
205 tree mem_ref, alias_ptr_type;
207 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
208 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
209 /* Arrays have the same alignment as their type. */
210 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
211 return mem_ref;
214 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
216 /* Function vect_mark_relevant.
218 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
220 static void
221 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
222 enum vect_relevant relevant, bool live_p,
223 bool used_in_pattern)
225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
226 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
228 gimple pattern_stmt;
230 if (dump_enabled_p ())
231 dump_printf_loc (MSG_NOTE, vect_location,
232 "mark relevant %d, live %d.\n", relevant, live_p);
234 /* If this stmt is an original stmt in a pattern, we might need to mark its
235 related pattern stmt instead of the original stmt. However, such stmts
236 may have their own uses that are not in any pattern, in such cases the
237 stmt itself should be marked. */
238 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
240 bool found = false;
241 if (!used_in_pattern)
243 imm_use_iterator imm_iter;
244 use_operand_p use_p;
245 gimple use_stmt;
246 tree lhs;
247 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
248 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
250 if (is_gimple_assign (stmt))
251 lhs = gimple_assign_lhs (stmt);
252 else
253 lhs = gimple_call_lhs (stmt);
255 /* This use is out of pattern use, if LHS has other uses that are
256 pattern uses, we should mark the stmt itself, and not the pattern
257 stmt. */
258 if (lhs && TREE_CODE (lhs) == SSA_NAME)
259 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
261 if (is_gimple_debug (USE_STMT (use_p)))
262 continue;
263 use_stmt = USE_STMT (use_p);
265 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
266 continue;
268 if (vinfo_for_stmt (use_stmt)
269 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
271 found = true;
272 break;
277 if (!found)
279 /* This is the last stmt in a sequence that was detected as a
280 pattern that can potentially be vectorized. Don't mark the stmt
281 as relevant/live because it's not going to be vectorized.
282 Instead mark the pattern-stmt that replaces it. */
284 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
286 if (dump_enabled_p ())
287 dump_printf_loc (MSG_NOTE, vect_location,
288 "last stmt in pattern. don't mark"
289 " relevant/live.\n");
290 stmt_info = vinfo_for_stmt (pattern_stmt);
291 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
292 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
293 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
294 stmt = pattern_stmt;
298 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
299 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
300 STMT_VINFO_RELEVANT (stmt_info) = relevant;
302 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
303 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
305 if (dump_enabled_p ())
306 dump_printf_loc (MSG_NOTE, vect_location,
307 "already marked relevant/live.\n");
308 return;
311 worklist->safe_push (stmt);
315 /* Function vect_stmt_relevant_p.
317 Return true if STMT in loop that is represented by LOOP_VINFO is
318 "relevant for vectorization".
320 A stmt is considered "relevant for vectorization" if:
321 - it has uses outside the loop.
322 - it has vdefs (it alters memory).
323 - control stmts in the loop (except for the exit condition).
325 CHECKME: what other side effects would the vectorizer allow? */
327 static bool
328 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
329 enum vect_relevant *relevant, bool *live_p)
331 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
332 ssa_op_iter op_iter;
333 imm_use_iterator imm_iter;
334 use_operand_p use_p;
335 def_operand_p def_p;
337 *relevant = vect_unused_in_scope;
338 *live_p = false;
340 /* cond stmt other than loop exit cond. */
341 if (is_ctrl_stmt (stmt)
342 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
343 != loop_exit_ctrl_vec_info_type)
344 *relevant = vect_used_in_scope;
346 /* changing memory. */
347 if (gimple_code (stmt) != GIMPLE_PHI)
348 if (gimple_vdef (stmt)
349 && !gimple_clobber_p (stmt))
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_NOTE, vect_location,
353 "vec_stmt_relevant_p: stmt has vdefs.\n");
354 *relevant = vect_used_in_scope;
357 /* uses outside the loop. */
358 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
360 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
362 basic_block bb = gimple_bb (USE_STMT (use_p));
363 if (!flow_bb_inside_loop_p (loop, bb))
365 if (dump_enabled_p ())
366 dump_printf_loc (MSG_NOTE, vect_location,
367 "vec_stmt_relevant_p: used out of loop.\n");
369 if (is_gimple_debug (USE_STMT (use_p)))
370 continue;
372 /* We expect all such uses to be in the loop exit phis
373 (because of loop closed form) */
374 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
375 gcc_assert (bb == single_exit (loop)->dest);
377 *live_p = true;
382 return (*live_p || *relevant);
386 /* Function exist_non_indexing_operands_for_use_p
388 USE is one of the uses attached to STMT. Check if USE is
389 used in STMT for anything other than indexing an array. */
391 static bool
392 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
394 tree operand;
395 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
397 /* USE corresponds to some operand in STMT. If there is no data
398 reference in STMT, then any operand that corresponds to USE
399 is not indexing an array. */
400 if (!STMT_VINFO_DATA_REF (stmt_info))
401 return true;
403 /* STMT has a data_ref. FORNOW this means that its of one of
404 the following forms:
405 -1- ARRAY_REF = var
406 -2- var = ARRAY_REF
407 (This should have been verified in analyze_data_refs).
409 'var' in the second case corresponds to a def, not a use,
410 so USE cannot correspond to any operands that are not used
411 for array indexing.
413 Therefore, all we need to check is if STMT falls into the
414 first case, and whether var corresponds to USE. */
416 if (!gimple_assign_copy_p (stmt))
418 if (is_gimple_call (stmt)
419 && gimple_call_internal_p (stmt))
420 switch (gimple_call_internal_fn (stmt))
422 case IFN_MASK_STORE:
423 operand = gimple_call_arg (stmt, 3);
424 if (operand == use)
425 return true;
426 /* FALLTHRU */
427 case IFN_MASK_LOAD:
428 operand = gimple_call_arg (stmt, 2);
429 if (operand == use)
430 return true;
431 break;
432 default:
433 break;
435 return false;
438 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
439 return false;
440 operand = gimple_assign_rhs1 (stmt);
441 if (TREE_CODE (operand) != SSA_NAME)
442 return false;
444 if (operand == use)
445 return true;
447 return false;
452 Function process_use.
454 Inputs:
455 - a USE in STMT in a loop represented by LOOP_VINFO
456 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
457 that defined USE. This is done by calling mark_relevant and passing it
458 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
459 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
460 be performed.
462 Outputs:
463 Generally, LIVE_P and RELEVANT are used to define the liveness and
464 relevance info of the DEF_STMT of this USE:
465 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
466 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
467 Exceptions:
468 - case 1: If USE is used only for address computations (e.g. array indexing),
469 which does not need to be directly vectorized, then the liveness/relevance
470 of the respective DEF_STMT is left unchanged.
471 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
472 skip DEF_STMT cause it had already been processed.
473 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
474 be modified accordingly.
476 Return true if everything is as expected. Return false otherwise. */
478 static bool
479 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
480 enum vect_relevant relevant, vec<gimple> *worklist,
481 bool force)
483 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
484 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
485 stmt_vec_info dstmt_vinfo;
486 basic_block bb, def_bb;
487 tree def;
488 gimple def_stmt;
489 enum vect_def_type dt;
491 /* case 1: we are only interested in uses that need to be vectorized. Uses
492 that are used for address computation are not considered relevant. */
493 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
494 return true;
496 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
498 if (dump_enabled_p ())
499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
500 "not vectorized: unsupported use in stmt.\n");
501 return false;
504 if (!def_stmt || gimple_nop_p (def_stmt))
505 return true;
507 def_bb = gimple_bb (def_stmt);
508 if (!flow_bb_inside_loop_p (loop, def_bb))
510 if (dump_enabled_p ())
511 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
512 return true;
515 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
516 DEF_STMT must have already been processed, because this should be the
517 only way that STMT, which is a reduction-phi, was put in the worklist,
518 as there should be no other uses for DEF_STMT in the loop. So we just
519 check that everything is as expected, and we are done. */
520 dstmt_vinfo = vinfo_for_stmt (def_stmt);
521 bb = gimple_bb (stmt);
522 if (gimple_code (stmt) == GIMPLE_PHI
523 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
524 && gimple_code (def_stmt) != GIMPLE_PHI
525 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
526 && bb->loop_father == def_bb->loop_father)
528 if (dump_enabled_p ())
529 dump_printf_loc (MSG_NOTE, vect_location,
530 "reduc-stmt defining reduc-phi in the same nest.\n");
531 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
532 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
533 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
534 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
535 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
536 return true;
539 /* case 3a: outer-loop stmt defining an inner-loop stmt:
540 outer-loop-header-bb:
541 d = def_stmt
542 inner-loop:
543 stmt # use (d)
544 outer-loop-tail-bb:
545 ... */
546 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
548 if (dump_enabled_p ())
549 dump_printf_loc (MSG_NOTE, vect_location,
550 "outer-loop def-stmt defining inner-loop stmt.\n");
552 switch (relevant)
554 case vect_unused_in_scope:
555 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
556 vect_used_in_scope : vect_unused_in_scope;
557 break;
559 case vect_used_in_outer_by_reduction:
560 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
561 relevant = vect_used_by_reduction;
562 break;
564 case vect_used_in_outer:
565 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
566 relevant = vect_used_in_scope;
567 break;
569 case vect_used_in_scope:
570 break;
572 default:
573 gcc_unreachable ();
577 /* case 3b: inner-loop stmt defining an outer-loop stmt:
578 outer-loop-header-bb:
580 inner-loop:
581 d = def_stmt
582 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
583 stmt # use (d) */
584 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
586 if (dump_enabled_p ())
587 dump_printf_loc (MSG_NOTE, vect_location,
588 "inner-loop def-stmt defining outer-loop stmt.\n");
590 switch (relevant)
592 case vect_unused_in_scope:
593 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
594 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
595 vect_used_in_outer_by_reduction : vect_unused_in_scope;
596 break;
598 case vect_used_by_reduction:
599 relevant = vect_used_in_outer_by_reduction;
600 break;
602 case vect_used_in_scope:
603 relevant = vect_used_in_outer;
604 break;
606 default:
607 gcc_unreachable ();
611 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
612 is_pattern_stmt_p (stmt_vinfo));
613 return true;
617 /* Function vect_mark_stmts_to_be_vectorized.
619 Not all stmts in the loop need to be vectorized. For example:
621 for i...
622 for j...
623 1. T0 = i + j
624 2. T1 = a[T0]
626 3. j = j + 1
628 Stmt 1 and 3 do not need to be vectorized, because loop control and
629 addressing of vectorized data-refs are handled differently.
631 This pass detects such stmts. */
633 bool
634 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
636 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
637 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
638 unsigned int nbbs = loop->num_nodes;
639 gimple_stmt_iterator si;
640 gimple stmt;
641 unsigned int i;
642 stmt_vec_info stmt_vinfo;
643 basic_block bb;
644 gimple phi;
645 bool live_p;
646 enum vect_relevant relevant, tmp_relevant;
647 enum vect_def_type def_type;
649 if (dump_enabled_p ())
650 dump_printf_loc (MSG_NOTE, vect_location,
651 "=== vect_mark_stmts_to_be_vectorized ===\n");
653 auto_vec<gimple, 64> worklist;
655 /* 1. Init worklist. */
656 for (i = 0; i < nbbs; i++)
658 bb = bbs[i];
659 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
661 phi = gsi_stmt (si);
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
668 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
669 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
671 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
673 stmt = gsi_stmt (si);
674 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
677 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
680 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
681 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
685 /* 2. Process_worklist */
686 while (worklist.length () > 0)
688 use_operand_p use_p;
689 ssa_op_iter iter;
691 stmt = worklist.pop ();
692 if (dump_enabled_p ())
694 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
695 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
698 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
699 (DEF_STMT) as relevant/irrelevant and live/dead according to the
700 liveness and relevance properties of STMT. */
701 stmt_vinfo = vinfo_for_stmt (stmt);
702 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
703 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
705 /* Generally, the liveness and relevance properties of STMT are
706 propagated as is to the DEF_STMTs of its USEs:
707 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
708 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
710 One exception is when STMT has been identified as defining a reduction
711 variable; in this case we set the liveness/relevance as follows:
712 live_p = false
713 relevant = vect_used_by_reduction
714 This is because we distinguish between two kinds of relevant stmts -
715 those that are used by a reduction computation, and those that are
716 (also) used by a regular computation. This allows us later on to
717 identify stmts that are used solely by a reduction, and therefore the
718 order of the results that they produce does not have to be kept. */
720 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
721 tmp_relevant = relevant;
722 switch (def_type)
724 case vect_reduction_def:
725 switch (tmp_relevant)
727 case vect_unused_in_scope:
728 relevant = vect_used_by_reduction;
729 break;
731 case vect_used_by_reduction:
732 if (gimple_code (stmt) == GIMPLE_PHI)
733 break;
734 /* fall through */
736 default:
737 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 "unsupported use of reduction.\n");
740 return false;
743 live_p = false;
744 break;
746 case vect_nested_cycle:
747 if (tmp_relevant != vect_unused_in_scope
748 && tmp_relevant != vect_used_in_outer_by_reduction
749 && tmp_relevant != vect_used_in_outer)
751 if (dump_enabled_p ())
752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
753 "unsupported use of nested cycle.\n");
755 return false;
758 live_p = false;
759 break;
761 case vect_double_reduction_def:
762 if (tmp_relevant != vect_unused_in_scope
763 && tmp_relevant != vect_used_by_reduction)
765 if (dump_enabled_p ())
766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
767 "unsupported use of double reduction.\n");
769 return false;
772 live_p = false;
773 break;
775 default:
776 break;
779 if (is_pattern_stmt_p (stmt_vinfo))
781 /* Pattern statements are not inserted into the code, so
782 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
783 have to scan the RHS or function arguments instead. */
784 if (is_gimple_assign (stmt))
786 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
787 tree op = gimple_assign_rhs1 (stmt);
789 i = 1;
790 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
792 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
793 live_p, relevant, &worklist, false)
794 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
795 live_p, relevant, &worklist, false))
796 return false;
797 i = 2;
799 for (; i < gimple_num_ops (stmt); i++)
801 op = gimple_op (stmt, i);
802 if (TREE_CODE (op) == SSA_NAME
803 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
804 &worklist, false))
805 return false;
808 else if (is_gimple_call (stmt))
810 for (i = 0; i < gimple_call_num_args (stmt); i++)
812 tree arg = gimple_call_arg (stmt, i);
813 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
814 &worklist, false))
815 return false;
819 else
820 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
822 tree op = USE_FROM_PTR (use_p);
823 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
824 &worklist, false))
825 return false;
828 if (STMT_VINFO_GATHER_P (stmt_vinfo))
830 tree off;
831 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
832 gcc_assert (decl);
833 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
834 &worklist, true))
835 return false;
837 } /* while worklist */
839 return true;
843 /* Function vect_model_simple_cost.
845 Models cost for simple operations, i.e. those that only emit ncopies of a
846 single op. Right now, this does not account for multiple insns that could
847 be generated for the single vector op. We will handle that shortly. */
849 void
850 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
851 enum vect_def_type *dt,
852 stmt_vector_for_cost *prologue_cost_vec,
853 stmt_vector_for_cost *body_cost_vec)
855 int i;
856 int inside_cost = 0, prologue_cost = 0;
858 /* The SLP costs were already calculated during SLP tree build. */
859 if (PURE_SLP_STMT (stmt_info))
860 return;
862 /* FORNOW: Assuming maximum 2 args per stmts. */
863 for (i = 0; i < 2; i++)
864 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
865 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
866 stmt_info, 0, vect_prologue);
868 /* Pass the inside-of-loop statements to the target-specific cost model. */
869 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
870 stmt_info, 0, vect_body);
872 if (dump_enabled_p ())
873 dump_printf_loc (MSG_NOTE, vect_location,
874 "vect_model_simple_cost: inside_cost = %d, "
875 "prologue_cost = %d .\n", inside_cost, prologue_cost);
879 /* Model cost for type demotion and promotion operations. PWR is normally
880 zero for single-step promotions and demotions. It will be one if
881 two-step promotion/demotion is required, and so on. Each additional
882 step doubles the number of instructions required. */
884 static void
885 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
886 enum vect_def_type *dt, int pwr)
888 int i, tmp;
889 int inside_cost = 0, prologue_cost = 0;
890 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
891 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
892 void *target_cost_data;
894 /* The SLP costs were already calculated during SLP tree build. */
895 if (PURE_SLP_STMT (stmt_info))
896 return;
898 if (loop_vinfo)
899 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
900 else
901 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
903 for (i = 0; i < pwr + 1; i++)
905 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
906 (i + 1) : i;
907 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
908 vec_promote_demote, stmt_info, 0,
909 vect_body);
912 /* FORNOW: Assuming maximum 2 args per stmts. */
913 for (i = 0; i < 2; i++)
914 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
915 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
916 stmt_info, 0, vect_prologue);
918 if (dump_enabled_p ())
919 dump_printf_loc (MSG_NOTE, vect_location,
920 "vect_model_promotion_demotion_cost: inside_cost = %d, "
921 "prologue_cost = %d .\n", inside_cost, prologue_cost);
924 /* Function vect_cost_group_size
926 For grouped load or store, return the group_size only if it is the first
927 load or store of a group, else return 1. This ensures that group size is
928 only returned once per group. */
930 static int
931 vect_cost_group_size (stmt_vec_info stmt_info)
933 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
935 if (first_stmt == STMT_VINFO_STMT (stmt_info))
936 return GROUP_SIZE (stmt_info);
938 return 1;
942 /* Function vect_model_store_cost
944 Models cost for stores. In the case of grouped accesses, one access
945 has the overhead of the grouped access attributed to it. */
947 void
948 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
949 bool store_lanes_p, enum vect_def_type dt,
950 slp_tree slp_node,
951 stmt_vector_for_cost *prologue_cost_vec,
952 stmt_vector_for_cost *body_cost_vec)
954 int group_size;
955 unsigned int inside_cost = 0, prologue_cost = 0;
956 struct data_reference *first_dr;
957 gimple first_stmt;
959 if (dt == vect_constant_def || dt == vect_external_def)
960 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
961 stmt_info, 0, vect_prologue);
963 /* Grouped access? */
964 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
966 if (slp_node)
968 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
969 group_size = 1;
971 else
973 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
974 group_size = vect_cost_group_size (stmt_info);
977 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
979 /* Not a grouped access. */
980 else
982 group_size = 1;
983 first_dr = STMT_VINFO_DATA_REF (stmt_info);
986 /* We assume that the cost of a single store-lanes instruction is
987 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
988 access is instead being provided by a permute-and-store operation,
989 include the cost of the permutes. */
990 if (!store_lanes_p && group_size > 1
991 && !STMT_VINFO_STRIDED_P (stmt_info))
993 /* Uses a high and low interleave or shuffle operations for each
994 needed permute. */
995 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
996 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
997 stmt_info, 0, vect_body);
999 if (dump_enabled_p ())
1000 dump_printf_loc (MSG_NOTE, vect_location,
1001 "vect_model_store_cost: strided group_size = %d .\n",
1002 group_size);
1005 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1006 /* Costs of the stores. */
1007 if (STMT_VINFO_STRIDED_P (stmt_info)
1008 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1010 /* N scalar stores plus extracting the elements. */
1011 inside_cost += record_stmt_cost (body_cost_vec,
1012 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1013 scalar_store, stmt_info, 0, vect_body);
1015 else
1016 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1018 if (STMT_VINFO_STRIDED_P (stmt_info))
1019 inside_cost += record_stmt_cost (body_cost_vec,
1020 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1021 vec_to_scalar, stmt_info, 0, vect_body);
1023 if (dump_enabled_p ())
1024 dump_printf_loc (MSG_NOTE, vect_location,
1025 "vect_model_store_cost: inside_cost = %d, "
1026 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1030 /* Calculate cost of DR's memory access. */
1031 void
1032 vect_get_store_cost (struct data_reference *dr, int ncopies,
1033 unsigned int *inside_cost,
1034 stmt_vector_for_cost *body_cost_vec)
1036 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1037 gimple stmt = DR_STMT (dr);
1038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1040 switch (alignment_support_scheme)
1042 case dr_aligned:
1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1045 vector_store, stmt_info, 0,
1046 vect_body);
1048 if (dump_enabled_p ())
1049 dump_printf_loc (MSG_NOTE, vect_location,
1050 "vect_model_store_cost: aligned.\n");
1051 break;
1054 case dr_unaligned_supported:
1056 /* Here, we assign an additional cost for the unaligned store. */
1057 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1058 unaligned_store, stmt_info,
1059 DR_MISALIGNMENT (dr), vect_body);
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_NOTE, vect_location,
1062 "vect_model_store_cost: unaligned supported by "
1063 "hardware.\n");
1064 break;
1067 case dr_unaligned_unsupported:
1069 *inside_cost = VECT_MAX_COST;
1071 if (dump_enabled_p ())
1072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1073 "vect_model_store_cost: unsupported access.\n");
1074 break;
1077 default:
1078 gcc_unreachable ();
1083 /* Function vect_model_load_cost
1085 Models cost for loads. In the case of grouped accesses, the last access
1086 has the overhead of the grouped access attributed to it. Since unaligned
1087 accesses are supported for loads, we also account for the costs of the
1088 access scheme chosen. */
1090 void
1091 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1092 bool load_lanes_p, slp_tree slp_node,
1093 stmt_vector_for_cost *prologue_cost_vec,
1094 stmt_vector_for_cost *body_cost_vec)
1096 int group_size;
1097 gimple first_stmt;
1098 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1099 unsigned int inside_cost = 0, prologue_cost = 0;
1101 /* Grouped accesses? */
1102 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1103 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1105 group_size = vect_cost_group_size (stmt_info);
1106 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1108 /* Not a grouped access. */
1109 else
1111 group_size = 1;
1112 first_dr = dr;
1115 /* We assume that the cost of a single load-lanes instruction is
1116 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1117 access is instead being provided by a load-and-permute operation,
1118 include the cost of the permutes. */
1119 if (!load_lanes_p && group_size > 1
1120 && !STMT_VINFO_STRIDED_P (stmt_info))
1122 /* Uses an even and odd extract operations or shuffle operations
1123 for each needed permute. */
1124 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1125 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1126 stmt_info, 0, vect_body);
1128 if (dump_enabled_p ())
1129 dump_printf_loc (MSG_NOTE, vect_location,
1130 "vect_model_load_cost: strided group_size = %d .\n",
1131 group_size);
1134 /* The loads themselves. */
1135 if (STMT_VINFO_STRIDED_P (stmt_info)
1136 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1138 /* N scalar loads plus gathering them into a vector. */
1139 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1140 inside_cost += record_stmt_cost (body_cost_vec,
1141 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1142 scalar_load, stmt_info, 0, vect_body);
1144 else
1145 vect_get_load_cost (first_dr, ncopies,
1146 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1147 || group_size > 1 || slp_node),
1148 &inside_cost, &prologue_cost,
1149 prologue_cost_vec, body_cost_vec, true);
1150 if (STMT_VINFO_STRIDED_P (stmt_info))
1151 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1152 stmt_info, 0, vect_body);
1154 if (dump_enabled_p ())
1155 dump_printf_loc (MSG_NOTE, vect_location,
1156 "vect_model_load_cost: inside_cost = %d, "
1157 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1161 /* Calculate cost of DR's memory access. */
1162 void
1163 vect_get_load_cost (struct data_reference *dr, int ncopies,
1164 bool add_realign_cost, unsigned int *inside_cost,
1165 unsigned int *prologue_cost,
1166 stmt_vector_for_cost *prologue_cost_vec,
1167 stmt_vector_for_cost *body_cost_vec,
1168 bool record_prologue_costs)
1170 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1171 gimple stmt = DR_STMT (dr);
1172 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1174 switch (alignment_support_scheme)
1176 case dr_aligned:
1178 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1179 stmt_info, 0, vect_body);
1181 if (dump_enabled_p ())
1182 dump_printf_loc (MSG_NOTE, vect_location,
1183 "vect_model_load_cost: aligned.\n");
1185 break;
1187 case dr_unaligned_supported:
1189 /* Here, we assign an additional cost for the unaligned load. */
1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1191 unaligned_load, stmt_info,
1192 DR_MISALIGNMENT (dr), vect_body);
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_NOTE, vect_location,
1196 "vect_model_load_cost: unaligned supported by "
1197 "hardware.\n");
1199 break;
1201 case dr_explicit_realign:
1203 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1204 vector_load, stmt_info, 0, vect_body);
1205 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1206 vec_perm, stmt_info, 0, vect_body);
1208 /* FIXME: If the misalignment remains fixed across the iterations of
1209 the containing loop, the following cost should be added to the
1210 prologue costs. */
1211 if (targetm.vectorize.builtin_mask_for_load)
1212 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1213 stmt_info, 0, vect_body);
1215 if (dump_enabled_p ())
1216 dump_printf_loc (MSG_NOTE, vect_location,
1217 "vect_model_load_cost: explicit realign\n");
1219 break;
1221 case dr_explicit_realign_optimized:
1223 if (dump_enabled_p ())
1224 dump_printf_loc (MSG_NOTE, vect_location,
1225 "vect_model_load_cost: unaligned software "
1226 "pipelined.\n");
1228 /* Unaligned software pipeline has a load of an address, an initial
1229 load, and possibly a mask operation to "prime" the loop. However,
1230 if this is an access in a group of loads, which provide grouped
1231 access, then the above cost should only be considered for one
1232 access in the group. Inside the loop, there is a load op
1233 and a realignment op. */
1235 if (add_realign_cost && record_prologue_costs)
1237 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1238 vector_stmt, stmt_info,
1239 0, vect_prologue);
1240 if (targetm.vectorize.builtin_mask_for_load)
1241 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1242 vector_stmt, stmt_info,
1243 0, vect_prologue);
1246 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1247 stmt_info, 0, vect_body);
1248 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1249 stmt_info, 0, vect_body);
1251 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE, vect_location,
1253 "vect_model_load_cost: explicit realign optimized"
1254 "\n");
1256 break;
1259 case dr_unaligned_unsupported:
1261 *inside_cost = VECT_MAX_COST;
1263 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1265 "vect_model_load_cost: unsupported access.\n");
1266 break;
1269 default:
1270 gcc_unreachable ();
1274 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1275 the loop preheader for the vectorized stmt STMT. */
1277 static void
1278 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1280 if (gsi)
1281 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1282 else
1284 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1285 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1287 if (loop_vinfo)
1289 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1290 basic_block new_bb;
1291 edge pe;
1293 if (nested_in_vect_loop_p (loop, stmt))
1294 loop = loop->inner;
1296 pe = loop_preheader_edge (loop);
1297 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1298 gcc_assert (!new_bb);
1300 else
1302 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1303 basic_block bb;
1304 gimple_stmt_iterator gsi_bb_start;
1306 gcc_assert (bb_vinfo);
1307 bb = BB_VINFO_BB (bb_vinfo);
1308 gsi_bb_start = gsi_after_labels (bb);
1309 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1313 if (dump_enabled_p ())
1315 dump_printf_loc (MSG_NOTE, vect_location,
1316 "created new init_stmt: ");
1317 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1321 /* Function vect_init_vector.
1323 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1324 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1325 vector type a vector with all elements equal to VAL is created first.
1326 Place the initialization at BSI if it is not NULL. Otherwise, place the
1327 initialization at the loop preheader.
1328 Return the DEF of INIT_STMT.
1329 It will be used in the vectorization of STMT. */
1331 tree
1332 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1334 tree new_var;
1335 gimple init_stmt;
1336 tree vec_oprnd;
1337 tree new_temp;
1339 if (TREE_CODE (type) == VECTOR_TYPE
1340 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1342 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1344 if (CONSTANT_CLASS_P (val))
1345 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1346 else
1348 new_temp = make_ssa_name (TREE_TYPE (type));
1349 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1350 vect_init_vector_1 (stmt, init_stmt, gsi);
1351 val = new_temp;
1354 val = build_vector_from_val (type, val);
1357 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1358 init_stmt = gimple_build_assign (new_var, val);
1359 new_temp = make_ssa_name (new_var, init_stmt);
1360 gimple_assign_set_lhs (init_stmt, new_temp);
1361 vect_init_vector_1 (stmt, init_stmt, gsi);
1362 vec_oprnd = gimple_assign_lhs (init_stmt);
1363 return vec_oprnd;
1367 /* Function vect_get_vec_def_for_operand.
1369 OP is an operand in STMT. This function returns a (vector) def that will be
1370 used in the vectorized stmt for STMT.
1372 In the case that OP is an SSA_NAME which is defined in the loop, then
1373 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1375 In case OP is an invariant or constant, a new stmt that creates a vector def
1376 needs to be introduced. */
1378 tree
1379 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1381 tree vec_oprnd;
1382 gimple vec_stmt;
1383 gimple def_stmt;
1384 stmt_vec_info def_stmt_info = NULL;
1385 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1386 unsigned int nunits;
1387 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1388 tree def;
1389 enum vect_def_type dt;
1390 bool is_simple_use;
1391 tree vector_type;
1393 if (dump_enabled_p ())
1395 dump_printf_loc (MSG_NOTE, vect_location,
1396 "vect_get_vec_def_for_operand: ");
1397 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1398 dump_printf (MSG_NOTE, "\n");
1401 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1402 &def_stmt, &def, &dt);
1403 gcc_assert (is_simple_use);
1404 if (dump_enabled_p ())
1406 int loc_printed = 0;
1407 if (def)
1409 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1410 loc_printed = 1;
1411 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1412 dump_printf (MSG_NOTE, "\n");
1414 if (def_stmt)
1416 if (loc_printed)
1417 dump_printf (MSG_NOTE, " def_stmt = ");
1418 else
1419 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1420 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1424 switch (dt)
1426 /* Case 1: operand is a constant. */
1427 case vect_constant_def:
1429 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1430 gcc_assert (vector_type);
1431 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1433 if (scalar_def)
1434 *scalar_def = op;
1436 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1437 if (dump_enabled_p ())
1438 dump_printf_loc (MSG_NOTE, vect_location,
1439 "Create vector_cst. nunits = %d\n", nunits);
1441 return vect_init_vector (stmt, op, vector_type, NULL);
1444 /* Case 2: operand is defined outside the loop - loop invariant. */
1445 case vect_external_def:
1447 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1448 gcc_assert (vector_type);
1450 if (scalar_def)
1451 *scalar_def = def;
1453 /* Create 'vec_inv = {inv,inv,..,inv}' */
1454 if (dump_enabled_p ())
1455 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1457 return vect_init_vector (stmt, def, vector_type, NULL);
1460 /* Case 3: operand is defined inside the loop. */
1461 case vect_internal_def:
1463 if (scalar_def)
1464 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1466 /* Get the def from the vectorized stmt. */
1467 def_stmt_info = vinfo_for_stmt (def_stmt);
1469 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1470 /* Get vectorized pattern statement. */
1471 if (!vec_stmt
1472 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1473 && !STMT_VINFO_RELEVANT (def_stmt_info))
1474 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1475 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1476 gcc_assert (vec_stmt);
1477 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1478 vec_oprnd = PHI_RESULT (vec_stmt);
1479 else if (is_gimple_call (vec_stmt))
1480 vec_oprnd = gimple_call_lhs (vec_stmt);
1481 else
1482 vec_oprnd = gimple_assign_lhs (vec_stmt);
1483 return vec_oprnd;
1486 /* Case 4: operand is defined by a loop header phi - reduction */
1487 case vect_reduction_def:
1488 case vect_double_reduction_def:
1489 case vect_nested_cycle:
1491 struct loop *loop;
1493 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1494 loop = (gimple_bb (def_stmt))->loop_father;
1496 /* Get the def before the loop */
1497 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1498 return get_initial_def_for_reduction (stmt, op, scalar_def);
1501 /* Case 5: operand is defined by loop-header phi - induction. */
1502 case vect_induction_def:
1504 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1506 /* Get the def from the vectorized stmt. */
1507 def_stmt_info = vinfo_for_stmt (def_stmt);
1508 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1509 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1510 vec_oprnd = PHI_RESULT (vec_stmt);
1511 else
1512 vec_oprnd = gimple_get_lhs (vec_stmt);
1513 return vec_oprnd;
1516 default:
1517 gcc_unreachable ();
1522 /* Function vect_get_vec_def_for_stmt_copy
1524 Return a vector-def for an operand. This function is used when the
1525 vectorized stmt to be created (by the caller to this function) is a "copy"
1526 created in case the vectorized result cannot fit in one vector, and several
1527 copies of the vector-stmt are required. In this case the vector-def is
1528 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1529 of the stmt that defines VEC_OPRND.
1530 DT is the type of the vector def VEC_OPRND.
1532 Context:
1533 In case the vectorization factor (VF) is bigger than the number
1534 of elements that can fit in a vectype (nunits), we have to generate
1535 more than one vector stmt to vectorize the scalar stmt. This situation
1536 arises when there are multiple data-types operated upon in the loop; the
1537 smallest data-type determines the VF, and as a result, when vectorizing
1538 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1539 vector stmt (each computing a vector of 'nunits' results, and together
1540 computing 'VF' results in each iteration). This function is called when
1541 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1542 which VF=16 and nunits=4, so the number of copies required is 4):
1544 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1546 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1547 VS1.1: vx.1 = memref1 VS1.2
1548 VS1.2: vx.2 = memref2 VS1.3
1549 VS1.3: vx.3 = memref3
1551 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1552 VSnew.1: vz1 = vx.1 + ... VSnew.2
1553 VSnew.2: vz2 = vx.2 + ... VSnew.3
1554 VSnew.3: vz3 = vx.3 + ...
1556 The vectorization of S1 is explained in vectorizable_load.
1557 The vectorization of S2:
1558 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1559 the function 'vect_get_vec_def_for_operand' is called to
1560 get the relevant vector-def for each operand of S2. For operand x it
1561 returns the vector-def 'vx.0'.
1563 To create the remaining copies of the vector-stmt (VSnew.j), this
1564 function is called to get the relevant vector-def for each operand. It is
1565 obtained from the respective VS1.j stmt, which is recorded in the
1566 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1568 For example, to obtain the vector-def 'vx.1' in order to create the
1569 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1570 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1571 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1572 and return its def ('vx.1').
1573 Overall, to create the above sequence this function will be called 3 times:
1574 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1575 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1576 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1578 tree
1579 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1581 gimple vec_stmt_for_operand;
1582 stmt_vec_info def_stmt_info;
1584 /* Do nothing; can reuse same def. */
1585 if (dt == vect_external_def || dt == vect_constant_def )
1586 return vec_oprnd;
1588 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1589 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1590 gcc_assert (def_stmt_info);
1591 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1592 gcc_assert (vec_stmt_for_operand);
1593 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1594 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1595 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1596 else
1597 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1598 return vec_oprnd;
1602 /* Get vectorized definitions for the operands to create a copy of an original
1603 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1605 static void
1606 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1607 vec<tree> *vec_oprnds0,
1608 vec<tree> *vec_oprnds1)
1610 tree vec_oprnd = vec_oprnds0->pop ();
1612 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1613 vec_oprnds0->quick_push (vec_oprnd);
1615 if (vec_oprnds1 && vec_oprnds1->length ())
1617 vec_oprnd = vec_oprnds1->pop ();
1618 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1619 vec_oprnds1->quick_push (vec_oprnd);
1624 /* Get vectorized definitions for OP0 and OP1.
1625 REDUC_INDEX is the index of reduction operand in case of reduction,
1626 and -1 otherwise. */
1628 void
1629 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1630 vec<tree> *vec_oprnds0,
1631 vec<tree> *vec_oprnds1,
1632 slp_tree slp_node, int reduc_index)
1634 if (slp_node)
1636 int nops = (op1 == NULL_TREE) ? 1 : 2;
1637 auto_vec<tree> ops (nops);
1638 auto_vec<vec<tree> > vec_defs (nops);
1640 ops.quick_push (op0);
1641 if (op1)
1642 ops.quick_push (op1);
1644 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1646 *vec_oprnds0 = vec_defs[0];
1647 if (op1)
1648 *vec_oprnds1 = vec_defs[1];
1650 else
1652 tree vec_oprnd;
1654 vec_oprnds0->create (1);
1655 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1656 vec_oprnds0->quick_push (vec_oprnd);
1658 if (op1)
1660 vec_oprnds1->create (1);
1661 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1662 vec_oprnds1->quick_push (vec_oprnd);
1668 /* Function vect_finish_stmt_generation.
1670 Insert a new stmt. */
1672 void
1673 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1674 gimple_stmt_iterator *gsi)
1676 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1677 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1678 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1680 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1682 if (!gsi_end_p (*gsi)
1683 && gimple_has_mem_ops (vec_stmt))
1685 gimple at_stmt = gsi_stmt (*gsi);
1686 tree vuse = gimple_vuse (at_stmt);
1687 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1689 tree vdef = gimple_vdef (at_stmt);
1690 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1691 /* If we have an SSA vuse and insert a store, update virtual
1692 SSA form to avoid triggering the renamer. Do so only
1693 if we can easily see all uses - which is what almost always
1694 happens with the way vectorized stmts are inserted. */
1695 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1696 && ((is_gimple_assign (vec_stmt)
1697 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1698 || (is_gimple_call (vec_stmt)
1699 && !(gimple_call_flags (vec_stmt)
1700 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1702 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1703 gimple_set_vdef (vec_stmt, new_vdef);
1704 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1708 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1710 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1711 bb_vinfo));
1713 if (dump_enabled_p ())
1715 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1716 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1719 gimple_set_location (vec_stmt, gimple_location (stmt));
1721 /* While EH edges will generally prevent vectorization, stmt might
1722 e.g. be in a must-not-throw region. Ensure newly created stmts
1723 that could throw are part of the same region. */
1724 int lp_nr = lookup_stmt_eh_lp (stmt);
1725 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1726 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1729 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1730 a function declaration if the target has a vectorized version
1731 of the function, or NULL_TREE if the function cannot be vectorized. */
1733 tree
1734 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1736 tree fndecl = gimple_call_fndecl (call);
1738 /* We only handle functions that do not read or clobber memory -- i.e.
1739 const or novops ones. */
1740 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1741 return NULL_TREE;
1743 if (!fndecl
1744 || TREE_CODE (fndecl) != FUNCTION_DECL
1745 || !DECL_BUILT_IN (fndecl))
1746 return NULL_TREE;
1748 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1749 vectype_in);
1753 static tree permute_vec_elements (tree, tree, tree, gimple,
1754 gimple_stmt_iterator *);
1757 /* Function vectorizable_mask_load_store.
1759 Check if STMT performs a conditional load or store that can be vectorized.
1760 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1761 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1762 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1764 static bool
1765 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1766 gimple *vec_stmt, slp_tree slp_node)
1768 tree vec_dest = NULL;
1769 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1770 stmt_vec_info prev_stmt_info;
1771 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1772 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1773 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1774 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1775 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1776 tree elem_type;
1777 gimple new_stmt;
1778 tree dummy;
1779 tree dataref_ptr = NULL_TREE;
1780 gimple ptr_incr;
1781 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1782 int ncopies;
1783 int i, j;
1784 bool inv_p;
1785 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1786 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1787 int gather_scale = 1;
1788 enum vect_def_type gather_dt = vect_unknown_def_type;
1789 bool is_store;
1790 tree mask;
1791 gimple def_stmt;
1792 tree def;
1793 enum vect_def_type dt;
1795 if (slp_node != NULL)
1796 return false;
1798 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1799 gcc_assert (ncopies >= 1);
1801 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1802 mask = gimple_call_arg (stmt, 2);
1803 if (TYPE_PRECISION (TREE_TYPE (mask))
1804 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1805 return false;
1807 /* FORNOW. This restriction should be relaxed. */
1808 if (nested_in_vect_loop && ncopies > 1)
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1812 "multiple types in nested loop.");
1813 return false;
1816 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1817 return false;
1819 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1820 return false;
1822 if (!STMT_VINFO_DATA_REF (stmt_info))
1823 return false;
1825 elem_type = TREE_TYPE (vectype);
1827 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1828 return false;
1830 if (STMT_VINFO_STRIDED_P (stmt_info))
1831 return false;
1833 if (STMT_VINFO_GATHER_P (stmt_info))
1835 gimple def_stmt;
1836 tree def;
1837 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1838 &gather_off, &gather_scale);
1839 gcc_assert (gather_decl);
1840 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1841 &def_stmt, &def, &gather_dt,
1842 &gather_off_vectype))
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846 "gather index use not simple.");
1847 return false;
1850 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1851 tree masktype
1852 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1853 if (TREE_CODE (masktype) == INTEGER_TYPE)
1855 if (dump_enabled_p ())
1856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1857 "masked gather with integer mask not supported.");
1858 return false;
1861 else if (tree_int_cst_compare (nested_in_vect_loop
1862 ? STMT_VINFO_DR_STEP (stmt_info)
1863 : DR_STEP (dr), size_zero_node) <= 0)
1864 return false;
1865 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1866 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1867 return false;
1869 if (TREE_CODE (mask) != SSA_NAME)
1870 return false;
1872 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1873 &def_stmt, &def, &dt))
1874 return false;
1876 if (is_store)
1878 tree rhs = gimple_call_arg (stmt, 3);
1879 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1880 &def_stmt, &def, &dt))
1881 return false;
1884 if (!vec_stmt) /* transformation not required. */
1886 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1887 if (is_store)
1888 vect_model_store_cost (stmt_info, ncopies, false, dt,
1889 NULL, NULL, NULL);
1890 else
1891 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1892 return true;
1895 /** Transform. **/
1897 if (STMT_VINFO_GATHER_P (stmt_info))
1899 tree vec_oprnd0 = NULL_TREE, op;
1900 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1901 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1902 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1903 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1904 tree mask_perm_mask = NULL_TREE;
1905 edge pe = loop_preheader_edge (loop);
1906 gimple_seq seq;
1907 basic_block new_bb;
1908 enum { NARROW, NONE, WIDEN } modifier;
1909 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1911 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1912 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1913 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1914 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1915 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1916 scaletype = TREE_VALUE (arglist);
1917 gcc_checking_assert (types_compatible_p (srctype, rettype)
1918 && types_compatible_p (srctype, masktype));
1920 if (nunits == gather_off_nunits)
1921 modifier = NONE;
1922 else if (nunits == gather_off_nunits / 2)
1924 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1925 modifier = WIDEN;
1927 for (i = 0; i < gather_off_nunits; ++i)
1928 sel[i] = i | nunits;
1930 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1932 else if (nunits == gather_off_nunits * 2)
1934 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1935 modifier = NARROW;
1937 for (i = 0; i < nunits; ++i)
1938 sel[i] = i < gather_off_nunits
1939 ? i : i + nunits - gather_off_nunits;
1941 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1942 ncopies *= 2;
1943 for (i = 0; i < nunits; ++i)
1944 sel[i] = i | gather_off_nunits;
1945 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1947 else
1948 gcc_unreachable ();
1950 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1952 ptr = fold_convert (ptrtype, gather_base);
1953 if (!is_gimple_min_invariant (ptr))
1955 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1956 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1957 gcc_assert (!new_bb);
1960 scale = build_int_cst (scaletype, gather_scale);
1962 prev_stmt_info = NULL;
1963 for (j = 0; j < ncopies; ++j)
1965 if (modifier == WIDEN && (j & 1))
1966 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1967 perm_mask, stmt, gsi);
1968 else if (j == 0)
1969 op = vec_oprnd0
1970 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1971 else
1972 op = vec_oprnd0
1973 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1975 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1977 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1978 == TYPE_VECTOR_SUBPARTS (idxtype));
1979 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1980 var = make_ssa_name (var);
1981 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1982 new_stmt
1983 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1985 op = var;
1988 if (mask_perm_mask && (j & 1))
1989 mask_op = permute_vec_elements (mask_op, mask_op,
1990 mask_perm_mask, stmt, gsi);
1991 else
1993 if (j == 0)
1994 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1995 else
1997 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1998 &def_stmt, &def, &dt);
1999 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2002 mask_op = vec_mask;
2003 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2005 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2006 == TYPE_VECTOR_SUBPARTS (masktype));
2007 var = vect_get_new_vect_var (masktype, vect_simple_var,
2008 NULL);
2009 var = make_ssa_name (var);
2010 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2011 new_stmt
2012 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2013 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2014 mask_op = var;
2018 new_stmt
2019 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2020 scale);
2022 if (!useless_type_conversion_p (vectype, rettype))
2024 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2025 == TYPE_VECTOR_SUBPARTS (rettype));
2026 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2027 op = make_ssa_name (var, new_stmt);
2028 gimple_call_set_lhs (new_stmt, op);
2029 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2030 var = make_ssa_name (vec_dest);
2031 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2032 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2034 else
2036 var = make_ssa_name (vec_dest, new_stmt);
2037 gimple_call_set_lhs (new_stmt, var);
2040 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2042 if (modifier == NARROW)
2044 if ((j & 1) == 0)
2046 prev_res = var;
2047 continue;
2049 var = permute_vec_elements (prev_res, var,
2050 perm_mask, stmt, gsi);
2051 new_stmt = SSA_NAME_DEF_STMT (var);
2054 if (prev_stmt_info == NULL)
2055 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2056 else
2057 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2058 prev_stmt_info = vinfo_for_stmt (new_stmt);
2061 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2062 from the IL. */
2063 tree lhs = gimple_call_lhs (stmt);
2064 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2065 set_vinfo_for_stmt (new_stmt, stmt_info);
2066 set_vinfo_for_stmt (stmt, NULL);
2067 STMT_VINFO_STMT (stmt_info) = new_stmt;
2068 gsi_replace (gsi, new_stmt, true);
2069 return true;
2071 else if (is_store)
2073 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2074 prev_stmt_info = NULL;
2075 for (i = 0; i < ncopies; i++)
2077 unsigned align, misalign;
2079 if (i == 0)
2081 tree rhs = gimple_call_arg (stmt, 3);
2082 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2083 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2084 /* We should have catched mismatched types earlier. */
2085 gcc_assert (useless_type_conversion_p (vectype,
2086 TREE_TYPE (vec_rhs)));
2087 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2088 NULL_TREE, &dummy, gsi,
2089 &ptr_incr, false, &inv_p);
2090 gcc_assert (!inv_p);
2092 else
2094 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2095 &def, &dt);
2096 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2097 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2098 &def, &dt);
2099 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2100 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2101 TYPE_SIZE_UNIT (vectype));
2104 align = TYPE_ALIGN_UNIT (vectype);
2105 if (aligned_access_p (dr))
2106 misalign = 0;
2107 else if (DR_MISALIGNMENT (dr) == -1)
2109 align = TYPE_ALIGN_UNIT (elem_type);
2110 misalign = 0;
2112 else
2113 misalign = DR_MISALIGNMENT (dr);
2114 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2115 misalign);
2116 new_stmt
2117 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2118 gimple_call_arg (stmt, 1),
2119 vec_mask, vec_rhs);
2120 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2121 if (i == 0)
2122 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2123 else
2124 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2125 prev_stmt_info = vinfo_for_stmt (new_stmt);
2128 else
2130 tree vec_mask = NULL_TREE;
2131 prev_stmt_info = NULL;
2132 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2133 for (i = 0; i < ncopies; i++)
2135 unsigned align, misalign;
2137 if (i == 0)
2139 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2140 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2141 NULL_TREE, &dummy, gsi,
2142 &ptr_incr, false, &inv_p);
2143 gcc_assert (!inv_p);
2145 else
2147 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2148 &def, &dt);
2149 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2150 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2151 TYPE_SIZE_UNIT (vectype));
2154 align = TYPE_ALIGN_UNIT (vectype);
2155 if (aligned_access_p (dr))
2156 misalign = 0;
2157 else if (DR_MISALIGNMENT (dr) == -1)
2159 align = TYPE_ALIGN_UNIT (elem_type);
2160 misalign = 0;
2162 else
2163 misalign = DR_MISALIGNMENT (dr);
2164 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2165 misalign);
2166 new_stmt
2167 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2168 gimple_call_arg (stmt, 1),
2169 vec_mask);
2170 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2172 if (i == 0)
2173 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2174 else
2175 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2176 prev_stmt_info = vinfo_for_stmt (new_stmt);
2180 if (!is_store)
2182 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2183 from the IL. */
2184 tree lhs = gimple_call_lhs (stmt);
2185 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2186 set_vinfo_for_stmt (new_stmt, stmt_info);
2187 set_vinfo_for_stmt (stmt, NULL);
2188 STMT_VINFO_STMT (stmt_info) = new_stmt;
2189 gsi_replace (gsi, new_stmt, true);
2192 return true;
2196 /* Function vectorizable_call.
2198 Check if GS performs a function call that can be vectorized.
2199 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2200 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2201 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2203 static bool
2204 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2205 slp_tree slp_node)
2207 gcall *stmt;
2208 tree vec_dest;
2209 tree scalar_dest;
2210 tree op, type;
2211 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2212 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2213 tree vectype_out, vectype_in;
2214 int nunits_in;
2215 int nunits_out;
2216 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2217 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2218 tree fndecl, new_temp, def, rhs_type;
2219 gimple def_stmt;
2220 enum vect_def_type dt[3]
2221 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2222 gimple new_stmt = NULL;
2223 int ncopies, j;
2224 vec<tree> vargs = vNULL;
2225 enum { NARROW, NONE, WIDEN } modifier;
2226 size_t i, nargs;
2227 tree lhs;
2229 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2230 return false;
2232 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2233 return false;
2235 /* Is GS a vectorizable call? */
2236 stmt = dyn_cast <gcall *> (gs);
2237 if (!stmt)
2238 return false;
2240 if (gimple_call_internal_p (stmt)
2241 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2242 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2243 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2244 slp_node);
2246 if (gimple_call_lhs (stmt) == NULL_TREE
2247 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2248 return false;
2250 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2252 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2254 /* Process function arguments. */
2255 rhs_type = NULL_TREE;
2256 vectype_in = NULL_TREE;
2257 nargs = gimple_call_num_args (stmt);
2259 /* Bail out if the function has more than three arguments, we do not have
2260 interesting builtin functions to vectorize with more than two arguments
2261 except for fma. No arguments is also not good. */
2262 if (nargs == 0 || nargs > 3)
2263 return false;
2265 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2266 if (gimple_call_internal_p (stmt)
2267 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2269 nargs = 0;
2270 rhs_type = unsigned_type_node;
2273 for (i = 0; i < nargs; i++)
2275 tree opvectype;
2277 op = gimple_call_arg (stmt, i);
2279 /* We can only handle calls with arguments of the same type. */
2280 if (rhs_type
2281 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2283 if (dump_enabled_p ())
2284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2285 "argument types differ.\n");
2286 return false;
2288 if (!rhs_type)
2289 rhs_type = TREE_TYPE (op);
2291 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2292 &def_stmt, &def, &dt[i], &opvectype))
2294 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2296 "use not simple.\n");
2297 return false;
2300 if (!vectype_in)
2301 vectype_in = opvectype;
2302 else if (opvectype
2303 && opvectype != vectype_in)
2305 if (dump_enabled_p ())
2306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2307 "argument vector types differ.\n");
2308 return false;
2311 /* If all arguments are external or constant defs use a vector type with
2312 the same size as the output vector type. */
2313 if (!vectype_in)
2314 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2315 if (vec_stmt)
2316 gcc_assert (vectype_in);
2317 if (!vectype_in)
2319 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2322 "no vectype for scalar type ");
2323 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2324 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2327 return false;
2330 /* FORNOW */
2331 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2332 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2333 if (nunits_in == nunits_out / 2)
2334 modifier = NARROW;
2335 else if (nunits_out == nunits_in)
2336 modifier = NONE;
2337 else if (nunits_out == nunits_in / 2)
2338 modifier = WIDEN;
2339 else
2340 return false;
2342 /* For now, we only vectorize functions if a target specific builtin
2343 is available. TODO -- in some cases, it might be profitable to
2344 insert the calls for pieces of the vector, in order to be able
2345 to vectorize other operations in the loop. */
2346 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2347 if (fndecl == NULL_TREE)
2349 if (gimple_call_internal_p (stmt)
2350 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2351 && !slp_node
2352 && loop_vinfo
2353 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2354 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2355 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2356 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2358 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2359 { 0, 1, 2, ... vf - 1 } vector. */
2360 gcc_assert (nargs == 0);
2362 else
2364 if (dump_enabled_p ())
2365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2366 "function is not vectorizable.\n");
2367 return false;
2371 gcc_assert (!gimple_vuse (stmt));
2373 if (slp_node || PURE_SLP_STMT (stmt_info))
2374 ncopies = 1;
2375 else if (modifier == NARROW)
2376 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2377 else
2378 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2380 /* Sanity check: make sure that at least one copy of the vectorized stmt
2381 needs to be generated. */
2382 gcc_assert (ncopies >= 1);
2384 if (!vec_stmt) /* transformation not required. */
2386 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2389 "\n");
2390 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2391 return true;
2394 /** Transform. **/
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2399 /* Handle def. */
2400 scalar_dest = gimple_call_lhs (stmt);
2401 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2403 prev_stmt_info = NULL;
2404 switch (modifier)
2406 case NONE:
2407 for (j = 0; j < ncopies; ++j)
2409 /* Build argument list for the vectorized call. */
2410 if (j == 0)
2411 vargs.create (nargs);
2412 else
2413 vargs.truncate (0);
2415 if (slp_node)
2417 auto_vec<vec<tree> > vec_defs (nargs);
2418 vec<tree> vec_oprnds0;
2420 for (i = 0; i < nargs; i++)
2421 vargs.quick_push (gimple_call_arg (stmt, i));
2422 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2423 vec_oprnds0 = vec_defs[0];
2425 /* Arguments are ready. Create the new vector stmt. */
2426 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2428 size_t k;
2429 for (k = 0; k < nargs; k++)
2431 vec<tree> vec_oprndsk = vec_defs[k];
2432 vargs[k] = vec_oprndsk[i];
2434 new_stmt = gimple_build_call_vec (fndecl, vargs);
2435 new_temp = make_ssa_name (vec_dest, new_stmt);
2436 gimple_call_set_lhs (new_stmt, new_temp);
2437 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2438 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2441 for (i = 0; i < nargs; i++)
2443 vec<tree> vec_oprndsi = vec_defs[i];
2444 vec_oprndsi.release ();
2446 continue;
2449 for (i = 0; i < nargs; i++)
2451 op = gimple_call_arg (stmt, i);
2452 if (j == 0)
2453 vec_oprnd0
2454 = vect_get_vec_def_for_operand (op, stmt, NULL);
2455 else
2457 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2458 vec_oprnd0
2459 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2462 vargs.quick_push (vec_oprnd0);
2465 if (gimple_call_internal_p (stmt)
2466 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2468 tree *v = XALLOCAVEC (tree, nunits_out);
2469 int k;
2470 for (k = 0; k < nunits_out; ++k)
2471 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2472 tree cst = build_vector (vectype_out, v);
2473 tree new_var
2474 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2475 gimple init_stmt = gimple_build_assign (new_var, cst);
2476 new_temp = make_ssa_name (new_var, init_stmt);
2477 gimple_assign_set_lhs (init_stmt, new_temp);
2478 vect_init_vector_1 (stmt, init_stmt, NULL);
2479 new_temp = make_ssa_name (vec_dest);
2480 new_stmt = gimple_build_assign (new_temp,
2481 gimple_assign_lhs (init_stmt));
2483 else
2485 new_stmt = gimple_build_call_vec (fndecl, vargs);
2486 new_temp = make_ssa_name (vec_dest, new_stmt);
2487 gimple_call_set_lhs (new_stmt, new_temp);
2489 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2491 if (j == 0)
2492 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2493 else
2494 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2496 prev_stmt_info = vinfo_for_stmt (new_stmt);
2499 break;
2501 case NARROW:
2502 for (j = 0; j < ncopies; ++j)
2504 /* Build argument list for the vectorized call. */
2505 if (j == 0)
2506 vargs.create (nargs * 2);
2507 else
2508 vargs.truncate (0);
2510 if (slp_node)
2512 auto_vec<vec<tree> > vec_defs (nargs);
2513 vec<tree> vec_oprnds0;
2515 for (i = 0; i < nargs; i++)
2516 vargs.quick_push (gimple_call_arg (stmt, i));
2517 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2518 vec_oprnds0 = vec_defs[0];
2520 /* Arguments are ready. Create the new vector stmt. */
2521 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2523 size_t k;
2524 vargs.truncate (0);
2525 for (k = 0; k < nargs; k++)
2527 vec<tree> vec_oprndsk = vec_defs[k];
2528 vargs.quick_push (vec_oprndsk[i]);
2529 vargs.quick_push (vec_oprndsk[i + 1]);
2531 new_stmt = gimple_build_call_vec (fndecl, vargs);
2532 new_temp = make_ssa_name (vec_dest, new_stmt);
2533 gimple_call_set_lhs (new_stmt, new_temp);
2534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2535 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2538 for (i = 0; i < nargs; i++)
2540 vec<tree> vec_oprndsi = vec_defs[i];
2541 vec_oprndsi.release ();
2543 continue;
2546 for (i = 0; i < nargs; i++)
2548 op = gimple_call_arg (stmt, i);
2549 if (j == 0)
2551 vec_oprnd0
2552 = vect_get_vec_def_for_operand (op, stmt, NULL);
2553 vec_oprnd1
2554 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2556 else
2558 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2559 vec_oprnd0
2560 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2561 vec_oprnd1
2562 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2565 vargs.quick_push (vec_oprnd0);
2566 vargs.quick_push (vec_oprnd1);
2569 new_stmt = gimple_build_call_vec (fndecl, vargs);
2570 new_temp = make_ssa_name (vec_dest, new_stmt);
2571 gimple_call_set_lhs (new_stmt, new_temp);
2572 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2574 if (j == 0)
2575 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2576 else
2577 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2579 prev_stmt_info = vinfo_for_stmt (new_stmt);
2582 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2584 break;
2586 case WIDEN:
2587 /* No current target implements this case. */
2588 return false;
2591 vargs.release ();
2593 /* The call in STMT might prevent it from being removed in dce.
2594 We however cannot remove it here, due to the way the ssa name
2595 it defines is mapped to the new definition. So just replace
2596 rhs of the statement with something harmless. */
2598 if (slp_node)
2599 return true;
2601 type = TREE_TYPE (scalar_dest);
2602 if (is_pattern_stmt_p (stmt_info))
2603 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2604 else
2605 lhs = gimple_call_lhs (stmt);
2606 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2607 set_vinfo_for_stmt (new_stmt, stmt_info);
2608 set_vinfo_for_stmt (stmt, NULL);
2609 STMT_VINFO_STMT (stmt_info) = new_stmt;
2610 gsi_replace (gsi, new_stmt, false);
2612 return true;
2616 struct simd_call_arg_info
2618 tree vectype;
2619 tree op;
2620 enum vect_def_type dt;
2621 HOST_WIDE_INT linear_step;
2622 unsigned int align;
2625 /* Function vectorizable_simd_clone_call.
2627 Check if STMT performs a function call that can be vectorized
2628 by calling a simd clone of the function.
2629 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2630 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2631 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2633 static bool
2634 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2635 gimple *vec_stmt, slp_tree slp_node)
2637 tree vec_dest;
2638 tree scalar_dest;
2639 tree op, type;
2640 tree vec_oprnd0 = NULL_TREE;
2641 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2642 tree vectype;
2643 unsigned int nunits;
2644 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2645 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2646 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2647 tree fndecl, new_temp, def;
2648 gimple def_stmt;
2649 gimple new_stmt = NULL;
2650 int ncopies, j;
2651 vec<simd_call_arg_info> arginfo = vNULL;
2652 vec<tree> vargs = vNULL;
2653 size_t i, nargs;
2654 tree lhs, rtype, ratype;
2655 vec<constructor_elt, va_gc> *ret_ctor_elts;
2657 /* Is STMT a vectorizable call? */
2658 if (!is_gimple_call (stmt))
2659 return false;
2661 fndecl = gimple_call_fndecl (stmt);
2662 if (fndecl == NULL_TREE)
2663 return false;
2665 struct cgraph_node *node = cgraph_node::get (fndecl);
2666 if (node == NULL || node->simd_clones == NULL)
2667 return false;
2669 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2670 return false;
2672 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2673 return false;
2675 if (gimple_call_lhs (stmt)
2676 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2677 return false;
2679 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2681 vectype = STMT_VINFO_VECTYPE (stmt_info);
2683 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2684 return false;
2686 /* FORNOW */
2687 if (slp_node || PURE_SLP_STMT (stmt_info))
2688 return false;
2690 /* Process function arguments. */
2691 nargs = gimple_call_num_args (stmt);
2693 /* Bail out if the function has zero arguments. */
2694 if (nargs == 0)
2695 return false;
2697 arginfo.create (nargs);
2699 for (i = 0; i < nargs; i++)
2701 simd_call_arg_info thisarginfo;
2702 affine_iv iv;
2704 thisarginfo.linear_step = 0;
2705 thisarginfo.align = 0;
2706 thisarginfo.op = NULL_TREE;
2708 op = gimple_call_arg (stmt, i);
2709 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2710 &def_stmt, &def, &thisarginfo.dt,
2711 &thisarginfo.vectype)
2712 || thisarginfo.dt == vect_uninitialized_def)
2714 if (dump_enabled_p ())
2715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2716 "use not simple.\n");
2717 arginfo.release ();
2718 return false;
2721 if (thisarginfo.dt == vect_constant_def
2722 || thisarginfo.dt == vect_external_def)
2723 gcc_assert (thisarginfo.vectype == NULL_TREE);
2724 else
2725 gcc_assert (thisarginfo.vectype != NULL_TREE);
2727 /* For linear arguments, the analyze phase should have saved
2728 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2729 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2730 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2732 gcc_assert (vec_stmt);
2733 thisarginfo.linear_step
2734 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2735 thisarginfo.op
2736 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2737 /* If loop has been peeled for alignment, we need to adjust it. */
2738 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2739 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2740 if (n1 != n2)
2742 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2743 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2744 tree opt = TREE_TYPE (thisarginfo.op);
2745 bias = fold_convert (TREE_TYPE (step), bias);
2746 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2747 thisarginfo.op
2748 = fold_build2 (POINTER_TYPE_P (opt)
2749 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2750 thisarginfo.op, bias);
2753 else if (!vec_stmt
2754 && thisarginfo.dt != vect_constant_def
2755 && thisarginfo.dt != vect_external_def
2756 && loop_vinfo
2757 && TREE_CODE (op) == SSA_NAME
2758 && simple_iv (loop, loop_containing_stmt (stmt), op,
2759 &iv, false)
2760 && tree_fits_shwi_p (iv.step))
2762 thisarginfo.linear_step = tree_to_shwi (iv.step);
2763 thisarginfo.op = iv.base;
2765 else if ((thisarginfo.dt == vect_constant_def
2766 || thisarginfo.dt == vect_external_def)
2767 && POINTER_TYPE_P (TREE_TYPE (op)))
2768 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2770 arginfo.quick_push (thisarginfo);
2773 unsigned int badness = 0;
2774 struct cgraph_node *bestn = NULL;
2775 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2776 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2777 else
2778 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2779 n = n->simdclone->next_clone)
2781 unsigned int this_badness = 0;
2782 if (n->simdclone->simdlen
2783 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2784 || n->simdclone->nargs != nargs)
2785 continue;
2786 if (n->simdclone->simdlen
2787 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2788 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2789 - exact_log2 (n->simdclone->simdlen)) * 1024;
2790 if (n->simdclone->inbranch)
2791 this_badness += 2048;
2792 int target_badness = targetm.simd_clone.usable (n);
2793 if (target_badness < 0)
2794 continue;
2795 this_badness += target_badness * 512;
2796 /* FORNOW: Have to add code to add the mask argument. */
2797 if (n->simdclone->inbranch)
2798 continue;
2799 for (i = 0; i < nargs; i++)
2801 switch (n->simdclone->args[i].arg_type)
2803 case SIMD_CLONE_ARG_TYPE_VECTOR:
2804 if (!useless_type_conversion_p
2805 (n->simdclone->args[i].orig_type,
2806 TREE_TYPE (gimple_call_arg (stmt, i))))
2807 i = -1;
2808 else if (arginfo[i].dt == vect_constant_def
2809 || arginfo[i].dt == vect_external_def
2810 || arginfo[i].linear_step)
2811 this_badness += 64;
2812 break;
2813 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2814 if (arginfo[i].dt != vect_constant_def
2815 && arginfo[i].dt != vect_external_def)
2816 i = -1;
2817 break;
2818 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2819 if (arginfo[i].dt == vect_constant_def
2820 || arginfo[i].dt == vect_external_def
2821 || (arginfo[i].linear_step
2822 != n->simdclone->args[i].linear_step))
2823 i = -1;
2824 break;
2825 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2826 /* FORNOW */
2827 i = -1;
2828 break;
2829 case SIMD_CLONE_ARG_TYPE_MASK:
2830 gcc_unreachable ();
2832 if (i == (size_t) -1)
2833 break;
2834 if (n->simdclone->args[i].alignment > arginfo[i].align)
2836 i = -1;
2837 break;
2839 if (arginfo[i].align)
2840 this_badness += (exact_log2 (arginfo[i].align)
2841 - exact_log2 (n->simdclone->args[i].alignment));
2843 if (i == (size_t) -1)
2844 continue;
2845 if (bestn == NULL || this_badness < badness)
2847 bestn = n;
2848 badness = this_badness;
2852 if (bestn == NULL)
2854 arginfo.release ();
2855 return false;
2858 for (i = 0; i < nargs; i++)
2859 if ((arginfo[i].dt == vect_constant_def
2860 || arginfo[i].dt == vect_external_def)
2861 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2863 arginfo[i].vectype
2864 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2865 i)));
2866 if (arginfo[i].vectype == NULL
2867 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2868 > bestn->simdclone->simdlen))
2870 arginfo.release ();
2871 return false;
2875 fndecl = bestn->decl;
2876 nunits = bestn->simdclone->simdlen;
2877 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2879 /* If the function isn't const, only allow it in simd loops where user
2880 has asserted that at least nunits consecutive iterations can be
2881 performed using SIMD instructions. */
2882 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2883 && gimple_vuse (stmt))
2885 arginfo.release ();
2886 return false;
2889 /* Sanity check: make sure that at least one copy of the vectorized stmt
2890 needs to be generated. */
2891 gcc_assert (ncopies >= 1);
2893 if (!vec_stmt) /* transformation not required. */
2895 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2896 for (i = 0; i < nargs; i++)
2897 if (bestn->simdclone->args[i].arg_type
2898 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2900 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2901 + 1);
2902 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2903 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2904 ? size_type_node : TREE_TYPE (arginfo[i].op);
2905 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2906 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2908 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2909 if (dump_enabled_p ())
2910 dump_printf_loc (MSG_NOTE, vect_location,
2911 "=== vectorizable_simd_clone_call ===\n");
2912 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2913 arginfo.release ();
2914 return true;
2917 /** Transform. **/
2919 if (dump_enabled_p ())
2920 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2922 /* Handle def. */
2923 scalar_dest = gimple_call_lhs (stmt);
2924 vec_dest = NULL_TREE;
2925 rtype = NULL_TREE;
2926 ratype = NULL_TREE;
2927 if (scalar_dest)
2929 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2930 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2931 if (TREE_CODE (rtype) == ARRAY_TYPE)
2933 ratype = rtype;
2934 rtype = TREE_TYPE (ratype);
2938 prev_stmt_info = NULL;
2939 for (j = 0; j < ncopies; ++j)
2941 /* Build argument list for the vectorized call. */
2942 if (j == 0)
2943 vargs.create (nargs);
2944 else
2945 vargs.truncate (0);
2947 for (i = 0; i < nargs; i++)
2949 unsigned int k, l, m, o;
2950 tree atype;
2951 op = gimple_call_arg (stmt, i);
2952 switch (bestn->simdclone->args[i].arg_type)
2954 case SIMD_CLONE_ARG_TYPE_VECTOR:
2955 atype = bestn->simdclone->args[i].vector_type;
2956 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2957 for (m = j * o; m < (j + 1) * o; m++)
2959 if (TYPE_VECTOR_SUBPARTS (atype)
2960 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2962 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2963 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2964 / TYPE_VECTOR_SUBPARTS (atype));
2965 gcc_assert ((k & (k - 1)) == 0);
2966 if (m == 0)
2967 vec_oprnd0
2968 = vect_get_vec_def_for_operand (op, stmt, NULL);
2969 else
2971 vec_oprnd0 = arginfo[i].op;
2972 if ((m & (k - 1)) == 0)
2973 vec_oprnd0
2974 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2975 vec_oprnd0);
2977 arginfo[i].op = vec_oprnd0;
2978 vec_oprnd0
2979 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2980 size_int (prec),
2981 bitsize_int ((m & (k - 1)) * prec));
2982 new_stmt
2983 = gimple_build_assign (make_ssa_name (atype),
2984 vec_oprnd0);
2985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2986 vargs.safe_push (gimple_assign_lhs (new_stmt));
2988 else
2990 k = (TYPE_VECTOR_SUBPARTS (atype)
2991 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2992 gcc_assert ((k & (k - 1)) == 0);
2993 vec<constructor_elt, va_gc> *ctor_elts;
2994 if (k != 1)
2995 vec_alloc (ctor_elts, k);
2996 else
2997 ctor_elts = NULL;
2998 for (l = 0; l < k; l++)
3000 if (m == 0 && l == 0)
3001 vec_oprnd0
3002 = vect_get_vec_def_for_operand (op, stmt, NULL);
3003 else
3004 vec_oprnd0
3005 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3006 arginfo[i].op);
3007 arginfo[i].op = vec_oprnd0;
3008 if (k == 1)
3009 break;
3010 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3011 vec_oprnd0);
3013 if (k == 1)
3014 vargs.safe_push (vec_oprnd0);
3015 else
3017 vec_oprnd0 = build_constructor (atype, ctor_elts);
3018 new_stmt
3019 = gimple_build_assign (make_ssa_name (atype),
3020 vec_oprnd0);
3021 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3022 vargs.safe_push (gimple_assign_lhs (new_stmt));
3026 break;
3027 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3028 vargs.safe_push (op);
3029 break;
3030 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3031 if (j == 0)
3033 gimple_seq stmts;
3034 arginfo[i].op
3035 = force_gimple_operand (arginfo[i].op, &stmts, true,
3036 NULL_TREE);
3037 if (stmts != NULL)
3039 basic_block new_bb;
3040 edge pe = loop_preheader_edge (loop);
3041 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3042 gcc_assert (!new_bb);
3044 tree phi_res = copy_ssa_name (op);
3045 gphi *new_phi = create_phi_node (phi_res, loop->header);
3046 set_vinfo_for_stmt (new_phi,
3047 new_stmt_vec_info (new_phi, loop_vinfo,
3048 NULL));
3049 add_phi_arg (new_phi, arginfo[i].op,
3050 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3051 enum tree_code code
3052 = POINTER_TYPE_P (TREE_TYPE (op))
3053 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3054 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3055 ? sizetype : TREE_TYPE (op);
3056 widest_int cst
3057 = wi::mul (bestn->simdclone->args[i].linear_step,
3058 ncopies * nunits);
3059 tree tcst = wide_int_to_tree (type, cst);
3060 tree phi_arg = copy_ssa_name (op);
3061 new_stmt
3062 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3063 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3064 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3065 set_vinfo_for_stmt (new_stmt,
3066 new_stmt_vec_info (new_stmt, loop_vinfo,
3067 NULL));
3068 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3069 UNKNOWN_LOCATION);
3070 arginfo[i].op = phi_res;
3071 vargs.safe_push (phi_res);
3073 else
3075 enum tree_code code
3076 = POINTER_TYPE_P (TREE_TYPE (op))
3077 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3078 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3079 ? sizetype : TREE_TYPE (op);
3080 widest_int cst
3081 = wi::mul (bestn->simdclone->args[i].linear_step,
3082 j * nunits);
3083 tree tcst = wide_int_to_tree (type, cst);
3084 new_temp = make_ssa_name (TREE_TYPE (op));
3085 new_stmt = gimple_build_assign (new_temp, code,
3086 arginfo[i].op, tcst);
3087 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3088 vargs.safe_push (new_temp);
3090 break;
3091 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3092 default:
3093 gcc_unreachable ();
3097 new_stmt = gimple_build_call_vec (fndecl, vargs);
3098 if (vec_dest)
3100 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3101 if (ratype)
3102 new_temp = create_tmp_var (ratype);
3103 else if (TYPE_VECTOR_SUBPARTS (vectype)
3104 == TYPE_VECTOR_SUBPARTS (rtype))
3105 new_temp = make_ssa_name (vec_dest, new_stmt);
3106 else
3107 new_temp = make_ssa_name (rtype, new_stmt);
3108 gimple_call_set_lhs (new_stmt, new_temp);
3110 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3112 if (vec_dest)
3114 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3116 unsigned int k, l;
3117 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3118 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3119 gcc_assert ((k & (k - 1)) == 0);
3120 for (l = 0; l < k; l++)
3122 tree t;
3123 if (ratype)
3125 t = build_fold_addr_expr (new_temp);
3126 t = build2 (MEM_REF, vectype, t,
3127 build_int_cst (TREE_TYPE (t),
3128 l * prec / BITS_PER_UNIT));
3130 else
3131 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3132 size_int (prec), bitsize_int (l * prec));
3133 new_stmt
3134 = gimple_build_assign (make_ssa_name (vectype), t);
3135 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3136 if (j == 0 && l == 0)
3137 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3138 else
3139 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3141 prev_stmt_info = vinfo_for_stmt (new_stmt);
3144 if (ratype)
3146 tree clobber = build_constructor (ratype, NULL);
3147 TREE_THIS_VOLATILE (clobber) = 1;
3148 new_stmt = gimple_build_assign (new_temp, clobber);
3149 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3151 continue;
3153 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3155 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3156 / TYPE_VECTOR_SUBPARTS (rtype));
3157 gcc_assert ((k & (k - 1)) == 0);
3158 if ((j & (k - 1)) == 0)
3159 vec_alloc (ret_ctor_elts, k);
3160 if (ratype)
3162 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3163 for (m = 0; m < o; m++)
3165 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3166 size_int (m), NULL_TREE, NULL_TREE);
3167 new_stmt
3168 = gimple_build_assign (make_ssa_name (rtype), tem);
3169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3170 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3171 gimple_assign_lhs (new_stmt));
3173 tree clobber = build_constructor (ratype, NULL);
3174 TREE_THIS_VOLATILE (clobber) = 1;
3175 new_stmt = gimple_build_assign (new_temp, clobber);
3176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3178 else
3179 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3180 if ((j & (k - 1)) != k - 1)
3181 continue;
3182 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3183 new_stmt
3184 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3185 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3187 if ((unsigned) j == k - 1)
3188 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3189 else
3190 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3192 prev_stmt_info = vinfo_for_stmt (new_stmt);
3193 continue;
3195 else if (ratype)
3197 tree t = build_fold_addr_expr (new_temp);
3198 t = build2 (MEM_REF, vectype, t,
3199 build_int_cst (TREE_TYPE (t), 0));
3200 new_stmt
3201 = gimple_build_assign (make_ssa_name (vec_dest), t);
3202 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3203 tree clobber = build_constructor (ratype, NULL);
3204 TREE_THIS_VOLATILE (clobber) = 1;
3205 vect_finish_stmt_generation (stmt,
3206 gimple_build_assign (new_temp,
3207 clobber), gsi);
3211 if (j == 0)
3212 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3213 else
3214 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3216 prev_stmt_info = vinfo_for_stmt (new_stmt);
3219 vargs.release ();
3221 /* The call in STMT might prevent it from being removed in dce.
3222 We however cannot remove it here, due to the way the ssa name
3223 it defines is mapped to the new definition. So just replace
3224 rhs of the statement with something harmless. */
3226 if (slp_node)
3227 return true;
3229 if (scalar_dest)
3231 type = TREE_TYPE (scalar_dest);
3232 if (is_pattern_stmt_p (stmt_info))
3233 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3234 else
3235 lhs = gimple_call_lhs (stmt);
3236 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3238 else
3239 new_stmt = gimple_build_nop ();
3240 set_vinfo_for_stmt (new_stmt, stmt_info);
3241 set_vinfo_for_stmt (stmt, NULL);
3242 STMT_VINFO_STMT (stmt_info) = new_stmt;
3243 gsi_replace (gsi, new_stmt, true);
3244 unlink_stmt_vdef (stmt);
3246 return true;
3250 /* Function vect_gen_widened_results_half
3252 Create a vector stmt whose code, type, number of arguments, and result
3253 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3254 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3255 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3256 needs to be created (DECL is a function-decl of a target-builtin).
3257 STMT is the original scalar stmt that we are vectorizing. */
3259 static gimple
3260 vect_gen_widened_results_half (enum tree_code code,
3261 tree decl,
3262 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3263 tree vec_dest, gimple_stmt_iterator *gsi,
3264 gimple stmt)
3266 gimple new_stmt;
3267 tree new_temp;
3269 /* Generate half of the widened result: */
3270 if (code == CALL_EXPR)
3272 /* Target specific support */
3273 if (op_type == binary_op)
3274 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3275 else
3276 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3277 new_temp = make_ssa_name (vec_dest, new_stmt);
3278 gimple_call_set_lhs (new_stmt, new_temp);
3280 else
3282 /* Generic support */
3283 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3284 if (op_type != binary_op)
3285 vec_oprnd1 = NULL;
3286 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3287 new_temp = make_ssa_name (vec_dest, new_stmt);
3288 gimple_assign_set_lhs (new_stmt, new_temp);
3290 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3292 return new_stmt;
3296 /* Get vectorized definitions for loop-based vectorization. For the first
3297 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3298 scalar operand), and for the rest we get a copy with
3299 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3300 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3301 The vectors are collected into VEC_OPRNDS. */
3303 static void
3304 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3305 vec<tree> *vec_oprnds, int multi_step_cvt)
3307 tree vec_oprnd;
3309 /* Get first vector operand. */
3310 /* All the vector operands except the very first one (that is scalar oprnd)
3311 are stmt copies. */
3312 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3313 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3314 else
3315 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3317 vec_oprnds->quick_push (vec_oprnd);
3319 /* Get second vector operand. */
3320 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3321 vec_oprnds->quick_push (vec_oprnd);
3323 *oprnd = vec_oprnd;
3325 /* For conversion in multiple steps, continue to get operands
3326 recursively. */
3327 if (multi_step_cvt)
3328 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3332 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3333 For multi-step conversions store the resulting vectors and call the function
3334 recursively. */
3336 static void
3337 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3338 int multi_step_cvt, gimple stmt,
3339 vec<tree> vec_dsts,
3340 gimple_stmt_iterator *gsi,
3341 slp_tree slp_node, enum tree_code code,
3342 stmt_vec_info *prev_stmt_info)
3344 unsigned int i;
3345 tree vop0, vop1, new_tmp, vec_dest;
3346 gimple new_stmt;
3347 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3349 vec_dest = vec_dsts.pop ();
3351 for (i = 0; i < vec_oprnds->length (); i += 2)
3353 /* Create demotion operation. */
3354 vop0 = (*vec_oprnds)[i];
3355 vop1 = (*vec_oprnds)[i + 1];
3356 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3357 new_tmp = make_ssa_name (vec_dest, new_stmt);
3358 gimple_assign_set_lhs (new_stmt, new_tmp);
3359 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3361 if (multi_step_cvt)
3362 /* Store the resulting vector for next recursive call. */
3363 (*vec_oprnds)[i/2] = new_tmp;
3364 else
3366 /* This is the last step of the conversion sequence. Store the
3367 vectors in SLP_NODE or in vector info of the scalar statement
3368 (or in STMT_VINFO_RELATED_STMT chain). */
3369 if (slp_node)
3370 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3371 else
3373 if (!*prev_stmt_info)
3374 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3375 else
3376 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3378 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3383 /* For multi-step demotion operations we first generate demotion operations
3384 from the source type to the intermediate types, and then combine the
3385 results (stored in VEC_OPRNDS) in demotion operation to the destination
3386 type. */
3387 if (multi_step_cvt)
3389 /* At each level of recursion we have half of the operands we had at the
3390 previous level. */
3391 vec_oprnds->truncate ((i+1)/2);
3392 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3393 stmt, vec_dsts, gsi, slp_node,
3394 VEC_PACK_TRUNC_EXPR,
3395 prev_stmt_info);
3398 vec_dsts.quick_push (vec_dest);
3402 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3403 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3404 the resulting vectors and call the function recursively. */
3406 static void
3407 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3408 vec<tree> *vec_oprnds1,
3409 gimple stmt, tree vec_dest,
3410 gimple_stmt_iterator *gsi,
3411 enum tree_code code1,
3412 enum tree_code code2, tree decl1,
3413 tree decl2, int op_type)
3415 int i;
3416 tree vop0, vop1, new_tmp1, new_tmp2;
3417 gimple new_stmt1, new_stmt2;
3418 vec<tree> vec_tmp = vNULL;
3420 vec_tmp.create (vec_oprnds0->length () * 2);
3421 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3423 if (op_type == binary_op)
3424 vop1 = (*vec_oprnds1)[i];
3425 else
3426 vop1 = NULL_TREE;
3428 /* Generate the two halves of promotion operation. */
3429 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3430 op_type, vec_dest, gsi, stmt);
3431 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3432 op_type, vec_dest, gsi, stmt);
3433 if (is_gimple_call (new_stmt1))
3435 new_tmp1 = gimple_call_lhs (new_stmt1);
3436 new_tmp2 = gimple_call_lhs (new_stmt2);
3438 else
3440 new_tmp1 = gimple_assign_lhs (new_stmt1);
3441 new_tmp2 = gimple_assign_lhs (new_stmt2);
3444 /* Store the results for the next step. */
3445 vec_tmp.quick_push (new_tmp1);
3446 vec_tmp.quick_push (new_tmp2);
3449 vec_oprnds0->release ();
3450 *vec_oprnds0 = vec_tmp;
3454 /* Check if STMT performs a conversion operation, that can be vectorized.
3455 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3456 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3457 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3459 static bool
3460 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3461 gimple *vec_stmt, slp_tree slp_node)
3463 tree vec_dest;
3464 tree scalar_dest;
3465 tree op0, op1 = NULL_TREE;
3466 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3467 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3468 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3469 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3470 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3471 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3472 tree new_temp;
3473 tree def;
3474 gimple def_stmt;
3475 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3476 gimple new_stmt = NULL;
3477 stmt_vec_info prev_stmt_info;
3478 int nunits_in;
3479 int nunits_out;
3480 tree vectype_out, vectype_in;
3481 int ncopies, i, j;
3482 tree lhs_type, rhs_type;
3483 enum { NARROW, NONE, WIDEN } modifier;
3484 vec<tree> vec_oprnds0 = vNULL;
3485 vec<tree> vec_oprnds1 = vNULL;
3486 tree vop0;
3487 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3488 int multi_step_cvt = 0;
3489 vec<tree> vec_dsts = vNULL;
3490 vec<tree> interm_types = vNULL;
3491 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3492 int op_type;
3493 machine_mode rhs_mode;
3494 unsigned short fltsz;
3496 /* Is STMT a vectorizable conversion? */
3498 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3499 return false;
3501 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3502 return false;
3504 if (!is_gimple_assign (stmt))
3505 return false;
3507 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3508 return false;
3510 code = gimple_assign_rhs_code (stmt);
3511 if (!CONVERT_EXPR_CODE_P (code)
3512 && code != FIX_TRUNC_EXPR
3513 && code != FLOAT_EXPR
3514 && code != WIDEN_MULT_EXPR
3515 && code != WIDEN_LSHIFT_EXPR)
3516 return false;
3518 op_type = TREE_CODE_LENGTH (code);
3520 /* Check types of lhs and rhs. */
3521 scalar_dest = gimple_assign_lhs (stmt);
3522 lhs_type = TREE_TYPE (scalar_dest);
3523 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3525 op0 = gimple_assign_rhs1 (stmt);
3526 rhs_type = TREE_TYPE (op0);
3528 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3529 && !((INTEGRAL_TYPE_P (lhs_type)
3530 && INTEGRAL_TYPE_P (rhs_type))
3531 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3532 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3533 return false;
3535 if ((INTEGRAL_TYPE_P (lhs_type)
3536 && (TYPE_PRECISION (lhs_type)
3537 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3538 || (INTEGRAL_TYPE_P (rhs_type)
3539 && (TYPE_PRECISION (rhs_type)
3540 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3542 if (dump_enabled_p ())
3543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3544 "type conversion to/from bit-precision unsupported."
3545 "\n");
3546 return false;
3549 /* Check the operands of the operation. */
3550 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3551 &def_stmt, &def, &dt[0], &vectype_in))
3553 if (dump_enabled_p ())
3554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3555 "use not simple.\n");
3556 return false;
3558 if (op_type == binary_op)
3560 bool ok;
3562 op1 = gimple_assign_rhs2 (stmt);
3563 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3564 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3565 OP1. */
3566 if (CONSTANT_CLASS_P (op0))
3567 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3568 &def_stmt, &def, &dt[1], &vectype_in);
3569 else
3570 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3571 &def, &dt[1]);
3573 if (!ok)
3575 if (dump_enabled_p ())
3576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3577 "use not simple.\n");
3578 return false;
3582 /* If op0 is an external or constant defs use a vector type of
3583 the same size as the output vector type. */
3584 if (!vectype_in)
3585 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3586 if (vec_stmt)
3587 gcc_assert (vectype_in);
3588 if (!vectype_in)
3590 if (dump_enabled_p ())
3592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3593 "no vectype for scalar type ");
3594 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3595 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3598 return false;
3601 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3602 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3603 if (nunits_in < nunits_out)
3604 modifier = NARROW;
3605 else if (nunits_out == nunits_in)
3606 modifier = NONE;
3607 else
3608 modifier = WIDEN;
3610 /* Multiple types in SLP are handled by creating the appropriate number of
3611 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3612 case of SLP. */
3613 if (slp_node || PURE_SLP_STMT (stmt_info))
3614 ncopies = 1;
3615 else if (modifier == NARROW)
3616 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3617 else
3618 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3620 /* Sanity check: make sure that at least one copy of the vectorized stmt
3621 needs to be generated. */
3622 gcc_assert (ncopies >= 1);
3624 /* Supportable by target? */
3625 switch (modifier)
3627 case NONE:
3628 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3629 return false;
3630 if (supportable_convert_operation (code, vectype_out, vectype_in,
3631 &decl1, &code1))
3632 break;
3633 /* FALLTHRU */
3634 unsupported:
3635 if (dump_enabled_p ())
3636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3637 "conversion not supported by target.\n");
3638 return false;
3640 case WIDEN:
3641 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3642 &code1, &code2, &multi_step_cvt,
3643 &interm_types))
3645 /* Binary widening operation can only be supported directly by the
3646 architecture. */
3647 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3648 break;
3651 if (code != FLOAT_EXPR
3652 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3653 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3654 goto unsupported;
3656 rhs_mode = TYPE_MODE (rhs_type);
3657 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3658 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3659 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3660 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3662 cvt_type
3663 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3664 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3665 if (cvt_type == NULL_TREE)
3666 goto unsupported;
3668 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3670 if (!supportable_convert_operation (code, vectype_out,
3671 cvt_type, &decl1, &codecvt1))
3672 goto unsupported;
3674 else if (!supportable_widening_operation (code, stmt, vectype_out,
3675 cvt_type, &codecvt1,
3676 &codecvt2, &multi_step_cvt,
3677 &interm_types))
3678 continue;
3679 else
3680 gcc_assert (multi_step_cvt == 0);
3682 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3683 vectype_in, &code1, &code2,
3684 &multi_step_cvt, &interm_types))
3685 break;
3688 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3689 goto unsupported;
3691 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3692 codecvt2 = ERROR_MARK;
3693 else
3695 multi_step_cvt++;
3696 interm_types.safe_push (cvt_type);
3697 cvt_type = NULL_TREE;
3699 break;
3701 case NARROW:
3702 gcc_assert (op_type == unary_op);
3703 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3704 &code1, &multi_step_cvt,
3705 &interm_types))
3706 break;
3708 if (code != FIX_TRUNC_EXPR
3709 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3710 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3711 goto unsupported;
3713 rhs_mode = TYPE_MODE (rhs_type);
3714 cvt_type
3715 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3716 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3717 if (cvt_type == NULL_TREE)
3718 goto unsupported;
3719 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3720 &decl1, &codecvt1))
3721 goto unsupported;
3722 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3723 &code1, &multi_step_cvt,
3724 &interm_types))
3725 break;
3726 goto unsupported;
3728 default:
3729 gcc_unreachable ();
3732 if (!vec_stmt) /* transformation not required. */
3734 if (dump_enabled_p ())
3735 dump_printf_loc (MSG_NOTE, vect_location,
3736 "=== vectorizable_conversion ===\n");
3737 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3739 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3740 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3742 else if (modifier == NARROW)
3744 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3745 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3747 else
3749 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3750 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3752 interm_types.release ();
3753 return true;
3756 /** Transform. **/
3757 if (dump_enabled_p ())
3758 dump_printf_loc (MSG_NOTE, vect_location,
3759 "transform conversion. ncopies = %d.\n", ncopies);
3761 if (op_type == binary_op)
3763 if (CONSTANT_CLASS_P (op0))
3764 op0 = fold_convert (TREE_TYPE (op1), op0);
3765 else if (CONSTANT_CLASS_P (op1))
3766 op1 = fold_convert (TREE_TYPE (op0), op1);
3769 /* In case of multi-step conversion, we first generate conversion operations
3770 to the intermediate types, and then from that types to the final one.
3771 We create vector destinations for the intermediate type (TYPES) received
3772 from supportable_*_operation, and store them in the correct order
3773 for future use in vect_create_vectorized_*_stmts (). */
3774 vec_dsts.create (multi_step_cvt + 1);
3775 vec_dest = vect_create_destination_var (scalar_dest,
3776 (cvt_type && modifier == WIDEN)
3777 ? cvt_type : vectype_out);
3778 vec_dsts.quick_push (vec_dest);
3780 if (multi_step_cvt)
3782 for (i = interm_types.length () - 1;
3783 interm_types.iterate (i, &intermediate_type); i--)
3785 vec_dest = vect_create_destination_var (scalar_dest,
3786 intermediate_type);
3787 vec_dsts.quick_push (vec_dest);
3791 if (cvt_type)
3792 vec_dest = vect_create_destination_var (scalar_dest,
3793 modifier == WIDEN
3794 ? vectype_out : cvt_type);
3796 if (!slp_node)
3798 if (modifier == WIDEN)
3800 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3801 if (op_type == binary_op)
3802 vec_oprnds1.create (1);
3804 else if (modifier == NARROW)
3805 vec_oprnds0.create (
3806 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3808 else if (code == WIDEN_LSHIFT_EXPR)
3809 vec_oprnds1.create (slp_node->vec_stmts_size);
3811 last_oprnd = op0;
3812 prev_stmt_info = NULL;
3813 switch (modifier)
3815 case NONE:
3816 for (j = 0; j < ncopies; j++)
3818 if (j == 0)
3819 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3820 -1);
3821 else
3822 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3824 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3826 /* Arguments are ready, create the new vector stmt. */
3827 if (code1 == CALL_EXPR)
3829 new_stmt = gimple_build_call (decl1, 1, vop0);
3830 new_temp = make_ssa_name (vec_dest, new_stmt);
3831 gimple_call_set_lhs (new_stmt, new_temp);
3833 else
3835 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3836 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3837 new_temp = make_ssa_name (vec_dest, new_stmt);
3838 gimple_assign_set_lhs (new_stmt, new_temp);
3841 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3842 if (slp_node)
3843 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3846 if (j == 0)
3847 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3848 else
3849 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3850 prev_stmt_info = vinfo_for_stmt (new_stmt);
3852 break;
3854 case WIDEN:
3855 /* In case the vectorization factor (VF) is bigger than the number
3856 of elements that we can fit in a vectype (nunits), we have to
3857 generate more than one vector stmt - i.e - we need to "unroll"
3858 the vector stmt by a factor VF/nunits. */
3859 for (j = 0; j < ncopies; j++)
3861 /* Handle uses. */
3862 if (j == 0)
3864 if (slp_node)
3866 if (code == WIDEN_LSHIFT_EXPR)
3868 unsigned int k;
3870 vec_oprnd1 = op1;
3871 /* Store vec_oprnd1 for every vector stmt to be created
3872 for SLP_NODE. We check during the analysis that all
3873 the shift arguments are the same. */
3874 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3875 vec_oprnds1.quick_push (vec_oprnd1);
3877 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3878 slp_node, -1);
3880 else
3881 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3882 &vec_oprnds1, slp_node, -1);
3884 else
3886 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3887 vec_oprnds0.quick_push (vec_oprnd0);
3888 if (op_type == binary_op)
3890 if (code == WIDEN_LSHIFT_EXPR)
3891 vec_oprnd1 = op1;
3892 else
3893 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3894 NULL);
3895 vec_oprnds1.quick_push (vec_oprnd1);
3899 else
3901 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3902 vec_oprnds0.truncate (0);
3903 vec_oprnds0.quick_push (vec_oprnd0);
3904 if (op_type == binary_op)
3906 if (code == WIDEN_LSHIFT_EXPR)
3907 vec_oprnd1 = op1;
3908 else
3909 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3910 vec_oprnd1);
3911 vec_oprnds1.truncate (0);
3912 vec_oprnds1.quick_push (vec_oprnd1);
3916 /* Arguments are ready. Create the new vector stmts. */
3917 for (i = multi_step_cvt; i >= 0; i--)
3919 tree this_dest = vec_dsts[i];
3920 enum tree_code c1 = code1, c2 = code2;
3921 if (i == 0 && codecvt2 != ERROR_MARK)
3923 c1 = codecvt1;
3924 c2 = codecvt2;
3926 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3927 &vec_oprnds1,
3928 stmt, this_dest, gsi,
3929 c1, c2, decl1, decl2,
3930 op_type);
3933 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3935 if (cvt_type)
3937 if (codecvt1 == CALL_EXPR)
3939 new_stmt = gimple_build_call (decl1, 1, vop0);
3940 new_temp = make_ssa_name (vec_dest, new_stmt);
3941 gimple_call_set_lhs (new_stmt, new_temp);
3943 else
3945 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3946 new_temp = make_ssa_name (vec_dest);
3947 new_stmt = gimple_build_assign (new_temp, codecvt1,
3948 vop0);
3951 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3953 else
3954 new_stmt = SSA_NAME_DEF_STMT (vop0);
3956 if (slp_node)
3957 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3958 else
3960 if (!prev_stmt_info)
3961 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3962 else
3963 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3964 prev_stmt_info = vinfo_for_stmt (new_stmt);
3969 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3970 break;
3972 case NARROW:
3973 /* In case the vectorization factor (VF) is bigger than the number
3974 of elements that we can fit in a vectype (nunits), we have to
3975 generate more than one vector stmt - i.e - we need to "unroll"
3976 the vector stmt by a factor VF/nunits. */
3977 for (j = 0; j < ncopies; j++)
3979 /* Handle uses. */
3980 if (slp_node)
3981 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3982 slp_node, -1);
3983 else
3985 vec_oprnds0.truncate (0);
3986 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3987 vect_pow2 (multi_step_cvt) - 1);
3990 /* Arguments are ready. Create the new vector stmts. */
3991 if (cvt_type)
3992 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3994 if (codecvt1 == CALL_EXPR)
3996 new_stmt = gimple_build_call (decl1, 1, vop0);
3997 new_temp = make_ssa_name (vec_dest, new_stmt);
3998 gimple_call_set_lhs (new_stmt, new_temp);
4000 else
4002 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4003 new_temp = make_ssa_name (vec_dest);
4004 new_stmt = gimple_build_assign (new_temp, codecvt1,
4005 vop0);
4008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4009 vec_oprnds0[i] = new_temp;
4012 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4013 stmt, vec_dsts, gsi,
4014 slp_node, code1,
4015 &prev_stmt_info);
4018 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4019 break;
4022 vec_oprnds0.release ();
4023 vec_oprnds1.release ();
4024 vec_dsts.release ();
4025 interm_types.release ();
4027 return true;
4031 /* Function vectorizable_assignment.
4033 Check if STMT performs an assignment (copy) that can be vectorized.
4034 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4035 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4036 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4038 static bool
4039 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4040 gimple *vec_stmt, slp_tree slp_node)
4042 tree vec_dest;
4043 tree scalar_dest;
4044 tree op;
4045 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4046 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4047 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4048 tree new_temp;
4049 tree def;
4050 gimple def_stmt;
4051 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4052 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4053 int ncopies;
4054 int i, j;
4055 vec<tree> vec_oprnds = vNULL;
4056 tree vop;
4057 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4058 gimple new_stmt = NULL;
4059 stmt_vec_info prev_stmt_info = NULL;
4060 enum tree_code code;
4061 tree vectype_in;
4063 /* Multiple types in SLP are handled by creating the appropriate number of
4064 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4065 case of SLP. */
4066 if (slp_node || PURE_SLP_STMT (stmt_info))
4067 ncopies = 1;
4068 else
4069 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4071 gcc_assert (ncopies >= 1);
4073 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4074 return false;
4076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4077 return false;
4079 /* Is vectorizable assignment? */
4080 if (!is_gimple_assign (stmt))
4081 return false;
4083 scalar_dest = gimple_assign_lhs (stmt);
4084 if (TREE_CODE (scalar_dest) != SSA_NAME)
4085 return false;
4087 code = gimple_assign_rhs_code (stmt);
4088 if (gimple_assign_single_p (stmt)
4089 || code == PAREN_EXPR
4090 || CONVERT_EXPR_CODE_P (code))
4091 op = gimple_assign_rhs1 (stmt);
4092 else
4093 return false;
4095 if (code == VIEW_CONVERT_EXPR)
4096 op = TREE_OPERAND (op, 0);
4098 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4099 &def_stmt, &def, &dt[0], &vectype_in))
4101 if (dump_enabled_p ())
4102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4103 "use not simple.\n");
4104 return false;
4107 /* We can handle NOP_EXPR conversions that do not change the number
4108 of elements or the vector size. */
4109 if ((CONVERT_EXPR_CODE_P (code)
4110 || code == VIEW_CONVERT_EXPR)
4111 && (!vectype_in
4112 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4113 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4114 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4115 return false;
4117 /* We do not handle bit-precision changes. */
4118 if ((CONVERT_EXPR_CODE_P (code)
4119 || code == VIEW_CONVERT_EXPR)
4120 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4121 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4122 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4123 || ((TYPE_PRECISION (TREE_TYPE (op))
4124 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4125 /* But a conversion that does not change the bit-pattern is ok. */
4126 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4127 > TYPE_PRECISION (TREE_TYPE (op)))
4128 && TYPE_UNSIGNED (TREE_TYPE (op))))
4130 if (dump_enabled_p ())
4131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4132 "type conversion to/from bit-precision "
4133 "unsupported.\n");
4134 return false;
4137 if (!vec_stmt) /* transformation not required. */
4139 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4140 if (dump_enabled_p ())
4141 dump_printf_loc (MSG_NOTE, vect_location,
4142 "=== vectorizable_assignment ===\n");
4143 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4144 return true;
4147 /** Transform. **/
4148 if (dump_enabled_p ())
4149 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4151 /* Handle def. */
4152 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4154 /* Handle use. */
4155 for (j = 0; j < ncopies; j++)
4157 /* Handle uses. */
4158 if (j == 0)
4159 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4160 else
4161 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4163 /* Arguments are ready. create the new vector stmt. */
4164 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4166 if (CONVERT_EXPR_CODE_P (code)
4167 || code == VIEW_CONVERT_EXPR)
4168 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4169 new_stmt = gimple_build_assign (vec_dest, vop);
4170 new_temp = make_ssa_name (vec_dest, new_stmt);
4171 gimple_assign_set_lhs (new_stmt, new_temp);
4172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4173 if (slp_node)
4174 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4177 if (slp_node)
4178 continue;
4180 if (j == 0)
4181 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4182 else
4183 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4185 prev_stmt_info = vinfo_for_stmt (new_stmt);
4188 vec_oprnds.release ();
4189 return true;
4193 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4194 either as shift by a scalar or by a vector. */
4196 bool
4197 vect_supportable_shift (enum tree_code code, tree scalar_type)
4200 machine_mode vec_mode;
4201 optab optab;
4202 int icode;
4203 tree vectype;
4205 vectype = get_vectype_for_scalar_type (scalar_type);
4206 if (!vectype)
4207 return false;
4209 optab = optab_for_tree_code (code, vectype, optab_scalar);
4210 if (!optab
4211 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4213 optab = optab_for_tree_code (code, vectype, optab_vector);
4214 if (!optab
4215 || (optab_handler (optab, TYPE_MODE (vectype))
4216 == CODE_FOR_nothing))
4217 return false;
4220 vec_mode = TYPE_MODE (vectype);
4221 icode = (int) optab_handler (optab, vec_mode);
4222 if (icode == CODE_FOR_nothing)
4223 return false;
4225 return true;
4229 /* Function vectorizable_shift.
4231 Check if STMT performs a shift operation that can be vectorized.
4232 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4233 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4234 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4236 static bool
4237 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4238 gimple *vec_stmt, slp_tree slp_node)
4240 tree vec_dest;
4241 tree scalar_dest;
4242 tree op0, op1 = NULL;
4243 tree vec_oprnd1 = NULL_TREE;
4244 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4245 tree vectype;
4246 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4247 enum tree_code code;
4248 machine_mode vec_mode;
4249 tree new_temp;
4250 optab optab;
4251 int icode;
4252 machine_mode optab_op2_mode;
4253 tree def;
4254 gimple def_stmt;
4255 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4256 gimple new_stmt = NULL;
4257 stmt_vec_info prev_stmt_info;
4258 int nunits_in;
4259 int nunits_out;
4260 tree vectype_out;
4261 tree op1_vectype;
4262 int ncopies;
4263 int j, i;
4264 vec<tree> vec_oprnds0 = vNULL;
4265 vec<tree> vec_oprnds1 = vNULL;
4266 tree vop0, vop1;
4267 unsigned int k;
4268 bool scalar_shift_arg = true;
4269 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4270 int vf;
4272 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4273 return false;
4275 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4276 return false;
4278 /* Is STMT a vectorizable binary/unary operation? */
4279 if (!is_gimple_assign (stmt))
4280 return false;
4282 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4283 return false;
4285 code = gimple_assign_rhs_code (stmt);
4287 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4288 || code == RROTATE_EXPR))
4289 return false;
4291 scalar_dest = gimple_assign_lhs (stmt);
4292 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4293 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4294 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4296 if (dump_enabled_p ())
4297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4298 "bit-precision shifts not supported.\n");
4299 return false;
4302 op0 = gimple_assign_rhs1 (stmt);
4303 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4304 &def_stmt, &def, &dt[0], &vectype))
4306 if (dump_enabled_p ())
4307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4308 "use not simple.\n");
4309 return false;
4311 /* If op0 is an external or constant def use a vector type with
4312 the same size as the output vector type. */
4313 if (!vectype)
4314 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4315 if (vec_stmt)
4316 gcc_assert (vectype);
4317 if (!vectype)
4319 if (dump_enabled_p ())
4320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4321 "no vectype for scalar type\n");
4322 return false;
4325 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4326 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4327 if (nunits_out != nunits_in)
4328 return false;
4330 op1 = gimple_assign_rhs2 (stmt);
4331 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4332 &def, &dt[1], &op1_vectype))
4334 if (dump_enabled_p ())
4335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4336 "use not simple.\n");
4337 return false;
4340 if (loop_vinfo)
4341 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4342 else
4343 vf = 1;
4345 /* Multiple types in SLP are handled by creating the appropriate number of
4346 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4347 case of SLP. */
4348 if (slp_node || PURE_SLP_STMT (stmt_info))
4349 ncopies = 1;
4350 else
4351 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4353 gcc_assert (ncopies >= 1);
4355 /* Determine whether the shift amount is a vector, or scalar. If the
4356 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4358 if (dt[1] == vect_internal_def && !slp_node)
4359 scalar_shift_arg = false;
4360 else if (dt[1] == vect_constant_def
4361 || dt[1] == vect_external_def
4362 || dt[1] == vect_internal_def)
4364 /* In SLP, need to check whether the shift count is the same,
4365 in loops if it is a constant or invariant, it is always
4366 a scalar shift. */
4367 if (slp_node)
4369 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4370 gimple slpstmt;
4372 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4373 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4374 scalar_shift_arg = false;
4377 else
4379 if (dump_enabled_p ())
4380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4381 "operand mode requires invariant argument.\n");
4382 return false;
4385 /* Vector shifted by vector. */
4386 if (!scalar_shift_arg)
4388 optab = optab_for_tree_code (code, vectype, optab_vector);
4389 if (dump_enabled_p ())
4390 dump_printf_loc (MSG_NOTE, vect_location,
4391 "vector/vector shift/rotate found.\n");
4393 if (!op1_vectype)
4394 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4395 if (op1_vectype == NULL_TREE
4396 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4398 if (dump_enabled_p ())
4399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4400 "unusable type for last operand in"
4401 " vector/vector shift/rotate.\n");
4402 return false;
4405 /* See if the machine has a vector shifted by scalar insn and if not
4406 then see if it has a vector shifted by vector insn. */
4407 else
4409 optab = optab_for_tree_code (code, vectype, optab_scalar);
4410 if (optab
4411 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4413 if (dump_enabled_p ())
4414 dump_printf_loc (MSG_NOTE, vect_location,
4415 "vector/scalar shift/rotate found.\n");
4417 else
4419 optab = optab_for_tree_code (code, vectype, optab_vector);
4420 if (optab
4421 && (optab_handler (optab, TYPE_MODE (vectype))
4422 != CODE_FOR_nothing))
4424 scalar_shift_arg = false;
4426 if (dump_enabled_p ())
4427 dump_printf_loc (MSG_NOTE, vect_location,
4428 "vector/vector shift/rotate found.\n");
4430 /* Unlike the other binary operators, shifts/rotates have
4431 the rhs being int, instead of the same type as the lhs,
4432 so make sure the scalar is the right type if we are
4433 dealing with vectors of long long/long/short/char. */
4434 if (dt[1] == vect_constant_def)
4435 op1 = fold_convert (TREE_TYPE (vectype), op1);
4436 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4437 TREE_TYPE (op1)))
4439 if (slp_node
4440 && TYPE_MODE (TREE_TYPE (vectype))
4441 != TYPE_MODE (TREE_TYPE (op1)))
4443 if (dump_enabled_p ())
4444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4445 "unusable type for last operand in"
4446 " vector/vector shift/rotate.\n");
4447 return false;
4449 if (vec_stmt && !slp_node)
4451 op1 = fold_convert (TREE_TYPE (vectype), op1);
4452 op1 = vect_init_vector (stmt, op1,
4453 TREE_TYPE (vectype), NULL);
4460 /* Supportable by target? */
4461 if (!optab)
4463 if (dump_enabled_p ())
4464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4465 "no optab.\n");
4466 return false;
4468 vec_mode = TYPE_MODE (vectype);
4469 icode = (int) optab_handler (optab, vec_mode);
4470 if (icode == CODE_FOR_nothing)
4472 if (dump_enabled_p ())
4473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4474 "op not supported by target.\n");
4475 /* Check only during analysis. */
4476 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4477 || (vf < vect_min_worthwhile_factor (code)
4478 && !vec_stmt))
4479 return false;
4480 if (dump_enabled_p ())
4481 dump_printf_loc (MSG_NOTE, vect_location,
4482 "proceeding using word mode.\n");
4485 /* Worthwhile without SIMD support? Check only during analysis. */
4486 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4487 && vf < vect_min_worthwhile_factor (code)
4488 && !vec_stmt)
4490 if (dump_enabled_p ())
4491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4492 "not worthwhile without SIMD support.\n");
4493 return false;
4496 if (!vec_stmt) /* transformation not required. */
4498 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4499 if (dump_enabled_p ())
4500 dump_printf_loc (MSG_NOTE, vect_location,
4501 "=== vectorizable_shift ===\n");
4502 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4503 return true;
4506 /** Transform. **/
4508 if (dump_enabled_p ())
4509 dump_printf_loc (MSG_NOTE, vect_location,
4510 "transform binary/unary operation.\n");
4512 /* Handle def. */
4513 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4515 prev_stmt_info = NULL;
4516 for (j = 0; j < ncopies; j++)
4518 /* Handle uses. */
4519 if (j == 0)
4521 if (scalar_shift_arg)
4523 /* Vector shl and shr insn patterns can be defined with scalar
4524 operand 2 (shift operand). In this case, use constant or loop
4525 invariant op1 directly, without extending it to vector mode
4526 first. */
4527 optab_op2_mode = insn_data[icode].operand[2].mode;
4528 if (!VECTOR_MODE_P (optab_op2_mode))
4530 if (dump_enabled_p ())
4531 dump_printf_loc (MSG_NOTE, vect_location,
4532 "operand 1 using scalar mode.\n");
4533 vec_oprnd1 = op1;
4534 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4535 vec_oprnds1.quick_push (vec_oprnd1);
4536 if (slp_node)
4538 /* Store vec_oprnd1 for every vector stmt to be created
4539 for SLP_NODE. We check during the analysis that all
4540 the shift arguments are the same.
4541 TODO: Allow different constants for different vector
4542 stmts generated for an SLP instance. */
4543 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4544 vec_oprnds1.quick_push (vec_oprnd1);
4549 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4550 (a special case for certain kind of vector shifts); otherwise,
4551 operand 1 should be of a vector type (the usual case). */
4552 if (vec_oprnd1)
4553 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4554 slp_node, -1);
4555 else
4556 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4557 slp_node, -1);
4559 else
4560 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4562 /* Arguments are ready. Create the new vector stmt. */
4563 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4565 vop1 = vec_oprnds1[i];
4566 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4567 new_temp = make_ssa_name (vec_dest, new_stmt);
4568 gimple_assign_set_lhs (new_stmt, new_temp);
4569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4570 if (slp_node)
4571 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4574 if (slp_node)
4575 continue;
4577 if (j == 0)
4578 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4579 else
4580 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4581 prev_stmt_info = vinfo_for_stmt (new_stmt);
4584 vec_oprnds0.release ();
4585 vec_oprnds1.release ();
4587 return true;
4591 /* Function vectorizable_operation.
4593 Check if STMT performs a binary, unary or ternary operation that can
4594 be vectorized.
4595 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4596 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4597 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4599 static bool
4600 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4601 gimple *vec_stmt, slp_tree slp_node)
4603 tree vec_dest;
4604 tree scalar_dest;
4605 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4606 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4607 tree vectype;
4608 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4609 enum tree_code code;
4610 machine_mode vec_mode;
4611 tree new_temp;
4612 int op_type;
4613 optab optab;
4614 int icode;
4615 tree def;
4616 gimple def_stmt;
4617 enum vect_def_type dt[3]
4618 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4619 gimple new_stmt = NULL;
4620 stmt_vec_info prev_stmt_info;
4621 int nunits_in;
4622 int nunits_out;
4623 tree vectype_out;
4624 int ncopies;
4625 int j, i;
4626 vec<tree> vec_oprnds0 = vNULL;
4627 vec<tree> vec_oprnds1 = vNULL;
4628 vec<tree> vec_oprnds2 = vNULL;
4629 tree vop0, vop1, vop2;
4630 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4631 int vf;
4633 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4634 return false;
4636 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4637 return false;
4639 /* Is STMT a vectorizable binary/unary operation? */
4640 if (!is_gimple_assign (stmt))
4641 return false;
4643 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4644 return false;
4646 code = gimple_assign_rhs_code (stmt);
4648 /* For pointer addition, we should use the normal plus for
4649 the vector addition. */
4650 if (code == POINTER_PLUS_EXPR)
4651 code = PLUS_EXPR;
4653 /* Support only unary or binary operations. */
4654 op_type = TREE_CODE_LENGTH (code);
4655 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4657 if (dump_enabled_p ())
4658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4659 "num. args = %d (not unary/binary/ternary op).\n",
4660 op_type);
4661 return false;
4664 scalar_dest = gimple_assign_lhs (stmt);
4665 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4667 /* Most operations cannot handle bit-precision types without extra
4668 truncations. */
4669 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4670 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4671 /* Exception are bitwise binary operations. */
4672 && code != BIT_IOR_EXPR
4673 && code != BIT_XOR_EXPR
4674 && code != BIT_AND_EXPR)
4676 if (dump_enabled_p ())
4677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4678 "bit-precision arithmetic not supported.\n");
4679 return false;
4682 op0 = gimple_assign_rhs1 (stmt);
4683 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4684 &def_stmt, &def, &dt[0], &vectype))
4686 if (dump_enabled_p ())
4687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4688 "use not simple.\n");
4689 return false;
4691 /* If op0 is an external or constant def use a vector type with
4692 the same size as the output vector type. */
4693 if (!vectype)
4694 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4695 if (vec_stmt)
4696 gcc_assert (vectype);
4697 if (!vectype)
4699 if (dump_enabled_p ())
4701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4702 "no vectype for scalar type ");
4703 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4704 TREE_TYPE (op0));
4705 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4708 return false;
4711 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4712 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4713 if (nunits_out != nunits_in)
4714 return false;
4716 if (op_type == binary_op || op_type == ternary_op)
4718 op1 = gimple_assign_rhs2 (stmt);
4719 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4720 &def, &dt[1]))
4722 if (dump_enabled_p ())
4723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4724 "use not simple.\n");
4725 return false;
4728 if (op_type == ternary_op)
4730 op2 = gimple_assign_rhs3 (stmt);
4731 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4732 &def, &dt[2]))
4734 if (dump_enabled_p ())
4735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4736 "use not simple.\n");
4737 return false;
4741 if (loop_vinfo)
4742 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4743 else
4744 vf = 1;
4746 /* Multiple types in SLP are handled by creating the appropriate number of
4747 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4748 case of SLP. */
4749 if (slp_node || PURE_SLP_STMT (stmt_info))
4750 ncopies = 1;
4751 else
4752 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4754 gcc_assert (ncopies >= 1);
4756 /* Shifts are handled in vectorizable_shift (). */
4757 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4758 || code == RROTATE_EXPR)
4759 return false;
4761 /* Supportable by target? */
4763 vec_mode = TYPE_MODE (vectype);
4764 if (code == MULT_HIGHPART_EXPR)
4766 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4767 icode = LAST_INSN_CODE;
4768 else
4769 icode = CODE_FOR_nothing;
4771 else
4773 optab = optab_for_tree_code (code, vectype, optab_default);
4774 if (!optab)
4776 if (dump_enabled_p ())
4777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4778 "no optab.\n");
4779 return false;
4781 icode = (int) optab_handler (optab, vec_mode);
4784 if (icode == CODE_FOR_nothing)
4786 if (dump_enabled_p ())
4787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4788 "op not supported by target.\n");
4789 /* Check only during analysis. */
4790 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4791 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4792 return false;
4793 if (dump_enabled_p ())
4794 dump_printf_loc (MSG_NOTE, vect_location,
4795 "proceeding using word mode.\n");
4798 /* Worthwhile without SIMD support? Check only during analysis. */
4799 if (!VECTOR_MODE_P (vec_mode)
4800 && !vec_stmt
4801 && vf < vect_min_worthwhile_factor (code))
4803 if (dump_enabled_p ())
4804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4805 "not worthwhile without SIMD support.\n");
4806 return false;
4809 if (!vec_stmt) /* transformation not required. */
4811 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4812 if (dump_enabled_p ())
4813 dump_printf_loc (MSG_NOTE, vect_location,
4814 "=== vectorizable_operation ===\n");
4815 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4816 return true;
4819 /** Transform. **/
4821 if (dump_enabled_p ())
4822 dump_printf_loc (MSG_NOTE, vect_location,
4823 "transform binary/unary operation.\n");
4825 /* Handle def. */
4826 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4828 /* In case the vectorization factor (VF) is bigger than the number
4829 of elements that we can fit in a vectype (nunits), we have to generate
4830 more than one vector stmt - i.e - we need to "unroll" the
4831 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4832 from one copy of the vector stmt to the next, in the field
4833 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4834 stages to find the correct vector defs to be used when vectorizing
4835 stmts that use the defs of the current stmt. The example below
4836 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4837 we need to create 4 vectorized stmts):
4839 before vectorization:
4840 RELATED_STMT VEC_STMT
4841 S1: x = memref - -
4842 S2: z = x + 1 - -
4844 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4845 there):
4846 RELATED_STMT VEC_STMT
4847 VS1_0: vx0 = memref0 VS1_1 -
4848 VS1_1: vx1 = memref1 VS1_2 -
4849 VS1_2: vx2 = memref2 VS1_3 -
4850 VS1_3: vx3 = memref3 - -
4851 S1: x = load - VS1_0
4852 S2: z = x + 1 - -
4854 step2: vectorize stmt S2 (done here):
4855 To vectorize stmt S2 we first need to find the relevant vector
4856 def for the first operand 'x'. This is, as usual, obtained from
4857 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4858 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4859 relevant vector def 'vx0'. Having found 'vx0' we can generate
4860 the vector stmt VS2_0, and as usual, record it in the
4861 STMT_VINFO_VEC_STMT of stmt S2.
4862 When creating the second copy (VS2_1), we obtain the relevant vector
4863 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4864 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4865 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4866 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4867 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4868 chain of stmts and pointers:
4869 RELATED_STMT VEC_STMT
4870 VS1_0: vx0 = memref0 VS1_1 -
4871 VS1_1: vx1 = memref1 VS1_2 -
4872 VS1_2: vx2 = memref2 VS1_3 -
4873 VS1_3: vx3 = memref3 - -
4874 S1: x = load - VS1_0
4875 VS2_0: vz0 = vx0 + v1 VS2_1 -
4876 VS2_1: vz1 = vx1 + v1 VS2_2 -
4877 VS2_2: vz2 = vx2 + v1 VS2_3 -
4878 VS2_3: vz3 = vx3 + v1 - -
4879 S2: z = x + 1 - VS2_0 */
4881 prev_stmt_info = NULL;
4882 for (j = 0; j < ncopies; j++)
4884 /* Handle uses. */
4885 if (j == 0)
4887 if (op_type == binary_op || op_type == ternary_op)
4888 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4889 slp_node, -1);
4890 else
4891 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4892 slp_node, -1);
4893 if (op_type == ternary_op)
4895 vec_oprnds2.create (1);
4896 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4897 stmt,
4898 NULL));
4901 else
4903 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4904 if (op_type == ternary_op)
4906 tree vec_oprnd = vec_oprnds2.pop ();
4907 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4908 vec_oprnd));
4912 /* Arguments are ready. Create the new vector stmt. */
4913 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4915 vop1 = ((op_type == binary_op || op_type == ternary_op)
4916 ? vec_oprnds1[i] : NULL_TREE);
4917 vop2 = ((op_type == ternary_op)
4918 ? vec_oprnds2[i] : NULL_TREE);
4919 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4920 new_temp = make_ssa_name (vec_dest, new_stmt);
4921 gimple_assign_set_lhs (new_stmt, new_temp);
4922 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4923 if (slp_node)
4924 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4927 if (slp_node)
4928 continue;
4930 if (j == 0)
4931 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4932 else
4933 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4934 prev_stmt_info = vinfo_for_stmt (new_stmt);
4937 vec_oprnds0.release ();
4938 vec_oprnds1.release ();
4939 vec_oprnds2.release ();
4941 return true;
4944 /* A helper function to ensure data reference DR's base alignment
4945 for STMT_INFO. */
4947 static void
4948 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4950 if (!dr->aux)
4951 return;
4953 if (((dataref_aux *)dr->aux)->base_misaligned)
4955 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4956 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4958 if (decl_in_symtab_p (base_decl))
4959 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4960 else
4962 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4963 DECL_USER_ALIGN (base_decl) = 1;
4965 ((dataref_aux *)dr->aux)->base_misaligned = false;
4970 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4971 reversal of the vector elements. If that is impossible to do,
4972 returns NULL. */
4974 static tree
4975 perm_mask_for_reverse (tree vectype)
4977 int i, nunits;
4978 unsigned char *sel;
4980 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4981 sel = XALLOCAVEC (unsigned char, nunits);
4983 for (i = 0; i < nunits; ++i)
4984 sel[i] = nunits - 1 - i;
4986 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4987 return NULL_TREE;
4988 return vect_gen_perm_mask_checked (vectype, sel);
4991 /* Function vectorizable_store.
4993 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4994 can be vectorized.
4995 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4996 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4997 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4999 static bool
5000 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5001 slp_tree slp_node)
5003 tree scalar_dest;
5004 tree data_ref;
5005 tree op;
5006 tree vec_oprnd = NULL_TREE;
5007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5008 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5009 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5010 tree elem_type;
5011 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5012 struct loop *loop = NULL;
5013 machine_mode vec_mode;
5014 tree dummy;
5015 enum dr_alignment_support alignment_support_scheme;
5016 tree def;
5017 gimple def_stmt;
5018 enum vect_def_type dt;
5019 stmt_vec_info prev_stmt_info = NULL;
5020 tree dataref_ptr = NULL_TREE;
5021 tree dataref_offset = NULL_TREE;
5022 gimple ptr_incr = NULL;
5023 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5024 int ncopies;
5025 int j;
5026 gimple next_stmt, first_stmt = NULL;
5027 bool grouped_store = false;
5028 bool store_lanes_p = false;
5029 unsigned int group_size, i;
5030 vec<tree> dr_chain = vNULL;
5031 vec<tree> oprnds = vNULL;
5032 vec<tree> result_chain = vNULL;
5033 bool inv_p;
5034 bool negative = false;
5035 tree offset = NULL_TREE;
5036 vec<tree> vec_oprnds = vNULL;
5037 bool slp = (slp_node != NULL);
5038 unsigned int vec_num;
5039 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5040 tree aggr_type;
5042 if (loop_vinfo)
5043 loop = LOOP_VINFO_LOOP (loop_vinfo);
5045 /* Multiple types in SLP are handled by creating the appropriate number of
5046 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5047 case of SLP. */
5048 if (slp || PURE_SLP_STMT (stmt_info))
5049 ncopies = 1;
5050 else
5051 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5053 gcc_assert (ncopies >= 1);
5055 /* FORNOW. This restriction should be relaxed. */
5056 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5058 if (dump_enabled_p ())
5059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5060 "multiple types in nested loop.\n");
5061 return false;
5064 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5065 return false;
5067 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5068 return false;
5070 /* Is vectorizable store? */
5072 if (!is_gimple_assign (stmt))
5073 return false;
5075 scalar_dest = gimple_assign_lhs (stmt);
5076 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5077 && is_pattern_stmt_p (stmt_info))
5078 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5079 if (TREE_CODE (scalar_dest) != ARRAY_REF
5080 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5081 && TREE_CODE (scalar_dest) != INDIRECT_REF
5082 && TREE_CODE (scalar_dest) != COMPONENT_REF
5083 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5084 && TREE_CODE (scalar_dest) != REALPART_EXPR
5085 && TREE_CODE (scalar_dest) != MEM_REF)
5086 return false;
5088 gcc_assert (gimple_assign_single_p (stmt));
5089 op = gimple_assign_rhs1 (stmt);
5090 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5091 &def, &dt))
5093 if (dump_enabled_p ())
5094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5095 "use not simple.\n");
5096 return false;
5099 elem_type = TREE_TYPE (vectype);
5100 vec_mode = TYPE_MODE (vectype);
5102 /* FORNOW. In some cases can vectorize even if data-type not supported
5103 (e.g. - array initialization with 0). */
5104 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5105 return false;
5107 if (!STMT_VINFO_DATA_REF (stmt_info))
5108 return false;
5110 if (!STMT_VINFO_STRIDED_P (stmt_info))
5112 negative =
5113 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5114 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5115 size_zero_node) < 0;
5116 if (negative && ncopies > 1)
5118 if (dump_enabled_p ())
5119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5120 "multiple types with negative step.\n");
5121 return false;
5123 if (negative)
5125 gcc_assert (!grouped_store);
5126 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5127 if (alignment_support_scheme != dr_aligned
5128 && alignment_support_scheme != dr_unaligned_supported)
5130 if (dump_enabled_p ())
5131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5132 "negative step but alignment required.\n");
5133 return false;
5135 if (dt != vect_constant_def
5136 && dt != vect_external_def
5137 && !perm_mask_for_reverse (vectype))
5139 if (dump_enabled_p ())
5140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5141 "negative step and reversing not supported.\n");
5142 return false;
5147 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5149 grouped_store = true;
5150 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5151 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5152 if (!slp
5153 && !PURE_SLP_STMT (stmt_info)
5154 && !STMT_VINFO_STRIDED_P (stmt_info))
5156 if (vect_store_lanes_supported (vectype, group_size))
5157 store_lanes_p = true;
5158 else if (!vect_grouped_store_supported (vectype, group_size))
5159 return false;
5162 if (STMT_VINFO_STRIDED_P (stmt_info)
5163 && (slp || PURE_SLP_STMT (stmt_info))
5164 && (group_size > nunits
5165 || nunits % group_size != 0))
5167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5168 "unhandled strided group store\n");
5169 return false;
5172 if (first_stmt == stmt)
5174 /* STMT is the leader of the group. Check the operands of all the
5175 stmts of the group. */
5176 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5177 while (next_stmt)
5179 gcc_assert (gimple_assign_single_p (next_stmt));
5180 op = gimple_assign_rhs1 (next_stmt);
5181 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5182 &def_stmt, &def, &dt))
5184 if (dump_enabled_p ())
5185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5186 "use not simple.\n");
5187 return false;
5189 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5194 if (!vec_stmt) /* transformation not required. */
5196 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5197 /* The SLP costs are calculated during SLP analysis. */
5198 if (!PURE_SLP_STMT (stmt_info))
5199 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5200 NULL, NULL, NULL);
5201 return true;
5204 /** Transform. **/
5206 ensure_base_align (stmt_info, dr);
5208 if (grouped_store)
5210 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5211 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5213 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5215 /* FORNOW */
5216 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5218 /* We vectorize all the stmts of the interleaving group when we
5219 reach the last stmt in the group. */
5220 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5221 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5222 && !slp)
5224 *vec_stmt = NULL;
5225 return true;
5228 if (slp)
5230 grouped_store = false;
5231 /* VEC_NUM is the number of vect stmts to be created for this
5232 group. */
5233 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5234 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5235 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5236 op = gimple_assign_rhs1 (first_stmt);
5238 else
5239 /* VEC_NUM is the number of vect stmts to be created for this
5240 group. */
5241 vec_num = group_size;
5243 else
5245 first_stmt = stmt;
5246 first_dr = dr;
5247 group_size = vec_num = 1;
5250 if (dump_enabled_p ())
5251 dump_printf_loc (MSG_NOTE, vect_location,
5252 "transform store. ncopies = %d\n", ncopies);
5254 if (STMT_VINFO_STRIDED_P (stmt_info))
5256 gimple_stmt_iterator incr_gsi;
5257 bool insert_after;
5258 gimple incr;
5259 tree offvar;
5260 tree ivstep;
5261 tree running_off;
5262 gimple_seq stmts = NULL;
5263 tree stride_base, stride_step, alias_off;
5264 tree vec_oprnd;
5265 unsigned int g;
5267 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5269 stride_base
5270 = fold_build_pointer_plus
5271 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5272 size_binop (PLUS_EXPR,
5273 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5274 convert_to_ptrofftype (DR_INIT(first_dr))));
5275 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5277 /* For a store with loop-invariant (but other than power-of-2)
5278 stride (i.e. not a grouped access) like so:
5280 for (i = 0; i < n; i += stride)
5281 array[i] = ...;
5283 we generate a new induction variable and new stores from
5284 the components of the (vectorized) rhs:
5286 for (j = 0; ; j += VF*stride)
5287 vectemp = ...;
5288 tmp1 = vectemp[0];
5289 array[j] = tmp1;
5290 tmp2 = vectemp[1];
5291 array[j + stride] = tmp2;
5295 unsigned nstores = nunits;
5296 tree ltype = elem_type;
5297 if (slp)
5299 nstores = nunits / group_size;
5300 if (group_size < nunits)
5301 ltype = build_vector_type (elem_type, group_size);
5302 else
5303 ltype = vectype;
5304 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5305 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5306 group_size = 1;
5309 ivstep = stride_step;
5310 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5311 build_int_cst (TREE_TYPE (ivstep),
5312 ncopies * nstores));
5314 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5316 create_iv (stride_base, ivstep, NULL,
5317 loop, &incr_gsi, insert_after,
5318 &offvar, NULL);
5319 incr = gsi_stmt (incr_gsi);
5320 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5322 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5323 if (stmts)
5324 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5326 prev_stmt_info = NULL;
5327 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5328 next_stmt = first_stmt;
5329 for (g = 0; g < group_size; g++)
5331 running_off = offvar;
5332 if (g)
5334 tree size = TYPE_SIZE_UNIT (ltype);
5335 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5336 size);
5337 tree newoff = copy_ssa_name (running_off, NULL);
5338 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5339 running_off, pos);
5340 vect_finish_stmt_generation (stmt, incr, gsi);
5341 running_off = newoff;
5343 for (j = 0; j < ncopies; j++)
5345 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5346 and first_stmt == stmt. */
5347 if (j == 0)
5349 if (slp)
5351 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5352 slp_node, -1);
5353 vec_oprnd = vec_oprnds[0];
5355 else
5357 gcc_assert (gimple_assign_single_p (next_stmt));
5358 op = gimple_assign_rhs1 (next_stmt);
5359 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5360 NULL);
5363 else
5365 if (slp)
5366 vec_oprnd = vec_oprnds[j];
5367 else
5369 vect_is_simple_use (vec_oprnd, NULL, loop_vinfo,
5370 bb_vinfo, &def_stmt, &def, &dt);
5371 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5375 for (i = 0; i < nstores; i++)
5377 tree newref, newoff;
5378 gimple incr, assign;
5379 tree size = TYPE_SIZE (ltype);
5380 /* Extract the i'th component. */
5381 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5382 bitsize_int (i), size);
5383 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5384 size, pos);
5386 elem = force_gimple_operand_gsi (gsi, elem, true,
5387 NULL_TREE, true,
5388 GSI_SAME_STMT);
5390 newref = build2 (MEM_REF, ltype,
5391 running_off, alias_off);
5393 /* And store it to *running_off. */
5394 assign = gimple_build_assign (newref, elem);
5395 vect_finish_stmt_generation (stmt, assign, gsi);
5397 newoff = copy_ssa_name (running_off, NULL);
5398 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5399 running_off, stride_step);
5400 vect_finish_stmt_generation (stmt, incr, gsi);
5402 running_off = newoff;
5403 if (g == group_size - 1)
5405 if (j == 0 && i == 0)
5406 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
5407 else
5408 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5409 prev_stmt_info = vinfo_for_stmt (assign);
5413 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5415 return true;
5418 dr_chain.create (group_size);
5419 oprnds.create (group_size);
5421 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5422 gcc_assert (alignment_support_scheme);
5423 /* Targets with store-lane instructions must not require explicit
5424 realignment. */
5425 gcc_assert (!store_lanes_p
5426 || alignment_support_scheme == dr_aligned
5427 || alignment_support_scheme == dr_unaligned_supported);
5429 if (negative)
5430 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5432 if (store_lanes_p)
5433 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5434 else
5435 aggr_type = vectype;
5437 /* In case the vectorization factor (VF) is bigger than the number
5438 of elements that we can fit in a vectype (nunits), we have to generate
5439 more than one vector stmt - i.e - we need to "unroll" the
5440 vector stmt by a factor VF/nunits. For more details see documentation in
5441 vect_get_vec_def_for_copy_stmt. */
5443 /* In case of interleaving (non-unit grouped access):
5445 S1: &base + 2 = x2
5446 S2: &base = x0
5447 S3: &base + 1 = x1
5448 S4: &base + 3 = x3
5450 We create vectorized stores starting from base address (the access of the
5451 first stmt in the chain (S2 in the above example), when the last store stmt
5452 of the chain (S4) is reached:
5454 VS1: &base = vx2
5455 VS2: &base + vec_size*1 = vx0
5456 VS3: &base + vec_size*2 = vx1
5457 VS4: &base + vec_size*3 = vx3
5459 Then permutation statements are generated:
5461 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5462 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5465 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5466 (the order of the data-refs in the output of vect_permute_store_chain
5467 corresponds to the order of scalar stmts in the interleaving chain - see
5468 the documentation of vect_permute_store_chain()).
5470 In case of both multiple types and interleaving, above vector stores and
5471 permutation stmts are created for every copy. The result vector stmts are
5472 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5473 STMT_VINFO_RELATED_STMT for the next copies.
5476 prev_stmt_info = NULL;
5477 for (j = 0; j < ncopies; j++)
5479 gimple new_stmt;
5481 if (j == 0)
5483 if (slp)
5485 /* Get vectorized arguments for SLP_NODE. */
5486 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5487 NULL, slp_node, -1);
5489 vec_oprnd = vec_oprnds[0];
5491 else
5493 /* For interleaved stores we collect vectorized defs for all the
5494 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5495 used as an input to vect_permute_store_chain(), and OPRNDS as
5496 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5498 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5499 OPRNDS are of size 1. */
5500 next_stmt = first_stmt;
5501 for (i = 0; i < group_size; i++)
5503 /* Since gaps are not supported for interleaved stores,
5504 GROUP_SIZE is the exact number of stmts in the chain.
5505 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5506 there is no interleaving, GROUP_SIZE is 1, and only one
5507 iteration of the loop will be executed. */
5508 gcc_assert (next_stmt
5509 && gimple_assign_single_p (next_stmt));
5510 op = gimple_assign_rhs1 (next_stmt);
5512 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5513 NULL);
5514 dr_chain.quick_push (vec_oprnd);
5515 oprnds.quick_push (vec_oprnd);
5516 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5520 /* We should have catched mismatched types earlier. */
5521 gcc_assert (useless_type_conversion_p (vectype,
5522 TREE_TYPE (vec_oprnd)));
5523 bool simd_lane_access_p
5524 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5525 if (simd_lane_access_p
5526 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5527 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5528 && integer_zerop (DR_OFFSET (first_dr))
5529 && integer_zerop (DR_INIT (first_dr))
5530 && alias_sets_conflict_p (get_alias_set (aggr_type),
5531 get_alias_set (DR_REF (first_dr))))
5533 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5534 dataref_offset = build_int_cst (reference_alias_ptr_type
5535 (DR_REF (first_dr)), 0);
5536 inv_p = false;
5538 else
5539 dataref_ptr
5540 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5541 simd_lane_access_p ? loop : NULL,
5542 offset, &dummy, gsi, &ptr_incr,
5543 simd_lane_access_p, &inv_p);
5544 gcc_assert (bb_vinfo || !inv_p);
5546 else
5548 /* For interleaved stores we created vectorized defs for all the
5549 defs stored in OPRNDS in the previous iteration (previous copy).
5550 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5551 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5552 next copy.
5553 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5554 OPRNDS are of size 1. */
5555 for (i = 0; i < group_size; i++)
5557 op = oprnds[i];
5558 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5559 &def, &dt);
5560 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5561 dr_chain[i] = vec_oprnd;
5562 oprnds[i] = vec_oprnd;
5564 if (dataref_offset)
5565 dataref_offset
5566 = int_const_binop (PLUS_EXPR, dataref_offset,
5567 TYPE_SIZE_UNIT (aggr_type));
5568 else
5569 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5570 TYPE_SIZE_UNIT (aggr_type));
5573 if (store_lanes_p)
5575 tree vec_array;
5577 /* Combine all the vectors into an array. */
5578 vec_array = create_vector_array (vectype, vec_num);
5579 for (i = 0; i < vec_num; i++)
5581 vec_oprnd = dr_chain[i];
5582 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5585 /* Emit:
5586 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5587 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5588 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5589 gimple_call_set_lhs (new_stmt, data_ref);
5590 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5592 else
5594 new_stmt = NULL;
5595 if (grouped_store)
5597 if (j == 0)
5598 result_chain.create (group_size);
5599 /* Permute. */
5600 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5601 &result_chain);
5604 next_stmt = first_stmt;
5605 for (i = 0; i < vec_num; i++)
5607 unsigned align, misalign;
5609 if (i > 0)
5610 /* Bump the vector pointer. */
5611 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5612 stmt, NULL_TREE);
5614 if (slp)
5615 vec_oprnd = vec_oprnds[i];
5616 else if (grouped_store)
5617 /* For grouped stores vectorized defs are interleaved in
5618 vect_permute_store_chain(). */
5619 vec_oprnd = result_chain[i];
5621 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5622 dataref_ptr,
5623 dataref_offset
5624 ? dataref_offset
5625 : build_int_cst (reference_alias_ptr_type
5626 (DR_REF (first_dr)), 0));
5627 align = TYPE_ALIGN_UNIT (vectype);
5628 if (aligned_access_p (first_dr))
5629 misalign = 0;
5630 else if (DR_MISALIGNMENT (first_dr) == -1)
5632 TREE_TYPE (data_ref)
5633 = build_aligned_type (TREE_TYPE (data_ref),
5634 TYPE_ALIGN (elem_type));
5635 align = TYPE_ALIGN_UNIT (elem_type);
5636 misalign = 0;
5638 else
5640 TREE_TYPE (data_ref)
5641 = build_aligned_type (TREE_TYPE (data_ref),
5642 TYPE_ALIGN (elem_type));
5643 misalign = DR_MISALIGNMENT (first_dr);
5645 if (dataref_offset == NULL_TREE
5646 && TREE_CODE (dataref_ptr) == SSA_NAME)
5647 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5648 misalign);
5650 if (negative
5651 && dt != vect_constant_def
5652 && dt != vect_external_def)
5654 tree perm_mask = perm_mask_for_reverse (vectype);
5655 tree perm_dest
5656 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5657 vectype);
5658 tree new_temp = make_ssa_name (perm_dest);
5660 /* Generate the permute statement. */
5661 gimple perm_stmt
5662 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5663 vec_oprnd, perm_mask);
5664 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5666 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5667 vec_oprnd = new_temp;
5670 /* Arguments are ready. Create the new vector stmt. */
5671 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5672 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5674 if (slp)
5675 continue;
5677 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5678 if (!next_stmt)
5679 break;
5682 if (!slp)
5684 if (j == 0)
5685 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5686 else
5687 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5688 prev_stmt_info = vinfo_for_stmt (new_stmt);
5692 dr_chain.release ();
5693 oprnds.release ();
5694 result_chain.release ();
5695 vec_oprnds.release ();
5697 return true;
5700 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5701 VECTOR_CST mask. No checks are made that the target platform supports the
5702 mask, so callers may wish to test can_vec_perm_p separately, or use
5703 vect_gen_perm_mask_checked. */
5705 tree
5706 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5708 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5709 int i, nunits;
5711 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5713 mask_elt_type = lang_hooks.types.type_for_mode
5714 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5715 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5717 mask_elts = XALLOCAVEC (tree, nunits);
5718 for (i = nunits - 1; i >= 0; i--)
5719 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5720 mask_vec = build_vector (mask_type, mask_elts);
5722 return mask_vec;
5725 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5726 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5728 tree
5729 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5731 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5732 return vect_gen_perm_mask_any (vectype, sel);
5735 /* Given a vector variable X and Y, that was generated for the scalar
5736 STMT, generate instructions to permute the vector elements of X and Y
5737 using permutation mask MASK_VEC, insert them at *GSI and return the
5738 permuted vector variable. */
5740 static tree
5741 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5742 gimple_stmt_iterator *gsi)
5744 tree vectype = TREE_TYPE (x);
5745 tree perm_dest, data_ref;
5746 gimple perm_stmt;
5748 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5749 data_ref = make_ssa_name (perm_dest);
5751 /* Generate the permute statement. */
5752 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5753 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5755 return data_ref;
5758 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5759 inserting them on the loops preheader edge. Returns true if we
5760 were successful in doing so (and thus STMT can be moved then),
5761 otherwise returns false. */
5763 static bool
5764 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5766 ssa_op_iter i;
5767 tree op;
5768 bool any = false;
5770 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5772 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5773 if (!gimple_nop_p (def_stmt)
5774 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5776 /* Make sure we don't need to recurse. While we could do
5777 so in simple cases when there are more complex use webs
5778 we don't have an easy way to preserve stmt order to fulfil
5779 dependencies within them. */
5780 tree op2;
5781 ssa_op_iter i2;
5782 if (gimple_code (def_stmt) == GIMPLE_PHI)
5783 return false;
5784 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5786 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5787 if (!gimple_nop_p (def_stmt2)
5788 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5789 return false;
5791 any = true;
5795 if (!any)
5796 return true;
5798 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5800 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5801 if (!gimple_nop_p (def_stmt)
5802 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5804 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5805 gsi_remove (&gsi, false);
5806 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5810 return true;
5813 /* vectorizable_load.
5815 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5816 can be vectorized.
5817 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5818 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5819 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5821 static bool
5822 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5823 slp_tree slp_node, slp_instance slp_node_instance)
5825 tree scalar_dest;
5826 tree vec_dest = NULL;
5827 tree data_ref = NULL;
5828 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5829 stmt_vec_info prev_stmt_info;
5830 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5831 struct loop *loop = NULL;
5832 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5833 bool nested_in_vect_loop = false;
5834 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5835 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5836 tree elem_type;
5837 tree new_temp;
5838 machine_mode mode;
5839 gimple new_stmt = NULL;
5840 tree dummy;
5841 enum dr_alignment_support alignment_support_scheme;
5842 tree dataref_ptr = NULL_TREE;
5843 tree dataref_offset = NULL_TREE;
5844 gimple ptr_incr = NULL;
5845 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5846 int ncopies;
5847 int i, j, group_size = -1, group_gap_adj;
5848 tree msq = NULL_TREE, lsq;
5849 tree offset = NULL_TREE;
5850 tree byte_offset = NULL_TREE;
5851 tree realignment_token = NULL_TREE;
5852 gphi *phi = NULL;
5853 vec<tree> dr_chain = vNULL;
5854 bool grouped_load = false;
5855 bool load_lanes_p = false;
5856 gimple first_stmt;
5857 bool inv_p;
5858 bool negative = false;
5859 bool compute_in_loop = false;
5860 struct loop *at_loop;
5861 int vec_num;
5862 bool slp = (slp_node != NULL);
5863 bool slp_perm = false;
5864 enum tree_code code;
5865 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5866 int vf;
5867 tree aggr_type;
5868 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5869 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5870 int gather_scale = 1;
5871 enum vect_def_type gather_dt = vect_unknown_def_type;
5873 if (loop_vinfo)
5875 loop = LOOP_VINFO_LOOP (loop_vinfo);
5876 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5877 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5879 else
5880 vf = 1;
5882 /* Multiple types in SLP are handled by creating the appropriate number of
5883 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5884 case of SLP. */
5885 if (slp || PURE_SLP_STMT (stmt_info))
5886 ncopies = 1;
5887 else
5888 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5890 gcc_assert (ncopies >= 1);
5892 /* FORNOW. This restriction should be relaxed. */
5893 if (nested_in_vect_loop && ncopies > 1)
5895 if (dump_enabled_p ())
5896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5897 "multiple types in nested loop.\n");
5898 return false;
5901 /* Invalidate assumptions made by dependence analysis when vectorization
5902 on the unrolled body effectively re-orders stmts. */
5903 if (ncopies > 1
5904 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5905 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5906 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5908 if (dump_enabled_p ())
5909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5910 "cannot perform implicit CSE when unrolling "
5911 "with negative dependence distance\n");
5912 return false;
5915 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5916 return false;
5918 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5919 return false;
5921 /* Is vectorizable load? */
5922 if (!is_gimple_assign (stmt))
5923 return false;
5925 scalar_dest = gimple_assign_lhs (stmt);
5926 if (TREE_CODE (scalar_dest) != SSA_NAME)
5927 return false;
5929 code = gimple_assign_rhs_code (stmt);
5930 if (code != ARRAY_REF
5931 && code != BIT_FIELD_REF
5932 && code != INDIRECT_REF
5933 && code != COMPONENT_REF
5934 && code != IMAGPART_EXPR
5935 && code != REALPART_EXPR
5936 && code != MEM_REF
5937 && TREE_CODE_CLASS (code) != tcc_declaration)
5938 return false;
5940 if (!STMT_VINFO_DATA_REF (stmt_info))
5941 return false;
5943 elem_type = TREE_TYPE (vectype);
5944 mode = TYPE_MODE (vectype);
5946 /* FORNOW. In some cases can vectorize even if data-type not supported
5947 (e.g. - data copies). */
5948 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5950 if (dump_enabled_p ())
5951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5952 "Aligned load, but unsupported type.\n");
5953 return false;
5956 /* Check if the load is a part of an interleaving chain. */
5957 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5959 grouped_load = true;
5960 /* FORNOW */
5961 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5963 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5965 /* If this is single-element interleaving with an element distance
5966 that leaves unused vector loads around punt - we at least create
5967 very sub-optimal code in that case (and blow up memory,
5968 see PR65518). */
5969 if (first_stmt == stmt
5970 && !GROUP_NEXT_ELEMENT (stmt_info)
5971 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5973 if (dump_enabled_p ())
5974 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5975 "single-element interleaving not supported "
5976 "for not adjacent vector loads\n");
5977 return false;
5980 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
5981 slp_perm = true;
5983 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5984 if (!slp
5985 && !PURE_SLP_STMT (stmt_info)
5986 && !STMT_VINFO_STRIDED_P (stmt_info))
5988 if (vect_load_lanes_supported (vectype, group_size))
5989 load_lanes_p = true;
5990 else if (!vect_grouped_load_supported (vectype, group_size))
5991 return false;
5994 /* Invalidate assumptions made by dependence analysis when vectorization
5995 on the unrolled body effectively re-orders stmts. */
5996 if (!PURE_SLP_STMT (stmt_info)
5997 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5998 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5999 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6001 if (dump_enabled_p ())
6002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6003 "cannot perform implicit CSE when performing "
6004 "group loads with negative dependence distance\n");
6005 return false;
6008 /* Similarly when the stmt is a load that is both part of a SLP
6009 instance and a loop vectorized stmt via the same-dr mechanism
6010 we have to give up. */
6011 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6012 && (STMT_SLP_TYPE (stmt_info)
6013 != STMT_SLP_TYPE (vinfo_for_stmt
6014 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6016 if (dump_enabled_p ())
6017 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6018 "conflicting SLP types for CSEd load\n");
6019 return false;
6024 if (STMT_VINFO_GATHER_P (stmt_info))
6026 gimple def_stmt;
6027 tree def;
6028 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
6029 &gather_off, &gather_scale);
6030 gcc_assert (gather_decl);
6031 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
6032 &def_stmt, &def, &gather_dt,
6033 &gather_off_vectype))
6035 if (dump_enabled_p ())
6036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6037 "gather index use not simple.\n");
6038 return false;
6041 else if (STMT_VINFO_STRIDED_P (stmt_info))
6043 if ((grouped_load
6044 && (slp || PURE_SLP_STMT (stmt_info)))
6045 && (group_size > nunits
6046 || nunits % group_size != 0))
6048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6049 "unhandled strided group load\n");
6050 return false;
6053 else
6055 negative = tree_int_cst_compare (nested_in_vect_loop
6056 ? STMT_VINFO_DR_STEP (stmt_info)
6057 : DR_STEP (dr),
6058 size_zero_node) < 0;
6059 if (negative && ncopies > 1)
6061 if (dump_enabled_p ())
6062 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6063 "multiple types with negative step.\n");
6064 return false;
6067 if (negative)
6069 if (grouped_load)
6071 if (dump_enabled_p ())
6072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6073 "negative step for group load not supported"
6074 "\n");
6075 return false;
6077 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6078 if (alignment_support_scheme != dr_aligned
6079 && alignment_support_scheme != dr_unaligned_supported)
6081 if (dump_enabled_p ())
6082 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6083 "negative step but alignment required.\n");
6084 return false;
6086 if (!perm_mask_for_reverse (vectype))
6088 if (dump_enabled_p ())
6089 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6090 "negative step and reversing not supported."
6091 "\n");
6092 return false;
6097 if (!vec_stmt) /* transformation not required. */
6099 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6100 /* The SLP costs are calculated during SLP analysis. */
6101 if (!PURE_SLP_STMT (stmt_info))
6102 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6103 NULL, NULL, NULL);
6104 return true;
6107 if (dump_enabled_p ())
6108 dump_printf_loc (MSG_NOTE, vect_location,
6109 "transform load. ncopies = %d\n", ncopies);
6111 /** Transform. **/
6113 ensure_base_align (stmt_info, dr);
6115 if (STMT_VINFO_GATHER_P (stmt_info))
6117 tree vec_oprnd0 = NULL_TREE, op;
6118 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6119 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6120 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6121 edge pe = loop_preheader_edge (loop);
6122 gimple_seq seq;
6123 basic_block new_bb;
6124 enum { NARROW, NONE, WIDEN } modifier;
6125 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6127 if (nunits == gather_off_nunits)
6128 modifier = NONE;
6129 else if (nunits == gather_off_nunits / 2)
6131 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6132 modifier = WIDEN;
6134 for (i = 0; i < gather_off_nunits; ++i)
6135 sel[i] = i | nunits;
6137 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6139 else if (nunits == gather_off_nunits * 2)
6141 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6142 modifier = NARROW;
6144 for (i = 0; i < nunits; ++i)
6145 sel[i] = i < gather_off_nunits
6146 ? i : i + nunits - gather_off_nunits;
6148 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6149 ncopies *= 2;
6151 else
6152 gcc_unreachable ();
6154 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6155 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6156 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6157 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6158 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6159 scaletype = TREE_VALUE (arglist);
6160 gcc_checking_assert (types_compatible_p (srctype, rettype));
6162 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6164 ptr = fold_convert (ptrtype, gather_base);
6165 if (!is_gimple_min_invariant (ptr))
6167 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6168 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6169 gcc_assert (!new_bb);
6172 /* Currently we support only unconditional gather loads,
6173 so mask should be all ones. */
6174 if (TREE_CODE (masktype) == INTEGER_TYPE)
6175 mask = build_int_cst (masktype, -1);
6176 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6178 mask = build_int_cst (TREE_TYPE (masktype), -1);
6179 mask = build_vector_from_val (masktype, mask);
6180 mask = vect_init_vector (stmt, mask, masktype, NULL);
6182 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6184 REAL_VALUE_TYPE r;
6185 long tmp[6];
6186 for (j = 0; j < 6; ++j)
6187 tmp[j] = -1;
6188 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6189 mask = build_real (TREE_TYPE (masktype), r);
6190 mask = build_vector_from_val (masktype, mask);
6191 mask = vect_init_vector (stmt, mask, masktype, NULL);
6193 else
6194 gcc_unreachable ();
6196 scale = build_int_cst (scaletype, gather_scale);
6198 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6199 merge = build_int_cst (TREE_TYPE (rettype), 0);
6200 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6202 REAL_VALUE_TYPE r;
6203 long tmp[6];
6204 for (j = 0; j < 6; ++j)
6205 tmp[j] = 0;
6206 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6207 merge = build_real (TREE_TYPE (rettype), r);
6209 else
6210 gcc_unreachable ();
6211 merge = build_vector_from_val (rettype, merge);
6212 merge = vect_init_vector (stmt, merge, rettype, NULL);
6214 prev_stmt_info = NULL;
6215 for (j = 0; j < ncopies; ++j)
6217 if (modifier == WIDEN && (j & 1))
6218 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6219 perm_mask, stmt, gsi);
6220 else if (j == 0)
6221 op = vec_oprnd0
6222 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6223 else
6224 op = vec_oprnd0
6225 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6227 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6229 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6230 == TYPE_VECTOR_SUBPARTS (idxtype));
6231 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6232 var = make_ssa_name (var);
6233 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6234 new_stmt
6235 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6236 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6237 op = var;
6240 new_stmt
6241 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6243 if (!useless_type_conversion_p (vectype, rettype))
6245 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6246 == TYPE_VECTOR_SUBPARTS (rettype));
6247 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6248 op = make_ssa_name (var, new_stmt);
6249 gimple_call_set_lhs (new_stmt, op);
6250 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6251 var = make_ssa_name (vec_dest);
6252 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6253 new_stmt
6254 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6256 else
6258 var = make_ssa_name (vec_dest, new_stmt);
6259 gimple_call_set_lhs (new_stmt, var);
6262 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6264 if (modifier == NARROW)
6266 if ((j & 1) == 0)
6268 prev_res = var;
6269 continue;
6271 var = permute_vec_elements (prev_res, var,
6272 perm_mask, stmt, gsi);
6273 new_stmt = SSA_NAME_DEF_STMT (var);
6276 if (prev_stmt_info == NULL)
6277 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6278 else
6279 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6280 prev_stmt_info = vinfo_for_stmt (new_stmt);
6282 return true;
6284 else if (STMT_VINFO_STRIDED_P (stmt_info))
6286 gimple_stmt_iterator incr_gsi;
6287 bool insert_after;
6288 gimple incr;
6289 tree offvar;
6290 tree ivstep;
6291 tree running_off;
6292 vec<constructor_elt, va_gc> *v = NULL;
6293 gimple_seq stmts = NULL;
6294 tree stride_base, stride_step, alias_off;
6296 gcc_assert (!nested_in_vect_loop);
6298 if (slp && grouped_load)
6299 first_dr = STMT_VINFO_DATA_REF
6300 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6301 else
6302 first_dr = dr;
6304 stride_base
6305 = fold_build_pointer_plus
6306 (DR_BASE_ADDRESS (first_dr),
6307 size_binop (PLUS_EXPR,
6308 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6309 convert_to_ptrofftype (DR_INIT (first_dr))));
6310 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6312 /* For a load with loop-invariant (but other than power-of-2)
6313 stride (i.e. not a grouped access) like so:
6315 for (i = 0; i < n; i += stride)
6316 ... = array[i];
6318 we generate a new induction variable and new accesses to
6319 form a new vector (or vectors, depending on ncopies):
6321 for (j = 0; ; j += VF*stride)
6322 tmp1 = array[j];
6323 tmp2 = array[j + stride];
6325 vectemp = {tmp1, tmp2, ...}
6328 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6329 build_int_cst (TREE_TYPE (stride_step), vf));
6331 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6333 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6334 loop, &incr_gsi, insert_after,
6335 &offvar, NULL);
6336 incr = gsi_stmt (incr_gsi);
6337 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6339 stride_step = force_gimple_operand (unshare_expr (stride_step),
6340 &stmts, true, NULL_TREE);
6341 if (stmts)
6342 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6344 prev_stmt_info = NULL;
6345 running_off = offvar;
6346 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6347 int nloads = nunits;
6348 tree ltype = TREE_TYPE (vectype);
6349 auto_vec<tree> dr_chain;
6350 if (slp)
6352 nloads = nunits / group_size;
6353 if (group_size < nunits)
6354 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6355 else
6356 ltype = vectype;
6357 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6358 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6359 if (slp_perm)
6360 dr_chain.create (ncopies);
6362 for (j = 0; j < ncopies; j++)
6364 tree vec_inv;
6366 if (nloads > 1)
6368 vec_alloc (v, nloads);
6369 for (i = 0; i < nloads; i++)
6371 tree newref, newoff;
6372 gimple incr;
6373 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6375 newref = force_gimple_operand_gsi (gsi, newref, true,
6376 NULL_TREE, true,
6377 GSI_SAME_STMT);
6378 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6379 newoff = copy_ssa_name (running_off);
6380 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6381 running_off, stride_step);
6382 vect_finish_stmt_generation (stmt, incr, gsi);
6384 running_off = newoff;
6387 vec_inv = build_constructor (vectype, v);
6388 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6389 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6391 else
6393 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6394 build2 (MEM_REF, ltype,
6395 running_off, alias_off));
6396 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6398 tree newoff = copy_ssa_name (running_off);
6399 gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6400 running_off, stride_step);
6401 vect_finish_stmt_generation (stmt, incr, gsi);
6403 running_off = newoff;
6406 if (slp)
6408 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6409 if (slp_perm)
6410 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6412 if (j == 0)
6413 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6414 else
6415 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6416 prev_stmt_info = vinfo_for_stmt (new_stmt);
6418 if (slp_perm)
6419 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6420 slp_node_instance, false);
6421 return true;
6424 if (grouped_load)
6426 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6427 if (slp
6428 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6429 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6430 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6432 /* Check if the chain of loads is already vectorized. */
6433 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6434 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6435 ??? But we can only do so if there is exactly one
6436 as we have no way to get at the rest. Leave the CSE
6437 opportunity alone.
6438 ??? With the group load eventually participating
6439 in multiple different permutations (having multiple
6440 slp nodes which refer to the same group) the CSE
6441 is even wrong code. See PR56270. */
6442 && !slp)
6444 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6445 return true;
6447 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6448 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6449 group_gap_adj = 0;
6451 /* VEC_NUM is the number of vect stmts to be created for this group. */
6452 if (slp)
6454 grouped_load = false;
6455 /* For SLP permutation support we need to load the whole group,
6456 not only the number of vector stmts the permutation result
6457 fits in. */
6458 if (slp_perm)
6459 vec_num = (group_size * vf + nunits - 1) / nunits;
6460 else
6461 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6462 group_gap_adj = vf * group_size - nunits * vec_num;
6464 else
6465 vec_num = group_size;
6467 else
6469 first_stmt = stmt;
6470 first_dr = dr;
6471 group_size = vec_num = 1;
6472 group_gap_adj = 0;
6475 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6476 gcc_assert (alignment_support_scheme);
6477 /* Targets with load-lane instructions must not require explicit
6478 realignment. */
6479 gcc_assert (!load_lanes_p
6480 || alignment_support_scheme == dr_aligned
6481 || alignment_support_scheme == dr_unaligned_supported);
6483 /* In case the vectorization factor (VF) is bigger than the number
6484 of elements that we can fit in a vectype (nunits), we have to generate
6485 more than one vector stmt - i.e - we need to "unroll" the
6486 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6487 from one copy of the vector stmt to the next, in the field
6488 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6489 stages to find the correct vector defs to be used when vectorizing
6490 stmts that use the defs of the current stmt. The example below
6491 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6492 need to create 4 vectorized stmts):
6494 before vectorization:
6495 RELATED_STMT VEC_STMT
6496 S1: x = memref - -
6497 S2: z = x + 1 - -
6499 step 1: vectorize stmt S1:
6500 We first create the vector stmt VS1_0, and, as usual, record a
6501 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6502 Next, we create the vector stmt VS1_1, and record a pointer to
6503 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6504 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6505 stmts and pointers:
6506 RELATED_STMT VEC_STMT
6507 VS1_0: vx0 = memref0 VS1_1 -
6508 VS1_1: vx1 = memref1 VS1_2 -
6509 VS1_2: vx2 = memref2 VS1_3 -
6510 VS1_3: vx3 = memref3 - -
6511 S1: x = load - VS1_0
6512 S2: z = x + 1 - -
6514 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6515 information we recorded in RELATED_STMT field is used to vectorize
6516 stmt S2. */
6518 /* In case of interleaving (non-unit grouped access):
6520 S1: x2 = &base + 2
6521 S2: x0 = &base
6522 S3: x1 = &base + 1
6523 S4: x3 = &base + 3
6525 Vectorized loads are created in the order of memory accesses
6526 starting from the access of the first stmt of the chain:
6528 VS1: vx0 = &base
6529 VS2: vx1 = &base + vec_size*1
6530 VS3: vx3 = &base + vec_size*2
6531 VS4: vx4 = &base + vec_size*3
6533 Then permutation statements are generated:
6535 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6536 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6539 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6540 (the order of the data-refs in the output of vect_permute_load_chain
6541 corresponds to the order of scalar stmts in the interleaving chain - see
6542 the documentation of vect_permute_load_chain()).
6543 The generation of permutation stmts and recording them in
6544 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6546 In case of both multiple types and interleaving, the vector loads and
6547 permutation stmts above are created for every copy. The result vector
6548 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6549 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6551 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6552 on a target that supports unaligned accesses (dr_unaligned_supported)
6553 we generate the following code:
6554 p = initial_addr;
6555 indx = 0;
6556 loop {
6557 p = p + indx * vectype_size;
6558 vec_dest = *(p);
6559 indx = indx + 1;
6562 Otherwise, the data reference is potentially unaligned on a target that
6563 does not support unaligned accesses (dr_explicit_realign_optimized) -
6564 then generate the following code, in which the data in each iteration is
6565 obtained by two vector loads, one from the previous iteration, and one
6566 from the current iteration:
6567 p1 = initial_addr;
6568 msq_init = *(floor(p1))
6569 p2 = initial_addr + VS - 1;
6570 realignment_token = call target_builtin;
6571 indx = 0;
6572 loop {
6573 p2 = p2 + indx * vectype_size
6574 lsq = *(floor(p2))
6575 vec_dest = realign_load (msq, lsq, realignment_token)
6576 indx = indx + 1;
6577 msq = lsq;
6578 } */
6580 /* If the misalignment remains the same throughout the execution of the
6581 loop, we can create the init_addr and permutation mask at the loop
6582 preheader. Otherwise, it needs to be created inside the loop.
6583 This can only occur when vectorizing memory accesses in the inner-loop
6584 nested within an outer-loop that is being vectorized. */
6586 if (nested_in_vect_loop
6587 && (TREE_INT_CST_LOW (DR_STEP (dr))
6588 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6590 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6591 compute_in_loop = true;
6594 if ((alignment_support_scheme == dr_explicit_realign_optimized
6595 || alignment_support_scheme == dr_explicit_realign)
6596 && !compute_in_loop)
6598 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6599 alignment_support_scheme, NULL_TREE,
6600 &at_loop);
6601 if (alignment_support_scheme == dr_explicit_realign_optimized)
6603 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6604 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6605 size_one_node);
6608 else
6609 at_loop = loop;
6611 if (negative)
6612 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6614 if (load_lanes_p)
6615 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6616 else
6617 aggr_type = vectype;
6619 prev_stmt_info = NULL;
6620 for (j = 0; j < ncopies; j++)
6622 /* 1. Create the vector or array pointer update chain. */
6623 if (j == 0)
6625 bool simd_lane_access_p
6626 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6627 if (simd_lane_access_p
6628 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6629 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6630 && integer_zerop (DR_OFFSET (first_dr))
6631 && integer_zerop (DR_INIT (first_dr))
6632 && alias_sets_conflict_p (get_alias_set (aggr_type),
6633 get_alias_set (DR_REF (first_dr)))
6634 && (alignment_support_scheme == dr_aligned
6635 || alignment_support_scheme == dr_unaligned_supported))
6637 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6638 dataref_offset = build_int_cst (reference_alias_ptr_type
6639 (DR_REF (first_dr)), 0);
6640 inv_p = false;
6642 else
6643 dataref_ptr
6644 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6645 offset, &dummy, gsi, &ptr_incr,
6646 simd_lane_access_p, &inv_p,
6647 byte_offset);
6649 else if (dataref_offset)
6650 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6651 TYPE_SIZE_UNIT (aggr_type));
6652 else
6653 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6654 TYPE_SIZE_UNIT (aggr_type));
6656 if (grouped_load || slp_perm)
6657 dr_chain.create (vec_num);
6659 if (load_lanes_p)
6661 tree vec_array;
6663 vec_array = create_vector_array (vectype, vec_num);
6665 /* Emit:
6666 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6667 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6668 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6669 gimple_call_set_lhs (new_stmt, vec_array);
6670 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6672 /* Extract each vector into an SSA_NAME. */
6673 for (i = 0; i < vec_num; i++)
6675 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6676 vec_array, i);
6677 dr_chain.quick_push (new_temp);
6680 /* Record the mapping between SSA_NAMEs and statements. */
6681 vect_record_grouped_load_vectors (stmt, dr_chain);
6683 else
6685 for (i = 0; i < vec_num; i++)
6687 if (i > 0)
6688 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6689 stmt, NULL_TREE);
6691 /* 2. Create the vector-load in the loop. */
6692 switch (alignment_support_scheme)
6694 case dr_aligned:
6695 case dr_unaligned_supported:
6697 unsigned int align, misalign;
6699 data_ref
6700 = fold_build2 (MEM_REF, vectype, dataref_ptr,
6701 dataref_offset
6702 ? dataref_offset
6703 : build_int_cst (reference_alias_ptr_type
6704 (DR_REF (first_dr)), 0));
6705 align = TYPE_ALIGN_UNIT (vectype);
6706 if (alignment_support_scheme == dr_aligned)
6708 gcc_assert (aligned_access_p (first_dr));
6709 misalign = 0;
6711 else if (DR_MISALIGNMENT (first_dr) == -1)
6713 TREE_TYPE (data_ref)
6714 = build_aligned_type (TREE_TYPE (data_ref),
6715 TYPE_ALIGN (elem_type));
6716 align = TYPE_ALIGN_UNIT (elem_type);
6717 misalign = 0;
6719 else
6721 TREE_TYPE (data_ref)
6722 = build_aligned_type (TREE_TYPE (data_ref),
6723 TYPE_ALIGN (elem_type));
6724 misalign = DR_MISALIGNMENT (first_dr);
6726 if (dataref_offset == NULL_TREE
6727 && TREE_CODE (dataref_ptr) == SSA_NAME)
6728 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6729 align, misalign);
6730 break;
6732 case dr_explicit_realign:
6734 tree ptr, bump;
6736 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
6738 if (compute_in_loop)
6739 msq = vect_setup_realignment (first_stmt, gsi,
6740 &realignment_token,
6741 dr_explicit_realign,
6742 dataref_ptr, NULL);
6744 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6745 ptr = copy_ssa_name (dataref_ptr);
6746 else
6747 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
6748 new_stmt = gimple_build_assign
6749 (ptr, BIT_AND_EXPR, dataref_ptr,
6750 build_int_cst
6751 (TREE_TYPE (dataref_ptr),
6752 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6753 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6754 data_ref
6755 = build2 (MEM_REF, vectype, ptr,
6756 build_int_cst (reference_alias_ptr_type
6757 (DR_REF (first_dr)), 0));
6758 vec_dest = vect_create_destination_var (scalar_dest,
6759 vectype);
6760 new_stmt = gimple_build_assign (vec_dest, data_ref);
6761 new_temp = make_ssa_name (vec_dest, new_stmt);
6762 gimple_assign_set_lhs (new_stmt, new_temp);
6763 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6764 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6765 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6766 msq = new_temp;
6768 bump = size_binop (MULT_EXPR, vs,
6769 TYPE_SIZE_UNIT (elem_type));
6770 bump = size_binop (MINUS_EXPR, bump, size_one_node);
6771 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6772 new_stmt = gimple_build_assign
6773 (NULL_TREE, BIT_AND_EXPR, ptr,
6774 build_int_cst
6775 (TREE_TYPE (ptr),
6776 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6777 ptr = copy_ssa_name (ptr, new_stmt);
6778 gimple_assign_set_lhs (new_stmt, ptr);
6779 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6780 data_ref
6781 = build2 (MEM_REF, vectype, ptr,
6782 build_int_cst (reference_alias_ptr_type
6783 (DR_REF (first_dr)), 0));
6784 break;
6786 case dr_explicit_realign_optimized:
6787 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6788 new_temp = copy_ssa_name (dataref_ptr);
6789 else
6790 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
6791 new_stmt = gimple_build_assign
6792 (new_temp, BIT_AND_EXPR, dataref_ptr,
6793 build_int_cst
6794 (TREE_TYPE (dataref_ptr),
6795 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6796 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6797 data_ref
6798 = build2 (MEM_REF, vectype, new_temp,
6799 build_int_cst (reference_alias_ptr_type
6800 (DR_REF (first_dr)), 0));
6801 break;
6802 default:
6803 gcc_unreachable ();
6805 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6806 new_stmt = gimple_build_assign (vec_dest, data_ref);
6807 new_temp = make_ssa_name (vec_dest, new_stmt);
6808 gimple_assign_set_lhs (new_stmt, new_temp);
6809 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6811 /* 3. Handle explicit realignment if necessary/supported.
6812 Create in loop:
6813 vec_dest = realign_load (msq, lsq, realignment_token) */
6814 if (alignment_support_scheme == dr_explicit_realign_optimized
6815 || alignment_support_scheme == dr_explicit_realign)
6817 lsq = gimple_assign_lhs (new_stmt);
6818 if (!realignment_token)
6819 realignment_token = dataref_ptr;
6820 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6821 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6822 msq, lsq, realignment_token);
6823 new_temp = make_ssa_name (vec_dest, new_stmt);
6824 gimple_assign_set_lhs (new_stmt, new_temp);
6825 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6827 if (alignment_support_scheme == dr_explicit_realign_optimized)
6829 gcc_assert (phi);
6830 if (i == vec_num - 1 && j == ncopies - 1)
6831 add_phi_arg (phi, lsq,
6832 loop_latch_edge (containing_loop),
6833 UNKNOWN_LOCATION);
6834 msq = lsq;
6838 /* 4. Handle invariant-load. */
6839 if (inv_p && !bb_vinfo)
6841 gcc_assert (!grouped_load);
6842 /* If we have versioned for aliasing or the loop doesn't
6843 have any data dependencies that would preclude this,
6844 then we are sure this is a loop invariant load and
6845 thus we can insert it on the preheader edge. */
6846 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6847 && !nested_in_vect_loop
6848 && hoist_defs_of_uses (stmt, loop))
6850 if (dump_enabled_p ())
6852 dump_printf_loc (MSG_NOTE, vect_location,
6853 "hoisting out of the vectorized "
6854 "loop: ");
6855 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6857 tree tem = copy_ssa_name (scalar_dest);
6858 gsi_insert_on_edge_immediate
6859 (loop_preheader_edge (loop),
6860 gimple_build_assign (tem,
6861 unshare_expr
6862 (gimple_assign_rhs1 (stmt))));
6863 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6865 else
6867 gimple_stmt_iterator gsi2 = *gsi;
6868 gsi_next (&gsi2);
6869 new_temp = vect_init_vector (stmt, scalar_dest,
6870 vectype, &gsi2);
6872 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6873 set_vinfo_for_stmt (new_stmt,
6874 new_stmt_vec_info (new_stmt, loop_vinfo,
6875 bb_vinfo));
6878 if (negative)
6880 tree perm_mask = perm_mask_for_reverse (vectype);
6881 new_temp = permute_vec_elements (new_temp, new_temp,
6882 perm_mask, stmt, gsi);
6883 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6886 /* Collect vector loads and later create their permutation in
6887 vect_transform_grouped_load (). */
6888 if (grouped_load || slp_perm)
6889 dr_chain.quick_push (new_temp);
6891 /* Store vector loads in the corresponding SLP_NODE. */
6892 if (slp && !slp_perm)
6893 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6895 /* Bump the vector pointer to account for a gap or for excess
6896 elements loaded for a permuted SLP load. */
6897 if (group_gap_adj != 0)
6899 bool ovf;
6900 tree bump
6901 = wide_int_to_tree (sizetype,
6902 wi::smul (TYPE_SIZE_UNIT (elem_type),
6903 group_gap_adj, &ovf));
6904 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6905 stmt, bump);
6909 if (slp && !slp_perm)
6910 continue;
6912 if (slp_perm)
6914 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6915 slp_node_instance, false))
6917 dr_chain.release ();
6918 return false;
6921 else
6923 if (grouped_load)
6925 if (!load_lanes_p)
6926 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6927 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6929 else
6931 if (j == 0)
6932 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6933 else
6934 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6935 prev_stmt_info = vinfo_for_stmt (new_stmt);
6938 dr_chain.release ();
6941 return true;
6944 /* Function vect_is_simple_cond.
6946 Input:
6947 LOOP - the loop that is being vectorized.
6948 COND - Condition that is checked for simple use.
6950 Output:
6951 *COMP_VECTYPE - the vector type for the comparison.
6953 Returns whether a COND can be vectorized. Checks whether
6954 condition operands are supportable using vec_is_simple_use. */
6956 static bool
6957 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6958 bb_vec_info bb_vinfo, tree *comp_vectype)
6960 tree lhs, rhs;
6961 tree def;
6962 enum vect_def_type dt;
6963 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6965 if (!COMPARISON_CLASS_P (cond))
6966 return false;
6968 lhs = TREE_OPERAND (cond, 0);
6969 rhs = TREE_OPERAND (cond, 1);
6971 if (TREE_CODE (lhs) == SSA_NAME)
6973 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6974 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6975 &lhs_def_stmt, &def, &dt, &vectype1))
6976 return false;
6978 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6979 && TREE_CODE (lhs) != FIXED_CST)
6980 return false;
6982 if (TREE_CODE (rhs) == SSA_NAME)
6984 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6985 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6986 &rhs_def_stmt, &def, &dt, &vectype2))
6987 return false;
6989 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6990 && TREE_CODE (rhs) != FIXED_CST)
6991 return false;
6993 *comp_vectype = vectype1 ? vectype1 : vectype2;
6994 return true;
6997 /* vectorizable_condition.
6999 Check if STMT is conditional modify expression that can be vectorized.
7000 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7001 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7002 at GSI.
7004 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7005 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7006 else caluse if it is 2).
7008 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7010 bool
7011 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
7012 gimple *vec_stmt, tree reduc_def, int reduc_index,
7013 slp_tree slp_node)
7015 tree scalar_dest = NULL_TREE;
7016 tree vec_dest = NULL_TREE;
7017 tree cond_expr, then_clause, else_clause;
7018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7019 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7020 tree comp_vectype = NULL_TREE;
7021 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7022 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7023 tree vec_compare, vec_cond_expr;
7024 tree new_temp;
7025 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7026 tree def;
7027 enum vect_def_type dt, dts[4];
7028 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7029 int ncopies;
7030 enum tree_code code;
7031 stmt_vec_info prev_stmt_info = NULL;
7032 int i, j;
7033 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7034 vec<tree> vec_oprnds0 = vNULL;
7035 vec<tree> vec_oprnds1 = vNULL;
7036 vec<tree> vec_oprnds2 = vNULL;
7037 vec<tree> vec_oprnds3 = vNULL;
7038 tree vec_cmp_type;
7040 if (slp_node || PURE_SLP_STMT (stmt_info))
7041 ncopies = 1;
7042 else
7043 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7045 gcc_assert (ncopies >= 1);
7046 if (reduc_index && ncopies > 1)
7047 return false; /* FORNOW */
7049 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7050 return false;
7052 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7053 return false;
7055 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7056 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7057 && reduc_def))
7058 return false;
7060 /* FORNOW: not yet supported. */
7061 if (STMT_VINFO_LIVE_P (stmt_info))
7063 if (dump_enabled_p ())
7064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7065 "value used after loop.\n");
7066 return false;
7069 /* Is vectorizable conditional operation? */
7070 if (!is_gimple_assign (stmt))
7071 return false;
7073 code = gimple_assign_rhs_code (stmt);
7075 if (code != COND_EXPR)
7076 return false;
7078 cond_expr = gimple_assign_rhs1 (stmt);
7079 then_clause = gimple_assign_rhs2 (stmt);
7080 else_clause = gimple_assign_rhs3 (stmt);
7082 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
7083 &comp_vectype)
7084 || !comp_vectype)
7085 return false;
7087 if (TREE_CODE (then_clause) == SSA_NAME)
7089 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
7090 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
7091 &then_def_stmt, &def, &dt))
7092 return false;
7094 else if (TREE_CODE (then_clause) != INTEGER_CST
7095 && TREE_CODE (then_clause) != REAL_CST
7096 && TREE_CODE (then_clause) != FIXED_CST)
7097 return false;
7099 if (TREE_CODE (else_clause) == SSA_NAME)
7101 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
7102 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
7103 &else_def_stmt, &def, &dt))
7104 return false;
7106 else if (TREE_CODE (else_clause) != INTEGER_CST
7107 && TREE_CODE (else_clause) != REAL_CST
7108 && TREE_CODE (else_clause) != FIXED_CST)
7109 return false;
7111 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7112 /* The result of a vector comparison should be signed type. */
7113 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7114 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7115 if (vec_cmp_type == NULL_TREE)
7116 return false;
7118 if (!vec_stmt)
7120 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7121 return expand_vec_cond_expr_p (vectype, comp_vectype);
7124 /* Transform. */
7126 if (!slp_node)
7128 vec_oprnds0.create (1);
7129 vec_oprnds1.create (1);
7130 vec_oprnds2.create (1);
7131 vec_oprnds3.create (1);
7134 /* Handle def. */
7135 scalar_dest = gimple_assign_lhs (stmt);
7136 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7138 /* Handle cond expr. */
7139 for (j = 0; j < ncopies; j++)
7141 gassign *new_stmt = NULL;
7142 if (j == 0)
7144 if (slp_node)
7146 auto_vec<tree, 4> ops;
7147 auto_vec<vec<tree>, 4> vec_defs;
7149 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7150 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7151 ops.safe_push (then_clause);
7152 ops.safe_push (else_clause);
7153 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7154 vec_oprnds3 = vec_defs.pop ();
7155 vec_oprnds2 = vec_defs.pop ();
7156 vec_oprnds1 = vec_defs.pop ();
7157 vec_oprnds0 = vec_defs.pop ();
7159 ops.release ();
7160 vec_defs.release ();
7162 else
7164 gimple gtemp;
7165 vec_cond_lhs =
7166 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7167 stmt, NULL);
7168 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7169 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
7171 vec_cond_rhs =
7172 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7173 stmt, NULL);
7174 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7175 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
7176 if (reduc_index == 1)
7177 vec_then_clause = reduc_def;
7178 else
7180 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7181 stmt, NULL);
7182 vect_is_simple_use (then_clause, stmt, loop_vinfo,
7183 NULL, &gtemp, &def, &dts[2]);
7185 if (reduc_index == 2)
7186 vec_else_clause = reduc_def;
7187 else
7189 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7190 stmt, NULL);
7191 vect_is_simple_use (else_clause, stmt, loop_vinfo,
7192 NULL, &gtemp, &def, &dts[3]);
7196 else
7198 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
7199 vec_oprnds0.pop ());
7200 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
7201 vec_oprnds1.pop ());
7202 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7203 vec_oprnds2.pop ());
7204 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7205 vec_oprnds3.pop ());
7208 if (!slp_node)
7210 vec_oprnds0.quick_push (vec_cond_lhs);
7211 vec_oprnds1.quick_push (vec_cond_rhs);
7212 vec_oprnds2.quick_push (vec_then_clause);
7213 vec_oprnds3.quick_push (vec_else_clause);
7216 /* Arguments are ready. Create the new vector stmt. */
7217 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7219 vec_cond_rhs = vec_oprnds1[i];
7220 vec_then_clause = vec_oprnds2[i];
7221 vec_else_clause = vec_oprnds3[i];
7223 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7224 vec_cond_lhs, vec_cond_rhs);
7225 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7226 vec_compare, vec_then_clause, vec_else_clause);
7228 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7229 new_temp = make_ssa_name (vec_dest, new_stmt);
7230 gimple_assign_set_lhs (new_stmt, new_temp);
7231 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7232 if (slp_node)
7233 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7236 if (slp_node)
7237 continue;
7239 if (j == 0)
7240 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7241 else
7242 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7244 prev_stmt_info = vinfo_for_stmt (new_stmt);
7247 vec_oprnds0.release ();
7248 vec_oprnds1.release ();
7249 vec_oprnds2.release ();
7250 vec_oprnds3.release ();
7252 return true;
7256 /* Make sure the statement is vectorizable. */
7258 bool
7259 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
7261 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7262 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7263 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7264 bool ok;
7265 tree scalar_type, vectype;
7266 gimple pattern_stmt;
7267 gimple_seq pattern_def_seq;
7269 if (dump_enabled_p ())
7271 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7272 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7275 if (gimple_has_volatile_ops (stmt))
7277 if (dump_enabled_p ())
7278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7279 "not vectorized: stmt has volatile operands\n");
7281 return false;
7284 /* Skip stmts that do not need to be vectorized. In loops this is expected
7285 to include:
7286 - the COND_EXPR which is the loop exit condition
7287 - any LABEL_EXPRs in the loop
7288 - computations that are used only for array indexing or loop control.
7289 In basic blocks we only analyze statements that are a part of some SLP
7290 instance, therefore, all the statements are relevant.
7292 Pattern statement needs to be analyzed instead of the original statement
7293 if the original statement is not relevant. Otherwise, we analyze both
7294 statements. In basic blocks we are called from some SLP instance
7295 traversal, don't analyze pattern stmts instead, the pattern stmts
7296 already will be part of SLP instance. */
7298 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7299 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7300 && !STMT_VINFO_LIVE_P (stmt_info))
7302 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7303 && pattern_stmt
7304 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7305 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7307 /* Analyze PATTERN_STMT instead of the original stmt. */
7308 stmt = pattern_stmt;
7309 stmt_info = vinfo_for_stmt (pattern_stmt);
7310 if (dump_enabled_p ())
7312 dump_printf_loc (MSG_NOTE, vect_location,
7313 "==> examining pattern statement: ");
7314 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7317 else
7319 if (dump_enabled_p ())
7320 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7322 return true;
7325 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7326 && node == NULL
7327 && pattern_stmt
7328 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7329 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7331 /* Analyze PATTERN_STMT too. */
7332 if (dump_enabled_p ())
7334 dump_printf_loc (MSG_NOTE, vect_location,
7335 "==> examining pattern statement: ");
7336 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7339 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7340 return false;
7343 if (is_pattern_stmt_p (stmt_info)
7344 && node == NULL
7345 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7347 gimple_stmt_iterator si;
7349 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7351 gimple pattern_def_stmt = gsi_stmt (si);
7352 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7353 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7355 /* Analyze def stmt of STMT if it's a pattern stmt. */
7356 if (dump_enabled_p ())
7358 dump_printf_loc (MSG_NOTE, vect_location,
7359 "==> examining pattern def statement: ");
7360 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7363 if (!vect_analyze_stmt (pattern_def_stmt,
7364 need_to_vectorize, node))
7365 return false;
7370 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7372 case vect_internal_def:
7373 break;
7375 case vect_reduction_def:
7376 case vect_nested_cycle:
7377 gcc_assert (!bb_vinfo
7378 && (relevance == vect_used_in_outer
7379 || relevance == vect_used_in_outer_by_reduction
7380 || relevance == vect_used_by_reduction
7381 || relevance == vect_unused_in_scope));
7382 break;
7384 case vect_induction_def:
7385 case vect_constant_def:
7386 case vect_external_def:
7387 case vect_unknown_def_type:
7388 default:
7389 gcc_unreachable ();
7392 if (bb_vinfo)
7394 gcc_assert (PURE_SLP_STMT (stmt_info));
7396 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7397 if (dump_enabled_p ())
7399 dump_printf_loc (MSG_NOTE, vect_location,
7400 "get vectype for scalar type: ");
7401 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7402 dump_printf (MSG_NOTE, "\n");
7405 vectype = get_vectype_for_scalar_type (scalar_type);
7406 if (!vectype)
7408 if (dump_enabled_p ())
7410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7411 "not SLPed: unsupported data-type ");
7412 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7413 scalar_type);
7414 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7416 return false;
7419 if (dump_enabled_p ())
7421 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7422 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7423 dump_printf (MSG_NOTE, "\n");
7426 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7429 if (STMT_VINFO_RELEVANT_P (stmt_info))
7431 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7432 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7433 || (is_gimple_call (stmt)
7434 && gimple_call_lhs (stmt) == NULL_TREE));
7435 *need_to_vectorize = true;
7438 if (PURE_SLP_STMT (stmt_info) && !node)
7440 dump_printf_loc (MSG_NOTE, vect_location,
7441 "handled only by SLP analysis\n");
7442 return true;
7445 ok = true;
7446 if (!bb_vinfo
7447 && (STMT_VINFO_RELEVANT_P (stmt_info)
7448 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7449 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7450 || vectorizable_conversion (stmt, NULL, NULL, node)
7451 || vectorizable_shift (stmt, NULL, NULL, node)
7452 || vectorizable_operation (stmt, NULL, NULL, node)
7453 || vectorizable_assignment (stmt, NULL, NULL, node)
7454 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7455 || vectorizable_call (stmt, NULL, NULL, node)
7456 || vectorizable_store (stmt, NULL, NULL, node)
7457 || vectorizable_reduction (stmt, NULL, NULL, node)
7458 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7459 else
7461 if (bb_vinfo)
7462 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7463 || vectorizable_conversion (stmt, NULL, NULL, node)
7464 || vectorizable_shift (stmt, NULL, NULL, node)
7465 || vectorizable_operation (stmt, NULL, NULL, node)
7466 || vectorizable_assignment (stmt, NULL, NULL, node)
7467 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7468 || vectorizable_call (stmt, NULL, NULL, node)
7469 || vectorizable_store (stmt, NULL, NULL, node)
7470 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7473 if (!ok)
7475 if (dump_enabled_p ())
7477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7478 "not vectorized: relevant stmt not ");
7479 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7480 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7483 return false;
7486 if (bb_vinfo)
7487 return true;
7489 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7490 need extra handling, except for vectorizable reductions. */
7491 if (STMT_VINFO_LIVE_P (stmt_info)
7492 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7493 ok = vectorizable_live_operation (stmt, NULL, NULL);
7495 if (!ok)
7497 if (dump_enabled_p ())
7499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7500 "not vectorized: live stmt not ");
7501 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7502 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7505 return false;
7508 return true;
7512 /* Function vect_transform_stmt.
7514 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7516 bool
7517 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7518 bool *grouped_store, slp_tree slp_node,
7519 slp_instance slp_node_instance)
7521 bool is_store = false;
7522 gimple vec_stmt = NULL;
7523 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7524 bool done;
7526 switch (STMT_VINFO_TYPE (stmt_info))
7528 case type_demotion_vec_info_type:
7529 case type_promotion_vec_info_type:
7530 case type_conversion_vec_info_type:
7531 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7532 gcc_assert (done);
7533 break;
7535 case induc_vec_info_type:
7536 gcc_assert (!slp_node);
7537 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7538 gcc_assert (done);
7539 break;
7541 case shift_vec_info_type:
7542 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7543 gcc_assert (done);
7544 break;
7546 case op_vec_info_type:
7547 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7548 gcc_assert (done);
7549 break;
7551 case assignment_vec_info_type:
7552 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7553 gcc_assert (done);
7554 break;
7556 case load_vec_info_type:
7557 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7558 slp_node_instance);
7559 gcc_assert (done);
7560 break;
7562 case store_vec_info_type:
7563 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7564 gcc_assert (done);
7565 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7567 /* In case of interleaving, the whole chain is vectorized when the
7568 last store in the chain is reached. Store stmts before the last
7569 one are skipped, and there vec_stmt_info shouldn't be freed
7570 meanwhile. */
7571 *grouped_store = true;
7572 if (STMT_VINFO_VEC_STMT (stmt_info))
7573 is_store = true;
7575 else
7576 is_store = true;
7577 break;
7579 case condition_vec_info_type:
7580 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7581 gcc_assert (done);
7582 break;
7584 case call_vec_info_type:
7585 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7586 stmt = gsi_stmt (*gsi);
7587 if (is_gimple_call (stmt)
7588 && gimple_call_internal_p (stmt)
7589 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7590 is_store = true;
7591 break;
7593 case call_simd_clone_vec_info_type:
7594 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7595 stmt = gsi_stmt (*gsi);
7596 break;
7598 case reduc_vec_info_type:
7599 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7600 gcc_assert (done);
7601 break;
7603 default:
7604 if (!STMT_VINFO_LIVE_P (stmt_info))
7606 if (dump_enabled_p ())
7607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7608 "stmt not supported.\n");
7609 gcc_unreachable ();
7613 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7614 is being vectorized, but outside the immediately enclosing loop. */
7615 if (vec_stmt
7616 && STMT_VINFO_LOOP_VINFO (stmt_info)
7617 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7618 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7619 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7620 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7621 || STMT_VINFO_RELEVANT (stmt_info) ==
7622 vect_used_in_outer_by_reduction))
7624 struct loop *innerloop = LOOP_VINFO_LOOP (
7625 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7626 imm_use_iterator imm_iter;
7627 use_operand_p use_p;
7628 tree scalar_dest;
7629 gimple exit_phi;
7631 if (dump_enabled_p ())
7632 dump_printf_loc (MSG_NOTE, vect_location,
7633 "Record the vdef for outer-loop vectorization.\n");
7635 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7636 (to be used when vectorizing outer-loop stmts that use the DEF of
7637 STMT). */
7638 if (gimple_code (stmt) == GIMPLE_PHI)
7639 scalar_dest = PHI_RESULT (stmt);
7640 else
7641 scalar_dest = gimple_assign_lhs (stmt);
7643 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7645 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7647 exit_phi = USE_STMT (use_p);
7648 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7653 /* Handle stmts whose DEF is used outside the loop-nest that is
7654 being vectorized. */
7655 if (STMT_VINFO_LIVE_P (stmt_info)
7656 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7658 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7659 gcc_assert (done);
7662 if (vec_stmt)
7663 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7665 return is_store;
7669 /* Remove a group of stores (for SLP or interleaving), free their
7670 stmt_vec_info. */
7672 void
7673 vect_remove_stores (gimple first_stmt)
7675 gimple next = first_stmt;
7676 gimple tmp;
7677 gimple_stmt_iterator next_si;
7679 while (next)
7681 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7683 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7684 if (is_pattern_stmt_p (stmt_info))
7685 next = STMT_VINFO_RELATED_STMT (stmt_info);
7686 /* Free the attached stmt_vec_info and remove the stmt. */
7687 next_si = gsi_for_stmt (next);
7688 unlink_stmt_vdef (next);
7689 gsi_remove (&next_si, true);
7690 release_defs (next);
7691 free_stmt_vec_info (next);
7692 next = tmp;
7697 /* Function new_stmt_vec_info.
7699 Create and initialize a new stmt_vec_info struct for STMT. */
7701 stmt_vec_info
7702 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7703 bb_vec_info bb_vinfo)
7705 stmt_vec_info res;
7706 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7708 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7709 STMT_VINFO_STMT (res) = stmt;
7710 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7711 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7712 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7713 STMT_VINFO_LIVE_P (res) = false;
7714 STMT_VINFO_VECTYPE (res) = NULL;
7715 STMT_VINFO_VEC_STMT (res) = NULL;
7716 STMT_VINFO_VECTORIZABLE (res) = true;
7717 STMT_VINFO_IN_PATTERN_P (res) = false;
7718 STMT_VINFO_RELATED_STMT (res) = NULL;
7719 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7720 STMT_VINFO_DATA_REF (res) = NULL;
7722 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7723 STMT_VINFO_DR_OFFSET (res) = NULL;
7724 STMT_VINFO_DR_INIT (res) = NULL;
7725 STMT_VINFO_DR_STEP (res) = NULL;
7726 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7728 if (gimple_code (stmt) == GIMPLE_PHI
7729 && is_loop_header_bb_p (gimple_bb (stmt)))
7730 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7731 else
7732 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7734 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7735 STMT_SLP_TYPE (res) = loop_vect;
7736 GROUP_FIRST_ELEMENT (res) = NULL;
7737 GROUP_NEXT_ELEMENT (res) = NULL;
7738 GROUP_SIZE (res) = 0;
7739 GROUP_STORE_COUNT (res) = 0;
7740 GROUP_GAP (res) = 0;
7741 GROUP_SAME_DR_STMT (res) = NULL;
7743 return res;
7747 /* Create a hash table for stmt_vec_info. */
7749 void
7750 init_stmt_vec_info_vec (void)
7752 gcc_assert (!stmt_vec_info_vec.exists ());
7753 stmt_vec_info_vec.create (50);
7757 /* Free hash table for stmt_vec_info. */
7759 void
7760 free_stmt_vec_info_vec (void)
7762 unsigned int i;
7763 vec_void_p info;
7764 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7765 if (info != NULL)
7766 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7767 gcc_assert (stmt_vec_info_vec.exists ());
7768 stmt_vec_info_vec.release ();
7772 /* Free stmt vectorization related info. */
7774 void
7775 free_stmt_vec_info (gimple stmt)
7777 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7779 if (!stmt_info)
7780 return;
7782 /* Check if this statement has a related "pattern stmt"
7783 (introduced by the vectorizer during the pattern recognition
7784 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7785 too. */
7786 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7788 stmt_vec_info patt_info
7789 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7790 if (patt_info)
7792 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7793 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7794 gimple_set_bb (patt_stmt, NULL);
7795 tree lhs = gimple_get_lhs (patt_stmt);
7796 if (TREE_CODE (lhs) == SSA_NAME)
7797 release_ssa_name (lhs);
7798 if (seq)
7800 gimple_stmt_iterator si;
7801 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7803 gimple seq_stmt = gsi_stmt (si);
7804 gimple_set_bb (seq_stmt, NULL);
7805 lhs = gimple_get_lhs (patt_stmt);
7806 if (TREE_CODE (lhs) == SSA_NAME)
7807 release_ssa_name (lhs);
7808 free_stmt_vec_info (seq_stmt);
7811 free_stmt_vec_info (patt_stmt);
7815 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7816 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7817 set_vinfo_for_stmt (stmt, NULL);
7818 free (stmt_info);
7822 /* Function get_vectype_for_scalar_type_and_size.
7824 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7825 by the target. */
7827 static tree
7828 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7830 machine_mode inner_mode = TYPE_MODE (scalar_type);
7831 machine_mode simd_mode;
7832 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7833 int nunits;
7834 tree vectype;
7836 if (nbytes == 0)
7837 return NULL_TREE;
7839 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7840 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7841 return NULL_TREE;
7843 /* For vector types of elements whose mode precision doesn't
7844 match their types precision we use a element type of mode
7845 precision. The vectorization routines will have to make sure
7846 they support the proper result truncation/extension.
7847 We also make sure to build vector types with INTEGER_TYPE
7848 component type only. */
7849 if (INTEGRAL_TYPE_P (scalar_type)
7850 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7851 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7852 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7853 TYPE_UNSIGNED (scalar_type));
7855 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7856 When the component mode passes the above test simply use a type
7857 corresponding to that mode. The theory is that any use that
7858 would cause problems with this will disable vectorization anyway. */
7859 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7860 && !INTEGRAL_TYPE_P (scalar_type))
7861 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7863 /* We can't build a vector type of elements with alignment bigger than
7864 their size. */
7865 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7866 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7867 TYPE_UNSIGNED (scalar_type));
7869 /* If we felt back to using the mode fail if there was
7870 no scalar type for it. */
7871 if (scalar_type == NULL_TREE)
7872 return NULL_TREE;
7874 /* If no size was supplied use the mode the target prefers. Otherwise
7875 lookup a vector mode of the specified size. */
7876 if (size == 0)
7877 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7878 else
7879 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7880 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7881 if (nunits <= 1)
7882 return NULL_TREE;
7884 vectype = build_vector_type (scalar_type, nunits);
7886 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7887 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7888 return NULL_TREE;
7890 return vectype;
7893 unsigned int current_vector_size;
7895 /* Function get_vectype_for_scalar_type.
7897 Returns the vector type corresponding to SCALAR_TYPE as supported
7898 by the target. */
7900 tree
7901 get_vectype_for_scalar_type (tree scalar_type)
7903 tree vectype;
7904 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7905 current_vector_size);
7906 if (vectype
7907 && current_vector_size == 0)
7908 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7909 return vectype;
7912 /* Function get_same_sized_vectype
7914 Returns a vector type corresponding to SCALAR_TYPE of size
7915 VECTOR_TYPE if supported by the target. */
7917 tree
7918 get_same_sized_vectype (tree scalar_type, tree vector_type)
7920 return get_vectype_for_scalar_type_and_size
7921 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7924 /* Function vect_is_simple_use.
7926 Input:
7927 LOOP_VINFO - the vect info of the loop that is being vectorized.
7928 BB_VINFO - the vect info of the basic block that is being vectorized.
7929 OPERAND - operand of STMT in the loop or bb.
7930 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7932 Returns whether a stmt with OPERAND can be vectorized.
7933 For loops, supportable operands are constants, loop invariants, and operands
7934 that are defined by the current iteration of the loop. Unsupportable
7935 operands are those that are defined by a previous iteration of the loop (as
7936 is the case in reduction/induction computations).
7937 For basic blocks, supportable operands are constants and bb invariants.
7938 For now, operands defined outside the basic block are not supported. */
7940 bool
7941 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7942 bb_vec_info bb_vinfo, gimple *def_stmt,
7943 tree *def, enum vect_def_type *dt)
7945 *def_stmt = NULL;
7946 *def = NULL_TREE;
7947 *dt = vect_unknown_def_type;
7949 if (dump_enabled_p ())
7951 dump_printf_loc (MSG_NOTE, vect_location,
7952 "vect_is_simple_use: operand ");
7953 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7954 dump_printf (MSG_NOTE, "\n");
7957 if (CONSTANT_CLASS_P (operand))
7959 *dt = vect_constant_def;
7960 return true;
7963 if (is_gimple_min_invariant (operand))
7965 *def = operand;
7966 *dt = vect_external_def;
7967 return true;
7970 if (TREE_CODE (operand) != SSA_NAME)
7972 if (dump_enabled_p ())
7973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7974 "not ssa-name.\n");
7975 return false;
7978 if (SSA_NAME_IS_DEFAULT_DEF (operand))
7980 *def = operand;
7981 *dt = vect_external_def;
7982 return true;
7985 *def_stmt = SSA_NAME_DEF_STMT (operand);
7986 if (dump_enabled_p ())
7988 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7989 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7992 basic_block bb = gimple_bb (*def_stmt);
7993 if ((loop_vinfo && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), bb))
7994 || (bb_vinfo
7995 && (bb != BB_VINFO_BB (bb_vinfo)
7996 || gimple_code (*def_stmt) == GIMPLE_PHI)))
7997 *dt = vect_external_def;
7998 else
8000 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8001 if (bb_vinfo && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
8002 *dt = vect_external_def;
8003 else
8004 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8007 if (dump_enabled_p ())
8009 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8010 switch (*dt)
8012 case vect_uninitialized_def:
8013 dump_printf (MSG_NOTE, "uninitialized\n");
8014 break;
8015 case vect_constant_def:
8016 dump_printf (MSG_NOTE, "constant\n");
8017 break;
8018 case vect_external_def:
8019 dump_printf (MSG_NOTE, "external\n");
8020 break;
8021 case vect_internal_def:
8022 dump_printf (MSG_NOTE, "internal\n");
8023 break;
8024 case vect_induction_def:
8025 dump_printf (MSG_NOTE, "induction\n");
8026 break;
8027 case vect_reduction_def:
8028 dump_printf (MSG_NOTE, "reduction\n");
8029 break;
8030 case vect_double_reduction_def:
8031 dump_printf (MSG_NOTE, "double reduction\n");
8032 break;
8033 case vect_nested_cycle:
8034 dump_printf (MSG_NOTE, "nested cycle\n");
8035 break;
8036 case vect_unknown_def_type:
8037 dump_printf (MSG_NOTE, "unknown\n");
8038 break;
8042 if (*dt == vect_unknown_def_type
8043 || (stmt
8044 && *dt == vect_double_reduction_def
8045 && gimple_code (stmt) != GIMPLE_PHI))
8047 if (dump_enabled_p ())
8048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8049 "Unsupported pattern.\n");
8050 return false;
8053 switch (gimple_code (*def_stmt))
8055 case GIMPLE_PHI:
8056 *def = gimple_phi_result (*def_stmt);
8057 break;
8059 case GIMPLE_ASSIGN:
8060 *def = gimple_assign_lhs (*def_stmt);
8061 break;
8063 case GIMPLE_CALL:
8064 *def = gimple_call_lhs (*def_stmt);
8065 if (*def != NULL)
8066 break;
8067 /* FALLTHRU */
8068 default:
8069 if (dump_enabled_p ())
8070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8071 "unsupported defining stmt:\n");
8072 return false;
8075 return true;
8078 /* Function vect_is_simple_use_1.
8080 Same as vect_is_simple_use_1 but also determines the vector operand
8081 type of OPERAND and stores it to *VECTYPE. If the definition of
8082 OPERAND is vect_uninitialized_def, vect_constant_def or
8083 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8084 is responsible to compute the best suited vector type for the
8085 scalar operand. */
8087 bool
8088 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
8089 bb_vec_info bb_vinfo, gimple *def_stmt,
8090 tree *def, enum vect_def_type *dt, tree *vectype)
8092 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
8093 def, dt))
8094 return false;
8096 /* Now get a vector type if the def is internal, otherwise supply
8097 NULL_TREE and leave it up to the caller to figure out a proper
8098 type for the use stmt. */
8099 if (*dt == vect_internal_def
8100 || *dt == vect_induction_def
8101 || *dt == vect_reduction_def
8102 || *dt == vect_double_reduction_def
8103 || *dt == vect_nested_cycle)
8105 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8107 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8108 && !STMT_VINFO_RELEVANT (stmt_info)
8109 && !STMT_VINFO_LIVE_P (stmt_info))
8110 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8112 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8113 gcc_assert (*vectype != NULL_TREE);
8115 else if (*dt == vect_uninitialized_def
8116 || *dt == vect_constant_def
8117 || *dt == vect_external_def)
8118 *vectype = NULL_TREE;
8119 else
8120 gcc_unreachable ();
8122 return true;
8126 /* Function supportable_widening_operation
8128 Check whether an operation represented by the code CODE is a
8129 widening operation that is supported by the target platform in
8130 vector form (i.e., when operating on arguments of type VECTYPE_IN
8131 producing a result of type VECTYPE_OUT).
8133 Widening operations we currently support are NOP (CONVERT), FLOAT
8134 and WIDEN_MULT. This function checks if these operations are supported
8135 by the target platform either directly (via vector tree-codes), or via
8136 target builtins.
8138 Output:
8139 - CODE1 and CODE2 are codes of vector operations to be used when
8140 vectorizing the operation, if available.
8141 - MULTI_STEP_CVT determines the number of required intermediate steps in
8142 case of multi-step conversion (like char->short->int - in that case
8143 MULTI_STEP_CVT will be 1).
8144 - INTERM_TYPES contains the intermediate type required to perform the
8145 widening operation (short in the above example). */
8147 bool
8148 supportable_widening_operation (enum tree_code code, gimple stmt,
8149 tree vectype_out, tree vectype_in,
8150 enum tree_code *code1, enum tree_code *code2,
8151 int *multi_step_cvt,
8152 vec<tree> *interm_types)
8154 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8155 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8156 struct loop *vect_loop = NULL;
8157 machine_mode vec_mode;
8158 enum insn_code icode1, icode2;
8159 optab optab1, optab2;
8160 tree vectype = vectype_in;
8161 tree wide_vectype = vectype_out;
8162 enum tree_code c1, c2;
8163 int i;
8164 tree prev_type, intermediate_type;
8165 machine_mode intermediate_mode, prev_mode;
8166 optab optab3, optab4;
8168 *multi_step_cvt = 0;
8169 if (loop_info)
8170 vect_loop = LOOP_VINFO_LOOP (loop_info);
8172 switch (code)
8174 case WIDEN_MULT_EXPR:
8175 /* The result of a vectorized widening operation usually requires
8176 two vectors (because the widened results do not fit into one vector).
8177 The generated vector results would normally be expected to be
8178 generated in the same order as in the original scalar computation,
8179 i.e. if 8 results are generated in each vector iteration, they are
8180 to be organized as follows:
8181 vect1: [res1,res2,res3,res4],
8182 vect2: [res5,res6,res7,res8].
8184 However, in the special case that the result of the widening
8185 operation is used in a reduction computation only, the order doesn't
8186 matter (because when vectorizing a reduction we change the order of
8187 the computation). Some targets can take advantage of this and
8188 generate more efficient code. For example, targets like Altivec,
8189 that support widen_mult using a sequence of {mult_even,mult_odd}
8190 generate the following vectors:
8191 vect1: [res1,res3,res5,res7],
8192 vect2: [res2,res4,res6,res8].
8194 When vectorizing outer-loops, we execute the inner-loop sequentially
8195 (each vectorized inner-loop iteration contributes to VF outer-loop
8196 iterations in parallel). We therefore don't allow to change the
8197 order of the computation in the inner-loop during outer-loop
8198 vectorization. */
8199 /* TODO: Another case in which order doesn't *really* matter is when we
8200 widen and then contract again, e.g. (short)((int)x * y >> 8).
8201 Normally, pack_trunc performs an even/odd permute, whereas the
8202 repack from an even/odd expansion would be an interleave, which
8203 would be significantly simpler for e.g. AVX2. */
8204 /* In any case, in order to avoid duplicating the code below, recurse
8205 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8206 are properly set up for the caller. If we fail, we'll continue with
8207 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8208 if (vect_loop
8209 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8210 && !nested_in_vect_loop_p (vect_loop, stmt)
8211 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8212 stmt, vectype_out, vectype_in,
8213 code1, code2, multi_step_cvt,
8214 interm_types))
8216 /* Elements in a vector with vect_used_by_reduction property cannot
8217 be reordered if the use chain with this property does not have the
8218 same operation. One such an example is s += a * b, where elements
8219 in a and b cannot be reordered. Here we check if the vector defined
8220 by STMT is only directly used in the reduction statement. */
8221 tree lhs = gimple_assign_lhs (stmt);
8222 use_operand_p dummy;
8223 gimple use_stmt;
8224 stmt_vec_info use_stmt_info = NULL;
8225 if (single_imm_use (lhs, &dummy, &use_stmt)
8226 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8227 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8228 return true;
8230 c1 = VEC_WIDEN_MULT_LO_EXPR;
8231 c2 = VEC_WIDEN_MULT_HI_EXPR;
8232 break;
8234 case VEC_WIDEN_MULT_EVEN_EXPR:
8235 /* Support the recursion induced just above. */
8236 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8237 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8238 break;
8240 case WIDEN_LSHIFT_EXPR:
8241 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8242 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8243 break;
8245 CASE_CONVERT:
8246 c1 = VEC_UNPACK_LO_EXPR;
8247 c2 = VEC_UNPACK_HI_EXPR;
8248 break;
8250 case FLOAT_EXPR:
8251 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8252 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8253 break;
8255 case FIX_TRUNC_EXPR:
8256 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8257 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8258 computing the operation. */
8259 return false;
8261 default:
8262 gcc_unreachable ();
8265 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8266 std::swap (c1, c2);
8268 if (code == FIX_TRUNC_EXPR)
8270 /* The signedness is determined from output operand. */
8271 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8272 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8274 else
8276 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8277 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8280 if (!optab1 || !optab2)
8281 return false;
8283 vec_mode = TYPE_MODE (vectype);
8284 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8285 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8286 return false;
8288 *code1 = c1;
8289 *code2 = c2;
8291 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8292 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8293 return true;
8295 /* Check if it's a multi-step conversion that can be done using intermediate
8296 types. */
8298 prev_type = vectype;
8299 prev_mode = vec_mode;
8301 if (!CONVERT_EXPR_CODE_P (code))
8302 return false;
8304 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8305 intermediate steps in promotion sequence. We try
8306 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8307 not. */
8308 interm_types->create (MAX_INTERM_CVT_STEPS);
8309 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8311 intermediate_mode = insn_data[icode1].operand[0].mode;
8312 intermediate_type
8313 = lang_hooks.types.type_for_mode (intermediate_mode,
8314 TYPE_UNSIGNED (prev_type));
8315 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8316 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8318 if (!optab3 || !optab4
8319 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8320 || insn_data[icode1].operand[0].mode != intermediate_mode
8321 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8322 || insn_data[icode2].operand[0].mode != intermediate_mode
8323 || ((icode1 = optab_handler (optab3, intermediate_mode))
8324 == CODE_FOR_nothing)
8325 || ((icode2 = optab_handler (optab4, intermediate_mode))
8326 == CODE_FOR_nothing))
8327 break;
8329 interm_types->quick_push (intermediate_type);
8330 (*multi_step_cvt)++;
8332 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8333 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8334 return true;
8336 prev_type = intermediate_type;
8337 prev_mode = intermediate_mode;
8340 interm_types->release ();
8341 return false;
8345 /* Function supportable_narrowing_operation
8347 Check whether an operation represented by the code CODE is a
8348 narrowing operation that is supported by the target platform in
8349 vector form (i.e., when operating on arguments of type VECTYPE_IN
8350 and producing a result of type VECTYPE_OUT).
8352 Narrowing operations we currently support are NOP (CONVERT) and
8353 FIX_TRUNC. This function checks if these operations are supported by
8354 the target platform directly via vector tree-codes.
8356 Output:
8357 - CODE1 is the code of a vector operation to be used when
8358 vectorizing the operation, if available.
8359 - MULTI_STEP_CVT determines the number of required intermediate steps in
8360 case of multi-step conversion (like int->short->char - in that case
8361 MULTI_STEP_CVT will be 1).
8362 - INTERM_TYPES contains the intermediate type required to perform the
8363 narrowing operation (short in the above example). */
8365 bool
8366 supportable_narrowing_operation (enum tree_code code,
8367 tree vectype_out, tree vectype_in,
8368 enum tree_code *code1, int *multi_step_cvt,
8369 vec<tree> *interm_types)
8371 machine_mode vec_mode;
8372 enum insn_code icode1;
8373 optab optab1, interm_optab;
8374 tree vectype = vectype_in;
8375 tree narrow_vectype = vectype_out;
8376 enum tree_code c1;
8377 tree intermediate_type;
8378 machine_mode intermediate_mode, prev_mode;
8379 int i;
8380 bool uns;
8382 *multi_step_cvt = 0;
8383 switch (code)
8385 CASE_CONVERT:
8386 c1 = VEC_PACK_TRUNC_EXPR;
8387 break;
8389 case FIX_TRUNC_EXPR:
8390 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8391 break;
8393 case FLOAT_EXPR:
8394 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8395 tree code and optabs used for computing the operation. */
8396 return false;
8398 default:
8399 gcc_unreachable ();
8402 if (code == FIX_TRUNC_EXPR)
8403 /* The signedness is determined from output operand. */
8404 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8405 else
8406 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8408 if (!optab1)
8409 return false;
8411 vec_mode = TYPE_MODE (vectype);
8412 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8413 return false;
8415 *code1 = c1;
8417 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8418 return true;
8420 /* Check if it's a multi-step conversion that can be done using intermediate
8421 types. */
8422 prev_mode = vec_mode;
8423 if (code == FIX_TRUNC_EXPR)
8424 uns = TYPE_UNSIGNED (vectype_out);
8425 else
8426 uns = TYPE_UNSIGNED (vectype);
8428 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8429 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8430 costly than signed. */
8431 if (code == FIX_TRUNC_EXPR && uns)
8433 enum insn_code icode2;
8435 intermediate_type
8436 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8437 interm_optab
8438 = optab_for_tree_code (c1, intermediate_type, optab_default);
8439 if (interm_optab != unknown_optab
8440 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8441 && insn_data[icode1].operand[0].mode
8442 == insn_data[icode2].operand[0].mode)
8444 uns = false;
8445 optab1 = interm_optab;
8446 icode1 = icode2;
8450 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8451 intermediate steps in promotion sequence. We try
8452 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8453 interm_types->create (MAX_INTERM_CVT_STEPS);
8454 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8456 intermediate_mode = insn_data[icode1].operand[0].mode;
8457 intermediate_type
8458 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8459 interm_optab
8460 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8461 optab_default);
8462 if (!interm_optab
8463 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8464 || insn_data[icode1].operand[0].mode != intermediate_mode
8465 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8466 == CODE_FOR_nothing))
8467 break;
8469 interm_types->quick_push (intermediate_type);
8470 (*multi_step_cvt)++;
8472 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8473 return true;
8475 prev_mode = intermediate_mode;
8476 optab1 = interm_optab;
8479 interm_types->release ();
8480 return false;