PR rtl-optimization/79386
[official-gcc.git] / gcc / tree-vect-stmts.c
blobbf07efda935346ac3f5174eef56d09417fa1f532
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
583 vect_mark_relevant (worklist, def_stmt, relevant, false);
584 return true;
588 /* Function vect_mark_stmts_to_be_vectorized.
590 Not all stmts in the loop need to be vectorized. For example:
592 for i...
593 for j...
594 1. T0 = i + j
595 2. T1 = a[T0]
597 3. j = j + 1
599 Stmt 1 and 3 do not need to be vectorized, because loop control and
600 addressing of vectorized data-refs are handled differently.
602 This pass detects such stmts. */
604 bool
605 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
607 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
608 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
609 unsigned int nbbs = loop->num_nodes;
610 gimple_stmt_iterator si;
611 gimple *stmt;
612 unsigned int i;
613 stmt_vec_info stmt_vinfo;
614 basic_block bb;
615 gimple *phi;
616 bool live_p;
617 enum vect_relevant relevant;
619 if (dump_enabled_p ())
620 dump_printf_loc (MSG_NOTE, vect_location,
621 "=== vect_mark_stmts_to_be_vectorized ===\n");
623 auto_vec<gimple *, 64> worklist;
625 /* 1. Init worklist. */
626 for (i = 0; i < nbbs; i++)
628 bb = bbs[i];
629 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
631 phi = gsi_stmt (si);
632 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
635 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
638 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
639 vect_mark_relevant (&worklist, phi, relevant, live_p);
641 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
643 stmt = gsi_stmt (si);
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
647 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
650 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
651 vect_mark_relevant (&worklist, stmt, relevant, live_p);
655 /* 2. Process_worklist */
656 while (worklist.length () > 0)
658 use_operand_p use_p;
659 ssa_op_iter iter;
661 stmt = worklist.pop ();
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
668 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
669 (DEF_STMT) as relevant/irrelevant according to the relevance property
670 of STMT. */
671 stmt_vinfo = vinfo_for_stmt (stmt);
672 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
674 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
675 propagated as is to the DEF_STMTs of its USEs.
677 One exception is when STMT has been identified as defining a reduction
678 variable; in this case we set the relevance to vect_used_by_reduction.
679 This is because we distinguish between two kinds of relevant stmts -
680 those that are used by a reduction computation, and those that are
681 (also) used by a regular computation. This allows us later on to
682 identify stmts that are used solely by a reduction, and therefore the
683 order of the results that they produce does not have to be kept. */
685 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
687 case vect_reduction_def:
688 gcc_assert (relevant != vect_unused_in_scope);
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_scope
691 && relevant != vect_used_by_reduction
692 && relevant != vect_used_only_live)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of reduction.\n");
697 return false;
699 break;
701 case vect_nested_cycle:
702 if (relevant != vect_unused_in_scope
703 && relevant != vect_used_in_outer_by_reduction
704 && relevant != vect_used_in_outer)
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "unsupported use of nested cycle.\n");
710 return false;
712 break;
714 case vect_double_reduction_def:
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_by_reduction
717 && relevant != vect_used_only_live)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721 "unsupported use of double reduction.\n");
723 return false;
725 break;
727 default:
728 break;
731 if (is_pattern_stmt_p (stmt_vinfo))
733 /* Pattern statements are not inserted into the code, so
734 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
735 have to scan the RHS or function arguments instead. */
736 if (is_gimple_assign (stmt))
738 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
739 tree op = gimple_assign_rhs1 (stmt);
741 i = 1;
742 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
744 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
745 relevant, &worklist, false)
746 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
747 relevant, &worklist, false))
748 return false;
749 i = 2;
751 for (; i < gimple_num_ops (stmt); i++)
753 op = gimple_op (stmt, i);
754 if (TREE_CODE (op) == SSA_NAME
755 && !process_use (stmt, op, loop_vinfo, relevant,
756 &worklist, false))
757 return false;
760 else if (is_gimple_call (stmt))
762 for (i = 0; i < gimple_call_num_args (stmt); i++)
764 tree arg = gimple_call_arg (stmt, i);
765 if (!process_use (stmt, arg, loop_vinfo, relevant,
766 &worklist, false))
767 return false;
771 else
772 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 tree op = USE_FROM_PTR (use_p);
775 if (!process_use (stmt, op, loop_vinfo, relevant,
776 &worklist, false))
777 return false;
780 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
782 gather_scatter_info gs_info;
783 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
784 gcc_unreachable ();
785 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
786 &worklist, true))
787 return false;
789 } /* while worklist */
791 return true;
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
801 void
802 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
803 enum vect_def_type *dt,
804 stmt_vector_for_cost *prologue_cost_vec,
805 stmt_vector_for_cost *body_cost_vec)
807 int i;
808 int inside_cost = 0, prologue_cost = 0;
810 /* The SLP costs were already calculated during SLP tree build. */
811 if (PURE_SLP_STMT (stmt_info))
812 return;
814 /* FORNOW: Assuming maximum 2 args per stmts. */
815 for (i = 0; i < 2; i++)
816 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
817 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
818 stmt_info, 0, vect_prologue);
820 /* Pass the inside-of-loop statements to the target-specific cost model. */
821 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
822 stmt_info, 0, vect_body);
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_NOTE, vect_location,
826 "vect_model_simple_cost: inside_cost = %d, "
827 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 /* Model cost for type demotion and promotion operations. PWR is normally
832 zero for single-step promotions and demotions. It will be one if
833 two-step promotion/demotion is required, and so on. Each additional
834 step doubles the number of instructions required. */
836 static void
837 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
838 enum vect_def_type *dt, int pwr)
840 int i, tmp;
841 int inside_cost = 0, prologue_cost = 0;
842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
843 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
844 void *target_cost_data;
846 /* The SLP costs were already calculated during SLP tree build. */
847 if (PURE_SLP_STMT (stmt_info))
848 return;
850 if (loop_vinfo)
851 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
852 else
853 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
855 for (i = 0; i < pwr + 1; i++)
857 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
858 (i + 1) : i;
859 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
860 vec_promote_demote, stmt_info, 0,
861 vect_body);
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i = 0; i < 2; i++)
866 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
867 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
868 stmt_info, 0, vect_prologue);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE, vect_location,
872 "vect_model_promotion_demotion_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost, prologue_cost);
876 /* Function vect_model_store_cost
878 Models cost for stores. In the case of grouped accesses, one access
879 has the overhead of the grouped access attributed to it. */
881 void
882 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
883 vect_memory_access_type memory_access_type,
884 enum vect_def_type dt, slp_tree slp_node,
885 stmt_vector_for_cost *prologue_cost_vec,
886 stmt_vector_for_cost *body_cost_vec)
888 unsigned int inside_cost = 0, prologue_cost = 0;
889 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
890 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
891 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
893 if (dt == vect_constant_def || dt == vect_external_def)
894 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
895 stmt_info, 0, vect_prologue);
897 /* Grouped stores update all elements in the group at once,
898 so we want the DR for the first statement. */
899 if (!slp_node && grouped_access_p)
901 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
902 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
905 /* True if we should include any once-per-group costs as well as
906 the cost of the statement itself. For SLP we only get called
907 once per group anyhow. */
908 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
910 /* We assume that the cost of a single store-lanes instruction is
911 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
912 access is instead being provided by a permute-and-store operation,
913 include the cost of the permutes. */
914 if (first_stmt_p
915 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
917 /* Uses a high and low interleave or shuffle operations for each
918 needed permute. */
919 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
920 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
921 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
922 stmt_info, 0, vect_body);
924 if (dump_enabled_p ())
925 dump_printf_loc (MSG_NOTE, vect_location,
926 "vect_model_store_cost: strided group_size = %d .\n",
927 group_size);
930 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
931 /* Costs of the stores. */
932 if (memory_access_type == VMAT_ELEMENTWISE)
933 /* N scalar stores plus extracting the elements. */
934 inside_cost += record_stmt_cost (body_cost_vec,
935 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
936 scalar_store, stmt_info, 0, vect_body);
937 else
938 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
940 if (memory_access_type == VMAT_ELEMENTWISE
941 || memory_access_type == VMAT_STRIDED_SLP)
942 inside_cost += record_stmt_cost (body_cost_vec,
943 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
944 vec_to_scalar, stmt_info, 0, vect_body);
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE, vect_location,
948 "vect_model_store_cost: inside_cost = %d, "
949 "prologue_cost = %d .\n", inside_cost, prologue_cost);
953 /* Calculate cost of DR's memory access. */
954 void
955 vect_get_store_cost (struct data_reference *dr, int ncopies,
956 unsigned int *inside_cost,
957 stmt_vector_for_cost *body_cost_vec)
959 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
960 gimple *stmt = DR_STMT (dr);
961 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
963 switch (alignment_support_scheme)
965 case dr_aligned:
967 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
968 vector_store, stmt_info, 0,
969 vect_body);
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE, vect_location,
973 "vect_model_store_cost: aligned.\n");
974 break;
977 case dr_unaligned_supported:
979 /* Here, we assign an additional cost for the unaligned store. */
980 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
981 unaligned_store, stmt_info,
982 DR_MISALIGNMENT (dr), vect_body);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_model_store_cost: unaligned supported by "
986 "hardware.\n");
987 break;
990 case dr_unaligned_unsupported:
992 *inside_cost = VECT_MAX_COST;
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
996 "vect_model_store_cost: unsupported access.\n");
997 break;
1000 default:
1001 gcc_unreachable ();
1006 /* Function vect_model_load_cost
1008 Models cost for loads. In the case of grouped accesses, one access has
1009 the overhead of the grouped access attributed to it. Since unaligned
1010 accesses are supported for loads, we also account for the costs of the
1011 access scheme chosen. */
1013 void
1014 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1015 vect_memory_access_type memory_access_type,
1016 slp_tree slp_node,
1017 stmt_vector_for_cost *prologue_cost_vec,
1018 stmt_vector_for_cost *body_cost_vec)
1020 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1021 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1022 unsigned int inside_cost = 0, prologue_cost = 0;
1023 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1025 /* Grouped loads read all elements in the group at once,
1026 so we want the DR for the first statement. */
1027 if (!slp_node && grouped_access_p)
1029 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1030 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1033 /* True if we should include any once-per-group costs as well as
1034 the cost of the statement itself. For SLP we only get called
1035 once per group anyhow. */
1036 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1038 /* We assume that the cost of a single load-lanes instruction is
1039 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1040 access is instead being provided by a load-and-permute operation,
1041 include the cost of the permutes. */
1042 if (first_stmt_p
1043 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1045 /* Uses an even and odd extract operations or shuffle operations
1046 for each needed permute. */
1047 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1048 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1049 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1050 stmt_info, 0, vect_body);
1052 if (dump_enabled_p ())
1053 dump_printf_loc (MSG_NOTE, vect_location,
1054 "vect_model_load_cost: strided group_size = %d .\n",
1055 group_size);
1058 /* The loads themselves. */
1059 if (memory_access_type == VMAT_ELEMENTWISE)
1061 /* N scalar loads plus gathering them into a vector. */
1062 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1063 inside_cost += record_stmt_cost (body_cost_vec,
1064 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1065 scalar_load, stmt_info, 0, vect_body);
1067 else
1068 vect_get_load_cost (dr, ncopies, first_stmt_p,
1069 &inside_cost, &prologue_cost,
1070 prologue_cost_vec, body_cost_vec, true);
1071 if (memory_access_type == VMAT_ELEMENTWISE
1072 || memory_access_type == VMAT_STRIDED_SLP)
1073 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1074 stmt_info, 0, vect_body);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE, vect_location,
1078 "vect_model_load_cost: inside_cost = %d, "
1079 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1083 /* Calculate cost of DR's memory access. */
1084 void
1085 vect_get_load_cost (struct data_reference *dr, int ncopies,
1086 bool add_realign_cost, unsigned int *inside_cost,
1087 unsigned int *prologue_cost,
1088 stmt_vector_for_cost *prologue_cost_vec,
1089 stmt_vector_for_cost *body_cost_vec,
1090 bool record_prologue_costs)
1092 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1093 gimple *stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096 switch (alignment_support_scheme)
1098 case dr_aligned:
1100 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1101 stmt_info, 0, vect_body);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: aligned.\n");
1107 break;
1109 case dr_unaligned_supported:
1111 /* Here, we assign an additional cost for the unaligned load. */
1112 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1113 unaligned_load, stmt_info,
1114 DR_MISALIGNMENT (dr), vect_body);
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE, vect_location,
1118 "vect_model_load_cost: unaligned supported by "
1119 "hardware.\n");
1121 break;
1123 case dr_explicit_realign:
1125 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1126 vector_load, stmt_info, 0, vect_body);
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1128 vec_perm, stmt_info, 0, vect_body);
1130 /* FIXME: If the misalignment remains fixed across the iterations of
1131 the containing loop, the following cost should be added to the
1132 prologue costs. */
1133 if (targetm.vectorize.builtin_mask_for_load)
1134 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1135 stmt_info, 0, vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: explicit realign\n");
1141 break;
1143 case dr_explicit_realign_optimized:
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned software "
1148 "pipelined.\n");
1150 /* Unaligned software pipeline has a load of an address, an initial
1151 load, and possibly a mask operation to "prime" the loop. However,
1152 if this is an access in a group of loads, which provide grouped
1153 access, then the above cost should only be considered for one
1154 access in the group. Inside the loop, there is a load op
1155 and a realignment op. */
1157 if (add_realign_cost && record_prologue_costs)
1159 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1160 vector_stmt, stmt_info,
1161 0, vect_prologue);
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1164 vector_stmt, stmt_info,
1165 0, vect_prologue);
1168 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1169 stmt_info, 0, vect_body);
1170 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1171 stmt_info, 0, vect_body);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: explicit realign optimized"
1176 "\n");
1178 break;
1181 case dr_unaligned_unsupported:
1183 *inside_cost = VECT_MAX_COST;
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1187 "vect_model_load_cost: unsupported access.\n");
1188 break;
1191 default:
1192 gcc_unreachable ();
1196 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1197 the loop preheader for the vectorized stmt STMT. */
1199 static void
1200 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1202 if (gsi)
1203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1204 else
1206 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1209 if (loop_vinfo)
1211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1212 basic_block new_bb;
1213 edge pe;
1215 if (nested_in_vect_loop_p (loop, stmt))
1216 loop = loop->inner;
1218 pe = loop_preheader_edge (loop);
1219 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1220 gcc_assert (!new_bb);
1222 else
1224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1225 basic_block bb;
1226 gimple_stmt_iterator gsi_bb_start;
1228 gcc_assert (bb_vinfo);
1229 bb = BB_VINFO_BB (bb_vinfo);
1230 gsi_bb_start = gsi_after_labels (bb);
1231 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE, vect_location,
1238 "created new init_stmt: ");
1239 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1243 /* Function vect_init_vector.
1245 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1246 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1247 vector type a vector with all elements equal to VAL is created first.
1248 Place the initialization at BSI if it is not NULL. Otherwise, place the
1249 initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1253 tree
1254 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1256 gimple *init_stmt;
1257 tree new_temp;
1259 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1260 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1262 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1263 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1265 /* Scalar boolean value should be transformed into
1266 all zeros or all ones value before building a vector. */
1267 if (VECTOR_BOOLEAN_TYPE_P (type))
1269 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1270 tree false_val = build_zero_cst (TREE_TYPE (type));
1272 if (CONSTANT_CLASS_P (val))
1273 val = integer_zerop (val) ? false_val : true_val;
1274 else
1276 new_temp = make_ssa_name (TREE_TYPE (type));
1277 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1278 val, true_val, false_val);
1279 vect_init_vector_1 (stmt, init_stmt, gsi);
1280 val = new_temp;
1283 else if (CONSTANT_CLASS_P (val))
1284 val = fold_convert (TREE_TYPE (type), val);
1285 else
1287 new_temp = make_ssa_name (TREE_TYPE (type));
1288 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1289 init_stmt = gimple_build_assign (new_temp,
1290 fold_build1 (VIEW_CONVERT_EXPR,
1291 TREE_TYPE (type),
1292 val));
1293 else
1294 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1295 vect_init_vector_1 (stmt, init_stmt, gsi);
1296 val = new_temp;
1299 val = build_vector_from_val (type, val);
1302 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1303 init_stmt = gimple_build_assign (new_temp, val);
1304 vect_init_vector_1 (stmt, init_stmt, gsi);
1305 return new_temp;
1308 /* Function vect_get_vec_def_for_operand_1.
1310 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1311 DT that will be used in the vectorized stmt. */
1313 tree
1314 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1316 tree vec_oprnd;
1317 gimple *vec_stmt;
1318 stmt_vec_info def_stmt_info = NULL;
1320 switch (dt)
1322 /* operand is a constant or a loop invariant. */
1323 case vect_constant_def:
1324 case vect_external_def:
1325 /* Code should use vect_get_vec_def_for_operand. */
1326 gcc_unreachable ();
1328 /* operand is defined inside the loop. */
1329 case vect_internal_def:
1331 /* Get the def from the vectorized stmt. */
1332 def_stmt_info = vinfo_for_stmt (def_stmt);
1334 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1335 /* Get vectorized pattern statement. */
1336 if (!vec_stmt
1337 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1338 && !STMT_VINFO_RELEVANT (def_stmt_info))
1339 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1340 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1341 gcc_assert (vec_stmt);
1342 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1343 vec_oprnd = PHI_RESULT (vec_stmt);
1344 else if (is_gimple_call (vec_stmt))
1345 vec_oprnd = gimple_call_lhs (vec_stmt);
1346 else
1347 vec_oprnd = gimple_assign_lhs (vec_stmt);
1348 return vec_oprnd;
1351 /* operand is defined by a loop header phi - reduction */
1352 case vect_reduction_def:
1353 case vect_double_reduction_def:
1354 case vect_nested_cycle:
1355 /* Code should use get_initial_def_for_reduction. */
1356 gcc_unreachable ();
1358 /* operand is defined by loop-header phi - induction. */
1359 case vect_induction_def:
1361 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1363 /* Get the def from the vectorized stmt. */
1364 def_stmt_info = vinfo_for_stmt (def_stmt);
1365 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1366 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1367 vec_oprnd = PHI_RESULT (vec_stmt);
1368 else
1369 vec_oprnd = gimple_get_lhs (vec_stmt);
1370 return vec_oprnd;
1373 default:
1374 gcc_unreachable ();
1379 /* Function vect_get_vec_def_for_operand.
1381 OP is an operand in STMT. This function returns a (vector) def that will be
1382 used in the vectorized stmt for STMT.
1384 In the case that OP is an SSA_NAME which is defined in the loop, then
1385 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1387 In case OP is an invariant or constant, a new stmt that creates a vector def
1388 needs to be introduced. VECTYPE may be used to specify a required type for
1389 vector invariant. */
1391 tree
1392 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1394 gimple *def_stmt;
1395 enum vect_def_type dt;
1396 bool is_simple_use;
1397 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1398 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1400 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE, vect_location,
1403 "vect_get_vec_def_for_operand: ");
1404 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1405 dump_printf (MSG_NOTE, "\n");
1408 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1409 gcc_assert (is_simple_use);
1410 if (def_stmt && dump_enabled_p ())
1412 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1416 if (dt == vect_constant_def || dt == vect_external_def)
1418 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1419 tree vector_type;
1421 if (vectype)
1422 vector_type = vectype;
1423 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1424 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1425 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1426 else
1427 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1429 gcc_assert (vector_type);
1430 return vect_init_vector (stmt, op, vector_type, NULL);
1432 else
1433 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1437 /* Function vect_get_vec_def_for_stmt_copy
1439 Return a vector-def for an operand. This function is used when the
1440 vectorized stmt to be created (by the caller to this function) is a "copy"
1441 created in case the vectorized result cannot fit in one vector, and several
1442 copies of the vector-stmt are required. In this case the vector-def is
1443 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1444 of the stmt that defines VEC_OPRND.
1445 DT is the type of the vector def VEC_OPRND.
1447 Context:
1448 In case the vectorization factor (VF) is bigger than the number
1449 of elements that can fit in a vectype (nunits), we have to generate
1450 more than one vector stmt to vectorize the scalar stmt. This situation
1451 arises when there are multiple data-types operated upon in the loop; the
1452 smallest data-type determines the VF, and as a result, when vectorizing
1453 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1454 vector stmt (each computing a vector of 'nunits' results, and together
1455 computing 'VF' results in each iteration). This function is called when
1456 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1457 which VF=16 and nunits=4, so the number of copies required is 4):
1459 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1461 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1462 VS1.1: vx.1 = memref1 VS1.2
1463 VS1.2: vx.2 = memref2 VS1.3
1464 VS1.3: vx.3 = memref3
1466 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1467 VSnew.1: vz1 = vx.1 + ... VSnew.2
1468 VSnew.2: vz2 = vx.2 + ... VSnew.3
1469 VSnew.3: vz3 = vx.3 + ...
1471 The vectorization of S1 is explained in vectorizable_load.
1472 The vectorization of S2:
1473 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1474 the function 'vect_get_vec_def_for_operand' is called to
1475 get the relevant vector-def for each operand of S2. For operand x it
1476 returns the vector-def 'vx.0'.
1478 To create the remaining copies of the vector-stmt (VSnew.j), this
1479 function is called to get the relevant vector-def for each operand. It is
1480 obtained from the respective VS1.j stmt, which is recorded in the
1481 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1483 For example, to obtain the vector-def 'vx.1' in order to create the
1484 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1485 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1486 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1487 and return its def ('vx.1').
1488 Overall, to create the above sequence this function will be called 3 times:
1489 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1490 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1491 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1493 tree
1494 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1496 gimple *vec_stmt_for_operand;
1497 stmt_vec_info def_stmt_info;
1499 /* Do nothing; can reuse same def. */
1500 if (dt == vect_external_def || dt == vect_constant_def )
1501 return vec_oprnd;
1503 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1504 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1505 gcc_assert (def_stmt_info);
1506 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1507 gcc_assert (vec_stmt_for_operand);
1508 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1509 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1510 else
1511 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1512 return vec_oprnd;
1516 /* Get vectorized definitions for the operands to create a copy of an original
1517 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1519 static void
1520 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1521 vec<tree> *vec_oprnds0,
1522 vec<tree> *vec_oprnds1)
1524 tree vec_oprnd = vec_oprnds0->pop ();
1526 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1527 vec_oprnds0->quick_push (vec_oprnd);
1529 if (vec_oprnds1 && vec_oprnds1->length ())
1531 vec_oprnd = vec_oprnds1->pop ();
1532 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1533 vec_oprnds1->quick_push (vec_oprnd);
1538 /* Get vectorized definitions for OP0 and OP1.
1539 REDUC_INDEX is the index of reduction operand in case of reduction,
1540 and -1 otherwise. */
1542 void
1543 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1544 vec<tree> *vec_oprnds0,
1545 vec<tree> *vec_oprnds1,
1546 slp_tree slp_node, int reduc_index)
1548 if (slp_node)
1550 int nops = (op1 == NULL_TREE) ? 1 : 2;
1551 auto_vec<tree> ops (nops);
1552 auto_vec<vec<tree> > vec_defs (nops);
1554 ops.quick_push (op0);
1555 if (op1)
1556 ops.quick_push (op1);
1558 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1560 *vec_oprnds0 = vec_defs[0];
1561 if (op1)
1562 *vec_oprnds1 = vec_defs[1];
1564 else
1566 tree vec_oprnd;
1568 vec_oprnds0->create (1);
1569 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1570 vec_oprnds0->quick_push (vec_oprnd);
1572 if (op1)
1574 vec_oprnds1->create (1);
1575 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1576 vec_oprnds1->quick_push (vec_oprnd);
1582 /* Function vect_finish_stmt_generation.
1584 Insert a new stmt. */
1586 void
1587 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1588 gimple_stmt_iterator *gsi)
1590 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1591 vec_info *vinfo = stmt_info->vinfo;
1593 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1595 if (!gsi_end_p (*gsi)
1596 && gimple_has_mem_ops (vec_stmt))
1598 gimple *at_stmt = gsi_stmt (*gsi);
1599 tree vuse = gimple_vuse (at_stmt);
1600 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1602 tree vdef = gimple_vdef (at_stmt);
1603 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1604 /* If we have an SSA vuse and insert a store, update virtual
1605 SSA form to avoid triggering the renamer. Do so only
1606 if we can easily see all uses - which is what almost always
1607 happens with the way vectorized stmts are inserted. */
1608 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1609 && ((is_gimple_assign (vec_stmt)
1610 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1611 || (is_gimple_call (vec_stmt)
1612 && !(gimple_call_flags (vec_stmt)
1613 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1615 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1616 gimple_set_vdef (vec_stmt, new_vdef);
1617 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1621 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1623 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1625 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1631 gimple_set_location (vec_stmt, gimple_location (stmt));
1633 /* While EH edges will generally prevent vectorization, stmt might
1634 e.g. be in a must-not-throw region. Ensure newly created stmts
1635 that could throw are part of the same region. */
1636 int lp_nr = lookup_stmt_eh_lp (stmt);
1637 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1638 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1641 /* We want to vectorize a call to combined function CFN with function
1642 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1643 as the types of all inputs. Check whether this is possible using
1644 an internal function, returning its code if so or IFN_LAST if not. */
1646 static internal_fn
1647 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1648 tree vectype_out, tree vectype_in)
1650 internal_fn ifn;
1651 if (internal_fn_p (cfn))
1652 ifn = as_internal_fn (cfn);
1653 else
1654 ifn = associated_internal_fn (fndecl);
1655 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1657 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1658 if (info.vectorizable)
1660 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1661 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1662 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1663 OPTIMIZE_FOR_SPEED))
1664 return ifn;
1667 return IFN_LAST;
1671 static tree permute_vec_elements (tree, tree, tree, gimple *,
1672 gimple_stmt_iterator *);
1674 /* STMT is a non-strided load or store, meaning that it accesses
1675 elements with a known constant step. Return -1 if that step
1676 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1678 static int
1679 compare_step_with_zero (gimple *stmt)
1681 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1682 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1683 tree step;
1684 if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
1685 step = STMT_VINFO_DR_STEP (stmt_info);
1686 else
1687 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1688 return tree_int_cst_compare (step, size_zero_node);
1691 /* If the target supports a permute mask that reverses the elements in
1692 a vector of type VECTYPE, return that mask, otherwise return null. */
1694 static tree
1695 perm_mask_for_reverse (tree vectype)
1697 int i, nunits;
1698 unsigned char *sel;
1700 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1701 sel = XALLOCAVEC (unsigned char, nunits);
1703 for (i = 0; i < nunits; ++i)
1704 sel[i] = nunits - 1 - i;
1706 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1707 return NULL_TREE;
1708 return vect_gen_perm_mask_checked (vectype, sel);
1711 /* A subroutine of get_load_store_type, with a subset of the same
1712 arguments. Handle the case where STMT is part of a grouped load
1713 or store.
1715 For stores, the statements in the group are all consecutive
1716 and there is no gap at the end. For loads, the statements in the
1717 group might not be consecutive; there can be gaps between statements
1718 as well as at the end. */
1720 static bool
1721 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1722 vec_load_store_type vls_type,
1723 vect_memory_access_type *memory_access_type)
1725 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1726 vec_info *vinfo = stmt_info->vinfo;
1727 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1728 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1729 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1730 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1731 bool single_element_p = (stmt == first_stmt
1732 && !GROUP_NEXT_ELEMENT (stmt_info));
1733 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1734 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1736 /* True if the vectorized statements would access beyond the last
1737 statement in the group. */
1738 bool overrun_p = false;
1740 /* True if we can cope with such overrun by peeling for gaps, so that
1741 there is at least one final scalar iteration after the vector loop. */
1742 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1744 /* There can only be a gap at the end of the group if the stride is
1745 known at compile time. */
1746 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1748 /* Stores can't yet have gaps. */
1749 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1751 if (slp)
1753 if (STMT_VINFO_STRIDED_P (stmt_info))
1755 /* Try to use consecutive accesses of GROUP_SIZE elements,
1756 separated by the stride, until we have a complete vector.
1757 Fall back to scalar accesses if that isn't possible. */
1758 if (nunits % group_size == 0)
1759 *memory_access_type = VMAT_STRIDED_SLP;
1760 else
1761 *memory_access_type = VMAT_ELEMENTWISE;
1763 else
1765 overrun_p = loop_vinfo && gap != 0;
1766 if (overrun_p && vls_type != VLS_LOAD)
1768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1769 "Grouped store with gaps requires"
1770 " non-consecutive accesses\n");
1771 return false;
1773 /* If the access is aligned an overrun is fine. */
1774 if (overrun_p
1775 && aligned_access_p
1776 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1777 overrun_p = false;
1778 if (overrun_p && !can_overrun_p)
1780 if (dump_enabled_p ())
1781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1782 "Peeling for outer loop is not supported\n");
1783 return false;
1785 *memory_access_type = VMAT_CONTIGUOUS;
1788 else
1790 /* We can always handle this case using elementwise accesses,
1791 but see if something more efficient is available. */
1792 *memory_access_type = VMAT_ELEMENTWISE;
1794 /* If there is a gap at the end of the group then these optimizations
1795 would access excess elements in the last iteration. */
1796 bool would_overrun_p = (gap != 0);
1797 /* If the access is aligned an overrun is fine. */
1798 if (would_overrun_p
1799 && aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
1800 would_overrun_p = false;
1801 if (!STMT_VINFO_STRIDED_P (stmt_info)
1802 && (can_overrun_p || !would_overrun_p)
1803 && compare_step_with_zero (stmt) > 0)
1805 /* First try using LOAD/STORE_LANES. */
1806 if (vls_type == VLS_LOAD
1807 ? vect_load_lanes_supported (vectype, group_size)
1808 : vect_store_lanes_supported (vectype, group_size))
1810 *memory_access_type = VMAT_LOAD_STORE_LANES;
1811 overrun_p = would_overrun_p;
1814 /* If that fails, try using permuting loads. */
1815 if (*memory_access_type == VMAT_ELEMENTWISE
1816 && (vls_type == VLS_LOAD
1817 ? vect_grouped_load_supported (vectype, single_element_p,
1818 group_size)
1819 : vect_grouped_store_supported (vectype, group_size)))
1821 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1822 overrun_p = would_overrun_p;
1827 if (vls_type != VLS_LOAD && first_stmt == stmt)
1829 /* STMT is the leader of the group. Check the operands of all the
1830 stmts of the group. */
1831 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1832 while (next_stmt)
1834 gcc_assert (gimple_assign_single_p (next_stmt));
1835 tree op = gimple_assign_rhs1 (next_stmt);
1836 gimple *def_stmt;
1837 enum vect_def_type dt;
1838 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1842 "use not simple.\n");
1843 return false;
1845 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1849 if (overrun_p)
1851 gcc_assert (can_overrun_p);
1852 if (dump_enabled_p ())
1853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1854 "Data access with gaps requires scalar "
1855 "epilogue loop\n");
1856 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1859 return true;
1862 /* A subroutine of get_load_store_type, with a subset of the same
1863 arguments. Handle the case where STMT is a load or store that
1864 accesses consecutive elements with a negative step. */
1866 static vect_memory_access_type
1867 get_negative_load_store_type (gimple *stmt, tree vectype,
1868 vec_load_store_type vls_type,
1869 unsigned int ncopies)
1871 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1872 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1873 dr_alignment_support alignment_support_scheme;
1875 if (ncopies > 1)
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1879 "multiple types with negative step.\n");
1880 return VMAT_ELEMENTWISE;
1883 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1884 if (alignment_support_scheme != dr_aligned
1885 && alignment_support_scheme != dr_unaligned_supported)
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1889 "negative step but alignment required.\n");
1890 return VMAT_ELEMENTWISE;
1893 if (vls_type == VLS_STORE_INVARIANT)
1895 if (dump_enabled_p ())
1896 dump_printf_loc (MSG_NOTE, vect_location,
1897 "negative step with invariant source;"
1898 " no permute needed.\n");
1899 return VMAT_CONTIGUOUS_DOWN;
1902 if (!perm_mask_for_reverse (vectype))
1904 if (dump_enabled_p ())
1905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1906 "negative step and reversing not supported.\n");
1907 return VMAT_ELEMENTWISE;
1910 return VMAT_CONTIGUOUS_REVERSE;
1913 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1914 if there is a memory access type that the vectorized form can use,
1915 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1916 or scatters, fill in GS_INFO accordingly.
1918 SLP says whether we're performing SLP rather than loop vectorization.
1919 VECTYPE is the vector type that the vectorized statements will use.
1920 NCOPIES is the number of vector statements that will be needed. */
1922 static bool
1923 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1924 vec_load_store_type vls_type, unsigned int ncopies,
1925 vect_memory_access_type *memory_access_type,
1926 gather_scatter_info *gs_info)
1928 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1929 vec_info *vinfo = stmt_info->vinfo;
1930 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1931 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1933 *memory_access_type = VMAT_GATHER_SCATTER;
1934 gimple *def_stmt;
1935 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1936 gcc_unreachable ();
1937 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1938 &gs_info->offset_dt,
1939 &gs_info->offset_vectype))
1941 if (dump_enabled_p ())
1942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1943 "%s index use not simple.\n",
1944 vls_type == VLS_LOAD ? "gather" : "scatter");
1945 return false;
1948 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1950 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1951 memory_access_type))
1952 return false;
1954 else if (STMT_VINFO_STRIDED_P (stmt_info))
1956 gcc_assert (!slp);
1957 *memory_access_type = VMAT_ELEMENTWISE;
1959 else
1961 int cmp = compare_step_with_zero (stmt);
1962 if (cmp < 0)
1963 *memory_access_type = get_negative_load_store_type
1964 (stmt, vectype, vls_type, ncopies);
1965 else if (cmp == 0)
1967 gcc_assert (vls_type == VLS_LOAD);
1968 *memory_access_type = VMAT_INVARIANT;
1970 else
1971 *memory_access_type = VMAT_CONTIGUOUS;
1974 /* FIXME: At the moment the cost model seems to underestimate the
1975 cost of using elementwise accesses. This check preserves the
1976 traditional behavior until that can be fixed. */
1977 if (*memory_access_type == VMAT_ELEMENTWISE
1978 && !STMT_VINFO_STRIDED_P (stmt_info))
1980 if (dump_enabled_p ())
1981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1982 "not falling back to elementwise accesses\n");
1983 return false;
1985 return true;
1988 /* Function vectorizable_mask_load_store.
1990 Check if STMT performs a conditional load or store that can be vectorized.
1991 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1992 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1993 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1995 static bool
1996 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1997 gimple **vec_stmt, slp_tree slp_node)
1999 tree vec_dest = NULL;
2000 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2001 stmt_vec_info prev_stmt_info;
2002 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2003 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2004 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2005 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2006 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2007 tree rhs_vectype = NULL_TREE;
2008 tree mask_vectype;
2009 tree elem_type;
2010 gimple *new_stmt;
2011 tree dummy;
2012 tree dataref_ptr = NULL_TREE;
2013 gimple *ptr_incr;
2014 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2015 int ncopies;
2016 int i, j;
2017 bool inv_p;
2018 gather_scatter_info gs_info;
2019 vec_load_store_type vls_type;
2020 tree mask;
2021 gimple *def_stmt;
2022 enum vect_def_type dt;
2024 if (slp_node != NULL)
2025 return false;
2027 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2028 gcc_assert (ncopies >= 1);
2030 mask = gimple_call_arg (stmt, 2);
2032 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2033 return false;
2035 /* FORNOW. This restriction should be relaxed. */
2036 if (nested_in_vect_loop && ncopies > 1)
2038 if (dump_enabled_p ())
2039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2040 "multiple types in nested loop.");
2041 return false;
2044 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2045 return false;
2047 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2048 && ! vec_stmt)
2049 return false;
2051 if (!STMT_VINFO_DATA_REF (stmt_info))
2052 return false;
2054 elem_type = TREE_TYPE (vectype);
2056 if (TREE_CODE (mask) != SSA_NAME)
2057 return false;
2059 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2060 return false;
2062 if (!mask_vectype)
2063 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2065 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2066 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2067 return false;
2069 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2071 tree rhs = gimple_call_arg (stmt, 3);
2072 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2073 return false;
2074 if (dt == vect_constant_def || dt == vect_external_def)
2075 vls_type = VLS_STORE_INVARIANT;
2076 else
2077 vls_type = VLS_STORE;
2079 else
2080 vls_type = VLS_LOAD;
2082 vect_memory_access_type memory_access_type;
2083 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2084 &memory_access_type, &gs_info))
2085 return false;
2087 if (memory_access_type == VMAT_GATHER_SCATTER)
2089 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2090 tree masktype
2091 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2092 if (TREE_CODE (masktype) == INTEGER_TYPE)
2094 if (dump_enabled_p ())
2095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2096 "masked gather with integer mask not supported.");
2097 return false;
2100 else if (memory_access_type != VMAT_CONTIGUOUS)
2102 if (dump_enabled_p ())
2103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2104 "unsupported access type for masked %s.\n",
2105 vls_type == VLS_LOAD ? "load" : "store");
2106 return false;
2108 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2109 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2110 TYPE_MODE (mask_vectype),
2111 vls_type == VLS_LOAD)
2112 || (rhs_vectype
2113 && !useless_type_conversion_p (vectype, rhs_vectype)))
2114 return false;
2116 if (!vec_stmt) /* transformation not required. */
2118 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2119 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2120 if (vls_type == VLS_LOAD)
2121 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2122 NULL, NULL, NULL);
2123 else
2124 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2125 dt, NULL, NULL, NULL);
2126 return true;
2128 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2130 /** Transform. **/
2132 if (memory_access_type == VMAT_GATHER_SCATTER)
2134 tree vec_oprnd0 = NULL_TREE, op;
2135 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2136 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2137 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2138 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2139 tree mask_perm_mask = NULL_TREE;
2140 edge pe = loop_preheader_edge (loop);
2141 gimple_seq seq;
2142 basic_block new_bb;
2143 enum { NARROW, NONE, WIDEN } modifier;
2144 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2146 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2147 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2148 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2149 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2150 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2151 scaletype = TREE_VALUE (arglist);
2152 gcc_checking_assert (types_compatible_p (srctype, rettype)
2153 && types_compatible_p (srctype, masktype));
2155 if (nunits == gather_off_nunits)
2156 modifier = NONE;
2157 else if (nunits == gather_off_nunits / 2)
2159 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2160 modifier = WIDEN;
2162 for (i = 0; i < gather_off_nunits; ++i)
2163 sel[i] = i | nunits;
2165 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2167 else if (nunits == gather_off_nunits * 2)
2169 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2170 modifier = NARROW;
2172 for (i = 0; i < nunits; ++i)
2173 sel[i] = i < gather_off_nunits
2174 ? i : i + nunits - gather_off_nunits;
2176 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2177 ncopies *= 2;
2178 for (i = 0; i < nunits; ++i)
2179 sel[i] = i | gather_off_nunits;
2180 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2182 else
2183 gcc_unreachable ();
2185 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2187 ptr = fold_convert (ptrtype, gs_info.base);
2188 if (!is_gimple_min_invariant (ptr))
2190 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2191 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2192 gcc_assert (!new_bb);
2195 scale = build_int_cst (scaletype, gs_info.scale);
2197 prev_stmt_info = NULL;
2198 for (j = 0; j < ncopies; ++j)
2200 if (modifier == WIDEN && (j & 1))
2201 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2202 perm_mask, stmt, gsi);
2203 else if (j == 0)
2204 op = vec_oprnd0
2205 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2206 else
2207 op = vec_oprnd0
2208 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2210 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2212 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2213 == TYPE_VECTOR_SUBPARTS (idxtype));
2214 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2215 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2216 new_stmt
2217 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2218 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2219 op = var;
2222 if (mask_perm_mask && (j & 1))
2223 mask_op = permute_vec_elements (mask_op, mask_op,
2224 mask_perm_mask, stmt, gsi);
2225 else
2227 if (j == 0)
2228 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2229 else
2231 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2232 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2235 mask_op = vec_mask;
2236 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2238 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2239 == TYPE_VECTOR_SUBPARTS (masktype));
2240 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2241 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2242 new_stmt
2243 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2244 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2245 mask_op = var;
2249 new_stmt
2250 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2251 scale);
2253 if (!useless_type_conversion_p (vectype, rettype))
2255 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2256 == TYPE_VECTOR_SUBPARTS (rettype));
2257 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2258 gimple_call_set_lhs (new_stmt, op);
2259 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2260 var = make_ssa_name (vec_dest);
2261 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2262 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2264 else
2266 var = make_ssa_name (vec_dest, new_stmt);
2267 gimple_call_set_lhs (new_stmt, var);
2270 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2272 if (modifier == NARROW)
2274 if ((j & 1) == 0)
2276 prev_res = var;
2277 continue;
2279 var = permute_vec_elements (prev_res, var,
2280 perm_mask, stmt, gsi);
2281 new_stmt = SSA_NAME_DEF_STMT (var);
2284 if (prev_stmt_info == NULL)
2285 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2286 else
2287 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2288 prev_stmt_info = vinfo_for_stmt (new_stmt);
2291 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2292 from the IL. */
2293 if (STMT_VINFO_RELATED_STMT (stmt_info))
2295 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2296 stmt_info = vinfo_for_stmt (stmt);
2298 tree lhs = gimple_call_lhs (stmt);
2299 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2300 set_vinfo_for_stmt (new_stmt, stmt_info);
2301 set_vinfo_for_stmt (stmt, NULL);
2302 STMT_VINFO_STMT (stmt_info) = new_stmt;
2303 gsi_replace (gsi, new_stmt, true);
2304 return true;
2306 else if (vls_type != VLS_LOAD)
2308 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2309 prev_stmt_info = NULL;
2310 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2311 for (i = 0; i < ncopies; i++)
2313 unsigned align, misalign;
2315 if (i == 0)
2317 tree rhs = gimple_call_arg (stmt, 3);
2318 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2319 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2320 /* We should have catched mismatched types earlier. */
2321 gcc_assert (useless_type_conversion_p (vectype,
2322 TREE_TYPE (vec_rhs)));
2323 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2324 NULL_TREE, &dummy, gsi,
2325 &ptr_incr, false, &inv_p);
2326 gcc_assert (!inv_p);
2328 else
2330 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2331 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2332 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2333 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2334 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2335 TYPE_SIZE_UNIT (vectype));
2338 align = TYPE_ALIGN_UNIT (vectype);
2339 if (aligned_access_p (dr))
2340 misalign = 0;
2341 else if (DR_MISALIGNMENT (dr) == -1)
2343 align = TYPE_ALIGN_UNIT (elem_type);
2344 misalign = 0;
2346 else
2347 misalign = DR_MISALIGNMENT (dr);
2348 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2349 misalign);
2350 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2351 misalign ? least_bit_hwi (misalign) : align);
2352 new_stmt
2353 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2354 ptr, vec_mask, vec_rhs);
2355 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2356 if (i == 0)
2357 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2358 else
2359 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2360 prev_stmt_info = vinfo_for_stmt (new_stmt);
2363 else
2365 tree vec_mask = NULL_TREE;
2366 prev_stmt_info = NULL;
2367 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2368 for (i = 0; i < ncopies; i++)
2370 unsigned align, misalign;
2372 if (i == 0)
2374 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2375 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2376 NULL_TREE, &dummy, gsi,
2377 &ptr_incr, false, &inv_p);
2378 gcc_assert (!inv_p);
2380 else
2382 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2383 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2384 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2385 TYPE_SIZE_UNIT (vectype));
2388 align = TYPE_ALIGN_UNIT (vectype);
2389 if (aligned_access_p (dr))
2390 misalign = 0;
2391 else if (DR_MISALIGNMENT (dr) == -1)
2393 align = TYPE_ALIGN_UNIT (elem_type);
2394 misalign = 0;
2396 else
2397 misalign = DR_MISALIGNMENT (dr);
2398 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2399 misalign);
2400 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2401 misalign ? least_bit_hwi (misalign) : align);
2402 new_stmt
2403 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2404 ptr, vec_mask);
2405 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2406 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2407 if (i == 0)
2408 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2409 else
2410 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2411 prev_stmt_info = vinfo_for_stmt (new_stmt);
2415 if (vls_type == VLS_LOAD)
2417 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2418 from the IL. */
2419 if (STMT_VINFO_RELATED_STMT (stmt_info))
2421 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2422 stmt_info = vinfo_for_stmt (stmt);
2424 tree lhs = gimple_call_lhs (stmt);
2425 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2426 set_vinfo_for_stmt (new_stmt, stmt_info);
2427 set_vinfo_for_stmt (stmt, NULL);
2428 STMT_VINFO_STMT (stmt_info) = new_stmt;
2429 gsi_replace (gsi, new_stmt, true);
2432 return true;
2435 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2437 static bool
2438 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2439 gimple **vec_stmt, slp_tree slp_node,
2440 tree vectype_in, enum vect_def_type *dt)
2442 tree op, vectype;
2443 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2444 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2445 unsigned ncopies, nunits;
2447 op = gimple_call_arg (stmt, 0);
2448 vectype = STMT_VINFO_VECTYPE (stmt_info);
2449 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2451 /* Multiple types in SLP are handled by creating the appropriate number of
2452 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2453 case of SLP. */
2454 if (slp_node)
2455 ncopies = 1;
2456 else
2457 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2459 gcc_assert (ncopies >= 1);
2461 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2462 if (! char_vectype)
2463 return false;
2465 unsigned char *elts
2466 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype));
2467 unsigned char *elt = elts;
2468 unsigned word_bytes = TYPE_VECTOR_SUBPARTS (char_vectype) / nunits;
2469 for (unsigned i = 0; i < nunits; ++i)
2470 for (unsigned j = 0; j < word_bytes; ++j)
2471 *elt++ = (i + 1) * word_bytes - j - 1;
2473 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
2474 return false;
2476 if (! vec_stmt)
2478 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2479 if (dump_enabled_p ())
2480 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2481 "\n");
2482 if (! PURE_SLP_STMT (stmt_info))
2484 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2485 1, vector_stmt, stmt_info, 0, vect_prologue);
2486 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2487 ncopies, vec_perm, stmt_info, 0, vect_body);
2489 return true;
2492 tree *telts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (char_vectype));
2493 for (unsigned i = 0; i < TYPE_VECTOR_SUBPARTS (char_vectype); ++i)
2494 telts[i] = build_int_cst (char_type_node, elts[i]);
2495 tree bswap_vconst = build_vector (char_vectype, telts);
2497 /* Transform. */
2498 vec<tree> vec_oprnds = vNULL;
2499 gimple *new_stmt = NULL;
2500 stmt_vec_info prev_stmt_info = NULL;
2501 for (unsigned j = 0; j < ncopies; j++)
2503 /* Handle uses. */
2504 if (j == 0)
2505 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2506 else
2507 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2509 /* Arguments are ready. create the new vector stmt. */
2510 unsigned i;
2511 tree vop;
2512 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2514 tree tem = make_ssa_name (char_vectype);
2515 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2516 char_vectype, vop));
2517 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2518 tree tem2 = make_ssa_name (char_vectype);
2519 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2520 tem, tem, bswap_vconst);
2521 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2522 tem = make_ssa_name (vectype);
2523 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2524 vectype, tem2));
2525 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2526 if (slp_node)
2527 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2530 if (slp_node)
2531 continue;
2533 if (j == 0)
2534 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2535 else
2536 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2538 prev_stmt_info = vinfo_for_stmt (new_stmt);
2541 vec_oprnds.release ();
2542 return true;
2545 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2546 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2547 in a single step. On success, store the binary pack code in
2548 *CONVERT_CODE. */
2550 static bool
2551 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2552 tree_code *convert_code)
2554 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2555 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2556 return false;
2558 tree_code code;
2559 int multi_step_cvt = 0;
2560 auto_vec <tree, 8> interm_types;
2561 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2562 &code, &multi_step_cvt,
2563 &interm_types)
2564 || multi_step_cvt)
2565 return false;
2567 *convert_code = code;
2568 return true;
2571 /* Function vectorizable_call.
2573 Check if GS performs a function call that can be vectorized.
2574 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2575 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2576 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2578 static bool
2579 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2580 slp_tree slp_node)
2582 gcall *stmt;
2583 tree vec_dest;
2584 tree scalar_dest;
2585 tree op, type;
2586 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2587 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2588 tree vectype_out, vectype_in;
2589 int nunits_in;
2590 int nunits_out;
2591 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2592 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2593 vec_info *vinfo = stmt_info->vinfo;
2594 tree fndecl, new_temp, rhs_type;
2595 gimple *def_stmt;
2596 enum vect_def_type dt[3]
2597 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2598 gimple *new_stmt = NULL;
2599 int ncopies, j;
2600 vec<tree> vargs = vNULL;
2601 enum { NARROW, NONE, WIDEN } modifier;
2602 size_t i, nargs;
2603 tree lhs;
2605 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2606 return false;
2608 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2609 && ! vec_stmt)
2610 return false;
2612 /* Is GS a vectorizable call? */
2613 stmt = dyn_cast <gcall *> (gs);
2614 if (!stmt)
2615 return false;
2617 if (gimple_call_internal_p (stmt)
2618 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2619 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2620 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2621 slp_node);
2623 if (gimple_call_lhs (stmt) == NULL_TREE
2624 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2625 return false;
2627 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2629 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2631 /* Process function arguments. */
2632 rhs_type = NULL_TREE;
2633 vectype_in = NULL_TREE;
2634 nargs = gimple_call_num_args (stmt);
2636 /* Bail out if the function has more than three arguments, we do not have
2637 interesting builtin functions to vectorize with more than two arguments
2638 except for fma. No arguments is also not good. */
2639 if (nargs == 0 || nargs > 3)
2640 return false;
2642 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2643 if (gimple_call_internal_p (stmt)
2644 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2646 nargs = 0;
2647 rhs_type = unsigned_type_node;
2650 for (i = 0; i < nargs; i++)
2652 tree opvectype;
2654 op = gimple_call_arg (stmt, i);
2656 /* We can only handle calls with arguments of the same type. */
2657 if (rhs_type
2658 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2660 if (dump_enabled_p ())
2661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2662 "argument types differ.\n");
2663 return false;
2665 if (!rhs_type)
2666 rhs_type = TREE_TYPE (op);
2668 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2670 if (dump_enabled_p ())
2671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2672 "use not simple.\n");
2673 return false;
2676 if (!vectype_in)
2677 vectype_in = opvectype;
2678 else if (opvectype
2679 && opvectype != vectype_in)
2681 if (dump_enabled_p ())
2682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2683 "argument vector types differ.\n");
2684 return false;
2687 /* If all arguments are external or constant defs use a vector type with
2688 the same size as the output vector type. */
2689 if (!vectype_in)
2690 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2691 if (vec_stmt)
2692 gcc_assert (vectype_in);
2693 if (!vectype_in)
2695 if (dump_enabled_p ())
2697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2698 "no vectype for scalar type ");
2699 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2700 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2703 return false;
2706 /* FORNOW */
2707 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2708 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2709 if (nunits_in == nunits_out / 2)
2710 modifier = NARROW;
2711 else if (nunits_out == nunits_in)
2712 modifier = NONE;
2713 else if (nunits_out == nunits_in / 2)
2714 modifier = WIDEN;
2715 else
2716 return false;
2718 /* We only handle functions that do not read or clobber memory. */
2719 if (gimple_vuse (stmt))
2721 if (dump_enabled_p ())
2722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2723 "function reads from or writes to memory.\n");
2724 return false;
2727 /* For now, we only vectorize functions if a target specific builtin
2728 is available. TODO -- in some cases, it might be profitable to
2729 insert the calls for pieces of the vector, in order to be able
2730 to vectorize other operations in the loop. */
2731 fndecl = NULL_TREE;
2732 internal_fn ifn = IFN_LAST;
2733 combined_fn cfn = gimple_call_combined_fn (stmt);
2734 tree callee = gimple_call_fndecl (stmt);
2736 /* First try using an internal function. */
2737 tree_code convert_code = ERROR_MARK;
2738 if (cfn != CFN_LAST
2739 && (modifier == NONE
2740 || (modifier == NARROW
2741 && simple_integer_narrowing (vectype_out, vectype_in,
2742 &convert_code))))
2743 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2744 vectype_in);
2746 /* If that fails, try asking for a target-specific built-in function. */
2747 if (ifn == IFN_LAST)
2749 if (cfn != CFN_LAST)
2750 fndecl = targetm.vectorize.builtin_vectorized_function
2751 (cfn, vectype_out, vectype_in);
2752 else
2753 fndecl = targetm.vectorize.builtin_md_vectorized_function
2754 (callee, vectype_out, vectype_in);
2757 if (ifn == IFN_LAST && !fndecl)
2759 if (cfn == CFN_GOMP_SIMD_LANE
2760 && !slp_node
2761 && loop_vinfo
2762 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2763 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2764 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2765 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2767 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2768 { 0, 1, 2, ... vf - 1 } vector. */
2769 gcc_assert (nargs == 0);
2771 else if (modifier == NONE
2772 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2773 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2774 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2775 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2776 vectype_in, dt);
2777 else
2779 if (dump_enabled_p ())
2780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2781 "function is not vectorizable.\n");
2782 return false;
2786 if (slp_node)
2787 ncopies = 1;
2788 else if (modifier == NARROW && ifn == IFN_LAST)
2789 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2790 else
2791 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2793 /* Sanity check: make sure that at least one copy of the vectorized stmt
2794 needs to be generated. */
2795 gcc_assert (ncopies >= 1);
2797 if (!vec_stmt) /* transformation not required. */
2799 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2800 if (dump_enabled_p ())
2801 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2802 "\n");
2803 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2804 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2805 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2806 vec_promote_demote, stmt_info, 0, vect_body);
2808 return true;
2811 /** Transform. **/
2813 if (dump_enabled_p ())
2814 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2816 /* Handle def. */
2817 scalar_dest = gimple_call_lhs (stmt);
2818 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2820 prev_stmt_info = NULL;
2821 if (modifier == NONE || ifn != IFN_LAST)
2823 tree prev_res = NULL_TREE;
2824 for (j = 0; j < ncopies; ++j)
2826 /* Build argument list for the vectorized call. */
2827 if (j == 0)
2828 vargs.create (nargs);
2829 else
2830 vargs.truncate (0);
2832 if (slp_node)
2834 auto_vec<vec<tree> > vec_defs (nargs);
2835 vec<tree> vec_oprnds0;
2837 for (i = 0; i < nargs; i++)
2838 vargs.quick_push (gimple_call_arg (stmt, i));
2839 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2840 vec_oprnds0 = vec_defs[0];
2842 /* Arguments are ready. Create the new vector stmt. */
2843 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2845 size_t k;
2846 for (k = 0; k < nargs; k++)
2848 vec<tree> vec_oprndsk = vec_defs[k];
2849 vargs[k] = vec_oprndsk[i];
2851 if (modifier == NARROW)
2853 tree half_res = make_ssa_name (vectype_in);
2854 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2855 gimple_call_set_lhs (new_stmt, half_res);
2856 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2857 if ((i & 1) == 0)
2859 prev_res = half_res;
2860 continue;
2862 new_temp = make_ssa_name (vec_dest);
2863 new_stmt = gimple_build_assign (new_temp, convert_code,
2864 prev_res, half_res);
2866 else
2868 if (ifn != IFN_LAST)
2869 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2870 else
2871 new_stmt = gimple_build_call_vec (fndecl, vargs);
2872 new_temp = make_ssa_name (vec_dest, new_stmt);
2873 gimple_call_set_lhs (new_stmt, new_temp);
2875 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2876 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2879 for (i = 0; i < nargs; i++)
2881 vec<tree> vec_oprndsi = vec_defs[i];
2882 vec_oprndsi.release ();
2884 continue;
2887 for (i = 0; i < nargs; i++)
2889 op = gimple_call_arg (stmt, i);
2890 if (j == 0)
2891 vec_oprnd0
2892 = vect_get_vec_def_for_operand (op, stmt);
2893 else
2895 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2896 vec_oprnd0
2897 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2900 vargs.quick_push (vec_oprnd0);
2903 if (gimple_call_internal_p (stmt)
2904 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2906 tree *v = XALLOCAVEC (tree, nunits_out);
2907 int k;
2908 for (k = 0; k < nunits_out; ++k)
2909 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2910 tree cst = build_vector (vectype_out, v);
2911 tree new_var
2912 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2913 gimple *init_stmt = gimple_build_assign (new_var, cst);
2914 vect_init_vector_1 (stmt, init_stmt, NULL);
2915 new_temp = make_ssa_name (vec_dest);
2916 new_stmt = gimple_build_assign (new_temp, new_var);
2918 else if (modifier == NARROW)
2920 tree half_res = make_ssa_name (vectype_in);
2921 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2922 gimple_call_set_lhs (new_stmt, half_res);
2923 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2924 if ((j & 1) == 0)
2926 prev_res = half_res;
2927 continue;
2929 new_temp = make_ssa_name (vec_dest);
2930 new_stmt = gimple_build_assign (new_temp, convert_code,
2931 prev_res, half_res);
2933 else
2935 if (ifn != IFN_LAST)
2936 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2937 else
2938 new_stmt = gimple_build_call_vec (fndecl, vargs);
2939 new_temp = make_ssa_name (vec_dest, new_stmt);
2940 gimple_call_set_lhs (new_stmt, new_temp);
2942 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2944 if (j == (modifier == NARROW ? 1 : 0))
2945 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2946 else
2947 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2949 prev_stmt_info = vinfo_for_stmt (new_stmt);
2952 else if (modifier == NARROW)
2954 for (j = 0; j < ncopies; ++j)
2956 /* Build argument list for the vectorized call. */
2957 if (j == 0)
2958 vargs.create (nargs * 2);
2959 else
2960 vargs.truncate (0);
2962 if (slp_node)
2964 auto_vec<vec<tree> > vec_defs (nargs);
2965 vec<tree> vec_oprnds0;
2967 for (i = 0; i < nargs; i++)
2968 vargs.quick_push (gimple_call_arg (stmt, i));
2969 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2970 vec_oprnds0 = vec_defs[0];
2972 /* Arguments are ready. Create the new vector stmt. */
2973 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2975 size_t k;
2976 vargs.truncate (0);
2977 for (k = 0; k < nargs; k++)
2979 vec<tree> vec_oprndsk = vec_defs[k];
2980 vargs.quick_push (vec_oprndsk[i]);
2981 vargs.quick_push (vec_oprndsk[i + 1]);
2983 if (ifn != IFN_LAST)
2984 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2985 else
2986 new_stmt = gimple_build_call_vec (fndecl, vargs);
2987 new_temp = make_ssa_name (vec_dest, new_stmt);
2988 gimple_call_set_lhs (new_stmt, new_temp);
2989 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2990 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2993 for (i = 0; i < nargs; i++)
2995 vec<tree> vec_oprndsi = vec_defs[i];
2996 vec_oprndsi.release ();
2998 continue;
3001 for (i = 0; i < nargs; i++)
3003 op = gimple_call_arg (stmt, i);
3004 if (j == 0)
3006 vec_oprnd0
3007 = vect_get_vec_def_for_operand (op, stmt);
3008 vec_oprnd1
3009 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3011 else
3013 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3014 vec_oprnd0
3015 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3016 vec_oprnd1
3017 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3020 vargs.quick_push (vec_oprnd0);
3021 vargs.quick_push (vec_oprnd1);
3024 new_stmt = gimple_build_call_vec (fndecl, vargs);
3025 new_temp = make_ssa_name (vec_dest, new_stmt);
3026 gimple_call_set_lhs (new_stmt, new_temp);
3027 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3029 if (j == 0)
3030 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3031 else
3032 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3034 prev_stmt_info = vinfo_for_stmt (new_stmt);
3037 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3039 else
3040 /* No current target implements this case. */
3041 return false;
3043 vargs.release ();
3045 /* The call in STMT might prevent it from being removed in dce.
3046 We however cannot remove it here, due to the way the ssa name
3047 it defines is mapped to the new definition. So just replace
3048 rhs of the statement with something harmless. */
3050 if (slp_node)
3051 return true;
3053 type = TREE_TYPE (scalar_dest);
3054 if (is_pattern_stmt_p (stmt_info))
3055 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3056 else
3057 lhs = gimple_call_lhs (stmt);
3059 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3060 set_vinfo_for_stmt (new_stmt, stmt_info);
3061 set_vinfo_for_stmt (stmt, NULL);
3062 STMT_VINFO_STMT (stmt_info) = new_stmt;
3063 gsi_replace (gsi, new_stmt, false);
3065 return true;
3069 struct simd_call_arg_info
3071 tree vectype;
3072 tree op;
3073 enum vect_def_type dt;
3074 HOST_WIDE_INT linear_step;
3075 unsigned int align;
3076 bool simd_lane_linear;
3079 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3080 is linear within simd lane (but not within whole loop), note it in
3081 *ARGINFO. */
3083 static void
3084 vect_simd_lane_linear (tree op, struct loop *loop,
3085 struct simd_call_arg_info *arginfo)
3087 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3089 if (!is_gimple_assign (def_stmt)
3090 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3091 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3092 return;
3094 tree base = gimple_assign_rhs1 (def_stmt);
3095 HOST_WIDE_INT linear_step = 0;
3096 tree v = gimple_assign_rhs2 (def_stmt);
3097 while (TREE_CODE (v) == SSA_NAME)
3099 tree t;
3100 def_stmt = SSA_NAME_DEF_STMT (v);
3101 if (is_gimple_assign (def_stmt))
3102 switch (gimple_assign_rhs_code (def_stmt))
3104 case PLUS_EXPR:
3105 t = gimple_assign_rhs2 (def_stmt);
3106 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3107 return;
3108 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3109 v = gimple_assign_rhs1 (def_stmt);
3110 continue;
3111 case MULT_EXPR:
3112 t = gimple_assign_rhs2 (def_stmt);
3113 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3114 return;
3115 linear_step = tree_to_shwi (t);
3116 v = gimple_assign_rhs1 (def_stmt);
3117 continue;
3118 CASE_CONVERT:
3119 t = gimple_assign_rhs1 (def_stmt);
3120 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3121 || (TYPE_PRECISION (TREE_TYPE (v))
3122 < TYPE_PRECISION (TREE_TYPE (t))))
3123 return;
3124 if (!linear_step)
3125 linear_step = 1;
3126 v = t;
3127 continue;
3128 default:
3129 return;
3131 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3132 && loop->simduid
3133 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3134 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3135 == loop->simduid))
3137 if (!linear_step)
3138 linear_step = 1;
3139 arginfo->linear_step = linear_step;
3140 arginfo->op = base;
3141 arginfo->simd_lane_linear = true;
3142 return;
3147 /* Function vectorizable_simd_clone_call.
3149 Check if STMT performs a function call that can be vectorized
3150 by calling a simd clone of the function.
3151 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3152 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3153 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3155 static bool
3156 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3157 gimple **vec_stmt, slp_tree slp_node)
3159 tree vec_dest;
3160 tree scalar_dest;
3161 tree op, type;
3162 tree vec_oprnd0 = NULL_TREE;
3163 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3164 tree vectype;
3165 unsigned int nunits;
3166 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3167 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3168 vec_info *vinfo = stmt_info->vinfo;
3169 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3170 tree fndecl, new_temp;
3171 gimple *def_stmt;
3172 gimple *new_stmt = NULL;
3173 int ncopies, j;
3174 auto_vec<simd_call_arg_info> arginfo;
3175 vec<tree> vargs = vNULL;
3176 size_t i, nargs;
3177 tree lhs, rtype, ratype;
3178 vec<constructor_elt, va_gc> *ret_ctor_elts;
3180 /* Is STMT a vectorizable call? */
3181 if (!is_gimple_call (stmt))
3182 return false;
3184 fndecl = gimple_call_fndecl (stmt);
3185 if (fndecl == NULL_TREE)
3186 return false;
3188 struct cgraph_node *node = cgraph_node::get (fndecl);
3189 if (node == NULL || node->simd_clones == NULL)
3190 return false;
3192 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3193 return false;
3195 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3196 && ! vec_stmt)
3197 return false;
3199 if (gimple_call_lhs (stmt)
3200 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3201 return false;
3203 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3205 vectype = STMT_VINFO_VECTYPE (stmt_info);
3207 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3208 return false;
3210 /* FORNOW */
3211 if (slp_node)
3212 return false;
3214 /* Process function arguments. */
3215 nargs = gimple_call_num_args (stmt);
3217 /* Bail out if the function has zero arguments. */
3218 if (nargs == 0)
3219 return false;
3221 arginfo.reserve (nargs, true);
3223 for (i = 0; i < nargs; i++)
3225 simd_call_arg_info thisarginfo;
3226 affine_iv iv;
3228 thisarginfo.linear_step = 0;
3229 thisarginfo.align = 0;
3230 thisarginfo.op = NULL_TREE;
3231 thisarginfo.simd_lane_linear = false;
3233 op = gimple_call_arg (stmt, i);
3234 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3235 &thisarginfo.vectype)
3236 || thisarginfo.dt == vect_uninitialized_def)
3238 if (dump_enabled_p ())
3239 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3240 "use not simple.\n");
3241 return false;
3244 if (thisarginfo.dt == vect_constant_def
3245 || thisarginfo.dt == vect_external_def)
3246 gcc_assert (thisarginfo.vectype == NULL_TREE);
3247 else
3248 gcc_assert (thisarginfo.vectype != NULL_TREE);
3250 /* For linear arguments, the analyze phase should have saved
3251 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3252 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3253 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3255 gcc_assert (vec_stmt);
3256 thisarginfo.linear_step
3257 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3258 thisarginfo.op
3259 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3260 thisarginfo.simd_lane_linear
3261 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3262 == boolean_true_node);
3263 /* If loop has been peeled for alignment, we need to adjust it. */
3264 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3265 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3266 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3268 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3269 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3270 tree opt = TREE_TYPE (thisarginfo.op);
3271 bias = fold_convert (TREE_TYPE (step), bias);
3272 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3273 thisarginfo.op
3274 = fold_build2 (POINTER_TYPE_P (opt)
3275 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3276 thisarginfo.op, bias);
3279 else if (!vec_stmt
3280 && thisarginfo.dt != vect_constant_def
3281 && thisarginfo.dt != vect_external_def
3282 && loop_vinfo
3283 && TREE_CODE (op) == SSA_NAME
3284 && simple_iv (loop, loop_containing_stmt (stmt), op,
3285 &iv, false)
3286 && tree_fits_shwi_p (iv.step))
3288 thisarginfo.linear_step = tree_to_shwi (iv.step);
3289 thisarginfo.op = iv.base;
3291 else if ((thisarginfo.dt == vect_constant_def
3292 || thisarginfo.dt == vect_external_def)
3293 && POINTER_TYPE_P (TREE_TYPE (op)))
3294 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3295 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3296 linear too. */
3297 if (POINTER_TYPE_P (TREE_TYPE (op))
3298 && !thisarginfo.linear_step
3299 && !vec_stmt
3300 && thisarginfo.dt != vect_constant_def
3301 && thisarginfo.dt != vect_external_def
3302 && loop_vinfo
3303 && !slp_node
3304 && TREE_CODE (op) == SSA_NAME)
3305 vect_simd_lane_linear (op, loop, &thisarginfo);
3307 arginfo.quick_push (thisarginfo);
3310 unsigned int badness = 0;
3311 struct cgraph_node *bestn = NULL;
3312 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3313 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3314 else
3315 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3316 n = n->simdclone->next_clone)
3318 unsigned int this_badness = 0;
3319 if (n->simdclone->simdlen
3320 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3321 || n->simdclone->nargs != nargs)
3322 continue;
3323 if (n->simdclone->simdlen
3324 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3325 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3326 - exact_log2 (n->simdclone->simdlen)) * 1024;
3327 if (n->simdclone->inbranch)
3328 this_badness += 2048;
3329 int target_badness = targetm.simd_clone.usable (n);
3330 if (target_badness < 0)
3331 continue;
3332 this_badness += target_badness * 512;
3333 /* FORNOW: Have to add code to add the mask argument. */
3334 if (n->simdclone->inbranch)
3335 continue;
3336 for (i = 0; i < nargs; i++)
3338 switch (n->simdclone->args[i].arg_type)
3340 case SIMD_CLONE_ARG_TYPE_VECTOR:
3341 if (!useless_type_conversion_p
3342 (n->simdclone->args[i].orig_type,
3343 TREE_TYPE (gimple_call_arg (stmt, i))))
3344 i = -1;
3345 else if (arginfo[i].dt == vect_constant_def
3346 || arginfo[i].dt == vect_external_def
3347 || arginfo[i].linear_step)
3348 this_badness += 64;
3349 break;
3350 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3351 if (arginfo[i].dt != vect_constant_def
3352 && arginfo[i].dt != vect_external_def)
3353 i = -1;
3354 break;
3355 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3356 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3357 if (arginfo[i].dt == vect_constant_def
3358 || arginfo[i].dt == vect_external_def
3359 || (arginfo[i].linear_step
3360 != n->simdclone->args[i].linear_step))
3361 i = -1;
3362 break;
3363 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3364 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3365 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3366 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3367 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3368 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3369 /* FORNOW */
3370 i = -1;
3371 break;
3372 case SIMD_CLONE_ARG_TYPE_MASK:
3373 gcc_unreachable ();
3375 if (i == (size_t) -1)
3376 break;
3377 if (n->simdclone->args[i].alignment > arginfo[i].align)
3379 i = -1;
3380 break;
3382 if (arginfo[i].align)
3383 this_badness += (exact_log2 (arginfo[i].align)
3384 - exact_log2 (n->simdclone->args[i].alignment));
3386 if (i == (size_t) -1)
3387 continue;
3388 if (bestn == NULL || this_badness < badness)
3390 bestn = n;
3391 badness = this_badness;
3395 if (bestn == NULL)
3396 return false;
3398 for (i = 0; i < nargs; i++)
3399 if ((arginfo[i].dt == vect_constant_def
3400 || arginfo[i].dt == vect_external_def)
3401 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3403 arginfo[i].vectype
3404 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3405 i)));
3406 if (arginfo[i].vectype == NULL
3407 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3408 > bestn->simdclone->simdlen))
3409 return false;
3412 fndecl = bestn->decl;
3413 nunits = bestn->simdclone->simdlen;
3414 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3416 /* If the function isn't const, only allow it in simd loops where user
3417 has asserted that at least nunits consecutive iterations can be
3418 performed using SIMD instructions. */
3419 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3420 && gimple_vuse (stmt))
3421 return false;
3423 /* Sanity check: make sure that at least one copy of the vectorized stmt
3424 needs to be generated. */
3425 gcc_assert (ncopies >= 1);
3427 if (!vec_stmt) /* transformation not required. */
3429 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3430 for (i = 0; i < nargs; i++)
3431 if ((bestn->simdclone->args[i].arg_type
3432 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3433 || (bestn->simdclone->args[i].arg_type
3434 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3436 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3437 + 1);
3438 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3439 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3440 ? size_type_node : TREE_TYPE (arginfo[i].op);
3441 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3442 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3443 tree sll = arginfo[i].simd_lane_linear
3444 ? boolean_true_node : boolean_false_node;
3445 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3447 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3448 if (dump_enabled_p ())
3449 dump_printf_loc (MSG_NOTE, vect_location,
3450 "=== vectorizable_simd_clone_call ===\n");
3451 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3452 return true;
3455 /** Transform. **/
3457 if (dump_enabled_p ())
3458 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3460 /* Handle def. */
3461 scalar_dest = gimple_call_lhs (stmt);
3462 vec_dest = NULL_TREE;
3463 rtype = NULL_TREE;
3464 ratype = NULL_TREE;
3465 if (scalar_dest)
3467 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3468 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3469 if (TREE_CODE (rtype) == ARRAY_TYPE)
3471 ratype = rtype;
3472 rtype = TREE_TYPE (ratype);
3476 prev_stmt_info = NULL;
3477 for (j = 0; j < ncopies; ++j)
3479 /* Build argument list for the vectorized call. */
3480 if (j == 0)
3481 vargs.create (nargs);
3482 else
3483 vargs.truncate (0);
3485 for (i = 0; i < nargs; i++)
3487 unsigned int k, l, m, o;
3488 tree atype;
3489 op = gimple_call_arg (stmt, i);
3490 switch (bestn->simdclone->args[i].arg_type)
3492 case SIMD_CLONE_ARG_TYPE_VECTOR:
3493 atype = bestn->simdclone->args[i].vector_type;
3494 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3495 for (m = j * o; m < (j + 1) * o; m++)
3497 if (TYPE_VECTOR_SUBPARTS (atype)
3498 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3500 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3501 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3502 / TYPE_VECTOR_SUBPARTS (atype));
3503 gcc_assert ((k & (k - 1)) == 0);
3504 if (m == 0)
3505 vec_oprnd0
3506 = vect_get_vec_def_for_operand (op, stmt);
3507 else
3509 vec_oprnd0 = arginfo[i].op;
3510 if ((m & (k - 1)) == 0)
3511 vec_oprnd0
3512 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3513 vec_oprnd0);
3515 arginfo[i].op = vec_oprnd0;
3516 vec_oprnd0
3517 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3518 size_int (prec),
3519 bitsize_int ((m & (k - 1)) * prec));
3520 new_stmt
3521 = gimple_build_assign (make_ssa_name (atype),
3522 vec_oprnd0);
3523 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3524 vargs.safe_push (gimple_assign_lhs (new_stmt));
3526 else
3528 k = (TYPE_VECTOR_SUBPARTS (atype)
3529 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3530 gcc_assert ((k & (k - 1)) == 0);
3531 vec<constructor_elt, va_gc> *ctor_elts;
3532 if (k != 1)
3533 vec_alloc (ctor_elts, k);
3534 else
3535 ctor_elts = NULL;
3536 for (l = 0; l < k; l++)
3538 if (m == 0 && l == 0)
3539 vec_oprnd0
3540 = vect_get_vec_def_for_operand (op, stmt);
3541 else
3542 vec_oprnd0
3543 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3544 arginfo[i].op);
3545 arginfo[i].op = vec_oprnd0;
3546 if (k == 1)
3547 break;
3548 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3549 vec_oprnd0);
3551 if (k == 1)
3552 vargs.safe_push (vec_oprnd0);
3553 else
3555 vec_oprnd0 = build_constructor (atype, ctor_elts);
3556 new_stmt
3557 = gimple_build_assign (make_ssa_name (atype),
3558 vec_oprnd0);
3559 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3560 vargs.safe_push (gimple_assign_lhs (new_stmt));
3564 break;
3565 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3566 vargs.safe_push (op);
3567 break;
3568 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3569 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3570 if (j == 0)
3572 gimple_seq stmts;
3573 arginfo[i].op
3574 = force_gimple_operand (arginfo[i].op, &stmts, true,
3575 NULL_TREE);
3576 if (stmts != NULL)
3578 basic_block new_bb;
3579 edge pe = loop_preheader_edge (loop);
3580 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3581 gcc_assert (!new_bb);
3583 if (arginfo[i].simd_lane_linear)
3585 vargs.safe_push (arginfo[i].op);
3586 break;
3588 tree phi_res = copy_ssa_name (op);
3589 gphi *new_phi = create_phi_node (phi_res, loop->header);
3590 set_vinfo_for_stmt (new_phi,
3591 new_stmt_vec_info (new_phi, loop_vinfo));
3592 add_phi_arg (new_phi, arginfo[i].op,
3593 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3594 enum tree_code code
3595 = POINTER_TYPE_P (TREE_TYPE (op))
3596 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3597 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3598 ? sizetype : TREE_TYPE (op);
3599 widest_int cst
3600 = wi::mul (bestn->simdclone->args[i].linear_step,
3601 ncopies * nunits);
3602 tree tcst = wide_int_to_tree (type, cst);
3603 tree phi_arg = copy_ssa_name (op);
3604 new_stmt
3605 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3606 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3607 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3608 set_vinfo_for_stmt (new_stmt,
3609 new_stmt_vec_info (new_stmt, loop_vinfo));
3610 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3611 UNKNOWN_LOCATION);
3612 arginfo[i].op = phi_res;
3613 vargs.safe_push (phi_res);
3615 else
3617 enum tree_code code
3618 = POINTER_TYPE_P (TREE_TYPE (op))
3619 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3620 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3621 ? sizetype : TREE_TYPE (op);
3622 widest_int cst
3623 = wi::mul (bestn->simdclone->args[i].linear_step,
3624 j * nunits);
3625 tree tcst = wide_int_to_tree (type, cst);
3626 new_temp = make_ssa_name (TREE_TYPE (op));
3627 new_stmt = gimple_build_assign (new_temp, code,
3628 arginfo[i].op, tcst);
3629 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3630 vargs.safe_push (new_temp);
3632 break;
3633 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3634 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3635 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3636 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3637 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3638 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3639 default:
3640 gcc_unreachable ();
3644 new_stmt = gimple_build_call_vec (fndecl, vargs);
3645 if (vec_dest)
3647 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3648 if (ratype)
3649 new_temp = create_tmp_var (ratype);
3650 else if (TYPE_VECTOR_SUBPARTS (vectype)
3651 == TYPE_VECTOR_SUBPARTS (rtype))
3652 new_temp = make_ssa_name (vec_dest, new_stmt);
3653 else
3654 new_temp = make_ssa_name (rtype, new_stmt);
3655 gimple_call_set_lhs (new_stmt, new_temp);
3657 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3659 if (vec_dest)
3661 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3663 unsigned int k, l;
3664 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3665 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3666 gcc_assert ((k & (k - 1)) == 0);
3667 for (l = 0; l < k; l++)
3669 tree t;
3670 if (ratype)
3672 t = build_fold_addr_expr (new_temp);
3673 t = build2 (MEM_REF, vectype, t,
3674 build_int_cst (TREE_TYPE (t),
3675 l * prec / BITS_PER_UNIT));
3677 else
3678 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3679 size_int (prec), bitsize_int (l * prec));
3680 new_stmt
3681 = gimple_build_assign (make_ssa_name (vectype), t);
3682 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3683 if (j == 0 && l == 0)
3684 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3685 else
3686 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3688 prev_stmt_info = vinfo_for_stmt (new_stmt);
3691 if (ratype)
3693 tree clobber = build_constructor (ratype, NULL);
3694 TREE_THIS_VOLATILE (clobber) = 1;
3695 new_stmt = gimple_build_assign (new_temp, clobber);
3696 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3698 continue;
3700 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3702 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3703 / TYPE_VECTOR_SUBPARTS (rtype));
3704 gcc_assert ((k & (k - 1)) == 0);
3705 if ((j & (k - 1)) == 0)
3706 vec_alloc (ret_ctor_elts, k);
3707 if (ratype)
3709 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3710 for (m = 0; m < o; m++)
3712 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3713 size_int (m), NULL_TREE, NULL_TREE);
3714 new_stmt
3715 = gimple_build_assign (make_ssa_name (rtype), tem);
3716 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3717 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3718 gimple_assign_lhs (new_stmt));
3720 tree clobber = build_constructor (ratype, NULL);
3721 TREE_THIS_VOLATILE (clobber) = 1;
3722 new_stmt = gimple_build_assign (new_temp, clobber);
3723 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3725 else
3726 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3727 if ((j & (k - 1)) != k - 1)
3728 continue;
3729 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3730 new_stmt
3731 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3732 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3734 if ((unsigned) j == k - 1)
3735 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3736 else
3737 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3739 prev_stmt_info = vinfo_for_stmt (new_stmt);
3740 continue;
3742 else if (ratype)
3744 tree t = build_fold_addr_expr (new_temp);
3745 t = build2 (MEM_REF, vectype, t,
3746 build_int_cst (TREE_TYPE (t), 0));
3747 new_stmt
3748 = gimple_build_assign (make_ssa_name (vec_dest), t);
3749 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3750 tree clobber = build_constructor (ratype, NULL);
3751 TREE_THIS_VOLATILE (clobber) = 1;
3752 vect_finish_stmt_generation (stmt,
3753 gimple_build_assign (new_temp,
3754 clobber), gsi);
3758 if (j == 0)
3759 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3760 else
3761 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3763 prev_stmt_info = vinfo_for_stmt (new_stmt);
3766 vargs.release ();
3768 /* The call in STMT might prevent it from being removed in dce.
3769 We however cannot remove it here, due to the way the ssa name
3770 it defines is mapped to the new definition. So just replace
3771 rhs of the statement with something harmless. */
3773 if (slp_node)
3774 return true;
3776 if (scalar_dest)
3778 type = TREE_TYPE (scalar_dest);
3779 if (is_pattern_stmt_p (stmt_info))
3780 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3781 else
3782 lhs = gimple_call_lhs (stmt);
3783 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3785 else
3786 new_stmt = gimple_build_nop ();
3787 set_vinfo_for_stmt (new_stmt, stmt_info);
3788 set_vinfo_for_stmt (stmt, NULL);
3789 STMT_VINFO_STMT (stmt_info) = new_stmt;
3790 gsi_replace (gsi, new_stmt, true);
3791 unlink_stmt_vdef (stmt);
3793 return true;
3797 /* Function vect_gen_widened_results_half
3799 Create a vector stmt whose code, type, number of arguments, and result
3800 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3801 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3802 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3803 needs to be created (DECL is a function-decl of a target-builtin).
3804 STMT is the original scalar stmt that we are vectorizing. */
3806 static gimple *
3807 vect_gen_widened_results_half (enum tree_code code,
3808 tree decl,
3809 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3810 tree vec_dest, gimple_stmt_iterator *gsi,
3811 gimple *stmt)
3813 gimple *new_stmt;
3814 tree new_temp;
3816 /* Generate half of the widened result: */
3817 if (code == CALL_EXPR)
3819 /* Target specific support */
3820 if (op_type == binary_op)
3821 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3822 else
3823 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3824 new_temp = make_ssa_name (vec_dest, new_stmt);
3825 gimple_call_set_lhs (new_stmt, new_temp);
3827 else
3829 /* Generic support */
3830 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3831 if (op_type != binary_op)
3832 vec_oprnd1 = NULL;
3833 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3834 new_temp = make_ssa_name (vec_dest, new_stmt);
3835 gimple_assign_set_lhs (new_stmt, new_temp);
3837 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3839 return new_stmt;
3843 /* Get vectorized definitions for loop-based vectorization. For the first
3844 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3845 scalar operand), and for the rest we get a copy with
3846 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3847 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3848 The vectors are collected into VEC_OPRNDS. */
3850 static void
3851 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3852 vec<tree> *vec_oprnds, int multi_step_cvt)
3854 tree vec_oprnd;
3856 /* Get first vector operand. */
3857 /* All the vector operands except the very first one (that is scalar oprnd)
3858 are stmt copies. */
3859 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3860 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3861 else
3862 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3864 vec_oprnds->quick_push (vec_oprnd);
3866 /* Get second vector operand. */
3867 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3868 vec_oprnds->quick_push (vec_oprnd);
3870 *oprnd = vec_oprnd;
3872 /* For conversion in multiple steps, continue to get operands
3873 recursively. */
3874 if (multi_step_cvt)
3875 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3879 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3880 For multi-step conversions store the resulting vectors and call the function
3881 recursively. */
3883 static void
3884 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3885 int multi_step_cvt, gimple *stmt,
3886 vec<tree> vec_dsts,
3887 gimple_stmt_iterator *gsi,
3888 slp_tree slp_node, enum tree_code code,
3889 stmt_vec_info *prev_stmt_info)
3891 unsigned int i;
3892 tree vop0, vop1, new_tmp, vec_dest;
3893 gimple *new_stmt;
3894 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3896 vec_dest = vec_dsts.pop ();
3898 for (i = 0; i < vec_oprnds->length (); i += 2)
3900 /* Create demotion operation. */
3901 vop0 = (*vec_oprnds)[i];
3902 vop1 = (*vec_oprnds)[i + 1];
3903 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3904 new_tmp = make_ssa_name (vec_dest, new_stmt);
3905 gimple_assign_set_lhs (new_stmt, new_tmp);
3906 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3908 if (multi_step_cvt)
3909 /* Store the resulting vector for next recursive call. */
3910 (*vec_oprnds)[i/2] = new_tmp;
3911 else
3913 /* This is the last step of the conversion sequence. Store the
3914 vectors in SLP_NODE or in vector info of the scalar statement
3915 (or in STMT_VINFO_RELATED_STMT chain). */
3916 if (slp_node)
3917 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3918 else
3920 if (!*prev_stmt_info)
3921 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3922 else
3923 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3925 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3930 /* For multi-step demotion operations we first generate demotion operations
3931 from the source type to the intermediate types, and then combine the
3932 results (stored in VEC_OPRNDS) in demotion operation to the destination
3933 type. */
3934 if (multi_step_cvt)
3936 /* At each level of recursion we have half of the operands we had at the
3937 previous level. */
3938 vec_oprnds->truncate ((i+1)/2);
3939 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3940 stmt, vec_dsts, gsi, slp_node,
3941 VEC_PACK_TRUNC_EXPR,
3942 prev_stmt_info);
3945 vec_dsts.quick_push (vec_dest);
3949 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3950 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3951 the resulting vectors and call the function recursively. */
3953 static void
3954 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3955 vec<tree> *vec_oprnds1,
3956 gimple *stmt, tree vec_dest,
3957 gimple_stmt_iterator *gsi,
3958 enum tree_code code1,
3959 enum tree_code code2, tree decl1,
3960 tree decl2, int op_type)
3962 int i;
3963 tree vop0, vop1, new_tmp1, new_tmp2;
3964 gimple *new_stmt1, *new_stmt2;
3965 vec<tree> vec_tmp = vNULL;
3967 vec_tmp.create (vec_oprnds0->length () * 2);
3968 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3970 if (op_type == binary_op)
3971 vop1 = (*vec_oprnds1)[i];
3972 else
3973 vop1 = NULL_TREE;
3975 /* Generate the two halves of promotion operation. */
3976 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3977 op_type, vec_dest, gsi, stmt);
3978 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3979 op_type, vec_dest, gsi, stmt);
3980 if (is_gimple_call (new_stmt1))
3982 new_tmp1 = gimple_call_lhs (new_stmt1);
3983 new_tmp2 = gimple_call_lhs (new_stmt2);
3985 else
3987 new_tmp1 = gimple_assign_lhs (new_stmt1);
3988 new_tmp2 = gimple_assign_lhs (new_stmt2);
3991 /* Store the results for the next step. */
3992 vec_tmp.quick_push (new_tmp1);
3993 vec_tmp.quick_push (new_tmp2);
3996 vec_oprnds0->release ();
3997 *vec_oprnds0 = vec_tmp;
4001 /* Check if STMT performs a conversion operation, that can be vectorized.
4002 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4003 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4004 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4006 static bool
4007 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4008 gimple **vec_stmt, slp_tree slp_node)
4010 tree vec_dest;
4011 tree scalar_dest;
4012 tree op0, op1 = NULL_TREE;
4013 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4014 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4015 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4016 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4017 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4018 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4019 tree new_temp;
4020 gimple *def_stmt;
4021 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4022 gimple *new_stmt = NULL;
4023 stmt_vec_info prev_stmt_info;
4024 int nunits_in;
4025 int nunits_out;
4026 tree vectype_out, vectype_in;
4027 int ncopies, i, j;
4028 tree lhs_type, rhs_type;
4029 enum { NARROW, NONE, WIDEN } modifier;
4030 vec<tree> vec_oprnds0 = vNULL;
4031 vec<tree> vec_oprnds1 = vNULL;
4032 tree vop0;
4033 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4034 vec_info *vinfo = stmt_info->vinfo;
4035 int multi_step_cvt = 0;
4036 vec<tree> interm_types = vNULL;
4037 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4038 int op_type;
4039 machine_mode rhs_mode;
4040 unsigned short fltsz;
4042 /* Is STMT a vectorizable conversion? */
4044 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4045 return false;
4047 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4048 && ! vec_stmt)
4049 return false;
4051 if (!is_gimple_assign (stmt))
4052 return false;
4054 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4055 return false;
4057 code = gimple_assign_rhs_code (stmt);
4058 if (!CONVERT_EXPR_CODE_P (code)
4059 && code != FIX_TRUNC_EXPR
4060 && code != FLOAT_EXPR
4061 && code != WIDEN_MULT_EXPR
4062 && code != WIDEN_LSHIFT_EXPR)
4063 return false;
4065 op_type = TREE_CODE_LENGTH (code);
4067 /* Check types of lhs and rhs. */
4068 scalar_dest = gimple_assign_lhs (stmt);
4069 lhs_type = TREE_TYPE (scalar_dest);
4070 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4072 op0 = gimple_assign_rhs1 (stmt);
4073 rhs_type = TREE_TYPE (op0);
4075 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4076 && !((INTEGRAL_TYPE_P (lhs_type)
4077 && INTEGRAL_TYPE_P (rhs_type))
4078 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4079 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4080 return false;
4082 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4083 && ((INTEGRAL_TYPE_P (lhs_type)
4084 && (TYPE_PRECISION (lhs_type)
4085 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
4086 || (INTEGRAL_TYPE_P (rhs_type)
4087 && (TYPE_PRECISION (rhs_type)
4088 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4090 if (dump_enabled_p ())
4091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4092 "type conversion to/from bit-precision unsupported."
4093 "\n");
4094 return false;
4097 /* Check the operands of the operation. */
4098 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4100 if (dump_enabled_p ())
4101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4102 "use not simple.\n");
4103 return false;
4105 if (op_type == binary_op)
4107 bool ok;
4109 op1 = gimple_assign_rhs2 (stmt);
4110 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4111 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4112 OP1. */
4113 if (CONSTANT_CLASS_P (op0))
4114 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4115 else
4116 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4118 if (!ok)
4120 if (dump_enabled_p ())
4121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4122 "use not simple.\n");
4123 return false;
4127 /* If op0 is an external or constant defs use a vector type of
4128 the same size as the output vector type. */
4129 if (!vectype_in)
4130 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4131 if (vec_stmt)
4132 gcc_assert (vectype_in);
4133 if (!vectype_in)
4135 if (dump_enabled_p ())
4137 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4138 "no vectype for scalar type ");
4139 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4140 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4143 return false;
4146 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4147 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4149 if (dump_enabled_p ())
4151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4152 "can't convert between boolean and non "
4153 "boolean vectors");
4154 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4155 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4158 return false;
4161 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4162 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4163 if (nunits_in < nunits_out)
4164 modifier = NARROW;
4165 else if (nunits_out == nunits_in)
4166 modifier = NONE;
4167 else
4168 modifier = WIDEN;
4170 /* Multiple types in SLP are handled by creating the appropriate number of
4171 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4172 case of SLP. */
4173 if (slp_node)
4174 ncopies = 1;
4175 else if (modifier == NARROW)
4176 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4177 else
4178 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4180 /* Sanity check: make sure that at least one copy of the vectorized stmt
4181 needs to be generated. */
4182 gcc_assert (ncopies >= 1);
4184 /* Supportable by target? */
4185 switch (modifier)
4187 case NONE:
4188 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4189 return false;
4190 if (supportable_convert_operation (code, vectype_out, vectype_in,
4191 &decl1, &code1))
4192 break;
4193 /* FALLTHRU */
4194 unsupported:
4195 if (dump_enabled_p ())
4196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4197 "conversion not supported by target.\n");
4198 return false;
4200 case WIDEN:
4201 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4202 &code1, &code2, &multi_step_cvt,
4203 &interm_types))
4205 /* Binary widening operation can only be supported directly by the
4206 architecture. */
4207 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4208 break;
4211 if (code != FLOAT_EXPR
4212 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4213 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4214 goto unsupported;
4216 rhs_mode = TYPE_MODE (rhs_type);
4217 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4218 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4219 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4220 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4222 cvt_type
4223 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4224 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4225 if (cvt_type == NULL_TREE)
4226 goto unsupported;
4228 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4230 if (!supportable_convert_operation (code, vectype_out,
4231 cvt_type, &decl1, &codecvt1))
4232 goto unsupported;
4234 else if (!supportable_widening_operation (code, stmt, vectype_out,
4235 cvt_type, &codecvt1,
4236 &codecvt2, &multi_step_cvt,
4237 &interm_types))
4238 continue;
4239 else
4240 gcc_assert (multi_step_cvt == 0);
4242 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4243 vectype_in, &code1, &code2,
4244 &multi_step_cvt, &interm_types))
4245 break;
4248 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4249 goto unsupported;
4251 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4252 codecvt2 = ERROR_MARK;
4253 else
4255 multi_step_cvt++;
4256 interm_types.safe_push (cvt_type);
4257 cvt_type = NULL_TREE;
4259 break;
4261 case NARROW:
4262 gcc_assert (op_type == unary_op);
4263 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4264 &code1, &multi_step_cvt,
4265 &interm_types))
4266 break;
4268 if (code != FIX_TRUNC_EXPR
4269 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4270 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4271 goto unsupported;
4273 rhs_mode = TYPE_MODE (rhs_type);
4274 cvt_type
4275 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4276 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4277 if (cvt_type == NULL_TREE)
4278 goto unsupported;
4279 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4280 &decl1, &codecvt1))
4281 goto unsupported;
4282 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4283 &code1, &multi_step_cvt,
4284 &interm_types))
4285 break;
4286 goto unsupported;
4288 default:
4289 gcc_unreachable ();
4292 if (!vec_stmt) /* transformation not required. */
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_NOTE, vect_location,
4296 "=== vectorizable_conversion ===\n");
4297 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4299 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4300 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4302 else if (modifier == NARROW)
4304 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4305 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4307 else
4309 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4310 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4312 interm_types.release ();
4313 return true;
4316 /** Transform. **/
4317 if (dump_enabled_p ())
4318 dump_printf_loc (MSG_NOTE, vect_location,
4319 "transform conversion. ncopies = %d.\n", ncopies);
4321 if (op_type == binary_op)
4323 if (CONSTANT_CLASS_P (op0))
4324 op0 = fold_convert (TREE_TYPE (op1), op0);
4325 else if (CONSTANT_CLASS_P (op1))
4326 op1 = fold_convert (TREE_TYPE (op0), op1);
4329 /* In case of multi-step conversion, we first generate conversion operations
4330 to the intermediate types, and then from that types to the final one.
4331 We create vector destinations for the intermediate type (TYPES) received
4332 from supportable_*_operation, and store them in the correct order
4333 for future use in vect_create_vectorized_*_stmts (). */
4334 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4335 vec_dest = vect_create_destination_var (scalar_dest,
4336 (cvt_type && modifier == WIDEN)
4337 ? cvt_type : vectype_out);
4338 vec_dsts.quick_push (vec_dest);
4340 if (multi_step_cvt)
4342 for (i = interm_types.length () - 1;
4343 interm_types.iterate (i, &intermediate_type); i--)
4345 vec_dest = vect_create_destination_var (scalar_dest,
4346 intermediate_type);
4347 vec_dsts.quick_push (vec_dest);
4351 if (cvt_type)
4352 vec_dest = vect_create_destination_var (scalar_dest,
4353 modifier == WIDEN
4354 ? vectype_out : cvt_type);
4356 if (!slp_node)
4358 if (modifier == WIDEN)
4360 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4361 if (op_type == binary_op)
4362 vec_oprnds1.create (1);
4364 else if (modifier == NARROW)
4365 vec_oprnds0.create (
4366 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4368 else if (code == WIDEN_LSHIFT_EXPR)
4369 vec_oprnds1.create (slp_node->vec_stmts_size);
4371 last_oprnd = op0;
4372 prev_stmt_info = NULL;
4373 switch (modifier)
4375 case NONE:
4376 for (j = 0; j < ncopies; j++)
4378 if (j == 0)
4379 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4380 -1);
4381 else
4382 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4384 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4386 /* Arguments are ready, create the new vector stmt. */
4387 if (code1 == CALL_EXPR)
4389 new_stmt = gimple_build_call (decl1, 1, vop0);
4390 new_temp = make_ssa_name (vec_dest, new_stmt);
4391 gimple_call_set_lhs (new_stmt, new_temp);
4393 else
4395 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4396 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4397 new_temp = make_ssa_name (vec_dest, new_stmt);
4398 gimple_assign_set_lhs (new_stmt, new_temp);
4401 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4402 if (slp_node)
4403 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4404 else
4406 if (!prev_stmt_info)
4407 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4408 else
4409 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4410 prev_stmt_info = vinfo_for_stmt (new_stmt);
4414 break;
4416 case WIDEN:
4417 /* In case the vectorization factor (VF) is bigger than the number
4418 of elements that we can fit in a vectype (nunits), we have to
4419 generate more than one vector stmt - i.e - we need to "unroll"
4420 the vector stmt by a factor VF/nunits. */
4421 for (j = 0; j < ncopies; j++)
4423 /* Handle uses. */
4424 if (j == 0)
4426 if (slp_node)
4428 if (code == WIDEN_LSHIFT_EXPR)
4430 unsigned int k;
4432 vec_oprnd1 = op1;
4433 /* Store vec_oprnd1 for every vector stmt to be created
4434 for SLP_NODE. We check during the analysis that all
4435 the shift arguments are the same. */
4436 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4437 vec_oprnds1.quick_push (vec_oprnd1);
4439 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4440 slp_node, -1);
4442 else
4443 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4444 &vec_oprnds1, slp_node, -1);
4446 else
4448 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4449 vec_oprnds0.quick_push (vec_oprnd0);
4450 if (op_type == binary_op)
4452 if (code == WIDEN_LSHIFT_EXPR)
4453 vec_oprnd1 = op1;
4454 else
4455 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4456 vec_oprnds1.quick_push (vec_oprnd1);
4460 else
4462 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4463 vec_oprnds0.truncate (0);
4464 vec_oprnds0.quick_push (vec_oprnd0);
4465 if (op_type == binary_op)
4467 if (code == WIDEN_LSHIFT_EXPR)
4468 vec_oprnd1 = op1;
4469 else
4470 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4471 vec_oprnd1);
4472 vec_oprnds1.truncate (0);
4473 vec_oprnds1.quick_push (vec_oprnd1);
4477 /* Arguments are ready. Create the new vector stmts. */
4478 for (i = multi_step_cvt; i >= 0; i--)
4480 tree this_dest = vec_dsts[i];
4481 enum tree_code c1 = code1, c2 = code2;
4482 if (i == 0 && codecvt2 != ERROR_MARK)
4484 c1 = codecvt1;
4485 c2 = codecvt2;
4487 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4488 &vec_oprnds1,
4489 stmt, this_dest, gsi,
4490 c1, c2, decl1, decl2,
4491 op_type);
4494 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4496 if (cvt_type)
4498 if (codecvt1 == CALL_EXPR)
4500 new_stmt = gimple_build_call (decl1, 1, vop0);
4501 new_temp = make_ssa_name (vec_dest, new_stmt);
4502 gimple_call_set_lhs (new_stmt, new_temp);
4504 else
4506 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4507 new_temp = make_ssa_name (vec_dest);
4508 new_stmt = gimple_build_assign (new_temp, codecvt1,
4509 vop0);
4512 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4514 else
4515 new_stmt = SSA_NAME_DEF_STMT (vop0);
4517 if (slp_node)
4518 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4519 else
4521 if (!prev_stmt_info)
4522 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4523 else
4524 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4525 prev_stmt_info = vinfo_for_stmt (new_stmt);
4530 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4531 break;
4533 case NARROW:
4534 /* In case the vectorization factor (VF) is bigger than the number
4535 of elements that we can fit in a vectype (nunits), we have to
4536 generate more than one vector stmt - i.e - we need to "unroll"
4537 the vector stmt by a factor VF/nunits. */
4538 for (j = 0; j < ncopies; j++)
4540 /* Handle uses. */
4541 if (slp_node)
4542 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4543 slp_node, -1);
4544 else
4546 vec_oprnds0.truncate (0);
4547 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4548 vect_pow2 (multi_step_cvt) - 1);
4551 /* Arguments are ready. Create the new vector stmts. */
4552 if (cvt_type)
4553 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4555 if (codecvt1 == CALL_EXPR)
4557 new_stmt = gimple_build_call (decl1, 1, vop0);
4558 new_temp = make_ssa_name (vec_dest, new_stmt);
4559 gimple_call_set_lhs (new_stmt, new_temp);
4561 else
4563 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4564 new_temp = make_ssa_name (vec_dest);
4565 new_stmt = gimple_build_assign (new_temp, codecvt1,
4566 vop0);
4569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4570 vec_oprnds0[i] = new_temp;
4573 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4574 stmt, vec_dsts, gsi,
4575 slp_node, code1,
4576 &prev_stmt_info);
4579 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4580 break;
4583 vec_oprnds0.release ();
4584 vec_oprnds1.release ();
4585 interm_types.release ();
4587 return true;
4591 /* Function vectorizable_assignment.
4593 Check if STMT performs an assignment (copy) that can be vectorized.
4594 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4595 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4596 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4598 static bool
4599 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4600 gimple **vec_stmt, slp_tree slp_node)
4602 tree vec_dest;
4603 tree scalar_dest;
4604 tree op;
4605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4606 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4607 tree new_temp;
4608 gimple *def_stmt;
4609 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4610 int ncopies;
4611 int i, j;
4612 vec<tree> vec_oprnds = vNULL;
4613 tree vop;
4614 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4615 vec_info *vinfo = stmt_info->vinfo;
4616 gimple *new_stmt = NULL;
4617 stmt_vec_info prev_stmt_info = NULL;
4618 enum tree_code code;
4619 tree vectype_in;
4621 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4622 return false;
4624 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4625 && ! vec_stmt)
4626 return false;
4628 /* Is vectorizable assignment? */
4629 if (!is_gimple_assign (stmt))
4630 return false;
4632 scalar_dest = gimple_assign_lhs (stmt);
4633 if (TREE_CODE (scalar_dest) != SSA_NAME)
4634 return false;
4636 code = gimple_assign_rhs_code (stmt);
4637 if (gimple_assign_single_p (stmt)
4638 || code == PAREN_EXPR
4639 || CONVERT_EXPR_CODE_P (code))
4640 op = gimple_assign_rhs1 (stmt);
4641 else
4642 return false;
4644 if (code == VIEW_CONVERT_EXPR)
4645 op = TREE_OPERAND (op, 0);
4647 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4648 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4650 /* Multiple types in SLP are handled by creating the appropriate number of
4651 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4652 case of SLP. */
4653 if (slp_node)
4654 ncopies = 1;
4655 else
4656 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4658 gcc_assert (ncopies >= 1);
4660 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4662 if (dump_enabled_p ())
4663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4664 "use not simple.\n");
4665 return false;
4668 /* We can handle NOP_EXPR conversions that do not change the number
4669 of elements or the vector size. */
4670 if ((CONVERT_EXPR_CODE_P (code)
4671 || code == VIEW_CONVERT_EXPR)
4672 && (!vectype_in
4673 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4674 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4675 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4676 return false;
4678 /* We do not handle bit-precision changes. */
4679 if ((CONVERT_EXPR_CODE_P (code)
4680 || code == VIEW_CONVERT_EXPR)
4681 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4682 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4683 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4684 || ((TYPE_PRECISION (TREE_TYPE (op))
4685 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4686 /* But a conversion that does not change the bit-pattern is ok. */
4687 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4688 > TYPE_PRECISION (TREE_TYPE (op)))
4689 && TYPE_UNSIGNED (TREE_TYPE (op)))
4690 /* Conversion between boolean types of different sizes is
4691 a simple assignment in case their vectypes are same
4692 boolean vectors. */
4693 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4694 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4696 if (dump_enabled_p ())
4697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4698 "type conversion to/from bit-precision "
4699 "unsupported.\n");
4700 return false;
4703 if (!vec_stmt) /* transformation not required. */
4705 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4706 if (dump_enabled_p ())
4707 dump_printf_loc (MSG_NOTE, vect_location,
4708 "=== vectorizable_assignment ===\n");
4709 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4710 return true;
4713 /** Transform. **/
4714 if (dump_enabled_p ())
4715 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4717 /* Handle def. */
4718 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4720 /* Handle use. */
4721 for (j = 0; j < ncopies; j++)
4723 /* Handle uses. */
4724 if (j == 0)
4725 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4726 else
4727 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4729 /* Arguments are ready. create the new vector stmt. */
4730 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4732 if (CONVERT_EXPR_CODE_P (code)
4733 || code == VIEW_CONVERT_EXPR)
4734 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4735 new_stmt = gimple_build_assign (vec_dest, vop);
4736 new_temp = make_ssa_name (vec_dest, new_stmt);
4737 gimple_assign_set_lhs (new_stmt, new_temp);
4738 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4739 if (slp_node)
4740 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4743 if (slp_node)
4744 continue;
4746 if (j == 0)
4747 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4748 else
4749 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4751 prev_stmt_info = vinfo_for_stmt (new_stmt);
4754 vec_oprnds.release ();
4755 return true;
4759 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4760 either as shift by a scalar or by a vector. */
4762 bool
4763 vect_supportable_shift (enum tree_code code, tree scalar_type)
4766 machine_mode vec_mode;
4767 optab optab;
4768 int icode;
4769 tree vectype;
4771 vectype = get_vectype_for_scalar_type (scalar_type);
4772 if (!vectype)
4773 return false;
4775 optab = optab_for_tree_code (code, vectype, optab_scalar);
4776 if (!optab
4777 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4779 optab = optab_for_tree_code (code, vectype, optab_vector);
4780 if (!optab
4781 || (optab_handler (optab, TYPE_MODE (vectype))
4782 == CODE_FOR_nothing))
4783 return false;
4786 vec_mode = TYPE_MODE (vectype);
4787 icode = (int) optab_handler (optab, vec_mode);
4788 if (icode == CODE_FOR_nothing)
4789 return false;
4791 return true;
4795 /* Function vectorizable_shift.
4797 Check if STMT performs a shift operation that can be vectorized.
4798 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4799 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4800 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4802 static bool
4803 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4804 gimple **vec_stmt, slp_tree slp_node)
4806 tree vec_dest;
4807 tree scalar_dest;
4808 tree op0, op1 = NULL;
4809 tree vec_oprnd1 = NULL_TREE;
4810 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4811 tree vectype;
4812 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4813 enum tree_code code;
4814 machine_mode vec_mode;
4815 tree new_temp;
4816 optab optab;
4817 int icode;
4818 machine_mode optab_op2_mode;
4819 gimple *def_stmt;
4820 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4821 gimple *new_stmt = NULL;
4822 stmt_vec_info prev_stmt_info;
4823 int nunits_in;
4824 int nunits_out;
4825 tree vectype_out;
4826 tree op1_vectype;
4827 int ncopies;
4828 int j, i;
4829 vec<tree> vec_oprnds0 = vNULL;
4830 vec<tree> vec_oprnds1 = vNULL;
4831 tree vop0, vop1;
4832 unsigned int k;
4833 bool scalar_shift_arg = true;
4834 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4835 vec_info *vinfo = stmt_info->vinfo;
4836 int vf;
4838 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4839 return false;
4841 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4842 && ! vec_stmt)
4843 return false;
4845 /* Is STMT a vectorizable binary/unary operation? */
4846 if (!is_gimple_assign (stmt))
4847 return false;
4849 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4850 return false;
4852 code = gimple_assign_rhs_code (stmt);
4854 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4855 || code == RROTATE_EXPR))
4856 return false;
4858 scalar_dest = gimple_assign_lhs (stmt);
4859 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4860 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4861 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4863 if (dump_enabled_p ())
4864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4865 "bit-precision shifts not supported.\n");
4866 return false;
4869 op0 = gimple_assign_rhs1 (stmt);
4870 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4872 if (dump_enabled_p ())
4873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4874 "use not simple.\n");
4875 return false;
4877 /* If op0 is an external or constant def use a vector type with
4878 the same size as the output vector type. */
4879 if (!vectype)
4880 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4881 if (vec_stmt)
4882 gcc_assert (vectype);
4883 if (!vectype)
4885 if (dump_enabled_p ())
4886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4887 "no vectype for scalar type\n");
4888 return false;
4891 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4892 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4893 if (nunits_out != nunits_in)
4894 return false;
4896 op1 = gimple_assign_rhs2 (stmt);
4897 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4899 if (dump_enabled_p ())
4900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4901 "use not simple.\n");
4902 return false;
4905 if (loop_vinfo)
4906 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4907 else
4908 vf = 1;
4910 /* Multiple types in SLP are handled by creating the appropriate number of
4911 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4912 case of SLP. */
4913 if (slp_node)
4914 ncopies = 1;
4915 else
4916 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4918 gcc_assert (ncopies >= 1);
4920 /* Determine whether the shift amount is a vector, or scalar. If the
4921 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4923 if ((dt[1] == vect_internal_def
4924 || dt[1] == vect_induction_def)
4925 && !slp_node)
4926 scalar_shift_arg = false;
4927 else if (dt[1] == vect_constant_def
4928 || dt[1] == vect_external_def
4929 || dt[1] == vect_internal_def)
4931 /* In SLP, need to check whether the shift count is the same,
4932 in loops if it is a constant or invariant, it is always
4933 a scalar shift. */
4934 if (slp_node)
4936 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4937 gimple *slpstmt;
4939 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4940 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4941 scalar_shift_arg = false;
4944 /* If the shift amount is computed by a pattern stmt we cannot
4945 use the scalar amount directly thus give up and use a vector
4946 shift. */
4947 if (dt[1] == vect_internal_def)
4949 gimple *def = SSA_NAME_DEF_STMT (op1);
4950 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4951 scalar_shift_arg = false;
4954 else
4956 if (dump_enabled_p ())
4957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4958 "operand mode requires invariant argument.\n");
4959 return false;
4962 /* Vector shifted by vector. */
4963 if (!scalar_shift_arg)
4965 optab = optab_for_tree_code (code, vectype, optab_vector);
4966 if (dump_enabled_p ())
4967 dump_printf_loc (MSG_NOTE, vect_location,
4968 "vector/vector shift/rotate found.\n");
4970 if (!op1_vectype)
4971 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4972 if (op1_vectype == NULL_TREE
4973 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4975 if (dump_enabled_p ())
4976 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4977 "unusable type for last operand in"
4978 " vector/vector shift/rotate.\n");
4979 return false;
4982 /* See if the machine has a vector shifted by scalar insn and if not
4983 then see if it has a vector shifted by vector insn. */
4984 else
4986 optab = optab_for_tree_code (code, vectype, optab_scalar);
4987 if (optab
4988 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4990 if (dump_enabled_p ())
4991 dump_printf_loc (MSG_NOTE, vect_location,
4992 "vector/scalar shift/rotate found.\n");
4994 else
4996 optab = optab_for_tree_code (code, vectype, optab_vector);
4997 if (optab
4998 && (optab_handler (optab, TYPE_MODE (vectype))
4999 != CODE_FOR_nothing))
5001 scalar_shift_arg = false;
5003 if (dump_enabled_p ())
5004 dump_printf_loc (MSG_NOTE, vect_location,
5005 "vector/vector shift/rotate found.\n");
5007 /* Unlike the other binary operators, shifts/rotates have
5008 the rhs being int, instead of the same type as the lhs,
5009 so make sure the scalar is the right type if we are
5010 dealing with vectors of long long/long/short/char. */
5011 if (dt[1] == vect_constant_def)
5012 op1 = fold_convert (TREE_TYPE (vectype), op1);
5013 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5014 TREE_TYPE (op1)))
5016 if (slp_node
5017 && TYPE_MODE (TREE_TYPE (vectype))
5018 != TYPE_MODE (TREE_TYPE (op1)))
5020 if (dump_enabled_p ())
5021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5022 "unusable type for last operand in"
5023 " vector/vector shift/rotate.\n");
5024 return false;
5026 if (vec_stmt && !slp_node)
5028 op1 = fold_convert (TREE_TYPE (vectype), op1);
5029 op1 = vect_init_vector (stmt, op1,
5030 TREE_TYPE (vectype), NULL);
5037 /* Supportable by target? */
5038 if (!optab)
5040 if (dump_enabled_p ())
5041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5042 "no optab.\n");
5043 return false;
5045 vec_mode = TYPE_MODE (vectype);
5046 icode = (int) optab_handler (optab, vec_mode);
5047 if (icode == CODE_FOR_nothing)
5049 if (dump_enabled_p ())
5050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5051 "op not supported by target.\n");
5052 /* Check only during analysis. */
5053 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5054 || (vf < vect_min_worthwhile_factor (code)
5055 && !vec_stmt))
5056 return false;
5057 if (dump_enabled_p ())
5058 dump_printf_loc (MSG_NOTE, vect_location,
5059 "proceeding using word mode.\n");
5062 /* Worthwhile without SIMD support? Check only during analysis. */
5063 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5064 && vf < vect_min_worthwhile_factor (code)
5065 && !vec_stmt)
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "not worthwhile without SIMD support.\n");
5070 return false;
5073 if (!vec_stmt) /* transformation not required. */
5075 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5076 if (dump_enabled_p ())
5077 dump_printf_loc (MSG_NOTE, vect_location,
5078 "=== vectorizable_shift ===\n");
5079 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5080 return true;
5083 /** Transform. **/
5085 if (dump_enabled_p ())
5086 dump_printf_loc (MSG_NOTE, vect_location,
5087 "transform binary/unary operation.\n");
5089 /* Handle def. */
5090 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5092 prev_stmt_info = NULL;
5093 for (j = 0; j < ncopies; j++)
5095 /* Handle uses. */
5096 if (j == 0)
5098 if (scalar_shift_arg)
5100 /* Vector shl and shr insn patterns can be defined with scalar
5101 operand 2 (shift operand). In this case, use constant or loop
5102 invariant op1 directly, without extending it to vector mode
5103 first. */
5104 optab_op2_mode = insn_data[icode].operand[2].mode;
5105 if (!VECTOR_MODE_P (optab_op2_mode))
5107 if (dump_enabled_p ())
5108 dump_printf_loc (MSG_NOTE, vect_location,
5109 "operand 1 using scalar mode.\n");
5110 vec_oprnd1 = op1;
5111 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5112 vec_oprnds1.quick_push (vec_oprnd1);
5113 if (slp_node)
5115 /* Store vec_oprnd1 for every vector stmt to be created
5116 for SLP_NODE. We check during the analysis that all
5117 the shift arguments are the same.
5118 TODO: Allow different constants for different vector
5119 stmts generated for an SLP instance. */
5120 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5121 vec_oprnds1.quick_push (vec_oprnd1);
5126 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5127 (a special case for certain kind of vector shifts); otherwise,
5128 operand 1 should be of a vector type (the usual case). */
5129 if (vec_oprnd1)
5130 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5131 slp_node, -1);
5132 else
5133 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5134 slp_node, -1);
5136 else
5137 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5139 /* Arguments are ready. Create the new vector stmt. */
5140 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5142 vop1 = vec_oprnds1[i];
5143 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5144 new_temp = make_ssa_name (vec_dest, new_stmt);
5145 gimple_assign_set_lhs (new_stmt, new_temp);
5146 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5147 if (slp_node)
5148 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5151 if (slp_node)
5152 continue;
5154 if (j == 0)
5155 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5156 else
5157 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5158 prev_stmt_info = vinfo_for_stmt (new_stmt);
5161 vec_oprnds0.release ();
5162 vec_oprnds1.release ();
5164 return true;
5168 /* Function vectorizable_operation.
5170 Check if STMT performs a binary, unary or ternary operation that can
5171 be vectorized.
5172 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5173 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5174 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5176 static bool
5177 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5178 gimple **vec_stmt, slp_tree slp_node)
5180 tree vec_dest;
5181 tree scalar_dest;
5182 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5183 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5184 tree vectype;
5185 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5186 enum tree_code code;
5187 machine_mode vec_mode;
5188 tree new_temp;
5189 int op_type;
5190 optab optab;
5191 bool target_support_p;
5192 gimple *def_stmt;
5193 enum vect_def_type dt[3]
5194 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5195 gimple *new_stmt = NULL;
5196 stmt_vec_info prev_stmt_info;
5197 int nunits_in;
5198 int nunits_out;
5199 tree vectype_out;
5200 int ncopies;
5201 int j, i;
5202 vec<tree> vec_oprnds0 = vNULL;
5203 vec<tree> vec_oprnds1 = vNULL;
5204 vec<tree> vec_oprnds2 = vNULL;
5205 tree vop0, vop1, vop2;
5206 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5207 vec_info *vinfo = stmt_info->vinfo;
5208 int vf;
5210 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5211 return false;
5213 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5214 && ! vec_stmt)
5215 return false;
5217 /* Is STMT a vectorizable binary/unary operation? */
5218 if (!is_gimple_assign (stmt))
5219 return false;
5221 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5222 return false;
5224 code = gimple_assign_rhs_code (stmt);
5226 /* For pointer addition, we should use the normal plus for
5227 the vector addition. */
5228 if (code == POINTER_PLUS_EXPR)
5229 code = PLUS_EXPR;
5231 /* Support only unary or binary operations. */
5232 op_type = TREE_CODE_LENGTH (code);
5233 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5235 if (dump_enabled_p ())
5236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5237 "num. args = %d (not unary/binary/ternary op).\n",
5238 op_type);
5239 return false;
5242 scalar_dest = gimple_assign_lhs (stmt);
5243 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5245 /* Most operations cannot handle bit-precision types without extra
5246 truncations. */
5247 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5248 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5249 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5250 /* Exception are bitwise binary operations. */
5251 && code != BIT_IOR_EXPR
5252 && code != BIT_XOR_EXPR
5253 && code != BIT_AND_EXPR)
5255 if (dump_enabled_p ())
5256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5257 "bit-precision arithmetic not supported.\n");
5258 return false;
5261 op0 = gimple_assign_rhs1 (stmt);
5262 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5264 if (dump_enabled_p ())
5265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5266 "use not simple.\n");
5267 return false;
5269 /* If op0 is an external or constant def use a vector type with
5270 the same size as the output vector type. */
5271 if (!vectype)
5273 /* For boolean type we cannot determine vectype by
5274 invariant value (don't know whether it is a vector
5275 of booleans or vector of integers). We use output
5276 vectype because operations on boolean don't change
5277 type. */
5278 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5280 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5282 if (dump_enabled_p ())
5283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5284 "not supported operation on bool value.\n");
5285 return false;
5287 vectype = vectype_out;
5289 else
5290 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5292 if (vec_stmt)
5293 gcc_assert (vectype);
5294 if (!vectype)
5296 if (dump_enabled_p ())
5298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5299 "no vectype for scalar type ");
5300 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5301 TREE_TYPE (op0));
5302 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5305 return false;
5308 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5309 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5310 if (nunits_out != nunits_in)
5311 return false;
5313 if (op_type == binary_op || op_type == ternary_op)
5315 op1 = gimple_assign_rhs2 (stmt);
5316 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5318 if (dump_enabled_p ())
5319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5320 "use not simple.\n");
5321 return false;
5324 if (op_type == ternary_op)
5326 op2 = gimple_assign_rhs3 (stmt);
5327 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5329 if (dump_enabled_p ())
5330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5331 "use not simple.\n");
5332 return false;
5336 if (loop_vinfo)
5337 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5338 else
5339 vf = 1;
5341 /* Multiple types in SLP are handled by creating the appropriate number of
5342 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5343 case of SLP. */
5344 if (slp_node)
5345 ncopies = 1;
5346 else
5347 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5349 gcc_assert (ncopies >= 1);
5351 /* Shifts are handled in vectorizable_shift (). */
5352 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5353 || code == RROTATE_EXPR)
5354 return false;
5356 /* Supportable by target? */
5358 vec_mode = TYPE_MODE (vectype);
5359 if (code == MULT_HIGHPART_EXPR)
5360 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5361 else
5363 optab = optab_for_tree_code (code, vectype, optab_default);
5364 if (!optab)
5366 if (dump_enabled_p ())
5367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5368 "no optab.\n");
5369 return false;
5371 target_support_p = (optab_handler (optab, vec_mode)
5372 != CODE_FOR_nothing);
5375 if (!target_support_p)
5377 if (dump_enabled_p ())
5378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5379 "op not supported by target.\n");
5380 /* Check only during analysis. */
5381 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5382 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5383 return false;
5384 if (dump_enabled_p ())
5385 dump_printf_loc (MSG_NOTE, vect_location,
5386 "proceeding using word mode.\n");
5389 /* Worthwhile without SIMD support? Check only during analysis. */
5390 if (!VECTOR_MODE_P (vec_mode)
5391 && !vec_stmt
5392 && vf < vect_min_worthwhile_factor (code))
5394 if (dump_enabled_p ())
5395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5396 "not worthwhile without SIMD support.\n");
5397 return false;
5400 if (!vec_stmt) /* transformation not required. */
5402 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_NOTE, vect_location,
5405 "=== vectorizable_operation ===\n");
5406 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5407 return true;
5410 /** Transform. **/
5412 if (dump_enabled_p ())
5413 dump_printf_loc (MSG_NOTE, vect_location,
5414 "transform binary/unary operation.\n");
5416 /* Handle def. */
5417 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5419 /* In case the vectorization factor (VF) is bigger than the number
5420 of elements that we can fit in a vectype (nunits), we have to generate
5421 more than one vector stmt - i.e - we need to "unroll" the
5422 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5423 from one copy of the vector stmt to the next, in the field
5424 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5425 stages to find the correct vector defs to be used when vectorizing
5426 stmts that use the defs of the current stmt. The example below
5427 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5428 we need to create 4 vectorized stmts):
5430 before vectorization:
5431 RELATED_STMT VEC_STMT
5432 S1: x = memref - -
5433 S2: z = x + 1 - -
5435 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5436 there):
5437 RELATED_STMT VEC_STMT
5438 VS1_0: vx0 = memref0 VS1_1 -
5439 VS1_1: vx1 = memref1 VS1_2 -
5440 VS1_2: vx2 = memref2 VS1_3 -
5441 VS1_3: vx3 = memref3 - -
5442 S1: x = load - VS1_0
5443 S2: z = x + 1 - -
5445 step2: vectorize stmt S2 (done here):
5446 To vectorize stmt S2 we first need to find the relevant vector
5447 def for the first operand 'x'. This is, as usual, obtained from
5448 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5449 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5450 relevant vector def 'vx0'. Having found 'vx0' we can generate
5451 the vector stmt VS2_0, and as usual, record it in the
5452 STMT_VINFO_VEC_STMT of stmt S2.
5453 When creating the second copy (VS2_1), we obtain the relevant vector
5454 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5455 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5456 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5457 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5458 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5459 chain of stmts and pointers:
5460 RELATED_STMT VEC_STMT
5461 VS1_0: vx0 = memref0 VS1_1 -
5462 VS1_1: vx1 = memref1 VS1_2 -
5463 VS1_2: vx2 = memref2 VS1_3 -
5464 VS1_3: vx3 = memref3 - -
5465 S1: x = load - VS1_0
5466 VS2_0: vz0 = vx0 + v1 VS2_1 -
5467 VS2_1: vz1 = vx1 + v1 VS2_2 -
5468 VS2_2: vz2 = vx2 + v1 VS2_3 -
5469 VS2_3: vz3 = vx3 + v1 - -
5470 S2: z = x + 1 - VS2_0 */
5472 prev_stmt_info = NULL;
5473 for (j = 0; j < ncopies; j++)
5475 /* Handle uses. */
5476 if (j == 0)
5478 if (op_type == binary_op || op_type == ternary_op)
5479 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5480 slp_node, -1);
5481 else
5482 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5483 slp_node, -1);
5484 if (op_type == ternary_op)
5485 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5486 slp_node, -1);
5488 else
5490 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5491 if (op_type == ternary_op)
5493 tree vec_oprnd = vec_oprnds2.pop ();
5494 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5495 vec_oprnd));
5499 /* Arguments are ready. Create the new vector stmt. */
5500 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5502 vop1 = ((op_type == binary_op || op_type == ternary_op)
5503 ? vec_oprnds1[i] : NULL_TREE);
5504 vop2 = ((op_type == ternary_op)
5505 ? vec_oprnds2[i] : NULL_TREE);
5506 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5507 new_temp = make_ssa_name (vec_dest, new_stmt);
5508 gimple_assign_set_lhs (new_stmt, new_temp);
5509 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5510 if (slp_node)
5511 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5514 if (slp_node)
5515 continue;
5517 if (j == 0)
5518 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5519 else
5520 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5521 prev_stmt_info = vinfo_for_stmt (new_stmt);
5524 vec_oprnds0.release ();
5525 vec_oprnds1.release ();
5526 vec_oprnds2.release ();
5528 return true;
5531 /* A helper function to ensure data reference DR's base alignment
5532 for STMT_INFO. */
5534 static void
5535 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5537 if (!dr->aux)
5538 return;
5540 if (DR_VECT_AUX (dr)->base_misaligned)
5542 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5543 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5545 if (decl_in_symtab_p (base_decl))
5546 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5547 else
5549 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5550 DECL_USER_ALIGN (base_decl) = 1;
5552 DR_VECT_AUX (dr)->base_misaligned = false;
5557 /* Function get_group_alias_ptr_type.
5559 Return the alias type for the group starting at FIRST_STMT. */
5561 static tree
5562 get_group_alias_ptr_type (gimple *first_stmt)
5564 struct data_reference *first_dr, *next_dr;
5565 gimple *next_stmt;
5567 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5568 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5569 while (next_stmt)
5571 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5572 if (get_alias_set (DR_REF (first_dr))
5573 != get_alias_set (DR_REF (next_dr)))
5575 if (dump_enabled_p ())
5576 dump_printf_loc (MSG_NOTE, vect_location,
5577 "conflicting alias set types.\n");
5578 return ptr_type_node;
5580 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5582 return reference_alias_ptr_type (DR_REF (first_dr));
5586 /* Function vectorizable_store.
5588 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5589 can be vectorized.
5590 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5591 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5592 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5594 static bool
5595 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5596 slp_tree slp_node)
5598 tree scalar_dest;
5599 tree data_ref;
5600 tree op;
5601 tree vec_oprnd = NULL_TREE;
5602 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5603 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5604 tree elem_type;
5605 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5606 struct loop *loop = NULL;
5607 machine_mode vec_mode;
5608 tree dummy;
5609 enum dr_alignment_support alignment_support_scheme;
5610 gimple *def_stmt;
5611 enum vect_def_type dt;
5612 stmt_vec_info prev_stmt_info = NULL;
5613 tree dataref_ptr = NULL_TREE;
5614 tree dataref_offset = NULL_TREE;
5615 gimple *ptr_incr = NULL;
5616 int ncopies;
5617 int j;
5618 gimple *next_stmt, *first_stmt;
5619 bool grouped_store;
5620 unsigned int group_size, i;
5621 vec<tree> oprnds = vNULL;
5622 vec<tree> result_chain = vNULL;
5623 bool inv_p;
5624 tree offset = NULL_TREE;
5625 vec<tree> vec_oprnds = vNULL;
5626 bool slp = (slp_node != NULL);
5627 unsigned int vec_num;
5628 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5629 vec_info *vinfo = stmt_info->vinfo;
5630 tree aggr_type;
5631 gather_scatter_info gs_info;
5632 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5633 gimple *new_stmt;
5634 int vf;
5635 vec_load_store_type vls_type;
5636 tree ref_type;
5638 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5639 return false;
5641 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5642 && ! vec_stmt)
5643 return false;
5645 /* Is vectorizable store? */
5647 if (!is_gimple_assign (stmt))
5648 return false;
5650 scalar_dest = gimple_assign_lhs (stmt);
5651 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5652 && is_pattern_stmt_p (stmt_info))
5653 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5654 if (TREE_CODE (scalar_dest) != ARRAY_REF
5655 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5656 && TREE_CODE (scalar_dest) != INDIRECT_REF
5657 && TREE_CODE (scalar_dest) != COMPONENT_REF
5658 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5659 && TREE_CODE (scalar_dest) != REALPART_EXPR
5660 && TREE_CODE (scalar_dest) != MEM_REF)
5661 return false;
5663 /* Cannot have hybrid store SLP -- that would mean storing to the
5664 same location twice. */
5665 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5667 gcc_assert (gimple_assign_single_p (stmt));
5669 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5670 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5672 if (loop_vinfo)
5674 loop = LOOP_VINFO_LOOP (loop_vinfo);
5675 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5677 else
5678 vf = 1;
5680 /* Multiple types in SLP are handled by creating the appropriate number of
5681 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5682 case of SLP. */
5683 if (slp)
5684 ncopies = 1;
5685 else
5686 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5688 gcc_assert (ncopies >= 1);
5690 /* FORNOW. This restriction should be relaxed. */
5691 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5693 if (dump_enabled_p ())
5694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5695 "multiple types in nested loop.\n");
5696 return false;
5699 op = gimple_assign_rhs1 (stmt);
5701 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5703 if (dump_enabled_p ())
5704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5705 "use not simple.\n");
5706 return false;
5709 if (dt == vect_constant_def || dt == vect_external_def)
5710 vls_type = VLS_STORE_INVARIANT;
5711 else
5712 vls_type = VLS_STORE;
5714 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5715 return false;
5717 elem_type = TREE_TYPE (vectype);
5718 vec_mode = TYPE_MODE (vectype);
5720 /* FORNOW. In some cases can vectorize even if data-type not supported
5721 (e.g. - array initialization with 0). */
5722 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5723 return false;
5725 if (!STMT_VINFO_DATA_REF (stmt_info))
5726 return false;
5728 vect_memory_access_type memory_access_type;
5729 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5730 &memory_access_type, &gs_info))
5731 return false;
5733 if (!vec_stmt) /* transformation not required. */
5735 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5736 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5737 /* The SLP costs are calculated during SLP analysis. */
5738 if (!PURE_SLP_STMT (stmt_info))
5739 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5740 NULL, NULL, NULL);
5741 return true;
5743 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5745 /** Transform. **/
5747 ensure_base_align (stmt_info, dr);
5749 if (memory_access_type == VMAT_GATHER_SCATTER)
5751 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5752 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5753 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5754 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5755 edge pe = loop_preheader_edge (loop);
5756 gimple_seq seq;
5757 basic_block new_bb;
5758 enum { NARROW, NONE, WIDEN } modifier;
5759 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5761 if (nunits == (unsigned int) scatter_off_nunits)
5762 modifier = NONE;
5763 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5765 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5766 modifier = WIDEN;
5768 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5769 sel[i] = i | nunits;
5771 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5772 gcc_assert (perm_mask != NULL_TREE);
5774 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5776 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5777 modifier = NARROW;
5779 for (i = 0; i < (unsigned int) nunits; ++i)
5780 sel[i] = i | scatter_off_nunits;
5782 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5783 gcc_assert (perm_mask != NULL_TREE);
5784 ncopies *= 2;
5786 else
5787 gcc_unreachable ();
5789 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5790 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5791 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5792 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5793 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5794 scaletype = TREE_VALUE (arglist);
5796 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5797 && TREE_CODE (rettype) == VOID_TYPE);
5799 ptr = fold_convert (ptrtype, gs_info.base);
5800 if (!is_gimple_min_invariant (ptr))
5802 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5803 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5804 gcc_assert (!new_bb);
5807 /* Currently we support only unconditional scatter stores,
5808 so mask should be all ones. */
5809 mask = build_int_cst (masktype, -1);
5810 mask = vect_init_vector (stmt, mask, masktype, NULL);
5812 scale = build_int_cst (scaletype, gs_info.scale);
5814 prev_stmt_info = NULL;
5815 for (j = 0; j < ncopies; ++j)
5817 if (j == 0)
5819 src = vec_oprnd1
5820 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5821 op = vec_oprnd0
5822 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5824 else if (modifier != NONE && (j & 1))
5826 if (modifier == WIDEN)
5828 src = vec_oprnd1
5829 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5830 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5831 stmt, gsi);
5833 else if (modifier == NARROW)
5835 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5836 stmt, gsi);
5837 op = vec_oprnd0
5838 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5839 vec_oprnd0);
5841 else
5842 gcc_unreachable ();
5844 else
5846 src = vec_oprnd1
5847 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5848 op = vec_oprnd0
5849 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5850 vec_oprnd0);
5853 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5855 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5856 == TYPE_VECTOR_SUBPARTS (srctype));
5857 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5858 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5859 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5860 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5861 src = var;
5864 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5866 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5867 == TYPE_VECTOR_SUBPARTS (idxtype));
5868 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5869 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5870 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5871 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5872 op = var;
5875 new_stmt
5876 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5878 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5880 if (prev_stmt_info == NULL)
5881 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5882 else
5883 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5884 prev_stmt_info = vinfo_for_stmt (new_stmt);
5886 return true;
5889 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5890 if (grouped_store)
5892 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5893 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5894 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5896 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5898 /* FORNOW */
5899 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5901 /* We vectorize all the stmts of the interleaving group when we
5902 reach the last stmt in the group. */
5903 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5904 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5905 && !slp)
5907 *vec_stmt = NULL;
5908 return true;
5911 if (slp)
5913 grouped_store = false;
5914 /* VEC_NUM is the number of vect stmts to be created for this
5915 group. */
5916 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5917 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5918 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5919 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5920 op = gimple_assign_rhs1 (first_stmt);
5922 else
5923 /* VEC_NUM is the number of vect stmts to be created for this
5924 group. */
5925 vec_num = group_size;
5927 ref_type = get_group_alias_ptr_type (first_stmt);
5929 else
5931 first_stmt = stmt;
5932 first_dr = dr;
5933 group_size = vec_num = 1;
5934 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5937 if (dump_enabled_p ())
5938 dump_printf_loc (MSG_NOTE, vect_location,
5939 "transform store. ncopies = %d\n", ncopies);
5941 if (memory_access_type == VMAT_ELEMENTWISE
5942 || memory_access_type == VMAT_STRIDED_SLP)
5944 gimple_stmt_iterator incr_gsi;
5945 bool insert_after;
5946 gimple *incr;
5947 tree offvar;
5948 tree ivstep;
5949 tree running_off;
5950 gimple_seq stmts = NULL;
5951 tree stride_base, stride_step, alias_off;
5952 tree vec_oprnd;
5953 unsigned int g;
5955 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5957 stride_base
5958 = fold_build_pointer_plus
5959 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5960 size_binop (PLUS_EXPR,
5961 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5962 convert_to_ptrofftype (DR_INIT (first_dr))));
5963 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5965 /* For a store with loop-invariant (but other than power-of-2)
5966 stride (i.e. not a grouped access) like so:
5968 for (i = 0; i < n; i += stride)
5969 array[i] = ...;
5971 we generate a new induction variable and new stores from
5972 the components of the (vectorized) rhs:
5974 for (j = 0; ; j += VF*stride)
5975 vectemp = ...;
5976 tmp1 = vectemp[0];
5977 array[j] = tmp1;
5978 tmp2 = vectemp[1];
5979 array[j + stride] = tmp2;
5983 unsigned nstores = nunits;
5984 unsigned lnel = 1;
5985 tree ltype = elem_type;
5986 if (slp)
5988 if (group_size < nunits
5989 && nunits % group_size == 0)
5991 nstores = nunits / group_size;
5992 lnel = group_size;
5993 ltype = build_vector_type (elem_type, group_size);
5995 else if (group_size >= nunits
5996 && group_size % nunits == 0)
5998 nstores = 1;
5999 lnel = nunits;
6000 ltype = vectype;
6002 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6003 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6006 ivstep = stride_step;
6007 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6008 build_int_cst (TREE_TYPE (ivstep), vf));
6010 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6012 create_iv (stride_base, ivstep, NULL,
6013 loop, &incr_gsi, insert_after,
6014 &offvar, NULL);
6015 incr = gsi_stmt (incr_gsi);
6016 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6018 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6019 if (stmts)
6020 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6022 prev_stmt_info = NULL;
6023 alias_off = build_int_cst (ref_type, 0);
6024 next_stmt = first_stmt;
6025 for (g = 0; g < group_size; g++)
6027 running_off = offvar;
6028 if (g)
6030 tree size = TYPE_SIZE_UNIT (ltype);
6031 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6032 size);
6033 tree newoff = copy_ssa_name (running_off, NULL);
6034 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6035 running_off, pos);
6036 vect_finish_stmt_generation (stmt, incr, gsi);
6037 running_off = newoff;
6039 unsigned int group_el = 0;
6040 unsigned HOST_WIDE_INT
6041 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6042 for (j = 0; j < ncopies; j++)
6044 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6045 and first_stmt == stmt. */
6046 if (j == 0)
6048 if (slp)
6050 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6051 slp_node, -1);
6052 vec_oprnd = vec_oprnds[0];
6054 else
6056 gcc_assert (gimple_assign_single_p (next_stmt));
6057 op = gimple_assign_rhs1 (next_stmt);
6058 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6061 else
6063 if (slp)
6064 vec_oprnd = vec_oprnds[j];
6065 else
6067 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6068 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6072 for (i = 0; i < nstores; i++)
6074 tree newref, newoff;
6075 gimple *incr, *assign;
6076 tree size = TYPE_SIZE (ltype);
6077 /* Extract the i'th component. */
6078 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6079 bitsize_int (i), size);
6080 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6081 size, pos);
6083 elem = force_gimple_operand_gsi (gsi, elem, true,
6084 NULL_TREE, true,
6085 GSI_SAME_STMT);
6087 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6088 group_el * elsz);
6089 newref = build2 (MEM_REF, ltype,
6090 running_off, this_off);
6092 /* And store it to *running_off. */
6093 assign = gimple_build_assign (newref, elem);
6094 vect_finish_stmt_generation (stmt, assign, gsi);
6096 group_el += lnel;
6097 if (! slp
6098 || group_el == group_size)
6100 newoff = copy_ssa_name (running_off, NULL);
6101 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6102 running_off, stride_step);
6103 vect_finish_stmt_generation (stmt, incr, gsi);
6105 running_off = newoff;
6106 group_el = 0;
6108 if (g == group_size - 1
6109 && !slp)
6111 if (j == 0 && i == 0)
6112 STMT_VINFO_VEC_STMT (stmt_info)
6113 = *vec_stmt = assign;
6114 else
6115 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6116 prev_stmt_info = vinfo_for_stmt (assign);
6120 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6121 if (slp)
6122 break;
6124 return true;
6127 auto_vec<tree> dr_chain (group_size);
6128 oprnds.create (group_size);
6130 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6131 gcc_assert (alignment_support_scheme);
6132 /* Targets with store-lane instructions must not require explicit
6133 realignment. */
6134 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6135 || alignment_support_scheme == dr_aligned
6136 || alignment_support_scheme == dr_unaligned_supported);
6138 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6139 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6140 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6142 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6143 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6144 else
6145 aggr_type = vectype;
6147 /* In case the vectorization factor (VF) is bigger than the number
6148 of elements that we can fit in a vectype (nunits), we have to generate
6149 more than one vector stmt - i.e - we need to "unroll" the
6150 vector stmt by a factor VF/nunits. For more details see documentation in
6151 vect_get_vec_def_for_copy_stmt. */
6153 /* In case of interleaving (non-unit grouped access):
6155 S1: &base + 2 = x2
6156 S2: &base = x0
6157 S3: &base + 1 = x1
6158 S4: &base + 3 = x3
6160 We create vectorized stores starting from base address (the access of the
6161 first stmt in the chain (S2 in the above example), when the last store stmt
6162 of the chain (S4) is reached:
6164 VS1: &base = vx2
6165 VS2: &base + vec_size*1 = vx0
6166 VS3: &base + vec_size*2 = vx1
6167 VS4: &base + vec_size*3 = vx3
6169 Then permutation statements are generated:
6171 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6172 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6175 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6176 (the order of the data-refs in the output of vect_permute_store_chain
6177 corresponds to the order of scalar stmts in the interleaving chain - see
6178 the documentation of vect_permute_store_chain()).
6180 In case of both multiple types and interleaving, above vector stores and
6181 permutation stmts are created for every copy. The result vector stmts are
6182 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6183 STMT_VINFO_RELATED_STMT for the next copies.
6186 prev_stmt_info = NULL;
6187 for (j = 0; j < ncopies; j++)
6190 if (j == 0)
6192 if (slp)
6194 /* Get vectorized arguments for SLP_NODE. */
6195 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6196 NULL, slp_node, -1);
6198 vec_oprnd = vec_oprnds[0];
6200 else
6202 /* For interleaved stores we collect vectorized defs for all the
6203 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6204 used as an input to vect_permute_store_chain(), and OPRNDS as
6205 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6207 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6208 OPRNDS are of size 1. */
6209 next_stmt = first_stmt;
6210 for (i = 0; i < group_size; i++)
6212 /* Since gaps are not supported for interleaved stores,
6213 GROUP_SIZE is the exact number of stmts in the chain.
6214 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6215 there is no interleaving, GROUP_SIZE is 1, and only one
6216 iteration of the loop will be executed. */
6217 gcc_assert (next_stmt
6218 && gimple_assign_single_p (next_stmt));
6219 op = gimple_assign_rhs1 (next_stmt);
6221 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6222 dr_chain.quick_push (vec_oprnd);
6223 oprnds.quick_push (vec_oprnd);
6224 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6228 /* We should have catched mismatched types earlier. */
6229 gcc_assert (useless_type_conversion_p (vectype,
6230 TREE_TYPE (vec_oprnd)));
6231 bool simd_lane_access_p
6232 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6233 if (simd_lane_access_p
6234 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6235 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6236 && integer_zerop (DR_OFFSET (first_dr))
6237 && integer_zerop (DR_INIT (first_dr))
6238 && alias_sets_conflict_p (get_alias_set (aggr_type),
6239 get_alias_set (TREE_TYPE (ref_type))))
6241 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6242 dataref_offset = build_int_cst (ref_type, 0);
6243 inv_p = false;
6245 else
6246 dataref_ptr
6247 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6248 simd_lane_access_p ? loop : NULL,
6249 offset, &dummy, gsi, &ptr_incr,
6250 simd_lane_access_p, &inv_p);
6251 gcc_assert (bb_vinfo || !inv_p);
6253 else
6255 /* For interleaved stores we created vectorized defs for all the
6256 defs stored in OPRNDS in the previous iteration (previous copy).
6257 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6258 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6259 next copy.
6260 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6261 OPRNDS are of size 1. */
6262 for (i = 0; i < group_size; i++)
6264 op = oprnds[i];
6265 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6266 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6267 dr_chain[i] = vec_oprnd;
6268 oprnds[i] = vec_oprnd;
6270 if (dataref_offset)
6271 dataref_offset
6272 = int_const_binop (PLUS_EXPR, dataref_offset,
6273 TYPE_SIZE_UNIT (aggr_type));
6274 else
6275 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6276 TYPE_SIZE_UNIT (aggr_type));
6279 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6281 tree vec_array;
6283 /* Combine all the vectors into an array. */
6284 vec_array = create_vector_array (vectype, vec_num);
6285 for (i = 0; i < vec_num; i++)
6287 vec_oprnd = dr_chain[i];
6288 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6291 /* Emit:
6292 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6293 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6294 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6295 gimple_call_set_lhs (new_stmt, data_ref);
6296 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6298 else
6300 new_stmt = NULL;
6301 if (grouped_store)
6303 if (j == 0)
6304 result_chain.create (group_size);
6305 /* Permute. */
6306 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6307 &result_chain);
6310 next_stmt = first_stmt;
6311 for (i = 0; i < vec_num; i++)
6313 unsigned align, misalign;
6315 if (i > 0)
6316 /* Bump the vector pointer. */
6317 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6318 stmt, NULL_TREE);
6320 if (slp)
6321 vec_oprnd = vec_oprnds[i];
6322 else if (grouped_store)
6323 /* For grouped stores vectorized defs are interleaved in
6324 vect_permute_store_chain(). */
6325 vec_oprnd = result_chain[i];
6327 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
6328 dataref_ptr,
6329 dataref_offset
6330 ? dataref_offset
6331 : build_int_cst (ref_type, 0));
6332 align = TYPE_ALIGN_UNIT (vectype);
6333 if (aligned_access_p (first_dr))
6334 misalign = 0;
6335 else if (DR_MISALIGNMENT (first_dr) == -1)
6337 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6338 align = TYPE_ALIGN_UNIT (elem_type);
6339 else
6340 align = get_object_alignment (DR_REF (first_dr))
6341 / BITS_PER_UNIT;
6342 misalign = 0;
6343 TREE_TYPE (data_ref)
6344 = build_aligned_type (TREE_TYPE (data_ref),
6345 align * BITS_PER_UNIT);
6347 else
6349 TREE_TYPE (data_ref)
6350 = build_aligned_type (TREE_TYPE (data_ref),
6351 TYPE_ALIGN (elem_type));
6352 misalign = DR_MISALIGNMENT (first_dr);
6354 if (dataref_offset == NULL_TREE
6355 && TREE_CODE (dataref_ptr) == SSA_NAME)
6356 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6357 misalign);
6359 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6361 tree perm_mask = perm_mask_for_reverse (vectype);
6362 tree perm_dest
6363 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6364 vectype);
6365 tree new_temp = make_ssa_name (perm_dest);
6367 /* Generate the permute statement. */
6368 gimple *perm_stmt
6369 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6370 vec_oprnd, perm_mask);
6371 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6373 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6374 vec_oprnd = new_temp;
6377 /* Arguments are ready. Create the new vector stmt. */
6378 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6379 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6381 if (slp)
6382 continue;
6384 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6385 if (!next_stmt)
6386 break;
6389 if (!slp)
6391 if (j == 0)
6392 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6393 else
6394 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6395 prev_stmt_info = vinfo_for_stmt (new_stmt);
6399 oprnds.release ();
6400 result_chain.release ();
6401 vec_oprnds.release ();
6403 return true;
6406 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6407 VECTOR_CST mask. No checks are made that the target platform supports the
6408 mask, so callers may wish to test can_vec_perm_p separately, or use
6409 vect_gen_perm_mask_checked. */
6411 tree
6412 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6414 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6415 int i, nunits;
6417 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6419 mask_elt_type = lang_hooks.types.type_for_mode
6420 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6421 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6423 mask_elts = XALLOCAVEC (tree, nunits);
6424 for (i = nunits - 1; i >= 0; i--)
6425 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6426 mask_vec = build_vector (mask_type, mask_elts);
6428 return mask_vec;
6431 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6432 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6434 tree
6435 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6437 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6438 return vect_gen_perm_mask_any (vectype, sel);
6441 /* Given a vector variable X and Y, that was generated for the scalar
6442 STMT, generate instructions to permute the vector elements of X and Y
6443 using permutation mask MASK_VEC, insert them at *GSI and return the
6444 permuted vector variable. */
6446 static tree
6447 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6448 gimple_stmt_iterator *gsi)
6450 tree vectype = TREE_TYPE (x);
6451 tree perm_dest, data_ref;
6452 gimple *perm_stmt;
6454 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6455 data_ref = make_ssa_name (perm_dest);
6457 /* Generate the permute statement. */
6458 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6459 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6461 return data_ref;
6464 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6465 inserting them on the loops preheader edge. Returns true if we
6466 were successful in doing so (and thus STMT can be moved then),
6467 otherwise returns false. */
6469 static bool
6470 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6472 ssa_op_iter i;
6473 tree op;
6474 bool any = false;
6476 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6478 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6479 if (!gimple_nop_p (def_stmt)
6480 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6482 /* Make sure we don't need to recurse. While we could do
6483 so in simple cases when there are more complex use webs
6484 we don't have an easy way to preserve stmt order to fulfil
6485 dependencies within them. */
6486 tree op2;
6487 ssa_op_iter i2;
6488 if (gimple_code (def_stmt) == GIMPLE_PHI)
6489 return false;
6490 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6492 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6493 if (!gimple_nop_p (def_stmt2)
6494 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6495 return false;
6497 any = true;
6501 if (!any)
6502 return true;
6504 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6506 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6507 if (!gimple_nop_p (def_stmt)
6508 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6510 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6511 gsi_remove (&gsi, false);
6512 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6516 return true;
6519 /* vectorizable_load.
6521 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6522 can be vectorized.
6523 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6524 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6525 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6527 static bool
6528 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6529 slp_tree slp_node, slp_instance slp_node_instance)
6531 tree scalar_dest;
6532 tree vec_dest = NULL;
6533 tree data_ref = NULL;
6534 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6535 stmt_vec_info prev_stmt_info;
6536 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6537 struct loop *loop = NULL;
6538 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6539 bool nested_in_vect_loop = false;
6540 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6541 tree elem_type;
6542 tree new_temp;
6543 machine_mode mode;
6544 gimple *new_stmt = NULL;
6545 tree dummy;
6546 enum dr_alignment_support alignment_support_scheme;
6547 tree dataref_ptr = NULL_TREE;
6548 tree dataref_offset = NULL_TREE;
6549 gimple *ptr_incr = NULL;
6550 int ncopies;
6551 int i, j, group_size, group_gap_adj;
6552 tree msq = NULL_TREE, lsq;
6553 tree offset = NULL_TREE;
6554 tree byte_offset = NULL_TREE;
6555 tree realignment_token = NULL_TREE;
6556 gphi *phi = NULL;
6557 vec<tree> dr_chain = vNULL;
6558 bool grouped_load = false;
6559 gimple *first_stmt;
6560 gimple *first_stmt_for_drptr = NULL;
6561 bool inv_p;
6562 bool compute_in_loop = false;
6563 struct loop *at_loop;
6564 int vec_num;
6565 bool slp = (slp_node != NULL);
6566 bool slp_perm = false;
6567 enum tree_code code;
6568 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6569 int vf;
6570 tree aggr_type;
6571 gather_scatter_info gs_info;
6572 vec_info *vinfo = stmt_info->vinfo;
6573 tree ref_type;
6575 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6576 return false;
6578 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6579 && ! vec_stmt)
6580 return false;
6582 /* Is vectorizable load? */
6583 if (!is_gimple_assign (stmt))
6584 return false;
6586 scalar_dest = gimple_assign_lhs (stmt);
6587 if (TREE_CODE (scalar_dest) != SSA_NAME)
6588 return false;
6590 code = gimple_assign_rhs_code (stmt);
6591 if (code != ARRAY_REF
6592 && code != BIT_FIELD_REF
6593 && code != INDIRECT_REF
6594 && code != COMPONENT_REF
6595 && code != IMAGPART_EXPR
6596 && code != REALPART_EXPR
6597 && code != MEM_REF
6598 && TREE_CODE_CLASS (code) != tcc_declaration)
6599 return false;
6601 if (!STMT_VINFO_DATA_REF (stmt_info))
6602 return false;
6604 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6605 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6607 if (loop_vinfo)
6609 loop = LOOP_VINFO_LOOP (loop_vinfo);
6610 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6611 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6613 else
6614 vf = 1;
6616 /* Multiple types in SLP are handled by creating the appropriate number of
6617 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6618 case of SLP. */
6619 if (slp)
6620 ncopies = 1;
6621 else
6622 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6624 gcc_assert (ncopies >= 1);
6626 /* FORNOW. This restriction should be relaxed. */
6627 if (nested_in_vect_loop && ncopies > 1)
6629 if (dump_enabled_p ())
6630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6631 "multiple types in nested loop.\n");
6632 return false;
6635 /* Invalidate assumptions made by dependence analysis when vectorization
6636 on the unrolled body effectively re-orders stmts. */
6637 if (ncopies > 1
6638 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6639 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6640 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6642 if (dump_enabled_p ())
6643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6644 "cannot perform implicit CSE when unrolling "
6645 "with negative dependence distance\n");
6646 return false;
6649 elem_type = TREE_TYPE (vectype);
6650 mode = TYPE_MODE (vectype);
6652 /* FORNOW. In some cases can vectorize even if data-type not supported
6653 (e.g. - data copies). */
6654 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6656 if (dump_enabled_p ())
6657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6658 "Aligned load, but unsupported type.\n");
6659 return false;
6662 /* Check if the load is a part of an interleaving chain. */
6663 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6665 grouped_load = true;
6666 /* FORNOW */
6667 gcc_assert (!nested_in_vect_loop);
6668 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6670 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6671 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6673 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6674 slp_perm = true;
6676 /* Invalidate assumptions made by dependence analysis when vectorization
6677 on the unrolled body effectively re-orders stmts. */
6678 if (!PURE_SLP_STMT (stmt_info)
6679 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6680 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6681 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6683 if (dump_enabled_p ())
6684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6685 "cannot perform implicit CSE when performing "
6686 "group loads with negative dependence distance\n");
6687 return false;
6690 /* Similarly when the stmt is a load that is both part of a SLP
6691 instance and a loop vectorized stmt via the same-dr mechanism
6692 we have to give up. */
6693 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6694 && (STMT_SLP_TYPE (stmt_info)
6695 != STMT_SLP_TYPE (vinfo_for_stmt
6696 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6698 if (dump_enabled_p ())
6699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6700 "conflicting SLP types for CSEd load\n");
6701 return false;
6705 vect_memory_access_type memory_access_type;
6706 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6707 &memory_access_type, &gs_info))
6708 return false;
6710 if (!vec_stmt) /* transformation not required. */
6712 if (!slp)
6713 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6714 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6715 /* The SLP costs are calculated during SLP analysis. */
6716 if (!PURE_SLP_STMT (stmt_info))
6717 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6718 NULL, NULL, NULL);
6719 return true;
6722 if (!slp)
6723 gcc_assert (memory_access_type
6724 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6726 if (dump_enabled_p ())
6727 dump_printf_loc (MSG_NOTE, vect_location,
6728 "transform load. ncopies = %d\n", ncopies);
6730 /** Transform. **/
6732 ensure_base_align (stmt_info, dr);
6734 if (memory_access_type == VMAT_GATHER_SCATTER)
6736 tree vec_oprnd0 = NULL_TREE, op;
6737 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6738 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6739 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6740 edge pe = loop_preheader_edge (loop);
6741 gimple_seq seq;
6742 basic_block new_bb;
6743 enum { NARROW, NONE, WIDEN } modifier;
6744 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6746 if (nunits == gather_off_nunits)
6747 modifier = NONE;
6748 else if (nunits == gather_off_nunits / 2)
6750 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6751 modifier = WIDEN;
6753 for (i = 0; i < gather_off_nunits; ++i)
6754 sel[i] = i | nunits;
6756 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6758 else if (nunits == gather_off_nunits * 2)
6760 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6761 modifier = NARROW;
6763 for (i = 0; i < nunits; ++i)
6764 sel[i] = i < gather_off_nunits
6765 ? i : i + nunits - gather_off_nunits;
6767 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6768 ncopies *= 2;
6770 else
6771 gcc_unreachable ();
6773 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6774 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6775 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6776 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6777 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6778 scaletype = TREE_VALUE (arglist);
6779 gcc_checking_assert (types_compatible_p (srctype, rettype));
6781 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6783 ptr = fold_convert (ptrtype, gs_info.base);
6784 if (!is_gimple_min_invariant (ptr))
6786 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6787 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6788 gcc_assert (!new_bb);
6791 /* Currently we support only unconditional gather loads,
6792 so mask should be all ones. */
6793 if (TREE_CODE (masktype) == INTEGER_TYPE)
6794 mask = build_int_cst (masktype, -1);
6795 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6797 mask = build_int_cst (TREE_TYPE (masktype), -1);
6798 mask = build_vector_from_val (masktype, mask);
6799 mask = vect_init_vector (stmt, mask, masktype, NULL);
6801 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6803 REAL_VALUE_TYPE r;
6804 long tmp[6];
6805 for (j = 0; j < 6; ++j)
6806 tmp[j] = -1;
6807 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6808 mask = build_real (TREE_TYPE (masktype), r);
6809 mask = build_vector_from_val (masktype, mask);
6810 mask = vect_init_vector (stmt, mask, masktype, NULL);
6812 else
6813 gcc_unreachable ();
6815 scale = build_int_cst (scaletype, gs_info.scale);
6817 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6818 merge = build_int_cst (TREE_TYPE (rettype), 0);
6819 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6821 REAL_VALUE_TYPE r;
6822 long tmp[6];
6823 for (j = 0; j < 6; ++j)
6824 tmp[j] = 0;
6825 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6826 merge = build_real (TREE_TYPE (rettype), r);
6828 else
6829 gcc_unreachable ();
6830 merge = build_vector_from_val (rettype, merge);
6831 merge = vect_init_vector (stmt, merge, rettype, NULL);
6833 prev_stmt_info = NULL;
6834 for (j = 0; j < ncopies; ++j)
6836 if (modifier == WIDEN && (j & 1))
6837 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6838 perm_mask, stmt, gsi);
6839 else if (j == 0)
6840 op = vec_oprnd0
6841 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6842 else
6843 op = vec_oprnd0
6844 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6846 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6848 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6849 == TYPE_VECTOR_SUBPARTS (idxtype));
6850 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6851 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6852 new_stmt
6853 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6854 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6855 op = var;
6858 new_stmt
6859 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6861 if (!useless_type_conversion_p (vectype, rettype))
6863 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6864 == TYPE_VECTOR_SUBPARTS (rettype));
6865 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6866 gimple_call_set_lhs (new_stmt, op);
6867 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6868 var = make_ssa_name (vec_dest);
6869 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6870 new_stmt
6871 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6873 else
6875 var = make_ssa_name (vec_dest, new_stmt);
6876 gimple_call_set_lhs (new_stmt, var);
6879 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6881 if (modifier == NARROW)
6883 if ((j & 1) == 0)
6885 prev_res = var;
6886 continue;
6888 var = permute_vec_elements (prev_res, var,
6889 perm_mask, stmt, gsi);
6890 new_stmt = SSA_NAME_DEF_STMT (var);
6893 if (prev_stmt_info == NULL)
6894 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6895 else
6896 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6897 prev_stmt_info = vinfo_for_stmt (new_stmt);
6899 return true;
6902 if (memory_access_type == VMAT_ELEMENTWISE
6903 || memory_access_type == VMAT_STRIDED_SLP)
6905 gimple_stmt_iterator incr_gsi;
6906 bool insert_after;
6907 gimple *incr;
6908 tree offvar;
6909 tree ivstep;
6910 tree running_off;
6911 vec<constructor_elt, va_gc> *v = NULL;
6912 gimple_seq stmts = NULL;
6913 tree stride_base, stride_step, alias_off;
6915 gcc_assert (!nested_in_vect_loop);
6917 if (slp && grouped_load)
6919 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6920 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6921 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6922 ref_type = get_group_alias_ptr_type (first_stmt);
6924 else
6926 first_stmt = stmt;
6927 first_dr = dr;
6928 group_size = 1;
6929 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6932 stride_base
6933 = fold_build_pointer_plus
6934 (DR_BASE_ADDRESS (first_dr),
6935 size_binop (PLUS_EXPR,
6936 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6937 convert_to_ptrofftype (DR_INIT (first_dr))));
6938 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6940 /* For a load with loop-invariant (but other than power-of-2)
6941 stride (i.e. not a grouped access) like so:
6943 for (i = 0; i < n; i += stride)
6944 ... = array[i];
6946 we generate a new induction variable and new accesses to
6947 form a new vector (or vectors, depending on ncopies):
6949 for (j = 0; ; j += VF*stride)
6950 tmp1 = array[j];
6951 tmp2 = array[j + stride];
6953 vectemp = {tmp1, tmp2, ...}
6956 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6957 build_int_cst (TREE_TYPE (stride_step), vf));
6959 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6961 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6962 loop, &incr_gsi, insert_after,
6963 &offvar, NULL);
6964 incr = gsi_stmt (incr_gsi);
6965 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6967 stride_step = force_gimple_operand (unshare_expr (stride_step),
6968 &stmts, true, NULL_TREE);
6969 if (stmts)
6970 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6972 prev_stmt_info = NULL;
6973 running_off = offvar;
6974 alias_off = build_int_cst (ref_type, 0);
6975 int nloads = nunits;
6976 int lnel = 1;
6977 tree ltype = TREE_TYPE (vectype);
6978 tree lvectype = vectype;
6979 auto_vec<tree> dr_chain;
6980 if (memory_access_type == VMAT_STRIDED_SLP)
6982 if (group_size < nunits)
6984 /* Avoid emitting a constructor of vector elements by performing
6985 the loads using an integer type of the same size,
6986 constructing a vector of those and then re-interpreting it
6987 as the original vector type. This works around the fact
6988 that the vec_init optab was only designed for scalar
6989 element modes and thus expansion goes through memory.
6990 This avoids a huge runtime penalty due to the general
6991 inability to perform store forwarding from smaller stores
6992 to a larger load. */
6993 unsigned lsize
6994 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
6995 enum machine_mode elmode = mode_for_size (lsize, MODE_INT, 0);
6996 enum machine_mode vmode = mode_for_vector (elmode,
6997 nunits / group_size);
6998 /* If we can't construct such a vector fall back to
6999 element loads of the original vector type. */
7000 if (VECTOR_MODE_P (vmode)
7001 && optab_handler (vec_init_optab, vmode) != CODE_FOR_nothing)
7003 nloads = nunits / group_size;
7004 lnel = group_size;
7005 ltype = build_nonstandard_integer_type (lsize, 1);
7006 lvectype = build_vector_type (ltype, nloads);
7009 else
7011 nloads = 1;
7012 lnel = nunits;
7013 ltype = vectype;
7015 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7017 if (slp)
7019 /* For SLP permutation support we need to load the whole group,
7020 not only the number of vector stmts the permutation result
7021 fits in. */
7022 if (slp_perm)
7024 ncopies = (group_size * vf + nunits - 1) / nunits;
7025 dr_chain.create (ncopies);
7027 else
7028 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7030 int group_el = 0;
7031 unsigned HOST_WIDE_INT
7032 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7033 for (j = 0; j < ncopies; j++)
7035 if (nloads > 1)
7036 vec_alloc (v, nloads);
7037 for (i = 0; i < nloads; i++)
7039 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7040 group_el * elsz);
7041 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7042 build2 (MEM_REF, ltype,
7043 running_off, this_off));
7044 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7045 if (nloads > 1)
7046 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7047 gimple_assign_lhs (new_stmt));
7049 group_el += lnel;
7050 if (! slp
7051 || group_el == group_size)
7053 tree newoff = copy_ssa_name (running_off);
7054 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7055 running_off, stride_step);
7056 vect_finish_stmt_generation (stmt, incr, gsi);
7058 running_off = newoff;
7059 group_el = 0;
7062 if (nloads > 1)
7064 tree vec_inv = build_constructor (lvectype, v);
7065 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7066 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7067 if (lvectype != vectype)
7069 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7070 VIEW_CONVERT_EXPR,
7071 build1 (VIEW_CONVERT_EXPR,
7072 vectype, new_temp));
7073 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7077 if (slp)
7079 if (slp_perm)
7080 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7081 else
7082 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7084 else
7086 if (j == 0)
7087 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7088 else
7089 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7090 prev_stmt_info = vinfo_for_stmt (new_stmt);
7093 if (slp_perm)
7095 unsigned n_perms;
7096 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7097 slp_node_instance, false, &n_perms);
7099 return true;
7102 if (grouped_load)
7104 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7105 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7106 /* For SLP vectorization we directly vectorize a subchain
7107 without permutation. */
7108 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7109 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7110 /* For BB vectorization always use the first stmt to base
7111 the data ref pointer on. */
7112 if (bb_vinfo)
7113 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7115 /* Check if the chain of loads is already vectorized. */
7116 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7117 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7118 ??? But we can only do so if there is exactly one
7119 as we have no way to get at the rest. Leave the CSE
7120 opportunity alone.
7121 ??? With the group load eventually participating
7122 in multiple different permutations (having multiple
7123 slp nodes which refer to the same group) the CSE
7124 is even wrong code. See PR56270. */
7125 && !slp)
7127 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7128 return true;
7130 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7131 group_gap_adj = 0;
7133 /* VEC_NUM is the number of vect stmts to be created for this group. */
7134 if (slp)
7136 grouped_load = false;
7137 /* For SLP permutation support we need to load the whole group,
7138 not only the number of vector stmts the permutation result
7139 fits in. */
7140 if (slp_perm)
7141 vec_num = (group_size * vf + nunits - 1) / nunits;
7142 else
7143 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7144 group_gap_adj = vf * group_size - nunits * vec_num;
7146 else
7147 vec_num = group_size;
7149 ref_type = get_group_alias_ptr_type (first_stmt);
7151 else
7153 first_stmt = stmt;
7154 first_dr = dr;
7155 group_size = vec_num = 1;
7156 group_gap_adj = 0;
7157 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7160 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7161 gcc_assert (alignment_support_scheme);
7162 /* Targets with load-lane instructions must not require explicit
7163 realignment. */
7164 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7165 || alignment_support_scheme == dr_aligned
7166 || alignment_support_scheme == dr_unaligned_supported);
7168 /* In case the vectorization factor (VF) is bigger than the number
7169 of elements that we can fit in a vectype (nunits), we have to generate
7170 more than one vector stmt - i.e - we need to "unroll" the
7171 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7172 from one copy of the vector stmt to the next, in the field
7173 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7174 stages to find the correct vector defs to be used when vectorizing
7175 stmts that use the defs of the current stmt. The example below
7176 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7177 need to create 4 vectorized stmts):
7179 before vectorization:
7180 RELATED_STMT VEC_STMT
7181 S1: x = memref - -
7182 S2: z = x + 1 - -
7184 step 1: vectorize stmt S1:
7185 We first create the vector stmt VS1_0, and, as usual, record a
7186 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7187 Next, we create the vector stmt VS1_1, and record a pointer to
7188 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7189 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7190 stmts and pointers:
7191 RELATED_STMT VEC_STMT
7192 VS1_0: vx0 = memref0 VS1_1 -
7193 VS1_1: vx1 = memref1 VS1_2 -
7194 VS1_2: vx2 = memref2 VS1_3 -
7195 VS1_3: vx3 = memref3 - -
7196 S1: x = load - VS1_0
7197 S2: z = x + 1 - -
7199 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7200 information we recorded in RELATED_STMT field is used to vectorize
7201 stmt S2. */
7203 /* In case of interleaving (non-unit grouped access):
7205 S1: x2 = &base + 2
7206 S2: x0 = &base
7207 S3: x1 = &base + 1
7208 S4: x3 = &base + 3
7210 Vectorized loads are created in the order of memory accesses
7211 starting from the access of the first stmt of the chain:
7213 VS1: vx0 = &base
7214 VS2: vx1 = &base + vec_size*1
7215 VS3: vx3 = &base + vec_size*2
7216 VS4: vx4 = &base + vec_size*3
7218 Then permutation statements are generated:
7220 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7221 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7224 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7225 (the order of the data-refs in the output of vect_permute_load_chain
7226 corresponds to the order of scalar stmts in the interleaving chain - see
7227 the documentation of vect_permute_load_chain()).
7228 The generation of permutation stmts and recording them in
7229 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7231 In case of both multiple types and interleaving, the vector loads and
7232 permutation stmts above are created for every copy. The result vector
7233 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7234 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7236 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7237 on a target that supports unaligned accesses (dr_unaligned_supported)
7238 we generate the following code:
7239 p = initial_addr;
7240 indx = 0;
7241 loop {
7242 p = p + indx * vectype_size;
7243 vec_dest = *(p);
7244 indx = indx + 1;
7247 Otherwise, the data reference is potentially unaligned on a target that
7248 does not support unaligned accesses (dr_explicit_realign_optimized) -
7249 then generate the following code, in which the data in each iteration is
7250 obtained by two vector loads, one from the previous iteration, and one
7251 from the current iteration:
7252 p1 = initial_addr;
7253 msq_init = *(floor(p1))
7254 p2 = initial_addr + VS - 1;
7255 realignment_token = call target_builtin;
7256 indx = 0;
7257 loop {
7258 p2 = p2 + indx * vectype_size
7259 lsq = *(floor(p2))
7260 vec_dest = realign_load (msq, lsq, realignment_token)
7261 indx = indx + 1;
7262 msq = lsq;
7263 } */
7265 /* If the misalignment remains the same throughout the execution of the
7266 loop, we can create the init_addr and permutation mask at the loop
7267 preheader. Otherwise, it needs to be created inside the loop.
7268 This can only occur when vectorizing memory accesses in the inner-loop
7269 nested within an outer-loop that is being vectorized. */
7271 if (nested_in_vect_loop
7272 && (TREE_INT_CST_LOW (DR_STEP (dr))
7273 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7275 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7276 compute_in_loop = true;
7279 if ((alignment_support_scheme == dr_explicit_realign_optimized
7280 || alignment_support_scheme == dr_explicit_realign)
7281 && !compute_in_loop)
7283 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7284 alignment_support_scheme, NULL_TREE,
7285 &at_loop);
7286 if (alignment_support_scheme == dr_explicit_realign_optimized)
7288 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7289 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7290 size_one_node);
7293 else
7294 at_loop = loop;
7296 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7297 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7299 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7300 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7301 else
7302 aggr_type = vectype;
7304 prev_stmt_info = NULL;
7305 for (j = 0; j < ncopies; j++)
7307 /* 1. Create the vector or array pointer update chain. */
7308 if (j == 0)
7310 bool simd_lane_access_p
7311 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7312 if (simd_lane_access_p
7313 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7314 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7315 && integer_zerop (DR_OFFSET (first_dr))
7316 && integer_zerop (DR_INIT (first_dr))
7317 && alias_sets_conflict_p (get_alias_set (aggr_type),
7318 get_alias_set (TREE_TYPE (ref_type)))
7319 && (alignment_support_scheme == dr_aligned
7320 || alignment_support_scheme == dr_unaligned_supported))
7322 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7323 dataref_offset = build_int_cst (ref_type, 0);
7324 inv_p = false;
7326 else if (first_stmt_for_drptr
7327 && first_stmt != first_stmt_for_drptr)
7329 dataref_ptr
7330 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7331 at_loop, offset, &dummy, gsi,
7332 &ptr_incr, simd_lane_access_p,
7333 &inv_p, byte_offset);
7334 /* Adjust the pointer by the difference to first_stmt. */
7335 data_reference_p ptrdr
7336 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7337 tree diff = fold_convert (sizetype,
7338 size_binop (MINUS_EXPR,
7339 DR_INIT (first_dr),
7340 DR_INIT (ptrdr)));
7341 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7342 stmt, diff);
7344 else
7345 dataref_ptr
7346 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7347 offset, &dummy, gsi, &ptr_incr,
7348 simd_lane_access_p, &inv_p,
7349 byte_offset);
7351 else if (dataref_offset)
7352 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7353 TYPE_SIZE_UNIT (aggr_type));
7354 else
7355 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7356 TYPE_SIZE_UNIT (aggr_type));
7358 if (grouped_load || slp_perm)
7359 dr_chain.create (vec_num);
7361 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7363 tree vec_array;
7365 vec_array = create_vector_array (vectype, vec_num);
7367 /* Emit:
7368 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7369 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7370 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7371 gimple_call_set_lhs (new_stmt, vec_array);
7372 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7374 /* Extract each vector into an SSA_NAME. */
7375 for (i = 0; i < vec_num; i++)
7377 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7378 vec_array, i);
7379 dr_chain.quick_push (new_temp);
7382 /* Record the mapping between SSA_NAMEs and statements. */
7383 vect_record_grouped_load_vectors (stmt, dr_chain);
7385 else
7387 for (i = 0; i < vec_num; i++)
7389 if (i > 0)
7390 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7391 stmt, NULL_TREE);
7393 /* 2. Create the vector-load in the loop. */
7394 switch (alignment_support_scheme)
7396 case dr_aligned:
7397 case dr_unaligned_supported:
7399 unsigned int align, misalign;
7401 data_ref
7402 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7403 dataref_offset
7404 ? dataref_offset
7405 : build_int_cst (ref_type, 0));
7406 align = TYPE_ALIGN_UNIT (vectype);
7407 if (alignment_support_scheme == dr_aligned)
7409 gcc_assert (aligned_access_p (first_dr));
7410 misalign = 0;
7412 else if (DR_MISALIGNMENT (first_dr) == -1)
7414 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7415 align = TYPE_ALIGN_UNIT (elem_type);
7416 else
7417 align = (get_object_alignment (DR_REF (first_dr))
7418 / BITS_PER_UNIT);
7419 misalign = 0;
7420 TREE_TYPE (data_ref)
7421 = build_aligned_type (TREE_TYPE (data_ref),
7422 align * BITS_PER_UNIT);
7424 else
7426 TREE_TYPE (data_ref)
7427 = build_aligned_type (TREE_TYPE (data_ref),
7428 TYPE_ALIGN (elem_type));
7429 misalign = DR_MISALIGNMENT (first_dr);
7431 if (dataref_offset == NULL_TREE
7432 && TREE_CODE (dataref_ptr) == SSA_NAME)
7433 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7434 align, misalign);
7435 break;
7437 case dr_explicit_realign:
7439 tree ptr, bump;
7441 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7443 if (compute_in_loop)
7444 msq = vect_setup_realignment (first_stmt, gsi,
7445 &realignment_token,
7446 dr_explicit_realign,
7447 dataref_ptr, NULL);
7449 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7450 ptr = copy_ssa_name (dataref_ptr);
7451 else
7452 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7453 new_stmt = gimple_build_assign
7454 (ptr, BIT_AND_EXPR, dataref_ptr,
7455 build_int_cst
7456 (TREE_TYPE (dataref_ptr),
7457 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7458 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7459 data_ref
7460 = build2 (MEM_REF, vectype, ptr,
7461 build_int_cst (ref_type, 0));
7462 vec_dest = vect_create_destination_var (scalar_dest,
7463 vectype);
7464 new_stmt = gimple_build_assign (vec_dest, data_ref);
7465 new_temp = make_ssa_name (vec_dest, new_stmt);
7466 gimple_assign_set_lhs (new_stmt, new_temp);
7467 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7468 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7469 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7470 msq = new_temp;
7472 bump = size_binop (MULT_EXPR, vs,
7473 TYPE_SIZE_UNIT (elem_type));
7474 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7475 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7476 new_stmt = gimple_build_assign
7477 (NULL_TREE, BIT_AND_EXPR, ptr,
7478 build_int_cst
7479 (TREE_TYPE (ptr),
7480 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7481 ptr = copy_ssa_name (ptr, new_stmt);
7482 gimple_assign_set_lhs (new_stmt, ptr);
7483 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7484 data_ref
7485 = build2 (MEM_REF, vectype, ptr,
7486 build_int_cst (ref_type, 0));
7487 break;
7489 case dr_explicit_realign_optimized:
7490 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7491 new_temp = copy_ssa_name (dataref_ptr);
7492 else
7493 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7494 new_stmt = gimple_build_assign
7495 (new_temp, BIT_AND_EXPR, dataref_ptr,
7496 build_int_cst
7497 (TREE_TYPE (dataref_ptr),
7498 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7499 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7500 data_ref
7501 = build2 (MEM_REF, vectype, new_temp,
7502 build_int_cst (ref_type, 0));
7503 break;
7504 default:
7505 gcc_unreachable ();
7507 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7508 new_stmt = gimple_build_assign (vec_dest, data_ref);
7509 new_temp = make_ssa_name (vec_dest, new_stmt);
7510 gimple_assign_set_lhs (new_stmt, new_temp);
7511 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7513 /* 3. Handle explicit realignment if necessary/supported.
7514 Create in loop:
7515 vec_dest = realign_load (msq, lsq, realignment_token) */
7516 if (alignment_support_scheme == dr_explicit_realign_optimized
7517 || alignment_support_scheme == dr_explicit_realign)
7519 lsq = gimple_assign_lhs (new_stmt);
7520 if (!realignment_token)
7521 realignment_token = dataref_ptr;
7522 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7523 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7524 msq, lsq, realignment_token);
7525 new_temp = make_ssa_name (vec_dest, new_stmt);
7526 gimple_assign_set_lhs (new_stmt, new_temp);
7527 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7529 if (alignment_support_scheme == dr_explicit_realign_optimized)
7531 gcc_assert (phi);
7532 if (i == vec_num - 1 && j == ncopies - 1)
7533 add_phi_arg (phi, lsq,
7534 loop_latch_edge (containing_loop),
7535 UNKNOWN_LOCATION);
7536 msq = lsq;
7540 /* 4. Handle invariant-load. */
7541 if (inv_p && !bb_vinfo)
7543 gcc_assert (!grouped_load);
7544 /* If we have versioned for aliasing or the loop doesn't
7545 have any data dependencies that would preclude this,
7546 then we are sure this is a loop invariant load and
7547 thus we can insert it on the preheader edge. */
7548 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7549 && !nested_in_vect_loop
7550 && hoist_defs_of_uses (stmt, loop))
7552 if (dump_enabled_p ())
7554 dump_printf_loc (MSG_NOTE, vect_location,
7555 "hoisting out of the vectorized "
7556 "loop: ");
7557 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7559 tree tem = copy_ssa_name (scalar_dest);
7560 gsi_insert_on_edge_immediate
7561 (loop_preheader_edge (loop),
7562 gimple_build_assign (tem,
7563 unshare_expr
7564 (gimple_assign_rhs1 (stmt))));
7565 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7566 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7567 set_vinfo_for_stmt (new_stmt,
7568 new_stmt_vec_info (new_stmt, vinfo));
7570 else
7572 gimple_stmt_iterator gsi2 = *gsi;
7573 gsi_next (&gsi2);
7574 new_temp = vect_init_vector (stmt, scalar_dest,
7575 vectype, &gsi2);
7576 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7580 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7582 tree perm_mask = perm_mask_for_reverse (vectype);
7583 new_temp = permute_vec_elements (new_temp, new_temp,
7584 perm_mask, stmt, gsi);
7585 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7588 /* Collect vector loads and later create their permutation in
7589 vect_transform_grouped_load (). */
7590 if (grouped_load || slp_perm)
7591 dr_chain.quick_push (new_temp);
7593 /* Store vector loads in the corresponding SLP_NODE. */
7594 if (slp && !slp_perm)
7595 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7597 /* Bump the vector pointer to account for a gap or for excess
7598 elements loaded for a permuted SLP load. */
7599 if (group_gap_adj != 0)
7601 bool ovf;
7602 tree bump
7603 = wide_int_to_tree (sizetype,
7604 wi::smul (TYPE_SIZE_UNIT (elem_type),
7605 group_gap_adj, &ovf));
7606 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7607 stmt, bump);
7611 if (slp && !slp_perm)
7612 continue;
7614 if (slp_perm)
7616 unsigned n_perms;
7617 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7618 slp_node_instance, false,
7619 &n_perms))
7621 dr_chain.release ();
7622 return false;
7625 else
7627 if (grouped_load)
7629 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7630 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7631 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7633 else
7635 if (j == 0)
7636 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7637 else
7638 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7639 prev_stmt_info = vinfo_for_stmt (new_stmt);
7642 dr_chain.release ();
7645 return true;
7648 /* Function vect_is_simple_cond.
7650 Input:
7651 LOOP - the loop that is being vectorized.
7652 COND - Condition that is checked for simple use.
7654 Output:
7655 *COMP_VECTYPE - the vector type for the comparison.
7657 Returns whether a COND can be vectorized. Checks whether
7658 condition operands are supportable using vec_is_simple_use. */
7660 static bool
7661 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7663 tree lhs, rhs;
7664 enum vect_def_type dt;
7665 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7667 /* Mask case. */
7668 if (TREE_CODE (cond) == SSA_NAME
7669 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7671 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7672 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7673 &dt, comp_vectype)
7674 || !*comp_vectype
7675 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7676 return false;
7677 return true;
7680 if (!COMPARISON_CLASS_P (cond))
7681 return false;
7683 lhs = TREE_OPERAND (cond, 0);
7684 rhs = TREE_OPERAND (cond, 1);
7686 if (TREE_CODE (lhs) == SSA_NAME)
7688 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7689 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7690 return false;
7692 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7693 && TREE_CODE (lhs) != FIXED_CST)
7694 return false;
7696 if (TREE_CODE (rhs) == SSA_NAME)
7698 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7699 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7700 return false;
7702 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7703 && TREE_CODE (rhs) != FIXED_CST)
7704 return false;
7706 if (vectype1 && vectype2
7707 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7708 return false;
7710 *comp_vectype = vectype1 ? vectype1 : vectype2;
7711 return true;
7714 /* vectorizable_condition.
7716 Check if STMT is conditional modify expression that can be vectorized.
7717 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7718 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7719 at GSI.
7721 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7722 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7723 else clause if it is 2).
7725 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7727 bool
7728 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7729 gimple **vec_stmt, tree reduc_def, int reduc_index,
7730 slp_tree slp_node)
7732 tree scalar_dest = NULL_TREE;
7733 tree vec_dest = NULL_TREE;
7734 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7735 tree then_clause, else_clause;
7736 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7737 tree comp_vectype = NULL_TREE;
7738 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7739 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7740 tree vec_compare;
7741 tree new_temp;
7742 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7743 enum vect_def_type dt, dts[4];
7744 int ncopies;
7745 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7746 stmt_vec_info prev_stmt_info = NULL;
7747 int i, j;
7748 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7749 vec<tree> vec_oprnds0 = vNULL;
7750 vec<tree> vec_oprnds1 = vNULL;
7751 vec<tree> vec_oprnds2 = vNULL;
7752 vec<tree> vec_oprnds3 = vNULL;
7753 tree vec_cmp_type;
7754 bool masked = false;
7756 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7757 return false;
7759 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7761 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7762 return false;
7764 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7765 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7766 && reduc_def))
7767 return false;
7769 /* FORNOW: not yet supported. */
7770 if (STMT_VINFO_LIVE_P (stmt_info))
7772 if (dump_enabled_p ())
7773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7774 "value used after loop.\n");
7775 return false;
7779 /* Is vectorizable conditional operation? */
7780 if (!is_gimple_assign (stmt))
7781 return false;
7783 code = gimple_assign_rhs_code (stmt);
7785 if (code != COND_EXPR)
7786 return false;
7788 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7789 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7790 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7792 if (slp_node)
7793 ncopies = 1;
7794 else
7795 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7797 gcc_assert (ncopies >= 1);
7798 if (reduc_index && ncopies > 1)
7799 return false; /* FORNOW */
7801 cond_expr = gimple_assign_rhs1 (stmt);
7802 then_clause = gimple_assign_rhs2 (stmt);
7803 else_clause = gimple_assign_rhs3 (stmt);
7805 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7806 || !comp_vectype)
7807 return false;
7809 gimple *def_stmt;
7810 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7811 &vectype1))
7812 return false;
7813 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7814 &vectype2))
7815 return false;
7817 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7818 return false;
7820 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7821 return false;
7823 masked = !COMPARISON_CLASS_P (cond_expr);
7824 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7826 if (vec_cmp_type == NULL_TREE)
7827 return false;
7829 cond_code = TREE_CODE (cond_expr);
7830 if (!masked)
7832 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7833 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7836 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7838 /* Boolean values may have another representation in vectors
7839 and therefore we prefer bit operations over comparison for
7840 them (which also works for scalar masks). We store opcodes
7841 to use in bitop1 and bitop2. Statement is vectorized as
7842 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7843 depending on bitop1 and bitop2 arity. */
7844 switch (cond_code)
7846 case GT_EXPR:
7847 bitop1 = BIT_NOT_EXPR;
7848 bitop2 = BIT_AND_EXPR;
7849 break;
7850 case GE_EXPR:
7851 bitop1 = BIT_NOT_EXPR;
7852 bitop2 = BIT_IOR_EXPR;
7853 break;
7854 case LT_EXPR:
7855 bitop1 = BIT_NOT_EXPR;
7856 bitop2 = BIT_AND_EXPR;
7857 std::swap (cond_expr0, cond_expr1);
7858 break;
7859 case LE_EXPR:
7860 bitop1 = BIT_NOT_EXPR;
7861 bitop2 = BIT_IOR_EXPR;
7862 std::swap (cond_expr0, cond_expr1);
7863 break;
7864 case NE_EXPR:
7865 bitop1 = BIT_XOR_EXPR;
7866 break;
7867 case EQ_EXPR:
7868 bitop1 = BIT_XOR_EXPR;
7869 bitop2 = BIT_NOT_EXPR;
7870 break;
7871 default:
7872 return false;
7874 cond_code = SSA_NAME;
7877 if (!vec_stmt)
7879 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7880 if (bitop1 != NOP_EXPR)
7882 machine_mode mode = TYPE_MODE (comp_vectype);
7883 optab optab;
7885 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
7886 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7887 return false;
7889 if (bitop2 != NOP_EXPR)
7891 optab = optab_for_tree_code (bitop2, comp_vectype,
7892 optab_default);
7893 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7894 return false;
7897 return expand_vec_cond_expr_p (vectype, comp_vectype,
7898 cond_code);
7901 /* Transform. */
7903 if (!slp_node)
7905 vec_oprnds0.create (1);
7906 vec_oprnds1.create (1);
7907 vec_oprnds2.create (1);
7908 vec_oprnds3.create (1);
7911 /* Handle def. */
7912 scalar_dest = gimple_assign_lhs (stmt);
7913 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7915 /* Handle cond expr. */
7916 for (j = 0; j < ncopies; j++)
7918 gassign *new_stmt = NULL;
7919 if (j == 0)
7921 if (slp_node)
7923 auto_vec<tree, 4> ops;
7924 auto_vec<vec<tree>, 4> vec_defs;
7926 if (masked)
7927 ops.safe_push (cond_expr);
7928 else
7930 ops.safe_push (cond_expr0);
7931 ops.safe_push (cond_expr1);
7933 ops.safe_push (then_clause);
7934 ops.safe_push (else_clause);
7935 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7936 vec_oprnds3 = vec_defs.pop ();
7937 vec_oprnds2 = vec_defs.pop ();
7938 if (!masked)
7939 vec_oprnds1 = vec_defs.pop ();
7940 vec_oprnds0 = vec_defs.pop ();
7942 else
7944 gimple *gtemp;
7945 if (masked)
7947 vec_cond_lhs
7948 = vect_get_vec_def_for_operand (cond_expr, stmt,
7949 comp_vectype);
7950 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7951 &gtemp, &dts[0]);
7953 else
7955 vec_cond_lhs
7956 = vect_get_vec_def_for_operand (cond_expr0,
7957 stmt, comp_vectype);
7958 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
7960 vec_cond_rhs
7961 = vect_get_vec_def_for_operand (cond_expr1,
7962 stmt, comp_vectype);
7963 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
7965 if (reduc_index == 1)
7966 vec_then_clause = reduc_def;
7967 else
7969 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7970 stmt);
7971 vect_is_simple_use (then_clause, loop_vinfo,
7972 &gtemp, &dts[2]);
7974 if (reduc_index == 2)
7975 vec_else_clause = reduc_def;
7976 else
7978 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7979 stmt);
7980 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7984 else
7986 vec_cond_lhs
7987 = vect_get_vec_def_for_stmt_copy (dts[0],
7988 vec_oprnds0.pop ());
7989 if (!masked)
7990 vec_cond_rhs
7991 = vect_get_vec_def_for_stmt_copy (dts[1],
7992 vec_oprnds1.pop ());
7994 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7995 vec_oprnds2.pop ());
7996 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7997 vec_oprnds3.pop ());
8000 if (!slp_node)
8002 vec_oprnds0.quick_push (vec_cond_lhs);
8003 if (!masked)
8004 vec_oprnds1.quick_push (vec_cond_rhs);
8005 vec_oprnds2.quick_push (vec_then_clause);
8006 vec_oprnds3.quick_push (vec_else_clause);
8009 /* Arguments are ready. Create the new vector stmt. */
8010 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8012 vec_then_clause = vec_oprnds2[i];
8013 vec_else_clause = vec_oprnds3[i];
8015 if (masked)
8016 vec_compare = vec_cond_lhs;
8017 else
8019 vec_cond_rhs = vec_oprnds1[i];
8020 if (bitop1 == NOP_EXPR)
8021 vec_compare = build2 (cond_code, vec_cmp_type,
8022 vec_cond_lhs, vec_cond_rhs);
8023 else
8025 new_temp = make_ssa_name (vec_cmp_type);
8026 if (bitop1 == BIT_NOT_EXPR)
8027 new_stmt = gimple_build_assign (new_temp, bitop1,
8028 vec_cond_rhs);
8029 else
8030 new_stmt
8031 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8032 vec_cond_rhs);
8033 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8034 if (bitop2 == NOP_EXPR)
8035 vec_compare = new_temp;
8036 else if (bitop2 == BIT_NOT_EXPR)
8038 /* Instead of doing ~x ? y : z do x ? z : y. */
8039 vec_compare = new_temp;
8040 std::swap (vec_then_clause, vec_else_clause);
8042 else
8044 vec_compare = make_ssa_name (vec_cmp_type);
8045 new_stmt
8046 = gimple_build_assign (vec_compare, bitop2,
8047 vec_cond_lhs, new_temp);
8048 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8052 new_temp = make_ssa_name (vec_dest);
8053 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8054 vec_compare, vec_then_clause,
8055 vec_else_clause);
8056 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8057 if (slp_node)
8058 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8061 if (slp_node)
8062 continue;
8064 if (j == 0)
8065 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8066 else
8067 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8069 prev_stmt_info = vinfo_for_stmt (new_stmt);
8072 vec_oprnds0.release ();
8073 vec_oprnds1.release ();
8074 vec_oprnds2.release ();
8075 vec_oprnds3.release ();
8077 return true;
8080 /* vectorizable_comparison.
8082 Check if STMT is comparison expression that can be vectorized.
8083 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8084 comparison, put it in VEC_STMT, and insert it at GSI.
8086 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8088 static bool
8089 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8090 gimple **vec_stmt, tree reduc_def,
8091 slp_tree slp_node)
8093 tree lhs, rhs1, rhs2;
8094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8095 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8096 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8097 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8098 tree new_temp;
8099 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8100 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8101 unsigned nunits;
8102 int ncopies;
8103 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8104 stmt_vec_info prev_stmt_info = NULL;
8105 int i, j;
8106 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8107 vec<tree> vec_oprnds0 = vNULL;
8108 vec<tree> vec_oprnds1 = vNULL;
8109 gimple *def_stmt;
8110 tree mask_type;
8111 tree mask;
8113 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8114 return false;
8116 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8117 return false;
8119 mask_type = vectype;
8120 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8122 if (slp_node)
8123 ncopies = 1;
8124 else
8125 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
8127 gcc_assert (ncopies >= 1);
8128 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8129 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8130 && reduc_def))
8131 return false;
8133 if (STMT_VINFO_LIVE_P (stmt_info))
8135 if (dump_enabled_p ())
8136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8137 "value used after loop.\n");
8138 return false;
8141 if (!is_gimple_assign (stmt))
8142 return false;
8144 code = gimple_assign_rhs_code (stmt);
8146 if (TREE_CODE_CLASS (code) != tcc_comparison)
8147 return false;
8149 rhs1 = gimple_assign_rhs1 (stmt);
8150 rhs2 = gimple_assign_rhs2 (stmt);
8152 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8153 &dts[0], &vectype1))
8154 return false;
8156 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8157 &dts[1], &vectype2))
8158 return false;
8160 if (vectype1 && vectype2
8161 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8162 return false;
8164 vectype = vectype1 ? vectype1 : vectype2;
8166 /* Invariant comparison. */
8167 if (!vectype)
8169 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8170 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8171 return false;
8173 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8174 return false;
8176 /* Can't compare mask and non-mask types. */
8177 if (vectype1 && vectype2
8178 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8179 return false;
8181 /* Boolean values may have another representation in vectors
8182 and therefore we prefer bit operations over comparison for
8183 them (which also works for scalar masks). We store opcodes
8184 to use in bitop1 and bitop2. Statement is vectorized as
8185 BITOP2 (rhs1 BITOP1 rhs2) or
8186 rhs1 BITOP2 (BITOP1 rhs2)
8187 depending on bitop1 and bitop2 arity. */
8188 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8190 if (code == GT_EXPR)
8192 bitop1 = BIT_NOT_EXPR;
8193 bitop2 = BIT_AND_EXPR;
8195 else if (code == GE_EXPR)
8197 bitop1 = BIT_NOT_EXPR;
8198 bitop2 = BIT_IOR_EXPR;
8200 else if (code == LT_EXPR)
8202 bitop1 = BIT_NOT_EXPR;
8203 bitop2 = BIT_AND_EXPR;
8204 std::swap (rhs1, rhs2);
8205 std::swap (dts[0], dts[1]);
8207 else if (code == LE_EXPR)
8209 bitop1 = BIT_NOT_EXPR;
8210 bitop2 = BIT_IOR_EXPR;
8211 std::swap (rhs1, rhs2);
8212 std::swap (dts[0], dts[1]);
8214 else
8216 bitop1 = BIT_XOR_EXPR;
8217 if (code == EQ_EXPR)
8218 bitop2 = BIT_NOT_EXPR;
8222 if (!vec_stmt)
8224 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8225 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8226 dts, NULL, NULL);
8227 if (bitop1 == NOP_EXPR)
8228 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8229 else
8231 machine_mode mode = TYPE_MODE (vectype);
8232 optab optab;
8234 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8235 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8236 return false;
8238 if (bitop2 != NOP_EXPR)
8240 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8241 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8242 return false;
8244 return true;
8248 /* Transform. */
8249 if (!slp_node)
8251 vec_oprnds0.create (1);
8252 vec_oprnds1.create (1);
8255 /* Handle def. */
8256 lhs = gimple_assign_lhs (stmt);
8257 mask = vect_create_destination_var (lhs, mask_type);
8259 /* Handle cmp expr. */
8260 for (j = 0; j < ncopies; j++)
8262 gassign *new_stmt = NULL;
8263 if (j == 0)
8265 if (slp_node)
8267 auto_vec<tree, 2> ops;
8268 auto_vec<vec<tree>, 2> vec_defs;
8270 ops.safe_push (rhs1);
8271 ops.safe_push (rhs2);
8272 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
8273 vec_oprnds1 = vec_defs.pop ();
8274 vec_oprnds0 = vec_defs.pop ();
8276 else
8278 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8279 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8282 else
8284 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8285 vec_oprnds0.pop ());
8286 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8287 vec_oprnds1.pop ());
8290 if (!slp_node)
8292 vec_oprnds0.quick_push (vec_rhs1);
8293 vec_oprnds1.quick_push (vec_rhs2);
8296 /* Arguments are ready. Create the new vector stmt. */
8297 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8299 vec_rhs2 = vec_oprnds1[i];
8301 new_temp = make_ssa_name (mask);
8302 if (bitop1 == NOP_EXPR)
8304 new_stmt = gimple_build_assign (new_temp, code,
8305 vec_rhs1, vec_rhs2);
8306 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8308 else
8310 if (bitop1 == BIT_NOT_EXPR)
8311 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8312 else
8313 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8314 vec_rhs2);
8315 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8316 if (bitop2 != NOP_EXPR)
8318 tree res = make_ssa_name (mask);
8319 if (bitop2 == BIT_NOT_EXPR)
8320 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8321 else
8322 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8323 new_temp);
8324 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8327 if (slp_node)
8328 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8331 if (slp_node)
8332 continue;
8334 if (j == 0)
8335 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8336 else
8337 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8339 prev_stmt_info = vinfo_for_stmt (new_stmt);
8342 vec_oprnds0.release ();
8343 vec_oprnds1.release ();
8345 return true;
8348 /* Make sure the statement is vectorizable. */
8350 bool
8351 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8353 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8354 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8355 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8356 bool ok;
8357 tree scalar_type, vectype;
8358 gimple *pattern_stmt;
8359 gimple_seq pattern_def_seq;
8361 if (dump_enabled_p ())
8363 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8364 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8367 if (gimple_has_volatile_ops (stmt))
8369 if (dump_enabled_p ())
8370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8371 "not vectorized: stmt has volatile operands\n");
8373 return false;
8376 /* Skip stmts that do not need to be vectorized. In loops this is expected
8377 to include:
8378 - the COND_EXPR which is the loop exit condition
8379 - any LABEL_EXPRs in the loop
8380 - computations that are used only for array indexing or loop control.
8381 In basic blocks we only analyze statements that are a part of some SLP
8382 instance, therefore, all the statements are relevant.
8384 Pattern statement needs to be analyzed instead of the original statement
8385 if the original statement is not relevant. Otherwise, we analyze both
8386 statements. In basic blocks we are called from some SLP instance
8387 traversal, don't analyze pattern stmts instead, the pattern stmts
8388 already will be part of SLP instance. */
8390 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8391 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8392 && !STMT_VINFO_LIVE_P (stmt_info))
8394 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8395 && pattern_stmt
8396 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8397 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8399 /* Analyze PATTERN_STMT instead of the original stmt. */
8400 stmt = pattern_stmt;
8401 stmt_info = vinfo_for_stmt (pattern_stmt);
8402 if (dump_enabled_p ())
8404 dump_printf_loc (MSG_NOTE, vect_location,
8405 "==> examining pattern statement: ");
8406 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8409 else
8411 if (dump_enabled_p ())
8412 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8414 return true;
8417 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8418 && node == NULL
8419 && pattern_stmt
8420 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8421 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8423 /* Analyze PATTERN_STMT too. */
8424 if (dump_enabled_p ())
8426 dump_printf_loc (MSG_NOTE, vect_location,
8427 "==> examining pattern statement: ");
8428 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8431 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8432 return false;
8435 if (is_pattern_stmt_p (stmt_info)
8436 && node == NULL
8437 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8439 gimple_stmt_iterator si;
8441 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8443 gimple *pattern_def_stmt = gsi_stmt (si);
8444 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8445 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8447 /* Analyze def stmt of STMT if it's a pattern stmt. */
8448 if (dump_enabled_p ())
8450 dump_printf_loc (MSG_NOTE, vect_location,
8451 "==> examining pattern def statement: ");
8452 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8455 if (!vect_analyze_stmt (pattern_def_stmt,
8456 need_to_vectorize, node))
8457 return false;
8462 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8464 case vect_internal_def:
8465 break;
8467 case vect_reduction_def:
8468 case vect_nested_cycle:
8469 gcc_assert (!bb_vinfo
8470 && (relevance == vect_used_in_outer
8471 || relevance == vect_used_in_outer_by_reduction
8472 || relevance == vect_used_by_reduction
8473 || relevance == vect_unused_in_scope
8474 || relevance == vect_used_only_live));
8475 break;
8477 case vect_induction_def:
8478 case vect_constant_def:
8479 case vect_external_def:
8480 case vect_unknown_def_type:
8481 default:
8482 gcc_unreachable ();
8485 if (bb_vinfo)
8487 gcc_assert (PURE_SLP_STMT (stmt_info));
8489 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8490 if (dump_enabled_p ())
8492 dump_printf_loc (MSG_NOTE, vect_location,
8493 "get vectype for scalar type: ");
8494 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8495 dump_printf (MSG_NOTE, "\n");
8498 vectype = get_vectype_for_scalar_type (scalar_type);
8499 if (!vectype)
8501 if (dump_enabled_p ())
8503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8504 "not SLPed: unsupported data-type ");
8505 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8506 scalar_type);
8507 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8509 return false;
8512 if (dump_enabled_p ())
8514 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8515 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8516 dump_printf (MSG_NOTE, "\n");
8519 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8522 if (STMT_VINFO_RELEVANT_P (stmt_info))
8524 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8525 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8526 || (is_gimple_call (stmt)
8527 && gimple_call_lhs (stmt) == NULL_TREE));
8528 *need_to_vectorize = true;
8531 if (PURE_SLP_STMT (stmt_info) && !node)
8533 dump_printf_loc (MSG_NOTE, vect_location,
8534 "handled only by SLP analysis\n");
8535 return true;
8538 ok = true;
8539 if (!bb_vinfo
8540 && (STMT_VINFO_RELEVANT_P (stmt_info)
8541 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8542 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8543 || vectorizable_conversion (stmt, NULL, NULL, node)
8544 || vectorizable_shift (stmt, NULL, NULL, node)
8545 || vectorizable_operation (stmt, NULL, NULL, node)
8546 || vectorizable_assignment (stmt, NULL, NULL, node)
8547 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8548 || vectorizable_call (stmt, NULL, NULL, node)
8549 || vectorizable_store (stmt, NULL, NULL, node)
8550 || vectorizable_reduction (stmt, NULL, NULL, node)
8551 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8552 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8553 else
8555 if (bb_vinfo)
8556 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8557 || vectorizable_conversion (stmt, NULL, NULL, node)
8558 || vectorizable_shift (stmt, NULL, NULL, node)
8559 || vectorizable_operation (stmt, NULL, NULL, node)
8560 || vectorizable_assignment (stmt, NULL, NULL, node)
8561 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8562 || vectorizable_call (stmt, NULL, NULL, node)
8563 || vectorizable_store (stmt, NULL, NULL, node)
8564 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8565 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8568 if (!ok)
8570 if (dump_enabled_p ())
8572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8573 "not vectorized: relevant stmt not ");
8574 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8575 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8578 return false;
8581 if (bb_vinfo)
8582 return true;
8584 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8585 need extra handling, except for vectorizable reductions. */
8586 if (STMT_VINFO_LIVE_P (stmt_info)
8587 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8588 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8590 if (!ok)
8592 if (dump_enabled_p ())
8594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8595 "not vectorized: live stmt not ");
8596 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8597 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8600 return false;
8603 return true;
8607 /* Function vect_transform_stmt.
8609 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8611 bool
8612 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8613 bool *grouped_store, slp_tree slp_node,
8614 slp_instance slp_node_instance)
8616 bool is_store = false;
8617 gimple *vec_stmt = NULL;
8618 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8619 bool done;
8621 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8622 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8624 switch (STMT_VINFO_TYPE (stmt_info))
8626 case type_demotion_vec_info_type:
8627 case type_promotion_vec_info_type:
8628 case type_conversion_vec_info_type:
8629 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8630 gcc_assert (done);
8631 break;
8633 case induc_vec_info_type:
8634 gcc_assert (!slp_node);
8635 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8636 gcc_assert (done);
8637 break;
8639 case shift_vec_info_type:
8640 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8641 gcc_assert (done);
8642 break;
8644 case op_vec_info_type:
8645 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8646 gcc_assert (done);
8647 break;
8649 case assignment_vec_info_type:
8650 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8651 gcc_assert (done);
8652 break;
8654 case load_vec_info_type:
8655 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8656 slp_node_instance);
8657 gcc_assert (done);
8658 break;
8660 case store_vec_info_type:
8661 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8662 gcc_assert (done);
8663 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8665 /* In case of interleaving, the whole chain is vectorized when the
8666 last store in the chain is reached. Store stmts before the last
8667 one are skipped, and there vec_stmt_info shouldn't be freed
8668 meanwhile. */
8669 *grouped_store = true;
8670 if (STMT_VINFO_VEC_STMT (stmt_info))
8671 is_store = true;
8673 else
8674 is_store = true;
8675 break;
8677 case condition_vec_info_type:
8678 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8679 gcc_assert (done);
8680 break;
8682 case comparison_vec_info_type:
8683 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8684 gcc_assert (done);
8685 break;
8687 case call_vec_info_type:
8688 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8689 stmt = gsi_stmt (*gsi);
8690 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8691 is_store = true;
8692 break;
8694 case call_simd_clone_vec_info_type:
8695 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8696 stmt = gsi_stmt (*gsi);
8697 break;
8699 case reduc_vec_info_type:
8700 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8701 gcc_assert (done);
8702 break;
8704 default:
8705 if (!STMT_VINFO_LIVE_P (stmt_info))
8707 if (dump_enabled_p ())
8708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8709 "stmt not supported.\n");
8710 gcc_unreachable ();
8714 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8715 This would break hybrid SLP vectorization. */
8716 if (slp_node)
8717 gcc_assert (!vec_stmt
8718 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8720 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8721 is being vectorized, but outside the immediately enclosing loop. */
8722 if (vec_stmt
8723 && STMT_VINFO_LOOP_VINFO (stmt_info)
8724 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8725 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8726 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8727 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8728 || STMT_VINFO_RELEVANT (stmt_info) ==
8729 vect_used_in_outer_by_reduction))
8731 struct loop *innerloop = LOOP_VINFO_LOOP (
8732 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8733 imm_use_iterator imm_iter;
8734 use_operand_p use_p;
8735 tree scalar_dest;
8736 gimple *exit_phi;
8738 if (dump_enabled_p ())
8739 dump_printf_loc (MSG_NOTE, vect_location,
8740 "Record the vdef for outer-loop vectorization.\n");
8742 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8743 (to be used when vectorizing outer-loop stmts that use the DEF of
8744 STMT). */
8745 if (gimple_code (stmt) == GIMPLE_PHI)
8746 scalar_dest = PHI_RESULT (stmt);
8747 else
8748 scalar_dest = gimple_assign_lhs (stmt);
8750 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8752 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8754 exit_phi = USE_STMT (use_p);
8755 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8760 /* Handle stmts whose DEF is used outside the loop-nest that is
8761 being vectorized. */
8762 if (slp_node)
8764 gimple *slp_stmt;
8765 int i;
8766 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8768 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8769 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8770 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8772 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8773 &vec_stmt);
8774 gcc_assert (done);
8778 else if (STMT_VINFO_LIVE_P (stmt_info)
8779 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8781 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8782 gcc_assert (done);
8785 if (vec_stmt)
8786 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8788 return is_store;
8792 /* Remove a group of stores (for SLP or interleaving), free their
8793 stmt_vec_info. */
8795 void
8796 vect_remove_stores (gimple *first_stmt)
8798 gimple *next = first_stmt;
8799 gimple *tmp;
8800 gimple_stmt_iterator next_si;
8802 while (next)
8804 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8806 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8807 if (is_pattern_stmt_p (stmt_info))
8808 next = STMT_VINFO_RELATED_STMT (stmt_info);
8809 /* Free the attached stmt_vec_info and remove the stmt. */
8810 next_si = gsi_for_stmt (next);
8811 unlink_stmt_vdef (next);
8812 gsi_remove (&next_si, true);
8813 release_defs (next);
8814 free_stmt_vec_info (next);
8815 next = tmp;
8820 /* Function new_stmt_vec_info.
8822 Create and initialize a new stmt_vec_info struct for STMT. */
8824 stmt_vec_info
8825 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8827 stmt_vec_info res;
8828 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8830 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8831 STMT_VINFO_STMT (res) = stmt;
8832 res->vinfo = vinfo;
8833 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8834 STMT_VINFO_LIVE_P (res) = false;
8835 STMT_VINFO_VECTYPE (res) = NULL;
8836 STMT_VINFO_VEC_STMT (res) = NULL;
8837 STMT_VINFO_VECTORIZABLE (res) = true;
8838 STMT_VINFO_IN_PATTERN_P (res) = false;
8839 STMT_VINFO_RELATED_STMT (res) = NULL;
8840 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8841 STMT_VINFO_DATA_REF (res) = NULL;
8842 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8843 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8845 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8846 STMT_VINFO_DR_OFFSET (res) = NULL;
8847 STMT_VINFO_DR_INIT (res) = NULL;
8848 STMT_VINFO_DR_STEP (res) = NULL;
8849 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8851 if (gimple_code (stmt) == GIMPLE_PHI
8852 && is_loop_header_bb_p (gimple_bb (stmt)))
8853 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8854 else
8855 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8857 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8858 STMT_SLP_TYPE (res) = loop_vect;
8859 STMT_VINFO_NUM_SLP_USES (res) = 0;
8861 GROUP_FIRST_ELEMENT (res) = NULL;
8862 GROUP_NEXT_ELEMENT (res) = NULL;
8863 GROUP_SIZE (res) = 0;
8864 GROUP_STORE_COUNT (res) = 0;
8865 GROUP_GAP (res) = 0;
8866 GROUP_SAME_DR_STMT (res) = NULL;
8868 return res;
8872 /* Create a hash table for stmt_vec_info. */
8874 void
8875 init_stmt_vec_info_vec (void)
8877 gcc_assert (!stmt_vec_info_vec.exists ());
8878 stmt_vec_info_vec.create (50);
8882 /* Free hash table for stmt_vec_info. */
8884 void
8885 free_stmt_vec_info_vec (void)
8887 unsigned int i;
8888 stmt_vec_info info;
8889 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8890 if (info != NULL)
8891 free_stmt_vec_info (STMT_VINFO_STMT (info));
8892 gcc_assert (stmt_vec_info_vec.exists ());
8893 stmt_vec_info_vec.release ();
8897 /* Free stmt vectorization related info. */
8899 void
8900 free_stmt_vec_info (gimple *stmt)
8902 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8904 if (!stmt_info)
8905 return;
8907 /* Check if this statement has a related "pattern stmt"
8908 (introduced by the vectorizer during the pattern recognition
8909 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8910 too. */
8911 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8913 stmt_vec_info patt_info
8914 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8915 if (patt_info)
8917 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8918 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8919 gimple_set_bb (patt_stmt, NULL);
8920 tree lhs = gimple_get_lhs (patt_stmt);
8921 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8922 release_ssa_name (lhs);
8923 if (seq)
8925 gimple_stmt_iterator si;
8926 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8928 gimple *seq_stmt = gsi_stmt (si);
8929 gimple_set_bb (seq_stmt, NULL);
8930 lhs = gimple_get_lhs (seq_stmt);
8931 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8932 release_ssa_name (lhs);
8933 free_stmt_vec_info (seq_stmt);
8936 free_stmt_vec_info (patt_stmt);
8940 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8941 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8942 set_vinfo_for_stmt (stmt, NULL);
8943 free (stmt_info);
8947 /* Function get_vectype_for_scalar_type_and_size.
8949 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8950 by the target. */
8952 static tree
8953 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8955 machine_mode inner_mode = TYPE_MODE (scalar_type);
8956 machine_mode simd_mode;
8957 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8958 int nunits;
8959 tree vectype;
8961 if (nbytes == 0)
8962 return NULL_TREE;
8964 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8965 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8966 return NULL_TREE;
8968 /* For vector types of elements whose mode precision doesn't
8969 match their types precision we use a element type of mode
8970 precision. The vectorization routines will have to make sure
8971 they support the proper result truncation/extension.
8972 We also make sure to build vector types with INTEGER_TYPE
8973 component type only. */
8974 if (INTEGRAL_TYPE_P (scalar_type)
8975 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8976 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8977 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8978 TYPE_UNSIGNED (scalar_type));
8980 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8981 When the component mode passes the above test simply use a type
8982 corresponding to that mode. The theory is that any use that
8983 would cause problems with this will disable vectorization anyway. */
8984 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8985 && !INTEGRAL_TYPE_P (scalar_type))
8986 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8988 /* We can't build a vector type of elements with alignment bigger than
8989 their size. */
8990 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8991 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8992 TYPE_UNSIGNED (scalar_type));
8994 /* If we felt back to using the mode fail if there was
8995 no scalar type for it. */
8996 if (scalar_type == NULL_TREE)
8997 return NULL_TREE;
8999 /* If no size was supplied use the mode the target prefers. Otherwise
9000 lookup a vector mode of the specified size. */
9001 if (size == 0)
9002 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9003 else
9004 simd_mode = mode_for_vector (inner_mode, size / nbytes);
9005 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9006 if (nunits <= 1)
9007 return NULL_TREE;
9009 vectype = build_vector_type (scalar_type, nunits);
9011 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9012 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9013 return NULL_TREE;
9015 return vectype;
9018 unsigned int current_vector_size;
9020 /* Function get_vectype_for_scalar_type.
9022 Returns the vector type corresponding to SCALAR_TYPE as supported
9023 by the target. */
9025 tree
9026 get_vectype_for_scalar_type (tree scalar_type)
9028 tree vectype;
9029 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9030 current_vector_size);
9031 if (vectype
9032 && current_vector_size == 0)
9033 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9034 return vectype;
9037 /* Function get_mask_type_for_scalar_type.
9039 Returns the mask type corresponding to a result of comparison
9040 of vectors of specified SCALAR_TYPE as supported by target. */
9042 tree
9043 get_mask_type_for_scalar_type (tree scalar_type)
9045 tree vectype = get_vectype_for_scalar_type (scalar_type);
9047 if (!vectype)
9048 return NULL;
9050 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9051 current_vector_size);
9054 /* Function get_same_sized_vectype
9056 Returns a vector type corresponding to SCALAR_TYPE of size
9057 VECTOR_TYPE if supported by the target. */
9059 tree
9060 get_same_sized_vectype (tree scalar_type, tree vector_type)
9062 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9063 return build_same_sized_truth_vector_type (vector_type);
9065 return get_vectype_for_scalar_type_and_size
9066 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9069 /* Function vect_is_simple_use.
9071 Input:
9072 VINFO - the vect info of the loop or basic block that is being vectorized.
9073 OPERAND - operand in the loop or bb.
9074 Output:
9075 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9076 DT - the type of definition
9078 Returns whether a stmt with OPERAND can be vectorized.
9079 For loops, supportable operands are constants, loop invariants, and operands
9080 that are defined by the current iteration of the loop. Unsupportable
9081 operands are those that are defined by a previous iteration of the loop (as
9082 is the case in reduction/induction computations).
9083 For basic blocks, supportable operands are constants and bb invariants.
9084 For now, operands defined outside the basic block are not supported. */
9086 bool
9087 vect_is_simple_use (tree operand, vec_info *vinfo,
9088 gimple **def_stmt, enum vect_def_type *dt)
9090 *def_stmt = NULL;
9091 *dt = vect_unknown_def_type;
9093 if (dump_enabled_p ())
9095 dump_printf_loc (MSG_NOTE, vect_location,
9096 "vect_is_simple_use: operand ");
9097 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9098 dump_printf (MSG_NOTE, "\n");
9101 if (CONSTANT_CLASS_P (operand))
9103 *dt = vect_constant_def;
9104 return true;
9107 if (is_gimple_min_invariant (operand))
9109 *dt = vect_external_def;
9110 return true;
9113 if (TREE_CODE (operand) != SSA_NAME)
9115 if (dump_enabled_p ())
9116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9117 "not ssa-name.\n");
9118 return false;
9121 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9123 *dt = vect_external_def;
9124 return true;
9127 *def_stmt = SSA_NAME_DEF_STMT (operand);
9128 if (dump_enabled_p ())
9130 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9131 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9134 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9135 *dt = vect_external_def;
9136 else
9138 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9139 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9142 if (dump_enabled_p ())
9144 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9145 switch (*dt)
9147 case vect_uninitialized_def:
9148 dump_printf (MSG_NOTE, "uninitialized\n");
9149 break;
9150 case vect_constant_def:
9151 dump_printf (MSG_NOTE, "constant\n");
9152 break;
9153 case vect_external_def:
9154 dump_printf (MSG_NOTE, "external\n");
9155 break;
9156 case vect_internal_def:
9157 dump_printf (MSG_NOTE, "internal\n");
9158 break;
9159 case vect_induction_def:
9160 dump_printf (MSG_NOTE, "induction\n");
9161 break;
9162 case vect_reduction_def:
9163 dump_printf (MSG_NOTE, "reduction\n");
9164 break;
9165 case vect_double_reduction_def:
9166 dump_printf (MSG_NOTE, "double reduction\n");
9167 break;
9168 case vect_nested_cycle:
9169 dump_printf (MSG_NOTE, "nested cycle\n");
9170 break;
9171 case vect_unknown_def_type:
9172 dump_printf (MSG_NOTE, "unknown\n");
9173 break;
9177 if (*dt == vect_unknown_def_type)
9179 if (dump_enabled_p ())
9180 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9181 "Unsupported pattern.\n");
9182 return false;
9185 switch (gimple_code (*def_stmt))
9187 case GIMPLE_PHI:
9188 case GIMPLE_ASSIGN:
9189 case GIMPLE_CALL:
9190 break;
9191 default:
9192 if (dump_enabled_p ())
9193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9194 "unsupported defining stmt:\n");
9195 return false;
9198 return true;
9201 /* Function vect_is_simple_use.
9203 Same as vect_is_simple_use but also determines the vector operand
9204 type of OPERAND and stores it to *VECTYPE. If the definition of
9205 OPERAND is vect_uninitialized_def, vect_constant_def or
9206 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9207 is responsible to compute the best suited vector type for the
9208 scalar operand. */
9210 bool
9211 vect_is_simple_use (tree operand, vec_info *vinfo,
9212 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9214 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9215 return false;
9217 /* Now get a vector type if the def is internal, otherwise supply
9218 NULL_TREE and leave it up to the caller to figure out a proper
9219 type for the use stmt. */
9220 if (*dt == vect_internal_def
9221 || *dt == vect_induction_def
9222 || *dt == vect_reduction_def
9223 || *dt == vect_double_reduction_def
9224 || *dt == vect_nested_cycle)
9226 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9228 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9229 && !STMT_VINFO_RELEVANT (stmt_info)
9230 && !STMT_VINFO_LIVE_P (stmt_info))
9231 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9233 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9234 gcc_assert (*vectype != NULL_TREE);
9236 else if (*dt == vect_uninitialized_def
9237 || *dt == vect_constant_def
9238 || *dt == vect_external_def)
9239 *vectype = NULL_TREE;
9240 else
9241 gcc_unreachable ();
9243 return true;
9247 /* Function supportable_widening_operation
9249 Check whether an operation represented by the code CODE is a
9250 widening operation that is supported by the target platform in
9251 vector form (i.e., when operating on arguments of type VECTYPE_IN
9252 producing a result of type VECTYPE_OUT).
9254 Widening operations we currently support are NOP (CONVERT), FLOAT
9255 and WIDEN_MULT. This function checks if these operations are supported
9256 by the target platform either directly (via vector tree-codes), or via
9257 target builtins.
9259 Output:
9260 - CODE1 and CODE2 are codes of vector operations to be used when
9261 vectorizing the operation, if available.
9262 - MULTI_STEP_CVT determines the number of required intermediate steps in
9263 case of multi-step conversion (like char->short->int - in that case
9264 MULTI_STEP_CVT will be 1).
9265 - INTERM_TYPES contains the intermediate type required to perform the
9266 widening operation (short in the above example). */
9268 bool
9269 supportable_widening_operation (enum tree_code code, gimple *stmt,
9270 tree vectype_out, tree vectype_in,
9271 enum tree_code *code1, enum tree_code *code2,
9272 int *multi_step_cvt,
9273 vec<tree> *interm_types)
9275 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9276 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9277 struct loop *vect_loop = NULL;
9278 machine_mode vec_mode;
9279 enum insn_code icode1, icode2;
9280 optab optab1, optab2;
9281 tree vectype = vectype_in;
9282 tree wide_vectype = vectype_out;
9283 enum tree_code c1, c2;
9284 int i;
9285 tree prev_type, intermediate_type;
9286 machine_mode intermediate_mode, prev_mode;
9287 optab optab3, optab4;
9289 *multi_step_cvt = 0;
9290 if (loop_info)
9291 vect_loop = LOOP_VINFO_LOOP (loop_info);
9293 switch (code)
9295 case WIDEN_MULT_EXPR:
9296 /* The result of a vectorized widening operation usually requires
9297 two vectors (because the widened results do not fit into one vector).
9298 The generated vector results would normally be expected to be
9299 generated in the same order as in the original scalar computation,
9300 i.e. if 8 results are generated in each vector iteration, they are
9301 to be organized as follows:
9302 vect1: [res1,res2,res3,res4],
9303 vect2: [res5,res6,res7,res8].
9305 However, in the special case that the result of the widening
9306 operation is used in a reduction computation only, the order doesn't
9307 matter (because when vectorizing a reduction we change the order of
9308 the computation). Some targets can take advantage of this and
9309 generate more efficient code. For example, targets like Altivec,
9310 that support widen_mult using a sequence of {mult_even,mult_odd}
9311 generate the following vectors:
9312 vect1: [res1,res3,res5,res7],
9313 vect2: [res2,res4,res6,res8].
9315 When vectorizing outer-loops, we execute the inner-loop sequentially
9316 (each vectorized inner-loop iteration contributes to VF outer-loop
9317 iterations in parallel). We therefore don't allow to change the
9318 order of the computation in the inner-loop during outer-loop
9319 vectorization. */
9320 /* TODO: Another case in which order doesn't *really* matter is when we
9321 widen and then contract again, e.g. (short)((int)x * y >> 8).
9322 Normally, pack_trunc performs an even/odd permute, whereas the
9323 repack from an even/odd expansion would be an interleave, which
9324 would be significantly simpler for e.g. AVX2. */
9325 /* In any case, in order to avoid duplicating the code below, recurse
9326 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9327 are properly set up for the caller. If we fail, we'll continue with
9328 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9329 if (vect_loop
9330 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9331 && !nested_in_vect_loop_p (vect_loop, stmt)
9332 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9333 stmt, vectype_out, vectype_in,
9334 code1, code2, multi_step_cvt,
9335 interm_types))
9337 /* Elements in a vector with vect_used_by_reduction property cannot
9338 be reordered if the use chain with this property does not have the
9339 same operation. One such an example is s += a * b, where elements
9340 in a and b cannot be reordered. Here we check if the vector defined
9341 by STMT is only directly used in the reduction statement. */
9342 tree lhs = gimple_assign_lhs (stmt);
9343 use_operand_p dummy;
9344 gimple *use_stmt;
9345 stmt_vec_info use_stmt_info = NULL;
9346 if (single_imm_use (lhs, &dummy, &use_stmt)
9347 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9348 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9349 return true;
9351 c1 = VEC_WIDEN_MULT_LO_EXPR;
9352 c2 = VEC_WIDEN_MULT_HI_EXPR;
9353 break;
9355 case DOT_PROD_EXPR:
9356 c1 = DOT_PROD_EXPR;
9357 c2 = DOT_PROD_EXPR;
9358 break;
9360 case SAD_EXPR:
9361 c1 = SAD_EXPR;
9362 c2 = SAD_EXPR;
9363 break;
9365 case VEC_WIDEN_MULT_EVEN_EXPR:
9366 /* Support the recursion induced just above. */
9367 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9368 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9369 break;
9371 case WIDEN_LSHIFT_EXPR:
9372 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9373 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9374 break;
9376 CASE_CONVERT:
9377 c1 = VEC_UNPACK_LO_EXPR;
9378 c2 = VEC_UNPACK_HI_EXPR;
9379 break;
9381 case FLOAT_EXPR:
9382 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9383 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9384 break;
9386 case FIX_TRUNC_EXPR:
9387 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9388 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9389 computing the operation. */
9390 return false;
9392 default:
9393 gcc_unreachable ();
9396 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9397 std::swap (c1, c2);
9399 if (code == FIX_TRUNC_EXPR)
9401 /* The signedness is determined from output operand. */
9402 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9403 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9405 else
9407 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9408 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9411 if (!optab1 || !optab2)
9412 return false;
9414 vec_mode = TYPE_MODE (vectype);
9415 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9416 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9417 return false;
9419 *code1 = c1;
9420 *code2 = c2;
9422 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9423 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9424 /* For scalar masks we may have different boolean
9425 vector types having the same QImode. Thus we
9426 add additional check for elements number. */
9427 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9428 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9429 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9431 /* Check if it's a multi-step conversion that can be done using intermediate
9432 types. */
9434 prev_type = vectype;
9435 prev_mode = vec_mode;
9437 if (!CONVERT_EXPR_CODE_P (code))
9438 return false;
9440 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9441 intermediate steps in promotion sequence. We try
9442 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9443 not. */
9444 interm_types->create (MAX_INTERM_CVT_STEPS);
9445 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9447 intermediate_mode = insn_data[icode1].operand[0].mode;
9448 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9450 intermediate_type
9451 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9452 current_vector_size);
9453 if (intermediate_mode != TYPE_MODE (intermediate_type))
9454 return false;
9456 else
9457 intermediate_type
9458 = lang_hooks.types.type_for_mode (intermediate_mode,
9459 TYPE_UNSIGNED (prev_type));
9461 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9462 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9464 if (!optab3 || !optab4
9465 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9466 || insn_data[icode1].operand[0].mode != intermediate_mode
9467 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9468 || insn_data[icode2].operand[0].mode != intermediate_mode
9469 || ((icode1 = optab_handler (optab3, intermediate_mode))
9470 == CODE_FOR_nothing)
9471 || ((icode2 = optab_handler (optab4, intermediate_mode))
9472 == CODE_FOR_nothing))
9473 break;
9475 interm_types->quick_push (intermediate_type);
9476 (*multi_step_cvt)++;
9478 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9479 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9480 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9481 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9482 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9484 prev_type = intermediate_type;
9485 prev_mode = intermediate_mode;
9488 interm_types->release ();
9489 return false;
9493 /* Function supportable_narrowing_operation
9495 Check whether an operation represented by the code CODE is a
9496 narrowing operation that is supported by the target platform in
9497 vector form (i.e., when operating on arguments of type VECTYPE_IN
9498 and producing a result of type VECTYPE_OUT).
9500 Narrowing operations we currently support are NOP (CONVERT) and
9501 FIX_TRUNC. This function checks if these operations are supported by
9502 the target platform directly via vector tree-codes.
9504 Output:
9505 - CODE1 is the code of a vector operation to be used when
9506 vectorizing the operation, if available.
9507 - MULTI_STEP_CVT determines the number of required intermediate steps in
9508 case of multi-step conversion (like int->short->char - in that case
9509 MULTI_STEP_CVT will be 1).
9510 - INTERM_TYPES contains the intermediate type required to perform the
9511 narrowing operation (short in the above example). */
9513 bool
9514 supportable_narrowing_operation (enum tree_code code,
9515 tree vectype_out, tree vectype_in,
9516 enum tree_code *code1, int *multi_step_cvt,
9517 vec<tree> *interm_types)
9519 machine_mode vec_mode;
9520 enum insn_code icode1;
9521 optab optab1, interm_optab;
9522 tree vectype = vectype_in;
9523 tree narrow_vectype = vectype_out;
9524 enum tree_code c1;
9525 tree intermediate_type, prev_type;
9526 machine_mode intermediate_mode, prev_mode;
9527 int i;
9528 bool uns;
9530 *multi_step_cvt = 0;
9531 switch (code)
9533 CASE_CONVERT:
9534 c1 = VEC_PACK_TRUNC_EXPR;
9535 break;
9537 case FIX_TRUNC_EXPR:
9538 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9539 break;
9541 case FLOAT_EXPR:
9542 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9543 tree code and optabs used for computing the operation. */
9544 return false;
9546 default:
9547 gcc_unreachable ();
9550 if (code == FIX_TRUNC_EXPR)
9551 /* The signedness is determined from output operand. */
9552 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9553 else
9554 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9556 if (!optab1)
9557 return false;
9559 vec_mode = TYPE_MODE (vectype);
9560 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9561 return false;
9563 *code1 = c1;
9565 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9566 /* For scalar masks we may have different boolean
9567 vector types having the same QImode. Thus we
9568 add additional check for elements number. */
9569 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9570 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9571 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9573 /* Check if it's a multi-step conversion that can be done using intermediate
9574 types. */
9575 prev_mode = vec_mode;
9576 prev_type = vectype;
9577 if (code == FIX_TRUNC_EXPR)
9578 uns = TYPE_UNSIGNED (vectype_out);
9579 else
9580 uns = TYPE_UNSIGNED (vectype);
9582 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9583 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9584 costly than signed. */
9585 if (code == FIX_TRUNC_EXPR && uns)
9587 enum insn_code icode2;
9589 intermediate_type
9590 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9591 interm_optab
9592 = optab_for_tree_code (c1, intermediate_type, optab_default);
9593 if (interm_optab != unknown_optab
9594 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9595 && insn_data[icode1].operand[0].mode
9596 == insn_data[icode2].operand[0].mode)
9598 uns = false;
9599 optab1 = interm_optab;
9600 icode1 = icode2;
9604 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9605 intermediate steps in promotion sequence. We try
9606 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9607 interm_types->create (MAX_INTERM_CVT_STEPS);
9608 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9610 intermediate_mode = insn_data[icode1].operand[0].mode;
9611 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9613 intermediate_type
9614 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9615 current_vector_size);
9616 if (intermediate_mode != TYPE_MODE (intermediate_type))
9617 return false;
9619 else
9620 intermediate_type
9621 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9622 interm_optab
9623 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9624 optab_default);
9625 if (!interm_optab
9626 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9627 || insn_data[icode1].operand[0].mode != intermediate_mode
9628 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9629 == CODE_FOR_nothing))
9630 break;
9632 interm_types->quick_push (intermediate_type);
9633 (*multi_step_cvt)++;
9635 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9636 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9637 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9638 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9640 prev_mode = intermediate_mode;
9641 prev_type = intermediate_type;
9642 optab1 = interm_optab;
9645 interm_types->release ();
9646 return false;