Small ChangeLog tweak.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob74c9a113082893c700c688b2ab14be87111b95a7
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
583 vect_mark_relevant (worklist, def_stmt, relevant, false);
584 return true;
588 /* Function vect_mark_stmts_to_be_vectorized.
590 Not all stmts in the loop need to be vectorized. For example:
592 for i...
593 for j...
594 1. T0 = i + j
595 2. T1 = a[T0]
597 3. j = j + 1
599 Stmt 1 and 3 do not need to be vectorized, because loop control and
600 addressing of vectorized data-refs are handled differently.
602 This pass detects such stmts. */
604 bool
605 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
607 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
608 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
609 unsigned int nbbs = loop->num_nodes;
610 gimple_stmt_iterator si;
611 gimple *stmt;
612 unsigned int i;
613 stmt_vec_info stmt_vinfo;
614 basic_block bb;
615 gimple *phi;
616 bool live_p;
617 enum vect_relevant relevant;
619 if (dump_enabled_p ())
620 dump_printf_loc (MSG_NOTE, vect_location,
621 "=== vect_mark_stmts_to_be_vectorized ===\n");
623 auto_vec<gimple *, 64> worklist;
625 /* 1. Init worklist. */
626 for (i = 0; i < nbbs; i++)
628 bb = bbs[i];
629 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
631 phi = gsi_stmt (si);
632 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
635 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
638 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
639 vect_mark_relevant (&worklist, phi, relevant, live_p);
641 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
643 stmt = gsi_stmt (si);
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
647 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
650 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
651 vect_mark_relevant (&worklist, stmt, relevant, live_p);
655 /* 2. Process_worklist */
656 while (worklist.length () > 0)
658 use_operand_p use_p;
659 ssa_op_iter iter;
661 stmt = worklist.pop ();
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
668 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
669 (DEF_STMT) as relevant/irrelevant according to the relevance property
670 of STMT. */
671 stmt_vinfo = vinfo_for_stmt (stmt);
672 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
674 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
675 propagated as is to the DEF_STMTs of its USEs.
677 One exception is when STMT has been identified as defining a reduction
678 variable; in this case we set the relevance to vect_used_by_reduction.
679 This is because we distinguish between two kinds of relevant stmts -
680 those that are used by a reduction computation, and those that are
681 (also) used by a regular computation. This allows us later on to
682 identify stmts that are used solely by a reduction, and therefore the
683 order of the results that they produce does not have to be kept. */
685 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
687 case vect_reduction_def:
688 gcc_assert (relevant != vect_unused_in_scope);
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_scope
691 && relevant != vect_used_by_reduction
692 && relevant != vect_used_only_live)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of reduction.\n");
697 return false;
699 break;
701 case vect_nested_cycle:
702 if (relevant != vect_unused_in_scope
703 && relevant != vect_used_in_outer_by_reduction
704 && relevant != vect_used_in_outer)
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "unsupported use of nested cycle.\n");
710 return false;
712 break;
714 case vect_double_reduction_def:
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_by_reduction
717 && relevant != vect_used_only_live)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721 "unsupported use of double reduction.\n");
723 return false;
725 break;
727 default:
728 break;
731 if (is_pattern_stmt_p (stmt_vinfo))
733 /* Pattern statements are not inserted into the code, so
734 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
735 have to scan the RHS or function arguments instead. */
736 if (is_gimple_assign (stmt))
738 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
739 tree op = gimple_assign_rhs1 (stmt);
741 i = 1;
742 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
744 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
745 relevant, &worklist, false)
746 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
747 relevant, &worklist, false))
748 return false;
749 i = 2;
751 for (; i < gimple_num_ops (stmt); i++)
753 op = gimple_op (stmt, i);
754 if (TREE_CODE (op) == SSA_NAME
755 && !process_use (stmt, op, loop_vinfo, relevant,
756 &worklist, false))
757 return false;
760 else if (is_gimple_call (stmt))
762 for (i = 0; i < gimple_call_num_args (stmt); i++)
764 tree arg = gimple_call_arg (stmt, i);
765 if (!process_use (stmt, arg, loop_vinfo, relevant,
766 &worklist, false))
767 return false;
771 else
772 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 tree op = USE_FROM_PTR (use_p);
775 if (!process_use (stmt, op, loop_vinfo, relevant,
776 &worklist, false))
777 return false;
780 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
782 gather_scatter_info gs_info;
783 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
784 gcc_unreachable ();
785 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
786 &worklist, true))
787 return false;
789 } /* while worklist */
791 return true;
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
801 void
802 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
803 enum vect_def_type *dt,
804 int ndts,
805 stmt_vector_for_cost *prologue_cost_vec,
806 stmt_vector_for_cost *body_cost_vec)
808 int i;
809 int inside_cost = 0, prologue_cost = 0;
811 /* The SLP costs were already calculated during SLP tree build. */
812 if (PURE_SLP_STMT (stmt_info))
813 return;
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
825 stmt_info, 0, vect_body);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
834 /* Model cost for type demotion and promotion operations. PWR is normally
835 zero for single-step promotions and demotions. It will be one if
836 two-step promotion/demotion is required, and so on. Each additional
837 step doubles the number of instructions required. */
839 static void
840 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
841 enum vect_def_type *dt, int pwr)
843 int i, tmp;
844 int inside_cost = 0, prologue_cost = 0;
845 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
846 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
847 void *target_cost_data;
849 /* The SLP costs were already calculated during SLP tree build. */
850 if (PURE_SLP_STMT (stmt_info))
851 return;
853 if (loop_vinfo)
854 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
855 else
856 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
858 for (i = 0; i < pwr + 1; i++)
860 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
861 (i + 1) : i;
862 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
863 vec_promote_demote, stmt_info, 0,
864 vect_body);
867 /* FORNOW: Assuming maximum 2 args per stmts. */
868 for (i = 0; i < 2; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
871 stmt_info, 0, vect_prologue);
873 if (dump_enabled_p ())
874 dump_printf_loc (MSG_NOTE, vect_location,
875 "vect_model_promotion_demotion_cost: inside_cost = %d, "
876 "prologue_cost = %d .\n", inside_cost, prologue_cost);
879 /* Function vect_model_store_cost
881 Models cost for stores. In the case of grouped accesses, one access
882 has the overhead of the grouped access attributed to it. */
884 void
885 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
886 vect_memory_access_type memory_access_type,
887 enum vect_def_type dt, slp_tree slp_node,
888 stmt_vector_for_cost *prologue_cost_vec,
889 stmt_vector_for_cost *body_cost_vec)
891 unsigned int inside_cost = 0, prologue_cost = 0;
892 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
893 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
894 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
896 if (dt == vect_constant_def || dt == vect_external_def)
897 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
898 stmt_info, 0, vect_prologue);
900 /* Grouped stores update all elements in the group at once,
901 so we want the DR for the first statement. */
902 if (!slp_node && grouped_access_p)
904 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
905 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
908 /* True if we should include any once-per-group costs as well as
909 the cost of the statement itself. For SLP we only get called
910 once per group anyhow. */
911 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
913 /* We assume that the cost of a single store-lanes instruction is
914 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
915 access is instead being provided by a permute-and-store operation,
916 include the cost of the permutes. */
917 if (first_stmt_p
918 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
920 /* Uses a high and low interleave or shuffle operations for each
921 needed permute. */
922 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
923 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
924 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
925 stmt_info, 0, vect_body);
927 if (dump_enabled_p ())
928 dump_printf_loc (MSG_NOTE, vect_location,
929 "vect_model_store_cost: strided group_size = %d .\n",
930 group_size);
933 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
934 /* Costs of the stores. */
935 if (memory_access_type == VMAT_ELEMENTWISE
936 || memory_access_type == VMAT_GATHER_SCATTER)
937 /* N scalar stores plus extracting the elements. */
938 inside_cost += record_stmt_cost (body_cost_vec,
939 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
940 scalar_store, stmt_info, 0, vect_body);
941 else
942 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
944 if (memory_access_type == VMAT_ELEMENTWISE
945 || memory_access_type == VMAT_STRIDED_SLP)
946 inside_cost += record_stmt_cost (body_cost_vec,
947 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
948 vec_to_scalar, stmt_info, 0, vect_body);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: inside_cost = %d, "
953 "prologue_cost = %d .\n", inside_cost, prologue_cost);
957 /* Calculate cost of DR's memory access. */
958 void
959 vect_get_store_cost (struct data_reference *dr, int ncopies,
960 unsigned int *inside_cost,
961 stmt_vector_for_cost *body_cost_vec)
963 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
964 gimple *stmt = DR_STMT (dr);
965 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
967 switch (alignment_support_scheme)
969 case dr_aligned:
971 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
972 vector_store, stmt_info, 0,
973 vect_body);
975 if (dump_enabled_p ())
976 dump_printf_loc (MSG_NOTE, vect_location,
977 "vect_model_store_cost: aligned.\n");
978 break;
981 case dr_unaligned_supported:
983 /* Here, we assign an additional cost for the unaligned store. */
984 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
985 unaligned_store, stmt_info,
986 DR_MISALIGNMENT (dr), vect_body);
987 if (dump_enabled_p ())
988 dump_printf_loc (MSG_NOTE, vect_location,
989 "vect_model_store_cost: unaligned supported by "
990 "hardware.\n");
991 break;
994 case dr_unaligned_unsupported:
996 *inside_cost = VECT_MAX_COST;
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1000 "vect_model_store_cost: unsupported access.\n");
1001 break;
1004 default:
1005 gcc_unreachable ();
1010 /* Function vect_model_load_cost
1012 Models cost for loads. In the case of grouped accesses, one access has
1013 the overhead of the grouped access attributed to it. Since unaligned
1014 accesses are supported for loads, we also account for the costs of the
1015 access scheme chosen. */
1017 void
1018 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1019 vect_memory_access_type memory_access_type,
1020 slp_tree slp_node,
1021 stmt_vector_for_cost *prologue_cost_vec,
1022 stmt_vector_for_cost *body_cost_vec)
1024 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1025 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1026 unsigned int inside_cost = 0, prologue_cost = 0;
1027 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1029 /* Grouped loads read all elements in the group at once,
1030 so we want the DR for the first statement. */
1031 if (!slp_node && grouped_access_p)
1033 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1034 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1037 /* True if we should include any once-per-group costs as well as
1038 the cost of the statement itself. For SLP we only get called
1039 once per group anyhow. */
1040 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1042 /* We assume that the cost of a single load-lanes instruction is
1043 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1044 access is instead being provided by a load-and-permute operation,
1045 include the cost of the permutes. */
1046 if (first_stmt_p
1047 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1049 /* Uses an even and odd extract operations or shuffle operations
1050 for each needed permute. */
1051 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1052 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1053 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1054 stmt_info, 0, vect_body);
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "vect_model_load_cost: strided group_size = %d .\n",
1059 group_size);
1062 /* The loads themselves. */
1063 if (memory_access_type == VMAT_ELEMENTWISE
1064 || memory_access_type == VMAT_GATHER_SCATTER)
1066 /* N scalar loads plus gathering them into a vector. */
1067 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1068 inside_cost += record_stmt_cost (body_cost_vec,
1069 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1070 scalar_load, stmt_info, 0, vect_body);
1072 else
1073 vect_get_load_cost (dr, ncopies, first_stmt_p,
1074 &inside_cost, &prologue_cost,
1075 prologue_cost_vec, body_cost_vec, true);
1076 if (memory_access_type == VMAT_ELEMENTWISE
1077 || memory_access_type == VMAT_STRIDED_SLP)
1078 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1079 stmt_info, 0, vect_body);
1081 if (dump_enabled_p ())
1082 dump_printf_loc (MSG_NOTE, vect_location,
1083 "vect_model_load_cost: inside_cost = %d, "
1084 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1088 /* Calculate cost of DR's memory access. */
1089 void
1090 vect_get_load_cost (struct data_reference *dr, int ncopies,
1091 bool add_realign_cost, unsigned int *inside_cost,
1092 unsigned int *prologue_cost,
1093 stmt_vector_for_cost *prologue_cost_vec,
1094 stmt_vector_for_cost *body_cost_vec,
1095 bool record_prologue_costs)
1097 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1098 gimple *stmt = DR_STMT (dr);
1099 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1101 switch (alignment_support_scheme)
1103 case dr_aligned:
1105 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1106 stmt_info, 0, vect_body);
1108 if (dump_enabled_p ())
1109 dump_printf_loc (MSG_NOTE, vect_location,
1110 "vect_model_load_cost: aligned.\n");
1112 break;
1114 case dr_unaligned_supported:
1116 /* Here, we assign an additional cost for the unaligned load. */
1117 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1118 unaligned_load, stmt_info,
1119 DR_MISALIGNMENT (dr), vect_body);
1121 if (dump_enabled_p ())
1122 dump_printf_loc (MSG_NOTE, vect_location,
1123 "vect_model_load_cost: unaligned supported by "
1124 "hardware.\n");
1126 break;
1128 case dr_explicit_realign:
1130 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1131 vector_load, stmt_info, 0, vect_body);
1132 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1133 vec_perm, stmt_info, 0, vect_body);
1135 /* FIXME: If the misalignment remains fixed across the iterations of
1136 the containing loop, the following cost should be added to the
1137 prologue costs. */
1138 if (targetm.vectorize.builtin_mask_for_load)
1139 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1140 stmt_info, 0, vect_body);
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE, vect_location,
1144 "vect_model_load_cost: explicit realign\n");
1146 break;
1148 case dr_explicit_realign_optimized:
1150 if (dump_enabled_p ())
1151 dump_printf_loc (MSG_NOTE, vect_location,
1152 "vect_model_load_cost: unaligned software "
1153 "pipelined.\n");
1155 /* Unaligned software pipeline has a load of an address, an initial
1156 load, and possibly a mask operation to "prime" the loop. However,
1157 if this is an access in a group of loads, which provide grouped
1158 access, then the above cost should only be considered for one
1159 access in the group. Inside the loop, there is a load op
1160 and a realignment op. */
1162 if (add_realign_cost && record_prologue_costs)
1164 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1165 vector_stmt, stmt_info,
1166 0, vect_prologue);
1167 if (targetm.vectorize.builtin_mask_for_load)
1168 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1169 vector_stmt, stmt_info,
1170 0, vect_prologue);
1173 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1174 stmt_info, 0, vect_body);
1175 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1176 stmt_info, 0, vect_body);
1178 if (dump_enabled_p ())
1179 dump_printf_loc (MSG_NOTE, vect_location,
1180 "vect_model_load_cost: explicit realign optimized"
1181 "\n");
1183 break;
1186 case dr_unaligned_unsupported:
1188 *inside_cost = VECT_MAX_COST;
1190 if (dump_enabled_p ())
1191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1192 "vect_model_load_cost: unsupported access.\n");
1193 break;
1196 default:
1197 gcc_unreachable ();
1201 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1202 the loop preheader for the vectorized stmt STMT. */
1204 static void
1205 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1207 if (gsi)
1208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1209 else
1211 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1214 if (loop_vinfo)
1216 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1217 basic_block new_bb;
1218 edge pe;
1220 if (nested_in_vect_loop_p (loop, stmt))
1221 loop = loop->inner;
1223 pe = loop_preheader_edge (loop);
1224 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1225 gcc_assert (!new_bb);
1227 else
1229 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1230 basic_block bb;
1231 gimple_stmt_iterator gsi_bb_start;
1233 gcc_assert (bb_vinfo);
1234 bb = BB_VINFO_BB (bb_vinfo);
1235 gsi_bb_start = gsi_after_labels (bb);
1236 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1240 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "created new init_stmt: ");
1244 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1248 /* Function vect_init_vector.
1250 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1251 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1252 vector type a vector with all elements equal to VAL is created first.
1253 Place the initialization at BSI if it is not NULL. Otherwise, place the
1254 initialization at the loop preheader.
1255 Return the DEF of INIT_STMT.
1256 It will be used in the vectorization of STMT. */
1258 tree
1259 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1261 gimple *init_stmt;
1262 tree new_temp;
1264 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1265 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1267 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1268 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1270 /* Scalar boolean value should be transformed into
1271 all zeros or all ones value before building a vector. */
1272 if (VECTOR_BOOLEAN_TYPE_P (type))
1274 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1275 tree false_val = build_zero_cst (TREE_TYPE (type));
1277 if (CONSTANT_CLASS_P (val))
1278 val = integer_zerop (val) ? false_val : true_val;
1279 else
1281 new_temp = make_ssa_name (TREE_TYPE (type));
1282 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1283 val, true_val, false_val);
1284 vect_init_vector_1 (stmt, init_stmt, gsi);
1285 val = new_temp;
1288 else if (CONSTANT_CLASS_P (val))
1289 val = fold_convert (TREE_TYPE (type), val);
1290 else
1292 new_temp = make_ssa_name (TREE_TYPE (type));
1293 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1294 init_stmt = gimple_build_assign (new_temp,
1295 fold_build1 (VIEW_CONVERT_EXPR,
1296 TREE_TYPE (type),
1297 val));
1298 else
1299 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1300 vect_init_vector_1 (stmt, init_stmt, gsi);
1301 val = new_temp;
1304 val = build_vector_from_val (type, val);
1307 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1308 init_stmt = gimple_build_assign (new_temp, val);
1309 vect_init_vector_1 (stmt, init_stmt, gsi);
1310 return new_temp;
1313 /* Function vect_get_vec_def_for_operand_1.
1315 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1316 DT that will be used in the vectorized stmt. */
1318 tree
1319 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1321 tree vec_oprnd;
1322 gimple *vec_stmt;
1323 stmt_vec_info def_stmt_info = NULL;
1325 switch (dt)
1327 /* operand is a constant or a loop invariant. */
1328 case vect_constant_def:
1329 case vect_external_def:
1330 /* Code should use vect_get_vec_def_for_operand. */
1331 gcc_unreachable ();
1333 /* operand is defined inside the loop. */
1334 case vect_internal_def:
1336 /* Get the def from the vectorized stmt. */
1337 def_stmt_info = vinfo_for_stmt (def_stmt);
1339 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1340 /* Get vectorized pattern statement. */
1341 if (!vec_stmt
1342 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1343 && !STMT_VINFO_RELEVANT (def_stmt_info))
1344 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1345 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1346 gcc_assert (vec_stmt);
1347 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1348 vec_oprnd = PHI_RESULT (vec_stmt);
1349 else if (is_gimple_call (vec_stmt))
1350 vec_oprnd = gimple_call_lhs (vec_stmt);
1351 else
1352 vec_oprnd = gimple_assign_lhs (vec_stmt);
1353 return vec_oprnd;
1356 /* operand is defined by a loop header phi - reduction */
1357 case vect_reduction_def:
1358 case vect_double_reduction_def:
1359 case vect_nested_cycle:
1360 /* Code should use get_initial_def_for_reduction. */
1361 gcc_unreachable ();
1363 /* operand is defined by loop-header phi - induction. */
1364 case vect_induction_def:
1366 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1368 /* Get the def from the vectorized stmt. */
1369 def_stmt_info = vinfo_for_stmt (def_stmt);
1370 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1371 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1372 vec_oprnd = PHI_RESULT (vec_stmt);
1373 else
1374 vec_oprnd = gimple_get_lhs (vec_stmt);
1375 return vec_oprnd;
1378 default:
1379 gcc_unreachable ();
1384 /* Function vect_get_vec_def_for_operand.
1386 OP is an operand in STMT. This function returns a (vector) def that will be
1387 used in the vectorized stmt for STMT.
1389 In the case that OP is an SSA_NAME which is defined in the loop, then
1390 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1392 In case OP is an invariant or constant, a new stmt that creates a vector def
1393 needs to be introduced. VECTYPE may be used to specify a required type for
1394 vector invariant. */
1396 tree
1397 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1399 gimple *def_stmt;
1400 enum vect_def_type dt;
1401 bool is_simple_use;
1402 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1403 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1405 if (dump_enabled_p ())
1407 dump_printf_loc (MSG_NOTE, vect_location,
1408 "vect_get_vec_def_for_operand: ");
1409 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1410 dump_printf (MSG_NOTE, "\n");
1413 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1414 gcc_assert (is_simple_use);
1415 if (def_stmt && dump_enabled_p ())
1417 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1418 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1421 if (dt == vect_constant_def || dt == vect_external_def)
1423 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1424 tree vector_type;
1426 if (vectype)
1427 vector_type = vectype;
1428 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1429 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1430 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1431 else
1432 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1434 gcc_assert (vector_type);
1435 return vect_init_vector (stmt, op, vector_type, NULL);
1437 else
1438 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1442 /* Function vect_get_vec_def_for_stmt_copy
1444 Return a vector-def for an operand. This function is used when the
1445 vectorized stmt to be created (by the caller to this function) is a "copy"
1446 created in case the vectorized result cannot fit in one vector, and several
1447 copies of the vector-stmt are required. In this case the vector-def is
1448 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1449 of the stmt that defines VEC_OPRND.
1450 DT is the type of the vector def VEC_OPRND.
1452 Context:
1453 In case the vectorization factor (VF) is bigger than the number
1454 of elements that can fit in a vectype (nunits), we have to generate
1455 more than one vector stmt to vectorize the scalar stmt. This situation
1456 arises when there are multiple data-types operated upon in the loop; the
1457 smallest data-type determines the VF, and as a result, when vectorizing
1458 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1459 vector stmt (each computing a vector of 'nunits' results, and together
1460 computing 'VF' results in each iteration). This function is called when
1461 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1462 which VF=16 and nunits=4, so the number of copies required is 4):
1464 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1466 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1467 VS1.1: vx.1 = memref1 VS1.2
1468 VS1.2: vx.2 = memref2 VS1.3
1469 VS1.3: vx.3 = memref3
1471 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1472 VSnew.1: vz1 = vx.1 + ... VSnew.2
1473 VSnew.2: vz2 = vx.2 + ... VSnew.3
1474 VSnew.3: vz3 = vx.3 + ...
1476 The vectorization of S1 is explained in vectorizable_load.
1477 The vectorization of S2:
1478 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1479 the function 'vect_get_vec_def_for_operand' is called to
1480 get the relevant vector-def for each operand of S2. For operand x it
1481 returns the vector-def 'vx.0'.
1483 To create the remaining copies of the vector-stmt (VSnew.j), this
1484 function is called to get the relevant vector-def for each operand. It is
1485 obtained from the respective VS1.j stmt, which is recorded in the
1486 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1488 For example, to obtain the vector-def 'vx.1' in order to create the
1489 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1490 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1491 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1492 and return its def ('vx.1').
1493 Overall, to create the above sequence this function will be called 3 times:
1494 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1495 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1496 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1498 tree
1499 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1501 gimple *vec_stmt_for_operand;
1502 stmt_vec_info def_stmt_info;
1504 /* Do nothing; can reuse same def. */
1505 if (dt == vect_external_def || dt == vect_constant_def )
1506 return vec_oprnd;
1508 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1509 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1510 gcc_assert (def_stmt_info);
1511 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1512 gcc_assert (vec_stmt_for_operand);
1513 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1514 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1515 else
1516 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1517 return vec_oprnd;
1521 /* Get vectorized definitions for the operands to create a copy of an original
1522 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1524 static void
1525 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1526 vec<tree> *vec_oprnds0,
1527 vec<tree> *vec_oprnds1)
1529 tree vec_oprnd = vec_oprnds0->pop ();
1531 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1532 vec_oprnds0->quick_push (vec_oprnd);
1534 if (vec_oprnds1 && vec_oprnds1->length ())
1536 vec_oprnd = vec_oprnds1->pop ();
1537 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1538 vec_oprnds1->quick_push (vec_oprnd);
1543 /* Get vectorized definitions for OP0 and OP1.
1544 REDUC_INDEX is the index of reduction operand in case of reduction,
1545 and -1 otherwise. */
1547 void
1548 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1549 vec<tree> *vec_oprnds0,
1550 vec<tree> *vec_oprnds1,
1551 slp_tree slp_node, int reduc_index)
1553 if (slp_node)
1555 int nops = (op1 == NULL_TREE) ? 1 : 2;
1556 auto_vec<tree> ops (nops);
1557 auto_vec<vec<tree> > vec_defs (nops);
1559 ops.quick_push (op0);
1560 if (op1)
1561 ops.quick_push (op1);
1563 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1565 *vec_oprnds0 = vec_defs[0];
1566 if (op1)
1567 *vec_oprnds1 = vec_defs[1];
1569 else
1571 tree vec_oprnd;
1573 vec_oprnds0->create (1);
1574 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1575 vec_oprnds0->quick_push (vec_oprnd);
1577 if (op1)
1579 vec_oprnds1->create (1);
1580 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1581 vec_oprnds1->quick_push (vec_oprnd);
1587 /* Function vect_finish_stmt_generation.
1589 Insert a new stmt. */
1591 void
1592 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1593 gimple_stmt_iterator *gsi)
1595 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1596 vec_info *vinfo = stmt_info->vinfo;
1598 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1600 if (!gsi_end_p (*gsi)
1601 && gimple_has_mem_ops (vec_stmt))
1603 gimple *at_stmt = gsi_stmt (*gsi);
1604 tree vuse = gimple_vuse (at_stmt);
1605 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1607 tree vdef = gimple_vdef (at_stmt);
1608 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1609 /* If we have an SSA vuse and insert a store, update virtual
1610 SSA form to avoid triggering the renamer. Do so only
1611 if we can easily see all uses - which is what almost always
1612 happens with the way vectorized stmts are inserted. */
1613 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1614 && ((is_gimple_assign (vec_stmt)
1615 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1616 || (is_gimple_call (vec_stmt)
1617 && !(gimple_call_flags (vec_stmt)
1618 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1620 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1621 gimple_set_vdef (vec_stmt, new_vdef);
1622 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1626 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1628 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1630 if (dump_enabled_p ())
1632 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1633 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1636 gimple_set_location (vec_stmt, gimple_location (stmt));
1638 /* While EH edges will generally prevent vectorization, stmt might
1639 e.g. be in a must-not-throw region. Ensure newly created stmts
1640 that could throw are part of the same region. */
1641 int lp_nr = lookup_stmt_eh_lp (stmt);
1642 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1643 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1646 /* We want to vectorize a call to combined function CFN with function
1647 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1648 as the types of all inputs. Check whether this is possible using
1649 an internal function, returning its code if so or IFN_LAST if not. */
1651 static internal_fn
1652 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1653 tree vectype_out, tree vectype_in)
1655 internal_fn ifn;
1656 if (internal_fn_p (cfn))
1657 ifn = as_internal_fn (cfn);
1658 else
1659 ifn = associated_internal_fn (fndecl);
1660 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1662 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1663 if (info.vectorizable)
1665 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1666 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1667 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1668 OPTIMIZE_FOR_SPEED))
1669 return ifn;
1672 return IFN_LAST;
1676 static tree permute_vec_elements (tree, tree, tree, gimple *,
1677 gimple_stmt_iterator *);
1679 /* STMT is a non-strided load or store, meaning that it accesses
1680 elements with a known constant step. Return -1 if that step
1681 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1683 static int
1684 compare_step_with_zero (gimple *stmt)
1686 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1687 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1688 tree step;
1689 if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
1690 step = STMT_VINFO_DR_STEP (stmt_info);
1691 else
1692 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1693 return tree_int_cst_compare (step, size_zero_node);
1696 /* If the target supports a permute mask that reverses the elements in
1697 a vector of type VECTYPE, return that mask, otherwise return null. */
1699 static tree
1700 perm_mask_for_reverse (tree vectype)
1702 int i, nunits;
1703 unsigned char *sel;
1705 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1706 sel = XALLOCAVEC (unsigned char, nunits);
1708 for (i = 0; i < nunits; ++i)
1709 sel[i] = nunits - 1 - i;
1711 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1712 return NULL_TREE;
1713 return vect_gen_perm_mask_checked (vectype, sel);
1716 /* A subroutine of get_load_store_type, with a subset of the same
1717 arguments. Handle the case where STMT is part of a grouped load
1718 or store.
1720 For stores, the statements in the group are all consecutive
1721 and there is no gap at the end. For loads, the statements in the
1722 group might not be consecutive; there can be gaps between statements
1723 as well as at the end. */
1725 static bool
1726 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1727 vec_load_store_type vls_type,
1728 vect_memory_access_type *memory_access_type)
1730 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1731 vec_info *vinfo = stmt_info->vinfo;
1732 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1733 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1734 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1735 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1736 bool single_element_p = (stmt == first_stmt
1737 && !GROUP_NEXT_ELEMENT (stmt_info));
1738 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1739 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1741 /* True if the vectorized statements would access beyond the last
1742 statement in the group. */
1743 bool overrun_p = false;
1745 /* True if we can cope with such overrun by peeling for gaps, so that
1746 there is at least one final scalar iteration after the vector loop. */
1747 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1749 /* There can only be a gap at the end of the group if the stride is
1750 known at compile time. */
1751 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1753 /* Stores can't yet have gaps. */
1754 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1756 if (slp)
1758 if (STMT_VINFO_STRIDED_P (stmt_info))
1760 /* Try to use consecutive accesses of GROUP_SIZE elements,
1761 separated by the stride, until we have a complete vector.
1762 Fall back to scalar accesses if that isn't possible. */
1763 if (nunits % group_size == 0)
1764 *memory_access_type = VMAT_STRIDED_SLP;
1765 else
1766 *memory_access_type = VMAT_ELEMENTWISE;
1768 else
1770 overrun_p = loop_vinfo && gap != 0;
1771 if (overrun_p && vls_type != VLS_LOAD)
1773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1774 "Grouped store with gaps requires"
1775 " non-consecutive accesses\n");
1776 return false;
1778 /* If the access is aligned an overrun is fine. */
1779 if (overrun_p
1780 && aligned_access_p
1781 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1782 overrun_p = false;
1783 if (overrun_p && !can_overrun_p)
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1787 "Peeling for outer loop is not supported\n");
1788 return false;
1790 *memory_access_type = VMAT_CONTIGUOUS;
1793 else
1795 /* We can always handle this case using elementwise accesses,
1796 but see if something more efficient is available. */
1797 *memory_access_type = VMAT_ELEMENTWISE;
1799 /* If there is a gap at the end of the group then these optimizations
1800 would access excess elements in the last iteration. */
1801 bool would_overrun_p = (gap != 0);
1802 /* If the access is aligned an overrun is fine, but only if the
1803 overrun is not inside an unused vector (if the gap is as large
1804 or larger than a vector). */
1805 if (would_overrun_p
1806 && gap < nunits
1807 && aligned_access_p
1808 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1809 would_overrun_p = false;
1810 if (!STMT_VINFO_STRIDED_P (stmt_info)
1811 && (can_overrun_p || !would_overrun_p)
1812 && compare_step_with_zero (stmt) > 0)
1814 /* First try using LOAD/STORE_LANES. */
1815 if (vls_type == VLS_LOAD
1816 ? vect_load_lanes_supported (vectype, group_size)
1817 : vect_store_lanes_supported (vectype, group_size))
1819 *memory_access_type = VMAT_LOAD_STORE_LANES;
1820 overrun_p = would_overrun_p;
1823 /* If that fails, try using permuting loads. */
1824 if (*memory_access_type == VMAT_ELEMENTWISE
1825 && (vls_type == VLS_LOAD
1826 ? vect_grouped_load_supported (vectype, single_element_p,
1827 group_size)
1828 : vect_grouped_store_supported (vectype, group_size)))
1830 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1831 overrun_p = would_overrun_p;
1836 if (vls_type != VLS_LOAD && first_stmt == stmt)
1838 /* STMT is the leader of the group. Check the operands of all the
1839 stmts of the group. */
1840 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1841 while (next_stmt)
1843 gcc_assert (gimple_assign_single_p (next_stmt));
1844 tree op = gimple_assign_rhs1 (next_stmt);
1845 gimple *def_stmt;
1846 enum vect_def_type dt;
1847 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1849 if (dump_enabled_p ())
1850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1851 "use not simple.\n");
1852 return false;
1854 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1858 if (overrun_p)
1860 gcc_assert (can_overrun_p);
1861 if (dump_enabled_p ())
1862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1863 "Data access with gaps requires scalar "
1864 "epilogue loop\n");
1865 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1868 return true;
1871 /* A subroutine of get_load_store_type, with a subset of the same
1872 arguments. Handle the case where STMT is a load or store that
1873 accesses consecutive elements with a negative step. */
1875 static vect_memory_access_type
1876 get_negative_load_store_type (gimple *stmt, tree vectype,
1877 vec_load_store_type vls_type,
1878 unsigned int ncopies)
1880 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1881 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1882 dr_alignment_support alignment_support_scheme;
1884 if (ncopies > 1)
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1888 "multiple types with negative step.\n");
1889 return VMAT_ELEMENTWISE;
1892 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1893 if (alignment_support_scheme != dr_aligned
1894 && alignment_support_scheme != dr_unaligned_supported)
1896 if (dump_enabled_p ())
1897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1898 "negative step but alignment required.\n");
1899 return VMAT_ELEMENTWISE;
1902 if (vls_type == VLS_STORE_INVARIANT)
1904 if (dump_enabled_p ())
1905 dump_printf_loc (MSG_NOTE, vect_location,
1906 "negative step with invariant source;"
1907 " no permute needed.\n");
1908 return VMAT_CONTIGUOUS_DOWN;
1911 if (!perm_mask_for_reverse (vectype))
1913 if (dump_enabled_p ())
1914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1915 "negative step and reversing not supported.\n");
1916 return VMAT_ELEMENTWISE;
1919 return VMAT_CONTIGUOUS_REVERSE;
1922 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1923 if there is a memory access type that the vectorized form can use,
1924 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1925 or scatters, fill in GS_INFO accordingly.
1927 SLP says whether we're performing SLP rather than loop vectorization.
1928 VECTYPE is the vector type that the vectorized statements will use.
1929 NCOPIES is the number of vector statements that will be needed. */
1931 static bool
1932 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1933 vec_load_store_type vls_type, unsigned int ncopies,
1934 vect_memory_access_type *memory_access_type,
1935 gather_scatter_info *gs_info)
1937 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1938 vec_info *vinfo = stmt_info->vinfo;
1939 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1940 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1942 *memory_access_type = VMAT_GATHER_SCATTER;
1943 gimple *def_stmt;
1944 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1945 gcc_unreachable ();
1946 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1947 &gs_info->offset_dt,
1948 &gs_info->offset_vectype))
1950 if (dump_enabled_p ())
1951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1952 "%s index use not simple.\n",
1953 vls_type == VLS_LOAD ? "gather" : "scatter");
1954 return false;
1957 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1959 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1960 memory_access_type))
1961 return false;
1963 else if (STMT_VINFO_STRIDED_P (stmt_info))
1965 gcc_assert (!slp);
1966 *memory_access_type = VMAT_ELEMENTWISE;
1968 else
1970 int cmp = compare_step_with_zero (stmt);
1971 if (cmp < 0)
1972 *memory_access_type = get_negative_load_store_type
1973 (stmt, vectype, vls_type, ncopies);
1974 else if (cmp == 0)
1976 gcc_assert (vls_type == VLS_LOAD);
1977 *memory_access_type = VMAT_INVARIANT;
1979 else
1980 *memory_access_type = VMAT_CONTIGUOUS;
1983 /* FIXME: At the moment the cost model seems to underestimate the
1984 cost of using elementwise accesses. This check preserves the
1985 traditional behavior until that can be fixed. */
1986 if (*memory_access_type == VMAT_ELEMENTWISE
1987 && !STMT_VINFO_STRIDED_P (stmt_info))
1989 if (dump_enabled_p ())
1990 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1991 "not falling back to elementwise accesses\n");
1992 return false;
1994 return true;
1997 /* Function vectorizable_mask_load_store.
1999 Check if STMT performs a conditional load or store that can be vectorized.
2000 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2001 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2002 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2004 static bool
2005 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2006 gimple **vec_stmt, slp_tree slp_node)
2008 tree vec_dest = NULL;
2009 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2010 stmt_vec_info prev_stmt_info;
2011 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2012 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2013 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2014 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2015 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2016 tree rhs_vectype = NULL_TREE;
2017 tree mask_vectype;
2018 tree elem_type;
2019 gimple *new_stmt;
2020 tree dummy;
2021 tree dataref_ptr = NULL_TREE;
2022 gimple *ptr_incr;
2023 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2024 int ncopies;
2025 int i, j;
2026 bool inv_p;
2027 gather_scatter_info gs_info;
2028 vec_load_store_type vls_type;
2029 tree mask;
2030 gimple *def_stmt;
2031 enum vect_def_type dt;
2033 if (slp_node != NULL)
2034 return false;
2036 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2037 gcc_assert (ncopies >= 1);
2039 mask = gimple_call_arg (stmt, 2);
2041 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2042 return false;
2044 /* FORNOW. This restriction should be relaxed. */
2045 if (nested_in_vect_loop && ncopies > 1)
2047 if (dump_enabled_p ())
2048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2049 "multiple types in nested loop.");
2050 return false;
2053 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2054 return false;
2056 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2057 && ! vec_stmt)
2058 return false;
2060 if (!STMT_VINFO_DATA_REF (stmt_info))
2061 return false;
2063 elem_type = TREE_TYPE (vectype);
2065 if (TREE_CODE (mask) != SSA_NAME)
2066 return false;
2068 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2069 return false;
2071 if (!mask_vectype)
2072 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2074 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2075 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2076 return false;
2078 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2080 tree rhs = gimple_call_arg (stmt, 3);
2081 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2082 return false;
2083 if (dt == vect_constant_def || dt == vect_external_def)
2084 vls_type = VLS_STORE_INVARIANT;
2085 else
2086 vls_type = VLS_STORE;
2088 else
2089 vls_type = VLS_LOAD;
2091 vect_memory_access_type memory_access_type;
2092 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2093 &memory_access_type, &gs_info))
2094 return false;
2096 if (memory_access_type == VMAT_GATHER_SCATTER)
2098 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2099 tree masktype
2100 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2101 if (TREE_CODE (masktype) == INTEGER_TYPE)
2103 if (dump_enabled_p ())
2104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2105 "masked gather with integer mask not supported.");
2106 return false;
2109 else if (memory_access_type != VMAT_CONTIGUOUS)
2111 if (dump_enabled_p ())
2112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2113 "unsupported access type for masked %s.\n",
2114 vls_type == VLS_LOAD ? "load" : "store");
2115 return false;
2117 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2118 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2119 TYPE_MODE (mask_vectype),
2120 vls_type == VLS_LOAD)
2121 || (rhs_vectype
2122 && !useless_type_conversion_p (vectype, rhs_vectype)))
2123 return false;
2125 if (!vec_stmt) /* transformation not required. */
2127 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2128 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2129 if (vls_type == VLS_LOAD)
2130 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2131 NULL, NULL, NULL);
2132 else
2133 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2134 dt, NULL, NULL, NULL);
2135 return true;
2137 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2139 /** Transform. **/
2141 if (memory_access_type == VMAT_GATHER_SCATTER)
2143 tree vec_oprnd0 = NULL_TREE, op;
2144 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2145 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2146 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2147 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2148 tree mask_perm_mask = NULL_TREE;
2149 edge pe = loop_preheader_edge (loop);
2150 gimple_seq seq;
2151 basic_block new_bb;
2152 enum { NARROW, NONE, WIDEN } modifier;
2153 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2155 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2156 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2157 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2158 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2159 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2160 scaletype = TREE_VALUE (arglist);
2161 gcc_checking_assert (types_compatible_p (srctype, rettype)
2162 && types_compatible_p (srctype, masktype));
2164 if (nunits == gather_off_nunits)
2165 modifier = NONE;
2166 else if (nunits == gather_off_nunits / 2)
2168 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2169 modifier = WIDEN;
2171 for (i = 0; i < gather_off_nunits; ++i)
2172 sel[i] = i | nunits;
2174 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2176 else if (nunits == gather_off_nunits * 2)
2178 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2179 modifier = NARROW;
2181 for (i = 0; i < nunits; ++i)
2182 sel[i] = i < gather_off_nunits
2183 ? i : i + nunits - gather_off_nunits;
2185 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2186 ncopies *= 2;
2187 for (i = 0; i < nunits; ++i)
2188 sel[i] = i | gather_off_nunits;
2189 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2191 else
2192 gcc_unreachable ();
2194 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2196 ptr = fold_convert (ptrtype, gs_info.base);
2197 if (!is_gimple_min_invariant (ptr))
2199 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2200 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2201 gcc_assert (!new_bb);
2204 scale = build_int_cst (scaletype, gs_info.scale);
2206 prev_stmt_info = NULL;
2207 for (j = 0; j < ncopies; ++j)
2209 if (modifier == WIDEN && (j & 1))
2210 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2211 perm_mask, stmt, gsi);
2212 else if (j == 0)
2213 op = vec_oprnd0
2214 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2215 else
2216 op = vec_oprnd0
2217 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2219 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2221 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2222 == TYPE_VECTOR_SUBPARTS (idxtype));
2223 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2224 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2225 new_stmt
2226 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2227 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2228 op = var;
2231 if (mask_perm_mask && (j & 1))
2232 mask_op = permute_vec_elements (mask_op, mask_op,
2233 mask_perm_mask, stmt, gsi);
2234 else
2236 if (j == 0)
2237 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2238 else
2240 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2241 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2244 mask_op = vec_mask;
2245 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2247 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2248 == TYPE_VECTOR_SUBPARTS (masktype));
2249 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2250 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2251 new_stmt
2252 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2253 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2254 mask_op = var;
2258 new_stmt
2259 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2260 scale);
2262 if (!useless_type_conversion_p (vectype, rettype))
2264 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2265 == TYPE_VECTOR_SUBPARTS (rettype));
2266 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2267 gimple_call_set_lhs (new_stmt, op);
2268 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2269 var = make_ssa_name (vec_dest);
2270 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2271 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2273 else
2275 var = make_ssa_name (vec_dest, new_stmt);
2276 gimple_call_set_lhs (new_stmt, var);
2279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2281 if (modifier == NARROW)
2283 if ((j & 1) == 0)
2285 prev_res = var;
2286 continue;
2288 var = permute_vec_elements (prev_res, var,
2289 perm_mask, stmt, gsi);
2290 new_stmt = SSA_NAME_DEF_STMT (var);
2293 if (prev_stmt_info == NULL)
2294 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2295 else
2296 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2297 prev_stmt_info = vinfo_for_stmt (new_stmt);
2300 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2301 from the IL. */
2302 if (STMT_VINFO_RELATED_STMT (stmt_info))
2304 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2305 stmt_info = vinfo_for_stmt (stmt);
2307 tree lhs = gimple_call_lhs (stmt);
2308 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2309 set_vinfo_for_stmt (new_stmt, stmt_info);
2310 set_vinfo_for_stmt (stmt, NULL);
2311 STMT_VINFO_STMT (stmt_info) = new_stmt;
2312 gsi_replace (gsi, new_stmt, true);
2313 return true;
2315 else if (vls_type != VLS_LOAD)
2317 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2318 prev_stmt_info = NULL;
2319 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2320 for (i = 0; i < ncopies; i++)
2322 unsigned align, misalign;
2324 if (i == 0)
2326 tree rhs = gimple_call_arg (stmt, 3);
2327 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2328 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2329 /* We should have catched mismatched types earlier. */
2330 gcc_assert (useless_type_conversion_p (vectype,
2331 TREE_TYPE (vec_rhs)));
2332 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2333 NULL_TREE, &dummy, gsi,
2334 &ptr_incr, false, &inv_p);
2335 gcc_assert (!inv_p);
2337 else
2339 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2340 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2341 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2342 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2343 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2344 TYPE_SIZE_UNIT (vectype));
2347 align = TYPE_ALIGN_UNIT (vectype);
2348 if (aligned_access_p (dr))
2349 misalign = 0;
2350 else if (DR_MISALIGNMENT (dr) == -1)
2352 align = TYPE_ALIGN_UNIT (elem_type);
2353 misalign = 0;
2355 else
2356 misalign = DR_MISALIGNMENT (dr);
2357 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2358 misalign);
2359 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2360 misalign ? least_bit_hwi (misalign) : align);
2361 new_stmt
2362 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2363 ptr, vec_mask, vec_rhs);
2364 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2365 if (i == 0)
2366 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2367 else
2368 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2369 prev_stmt_info = vinfo_for_stmt (new_stmt);
2372 else
2374 tree vec_mask = NULL_TREE;
2375 prev_stmt_info = NULL;
2376 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2377 for (i = 0; i < ncopies; i++)
2379 unsigned align, misalign;
2381 if (i == 0)
2383 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2384 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2385 NULL_TREE, &dummy, gsi,
2386 &ptr_incr, false, &inv_p);
2387 gcc_assert (!inv_p);
2389 else
2391 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2392 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2393 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2394 TYPE_SIZE_UNIT (vectype));
2397 align = TYPE_ALIGN_UNIT (vectype);
2398 if (aligned_access_p (dr))
2399 misalign = 0;
2400 else if (DR_MISALIGNMENT (dr) == -1)
2402 align = TYPE_ALIGN_UNIT (elem_type);
2403 misalign = 0;
2405 else
2406 misalign = DR_MISALIGNMENT (dr);
2407 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2408 misalign);
2409 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2410 misalign ? least_bit_hwi (misalign) : align);
2411 new_stmt
2412 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2413 ptr, vec_mask);
2414 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2415 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2416 if (i == 0)
2417 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2418 else
2419 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2420 prev_stmt_info = vinfo_for_stmt (new_stmt);
2424 if (vls_type == VLS_LOAD)
2426 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2427 from the IL. */
2428 if (STMT_VINFO_RELATED_STMT (stmt_info))
2430 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2431 stmt_info = vinfo_for_stmt (stmt);
2433 tree lhs = gimple_call_lhs (stmt);
2434 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2435 set_vinfo_for_stmt (new_stmt, stmt_info);
2436 set_vinfo_for_stmt (stmt, NULL);
2437 STMT_VINFO_STMT (stmt_info) = new_stmt;
2438 gsi_replace (gsi, new_stmt, true);
2441 return true;
2444 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2446 static bool
2447 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2448 gimple **vec_stmt, slp_tree slp_node,
2449 tree vectype_in, enum vect_def_type *dt)
2451 tree op, vectype;
2452 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2453 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2454 unsigned ncopies, nunits;
2456 op = gimple_call_arg (stmt, 0);
2457 vectype = STMT_VINFO_VECTYPE (stmt_info);
2458 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2460 /* Multiple types in SLP are handled by creating the appropriate number of
2461 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2462 case of SLP. */
2463 if (slp_node)
2464 ncopies = 1;
2465 else
2466 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2468 gcc_assert (ncopies >= 1);
2470 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2471 if (! char_vectype)
2472 return false;
2474 unsigned char *elts
2475 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype));
2476 unsigned char *elt = elts;
2477 unsigned word_bytes = TYPE_VECTOR_SUBPARTS (char_vectype) / nunits;
2478 for (unsigned i = 0; i < nunits; ++i)
2479 for (unsigned j = 0; j < word_bytes; ++j)
2480 *elt++ = (i + 1) * word_bytes - j - 1;
2482 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
2483 return false;
2485 if (! vec_stmt)
2487 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2488 if (dump_enabled_p ())
2489 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2490 "\n");
2491 if (! PURE_SLP_STMT (stmt_info))
2493 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2494 1, vector_stmt, stmt_info, 0, vect_prologue);
2495 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2496 ncopies, vec_perm, stmt_info, 0, vect_body);
2498 return true;
2501 tree *telts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (char_vectype));
2502 for (unsigned i = 0; i < TYPE_VECTOR_SUBPARTS (char_vectype); ++i)
2503 telts[i] = build_int_cst (char_type_node, elts[i]);
2504 tree bswap_vconst = build_vector (char_vectype, telts);
2506 /* Transform. */
2507 vec<tree> vec_oprnds = vNULL;
2508 gimple *new_stmt = NULL;
2509 stmt_vec_info prev_stmt_info = NULL;
2510 for (unsigned j = 0; j < ncopies; j++)
2512 /* Handle uses. */
2513 if (j == 0)
2514 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2515 else
2516 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2518 /* Arguments are ready. create the new vector stmt. */
2519 unsigned i;
2520 tree vop;
2521 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2523 tree tem = make_ssa_name (char_vectype);
2524 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2525 char_vectype, vop));
2526 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2527 tree tem2 = make_ssa_name (char_vectype);
2528 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2529 tem, tem, bswap_vconst);
2530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2531 tem = make_ssa_name (vectype);
2532 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2533 vectype, tem2));
2534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2535 if (slp_node)
2536 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2539 if (slp_node)
2540 continue;
2542 if (j == 0)
2543 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2544 else
2545 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2547 prev_stmt_info = vinfo_for_stmt (new_stmt);
2550 vec_oprnds.release ();
2551 return true;
2554 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2555 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2556 in a single step. On success, store the binary pack code in
2557 *CONVERT_CODE. */
2559 static bool
2560 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2561 tree_code *convert_code)
2563 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2564 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2565 return false;
2567 tree_code code;
2568 int multi_step_cvt = 0;
2569 auto_vec <tree, 8> interm_types;
2570 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2571 &code, &multi_step_cvt,
2572 &interm_types)
2573 || multi_step_cvt)
2574 return false;
2576 *convert_code = code;
2577 return true;
2580 /* Function vectorizable_call.
2582 Check if GS performs a function call that can be vectorized.
2583 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2584 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2585 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2587 static bool
2588 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2589 slp_tree slp_node)
2591 gcall *stmt;
2592 tree vec_dest;
2593 tree scalar_dest;
2594 tree op, type;
2595 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2596 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2597 tree vectype_out, vectype_in;
2598 int nunits_in;
2599 int nunits_out;
2600 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2601 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2602 vec_info *vinfo = stmt_info->vinfo;
2603 tree fndecl, new_temp, rhs_type;
2604 gimple *def_stmt;
2605 enum vect_def_type dt[3]
2606 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2607 int ndts = 3;
2608 gimple *new_stmt = NULL;
2609 int ncopies, j;
2610 vec<tree> vargs = vNULL;
2611 enum { NARROW, NONE, WIDEN } modifier;
2612 size_t i, nargs;
2613 tree lhs;
2615 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2616 return false;
2618 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2619 && ! vec_stmt)
2620 return false;
2622 /* Is GS a vectorizable call? */
2623 stmt = dyn_cast <gcall *> (gs);
2624 if (!stmt)
2625 return false;
2627 if (gimple_call_internal_p (stmt)
2628 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2629 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2630 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2631 slp_node);
2633 if (gimple_call_lhs (stmt) == NULL_TREE
2634 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2635 return false;
2637 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2639 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2641 /* Process function arguments. */
2642 rhs_type = NULL_TREE;
2643 vectype_in = NULL_TREE;
2644 nargs = gimple_call_num_args (stmt);
2646 /* Bail out if the function has more than three arguments, we do not have
2647 interesting builtin functions to vectorize with more than two arguments
2648 except for fma. No arguments is also not good. */
2649 if (nargs == 0 || nargs > 3)
2650 return false;
2652 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2653 if (gimple_call_internal_p (stmt)
2654 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2656 nargs = 0;
2657 rhs_type = unsigned_type_node;
2660 for (i = 0; i < nargs; i++)
2662 tree opvectype;
2664 op = gimple_call_arg (stmt, i);
2666 /* We can only handle calls with arguments of the same type. */
2667 if (rhs_type
2668 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2670 if (dump_enabled_p ())
2671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2672 "argument types differ.\n");
2673 return false;
2675 if (!rhs_type)
2676 rhs_type = TREE_TYPE (op);
2678 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2680 if (dump_enabled_p ())
2681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2682 "use not simple.\n");
2683 return false;
2686 if (!vectype_in)
2687 vectype_in = opvectype;
2688 else if (opvectype
2689 && opvectype != vectype_in)
2691 if (dump_enabled_p ())
2692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2693 "argument vector types differ.\n");
2694 return false;
2697 /* If all arguments are external or constant defs use a vector type with
2698 the same size as the output vector type. */
2699 if (!vectype_in)
2700 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2701 if (vec_stmt)
2702 gcc_assert (vectype_in);
2703 if (!vectype_in)
2705 if (dump_enabled_p ())
2707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2708 "no vectype for scalar type ");
2709 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2710 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2713 return false;
2716 /* FORNOW */
2717 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2718 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2719 if (nunits_in == nunits_out / 2)
2720 modifier = NARROW;
2721 else if (nunits_out == nunits_in)
2722 modifier = NONE;
2723 else if (nunits_out == nunits_in / 2)
2724 modifier = WIDEN;
2725 else
2726 return false;
2728 /* We only handle functions that do not read or clobber memory. */
2729 if (gimple_vuse (stmt))
2731 if (dump_enabled_p ())
2732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2733 "function reads from or writes to memory.\n");
2734 return false;
2737 /* For now, we only vectorize functions if a target specific builtin
2738 is available. TODO -- in some cases, it might be profitable to
2739 insert the calls for pieces of the vector, in order to be able
2740 to vectorize other operations in the loop. */
2741 fndecl = NULL_TREE;
2742 internal_fn ifn = IFN_LAST;
2743 combined_fn cfn = gimple_call_combined_fn (stmt);
2744 tree callee = gimple_call_fndecl (stmt);
2746 /* First try using an internal function. */
2747 tree_code convert_code = ERROR_MARK;
2748 if (cfn != CFN_LAST
2749 && (modifier == NONE
2750 || (modifier == NARROW
2751 && simple_integer_narrowing (vectype_out, vectype_in,
2752 &convert_code))))
2753 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2754 vectype_in);
2756 /* If that fails, try asking for a target-specific built-in function. */
2757 if (ifn == IFN_LAST)
2759 if (cfn != CFN_LAST)
2760 fndecl = targetm.vectorize.builtin_vectorized_function
2761 (cfn, vectype_out, vectype_in);
2762 else
2763 fndecl = targetm.vectorize.builtin_md_vectorized_function
2764 (callee, vectype_out, vectype_in);
2767 if (ifn == IFN_LAST && !fndecl)
2769 if (cfn == CFN_GOMP_SIMD_LANE
2770 && !slp_node
2771 && loop_vinfo
2772 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2773 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2774 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2775 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2777 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2778 { 0, 1, 2, ... vf - 1 } vector. */
2779 gcc_assert (nargs == 0);
2781 else if (modifier == NONE
2782 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2783 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2784 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2785 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2786 vectype_in, dt);
2787 else
2789 if (dump_enabled_p ())
2790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2791 "function is not vectorizable.\n");
2792 return false;
2796 if (slp_node)
2797 ncopies = 1;
2798 else if (modifier == NARROW && ifn == IFN_LAST)
2799 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2800 else
2801 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2803 /* Sanity check: make sure that at least one copy of the vectorized stmt
2804 needs to be generated. */
2805 gcc_assert (ncopies >= 1);
2807 if (!vec_stmt) /* transformation not required. */
2809 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2810 if (dump_enabled_p ())
2811 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2812 "\n");
2813 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2814 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2815 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2816 vec_promote_demote, stmt_info, 0, vect_body);
2818 return true;
2821 /** Transform. **/
2823 if (dump_enabled_p ())
2824 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2826 /* Handle def. */
2827 scalar_dest = gimple_call_lhs (stmt);
2828 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2830 prev_stmt_info = NULL;
2831 if (modifier == NONE || ifn != IFN_LAST)
2833 tree prev_res = NULL_TREE;
2834 for (j = 0; j < ncopies; ++j)
2836 /* Build argument list for the vectorized call. */
2837 if (j == 0)
2838 vargs.create (nargs);
2839 else
2840 vargs.truncate (0);
2842 if (slp_node)
2844 auto_vec<vec<tree> > vec_defs (nargs);
2845 vec<tree> vec_oprnds0;
2847 for (i = 0; i < nargs; i++)
2848 vargs.quick_push (gimple_call_arg (stmt, i));
2849 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2850 vec_oprnds0 = vec_defs[0];
2852 /* Arguments are ready. Create the new vector stmt. */
2853 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2855 size_t k;
2856 for (k = 0; k < nargs; k++)
2858 vec<tree> vec_oprndsk = vec_defs[k];
2859 vargs[k] = vec_oprndsk[i];
2861 if (modifier == NARROW)
2863 tree half_res = make_ssa_name (vectype_in);
2864 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2865 gimple_call_set_lhs (new_stmt, half_res);
2866 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2867 if ((i & 1) == 0)
2869 prev_res = half_res;
2870 continue;
2872 new_temp = make_ssa_name (vec_dest);
2873 new_stmt = gimple_build_assign (new_temp, convert_code,
2874 prev_res, half_res);
2876 else
2878 if (ifn != IFN_LAST)
2879 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2880 else
2881 new_stmt = gimple_build_call_vec (fndecl, vargs);
2882 new_temp = make_ssa_name (vec_dest, new_stmt);
2883 gimple_call_set_lhs (new_stmt, new_temp);
2885 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2886 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2889 for (i = 0; i < nargs; i++)
2891 vec<tree> vec_oprndsi = vec_defs[i];
2892 vec_oprndsi.release ();
2894 continue;
2897 for (i = 0; i < nargs; i++)
2899 op = gimple_call_arg (stmt, i);
2900 if (j == 0)
2901 vec_oprnd0
2902 = vect_get_vec_def_for_operand (op, stmt);
2903 else
2905 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2906 vec_oprnd0
2907 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2910 vargs.quick_push (vec_oprnd0);
2913 if (gimple_call_internal_p (stmt)
2914 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2916 tree *v = XALLOCAVEC (tree, nunits_out);
2917 int k;
2918 for (k = 0; k < nunits_out; ++k)
2919 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2920 tree cst = build_vector (vectype_out, v);
2921 tree new_var
2922 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2923 gimple *init_stmt = gimple_build_assign (new_var, cst);
2924 vect_init_vector_1 (stmt, init_stmt, NULL);
2925 new_temp = make_ssa_name (vec_dest);
2926 new_stmt = gimple_build_assign (new_temp, new_var);
2928 else if (modifier == NARROW)
2930 tree half_res = make_ssa_name (vectype_in);
2931 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2932 gimple_call_set_lhs (new_stmt, half_res);
2933 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2934 if ((j & 1) == 0)
2936 prev_res = half_res;
2937 continue;
2939 new_temp = make_ssa_name (vec_dest);
2940 new_stmt = gimple_build_assign (new_temp, convert_code,
2941 prev_res, half_res);
2943 else
2945 if (ifn != IFN_LAST)
2946 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2947 else
2948 new_stmt = gimple_build_call_vec (fndecl, vargs);
2949 new_temp = make_ssa_name (vec_dest, new_stmt);
2950 gimple_call_set_lhs (new_stmt, new_temp);
2952 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2954 if (j == (modifier == NARROW ? 1 : 0))
2955 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2956 else
2957 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2959 prev_stmt_info = vinfo_for_stmt (new_stmt);
2962 else if (modifier == NARROW)
2964 for (j = 0; j < ncopies; ++j)
2966 /* Build argument list for the vectorized call. */
2967 if (j == 0)
2968 vargs.create (nargs * 2);
2969 else
2970 vargs.truncate (0);
2972 if (slp_node)
2974 auto_vec<vec<tree> > vec_defs (nargs);
2975 vec<tree> vec_oprnds0;
2977 for (i = 0; i < nargs; i++)
2978 vargs.quick_push (gimple_call_arg (stmt, i));
2979 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2980 vec_oprnds0 = vec_defs[0];
2982 /* Arguments are ready. Create the new vector stmt. */
2983 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2985 size_t k;
2986 vargs.truncate (0);
2987 for (k = 0; k < nargs; k++)
2989 vec<tree> vec_oprndsk = vec_defs[k];
2990 vargs.quick_push (vec_oprndsk[i]);
2991 vargs.quick_push (vec_oprndsk[i + 1]);
2993 if (ifn != IFN_LAST)
2994 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2995 else
2996 new_stmt = gimple_build_call_vec (fndecl, vargs);
2997 new_temp = make_ssa_name (vec_dest, new_stmt);
2998 gimple_call_set_lhs (new_stmt, new_temp);
2999 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3000 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3003 for (i = 0; i < nargs; i++)
3005 vec<tree> vec_oprndsi = vec_defs[i];
3006 vec_oprndsi.release ();
3008 continue;
3011 for (i = 0; i < nargs; i++)
3013 op = gimple_call_arg (stmt, i);
3014 if (j == 0)
3016 vec_oprnd0
3017 = vect_get_vec_def_for_operand (op, stmt);
3018 vec_oprnd1
3019 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3021 else
3023 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3024 vec_oprnd0
3025 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3026 vec_oprnd1
3027 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3030 vargs.quick_push (vec_oprnd0);
3031 vargs.quick_push (vec_oprnd1);
3034 new_stmt = gimple_build_call_vec (fndecl, vargs);
3035 new_temp = make_ssa_name (vec_dest, new_stmt);
3036 gimple_call_set_lhs (new_stmt, new_temp);
3037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3039 if (j == 0)
3040 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3041 else
3042 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3044 prev_stmt_info = vinfo_for_stmt (new_stmt);
3047 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3049 else
3050 /* No current target implements this case. */
3051 return false;
3053 vargs.release ();
3055 /* The call in STMT might prevent it from being removed in dce.
3056 We however cannot remove it here, due to the way the ssa name
3057 it defines is mapped to the new definition. So just replace
3058 rhs of the statement with something harmless. */
3060 if (slp_node)
3061 return true;
3063 type = TREE_TYPE (scalar_dest);
3064 if (is_pattern_stmt_p (stmt_info))
3065 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3066 else
3067 lhs = gimple_call_lhs (stmt);
3069 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3070 set_vinfo_for_stmt (new_stmt, stmt_info);
3071 set_vinfo_for_stmt (stmt, NULL);
3072 STMT_VINFO_STMT (stmt_info) = new_stmt;
3073 gsi_replace (gsi, new_stmt, false);
3075 return true;
3079 struct simd_call_arg_info
3081 tree vectype;
3082 tree op;
3083 HOST_WIDE_INT linear_step;
3084 enum vect_def_type dt;
3085 unsigned int align;
3086 bool simd_lane_linear;
3089 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3090 is linear within simd lane (but not within whole loop), note it in
3091 *ARGINFO. */
3093 static void
3094 vect_simd_lane_linear (tree op, struct loop *loop,
3095 struct simd_call_arg_info *arginfo)
3097 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3099 if (!is_gimple_assign (def_stmt)
3100 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3101 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3102 return;
3104 tree base = gimple_assign_rhs1 (def_stmt);
3105 HOST_WIDE_INT linear_step = 0;
3106 tree v = gimple_assign_rhs2 (def_stmt);
3107 while (TREE_CODE (v) == SSA_NAME)
3109 tree t;
3110 def_stmt = SSA_NAME_DEF_STMT (v);
3111 if (is_gimple_assign (def_stmt))
3112 switch (gimple_assign_rhs_code (def_stmt))
3114 case PLUS_EXPR:
3115 t = gimple_assign_rhs2 (def_stmt);
3116 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3117 return;
3118 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3119 v = gimple_assign_rhs1 (def_stmt);
3120 continue;
3121 case MULT_EXPR:
3122 t = gimple_assign_rhs2 (def_stmt);
3123 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3124 return;
3125 linear_step = tree_to_shwi (t);
3126 v = gimple_assign_rhs1 (def_stmt);
3127 continue;
3128 CASE_CONVERT:
3129 t = gimple_assign_rhs1 (def_stmt);
3130 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3131 || (TYPE_PRECISION (TREE_TYPE (v))
3132 < TYPE_PRECISION (TREE_TYPE (t))))
3133 return;
3134 if (!linear_step)
3135 linear_step = 1;
3136 v = t;
3137 continue;
3138 default:
3139 return;
3141 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3142 && loop->simduid
3143 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3144 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3145 == loop->simduid))
3147 if (!linear_step)
3148 linear_step = 1;
3149 arginfo->linear_step = linear_step;
3150 arginfo->op = base;
3151 arginfo->simd_lane_linear = true;
3152 return;
3157 /* Function vectorizable_simd_clone_call.
3159 Check if STMT performs a function call that can be vectorized
3160 by calling a simd clone of the function.
3161 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3162 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3163 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3165 static bool
3166 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3167 gimple **vec_stmt, slp_tree slp_node)
3169 tree vec_dest;
3170 tree scalar_dest;
3171 tree op, type;
3172 tree vec_oprnd0 = NULL_TREE;
3173 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3174 tree vectype;
3175 unsigned int nunits;
3176 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3177 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3178 vec_info *vinfo = stmt_info->vinfo;
3179 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3180 tree fndecl, new_temp;
3181 gimple *def_stmt;
3182 gimple *new_stmt = NULL;
3183 int ncopies, j;
3184 auto_vec<simd_call_arg_info> arginfo;
3185 vec<tree> vargs = vNULL;
3186 size_t i, nargs;
3187 tree lhs, rtype, ratype;
3188 vec<constructor_elt, va_gc> *ret_ctor_elts;
3190 /* Is STMT a vectorizable call? */
3191 if (!is_gimple_call (stmt))
3192 return false;
3194 fndecl = gimple_call_fndecl (stmt);
3195 if (fndecl == NULL_TREE)
3196 return false;
3198 struct cgraph_node *node = cgraph_node::get (fndecl);
3199 if (node == NULL || node->simd_clones == NULL)
3200 return false;
3202 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3203 return false;
3205 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3206 && ! vec_stmt)
3207 return false;
3209 if (gimple_call_lhs (stmt)
3210 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3211 return false;
3213 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3215 vectype = STMT_VINFO_VECTYPE (stmt_info);
3217 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3218 return false;
3220 /* FORNOW */
3221 if (slp_node)
3222 return false;
3224 /* Process function arguments. */
3225 nargs = gimple_call_num_args (stmt);
3227 /* Bail out if the function has zero arguments. */
3228 if (nargs == 0)
3229 return false;
3231 arginfo.reserve (nargs, true);
3233 for (i = 0; i < nargs; i++)
3235 simd_call_arg_info thisarginfo;
3236 affine_iv iv;
3238 thisarginfo.linear_step = 0;
3239 thisarginfo.align = 0;
3240 thisarginfo.op = NULL_TREE;
3241 thisarginfo.simd_lane_linear = false;
3243 op = gimple_call_arg (stmt, i);
3244 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3245 &thisarginfo.vectype)
3246 || thisarginfo.dt == vect_uninitialized_def)
3248 if (dump_enabled_p ())
3249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3250 "use not simple.\n");
3251 return false;
3254 if (thisarginfo.dt == vect_constant_def
3255 || thisarginfo.dt == vect_external_def)
3256 gcc_assert (thisarginfo.vectype == NULL_TREE);
3257 else
3258 gcc_assert (thisarginfo.vectype != NULL_TREE);
3260 /* For linear arguments, the analyze phase should have saved
3261 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3262 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3263 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3265 gcc_assert (vec_stmt);
3266 thisarginfo.linear_step
3267 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3268 thisarginfo.op
3269 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3270 thisarginfo.simd_lane_linear
3271 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3272 == boolean_true_node);
3273 /* If loop has been peeled for alignment, we need to adjust it. */
3274 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3275 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3276 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3278 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3279 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3280 tree opt = TREE_TYPE (thisarginfo.op);
3281 bias = fold_convert (TREE_TYPE (step), bias);
3282 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3283 thisarginfo.op
3284 = fold_build2 (POINTER_TYPE_P (opt)
3285 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3286 thisarginfo.op, bias);
3289 else if (!vec_stmt
3290 && thisarginfo.dt != vect_constant_def
3291 && thisarginfo.dt != vect_external_def
3292 && loop_vinfo
3293 && TREE_CODE (op) == SSA_NAME
3294 && simple_iv (loop, loop_containing_stmt (stmt), op,
3295 &iv, false)
3296 && tree_fits_shwi_p (iv.step))
3298 thisarginfo.linear_step = tree_to_shwi (iv.step);
3299 thisarginfo.op = iv.base;
3301 else if ((thisarginfo.dt == vect_constant_def
3302 || thisarginfo.dt == vect_external_def)
3303 && POINTER_TYPE_P (TREE_TYPE (op)))
3304 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3305 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3306 linear too. */
3307 if (POINTER_TYPE_P (TREE_TYPE (op))
3308 && !thisarginfo.linear_step
3309 && !vec_stmt
3310 && thisarginfo.dt != vect_constant_def
3311 && thisarginfo.dt != vect_external_def
3312 && loop_vinfo
3313 && !slp_node
3314 && TREE_CODE (op) == SSA_NAME)
3315 vect_simd_lane_linear (op, loop, &thisarginfo);
3317 arginfo.quick_push (thisarginfo);
3320 unsigned int badness = 0;
3321 struct cgraph_node *bestn = NULL;
3322 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3323 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3324 else
3325 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3326 n = n->simdclone->next_clone)
3328 unsigned int this_badness = 0;
3329 if (n->simdclone->simdlen
3330 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3331 || n->simdclone->nargs != nargs)
3332 continue;
3333 if (n->simdclone->simdlen
3334 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3335 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3336 - exact_log2 (n->simdclone->simdlen)) * 1024;
3337 if (n->simdclone->inbranch)
3338 this_badness += 2048;
3339 int target_badness = targetm.simd_clone.usable (n);
3340 if (target_badness < 0)
3341 continue;
3342 this_badness += target_badness * 512;
3343 /* FORNOW: Have to add code to add the mask argument. */
3344 if (n->simdclone->inbranch)
3345 continue;
3346 for (i = 0; i < nargs; i++)
3348 switch (n->simdclone->args[i].arg_type)
3350 case SIMD_CLONE_ARG_TYPE_VECTOR:
3351 if (!useless_type_conversion_p
3352 (n->simdclone->args[i].orig_type,
3353 TREE_TYPE (gimple_call_arg (stmt, i))))
3354 i = -1;
3355 else if (arginfo[i].dt == vect_constant_def
3356 || arginfo[i].dt == vect_external_def
3357 || arginfo[i].linear_step)
3358 this_badness += 64;
3359 break;
3360 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3361 if (arginfo[i].dt != vect_constant_def
3362 && arginfo[i].dt != vect_external_def)
3363 i = -1;
3364 break;
3365 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3366 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3367 if (arginfo[i].dt == vect_constant_def
3368 || arginfo[i].dt == vect_external_def
3369 || (arginfo[i].linear_step
3370 != n->simdclone->args[i].linear_step))
3371 i = -1;
3372 break;
3373 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3374 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3375 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3376 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3377 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3378 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3379 /* FORNOW */
3380 i = -1;
3381 break;
3382 case SIMD_CLONE_ARG_TYPE_MASK:
3383 gcc_unreachable ();
3385 if (i == (size_t) -1)
3386 break;
3387 if (n->simdclone->args[i].alignment > arginfo[i].align)
3389 i = -1;
3390 break;
3392 if (arginfo[i].align)
3393 this_badness += (exact_log2 (arginfo[i].align)
3394 - exact_log2 (n->simdclone->args[i].alignment));
3396 if (i == (size_t) -1)
3397 continue;
3398 if (bestn == NULL || this_badness < badness)
3400 bestn = n;
3401 badness = this_badness;
3405 if (bestn == NULL)
3406 return false;
3408 for (i = 0; i < nargs; i++)
3409 if ((arginfo[i].dt == vect_constant_def
3410 || arginfo[i].dt == vect_external_def)
3411 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3413 arginfo[i].vectype
3414 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3415 i)));
3416 if (arginfo[i].vectype == NULL
3417 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3418 > bestn->simdclone->simdlen))
3419 return false;
3422 fndecl = bestn->decl;
3423 nunits = bestn->simdclone->simdlen;
3424 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3426 /* If the function isn't const, only allow it in simd loops where user
3427 has asserted that at least nunits consecutive iterations can be
3428 performed using SIMD instructions. */
3429 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3430 && gimple_vuse (stmt))
3431 return false;
3433 /* Sanity check: make sure that at least one copy of the vectorized stmt
3434 needs to be generated. */
3435 gcc_assert (ncopies >= 1);
3437 if (!vec_stmt) /* transformation not required. */
3439 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3440 for (i = 0; i < nargs; i++)
3441 if ((bestn->simdclone->args[i].arg_type
3442 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3443 || (bestn->simdclone->args[i].arg_type
3444 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3446 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3447 + 1);
3448 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3449 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3450 ? size_type_node : TREE_TYPE (arginfo[i].op);
3451 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3452 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3453 tree sll = arginfo[i].simd_lane_linear
3454 ? boolean_true_node : boolean_false_node;
3455 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3457 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3458 if (dump_enabled_p ())
3459 dump_printf_loc (MSG_NOTE, vect_location,
3460 "=== vectorizable_simd_clone_call ===\n");
3461 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3462 return true;
3465 /** Transform. **/
3467 if (dump_enabled_p ())
3468 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3470 /* Handle def. */
3471 scalar_dest = gimple_call_lhs (stmt);
3472 vec_dest = NULL_TREE;
3473 rtype = NULL_TREE;
3474 ratype = NULL_TREE;
3475 if (scalar_dest)
3477 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3478 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3479 if (TREE_CODE (rtype) == ARRAY_TYPE)
3481 ratype = rtype;
3482 rtype = TREE_TYPE (ratype);
3486 prev_stmt_info = NULL;
3487 for (j = 0; j < ncopies; ++j)
3489 /* Build argument list for the vectorized call. */
3490 if (j == 0)
3491 vargs.create (nargs);
3492 else
3493 vargs.truncate (0);
3495 for (i = 0; i < nargs; i++)
3497 unsigned int k, l, m, o;
3498 tree atype;
3499 op = gimple_call_arg (stmt, i);
3500 switch (bestn->simdclone->args[i].arg_type)
3502 case SIMD_CLONE_ARG_TYPE_VECTOR:
3503 atype = bestn->simdclone->args[i].vector_type;
3504 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3505 for (m = j * o; m < (j + 1) * o; m++)
3507 if (TYPE_VECTOR_SUBPARTS (atype)
3508 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3510 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3511 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3512 / TYPE_VECTOR_SUBPARTS (atype));
3513 gcc_assert ((k & (k - 1)) == 0);
3514 if (m == 0)
3515 vec_oprnd0
3516 = vect_get_vec_def_for_operand (op, stmt);
3517 else
3519 vec_oprnd0 = arginfo[i].op;
3520 if ((m & (k - 1)) == 0)
3521 vec_oprnd0
3522 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3523 vec_oprnd0);
3525 arginfo[i].op = vec_oprnd0;
3526 vec_oprnd0
3527 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3528 size_int (prec),
3529 bitsize_int ((m & (k - 1)) * prec));
3530 new_stmt
3531 = gimple_build_assign (make_ssa_name (atype),
3532 vec_oprnd0);
3533 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3534 vargs.safe_push (gimple_assign_lhs (new_stmt));
3536 else
3538 k = (TYPE_VECTOR_SUBPARTS (atype)
3539 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3540 gcc_assert ((k & (k - 1)) == 0);
3541 vec<constructor_elt, va_gc> *ctor_elts;
3542 if (k != 1)
3543 vec_alloc (ctor_elts, k);
3544 else
3545 ctor_elts = NULL;
3546 for (l = 0; l < k; l++)
3548 if (m == 0 && l == 0)
3549 vec_oprnd0
3550 = vect_get_vec_def_for_operand (op, stmt);
3551 else
3552 vec_oprnd0
3553 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3554 arginfo[i].op);
3555 arginfo[i].op = vec_oprnd0;
3556 if (k == 1)
3557 break;
3558 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3559 vec_oprnd0);
3561 if (k == 1)
3562 vargs.safe_push (vec_oprnd0);
3563 else
3565 vec_oprnd0 = build_constructor (atype, ctor_elts);
3566 new_stmt
3567 = gimple_build_assign (make_ssa_name (atype),
3568 vec_oprnd0);
3569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3570 vargs.safe_push (gimple_assign_lhs (new_stmt));
3574 break;
3575 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3576 vargs.safe_push (op);
3577 break;
3578 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3579 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3580 if (j == 0)
3582 gimple_seq stmts;
3583 arginfo[i].op
3584 = force_gimple_operand (arginfo[i].op, &stmts, true,
3585 NULL_TREE);
3586 if (stmts != NULL)
3588 basic_block new_bb;
3589 edge pe = loop_preheader_edge (loop);
3590 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3591 gcc_assert (!new_bb);
3593 if (arginfo[i].simd_lane_linear)
3595 vargs.safe_push (arginfo[i].op);
3596 break;
3598 tree phi_res = copy_ssa_name (op);
3599 gphi *new_phi = create_phi_node (phi_res, loop->header);
3600 set_vinfo_for_stmt (new_phi,
3601 new_stmt_vec_info (new_phi, loop_vinfo));
3602 add_phi_arg (new_phi, arginfo[i].op,
3603 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3604 enum tree_code code
3605 = POINTER_TYPE_P (TREE_TYPE (op))
3606 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3607 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3608 ? sizetype : TREE_TYPE (op);
3609 widest_int cst
3610 = wi::mul (bestn->simdclone->args[i].linear_step,
3611 ncopies * nunits);
3612 tree tcst = wide_int_to_tree (type, cst);
3613 tree phi_arg = copy_ssa_name (op);
3614 new_stmt
3615 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3616 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3617 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3618 set_vinfo_for_stmt (new_stmt,
3619 new_stmt_vec_info (new_stmt, loop_vinfo));
3620 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3621 UNKNOWN_LOCATION);
3622 arginfo[i].op = phi_res;
3623 vargs.safe_push (phi_res);
3625 else
3627 enum tree_code code
3628 = POINTER_TYPE_P (TREE_TYPE (op))
3629 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3630 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3631 ? sizetype : TREE_TYPE (op);
3632 widest_int cst
3633 = wi::mul (bestn->simdclone->args[i].linear_step,
3634 j * nunits);
3635 tree tcst = wide_int_to_tree (type, cst);
3636 new_temp = make_ssa_name (TREE_TYPE (op));
3637 new_stmt = gimple_build_assign (new_temp, code,
3638 arginfo[i].op, tcst);
3639 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3640 vargs.safe_push (new_temp);
3642 break;
3643 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3644 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3645 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3646 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3647 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3648 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3649 default:
3650 gcc_unreachable ();
3654 new_stmt = gimple_build_call_vec (fndecl, vargs);
3655 if (vec_dest)
3657 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3658 if (ratype)
3659 new_temp = create_tmp_var (ratype);
3660 else if (TYPE_VECTOR_SUBPARTS (vectype)
3661 == TYPE_VECTOR_SUBPARTS (rtype))
3662 new_temp = make_ssa_name (vec_dest, new_stmt);
3663 else
3664 new_temp = make_ssa_name (rtype, new_stmt);
3665 gimple_call_set_lhs (new_stmt, new_temp);
3667 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3669 if (vec_dest)
3671 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3673 unsigned int k, l;
3674 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3675 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3676 gcc_assert ((k & (k - 1)) == 0);
3677 for (l = 0; l < k; l++)
3679 tree t;
3680 if (ratype)
3682 t = build_fold_addr_expr (new_temp);
3683 t = build2 (MEM_REF, vectype, t,
3684 build_int_cst (TREE_TYPE (t),
3685 l * prec / BITS_PER_UNIT));
3687 else
3688 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3689 size_int (prec), bitsize_int (l * prec));
3690 new_stmt
3691 = gimple_build_assign (make_ssa_name (vectype), t);
3692 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3693 if (j == 0 && l == 0)
3694 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3695 else
3696 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3698 prev_stmt_info = vinfo_for_stmt (new_stmt);
3701 if (ratype)
3703 tree clobber = build_constructor (ratype, NULL);
3704 TREE_THIS_VOLATILE (clobber) = 1;
3705 new_stmt = gimple_build_assign (new_temp, clobber);
3706 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3708 continue;
3710 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3712 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3713 / TYPE_VECTOR_SUBPARTS (rtype));
3714 gcc_assert ((k & (k - 1)) == 0);
3715 if ((j & (k - 1)) == 0)
3716 vec_alloc (ret_ctor_elts, k);
3717 if (ratype)
3719 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3720 for (m = 0; m < o; m++)
3722 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3723 size_int (m), NULL_TREE, NULL_TREE);
3724 new_stmt
3725 = gimple_build_assign (make_ssa_name (rtype), tem);
3726 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3727 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3728 gimple_assign_lhs (new_stmt));
3730 tree clobber = build_constructor (ratype, NULL);
3731 TREE_THIS_VOLATILE (clobber) = 1;
3732 new_stmt = gimple_build_assign (new_temp, clobber);
3733 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3735 else
3736 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3737 if ((j & (k - 1)) != k - 1)
3738 continue;
3739 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3740 new_stmt
3741 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3742 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3744 if ((unsigned) j == k - 1)
3745 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3746 else
3747 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3749 prev_stmt_info = vinfo_for_stmt (new_stmt);
3750 continue;
3752 else if (ratype)
3754 tree t = build_fold_addr_expr (new_temp);
3755 t = build2 (MEM_REF, vectype, t,
3756 build_int_cst (TREE_TYPE (t), 0));
3757 new_stmt
3758 = gimple_build_assign (make_ssa_name (vec_dest), t);
3759 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3760 tree clobber = build_constructor (ratype, NULL);
3761 TREE_THIS_VOLATILE (clobber) = 1;
3762 vect_finish_stmt_generation (stmt,
3763 gimple_build_assign (new_temp,
3764 clobber), gsi);
3768 if (j == 0)
3769 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3770 else
3771 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3773 prev_stmt_info = vinfo_for_stmt (new_stmt);
3776 vargs.release ();
3778 /* The call in STMT might prevent it from being removed in dce.
3779 We however cannot remove it here, due to the way the ssa name
3780 it defines is mapped to the new definition. So just replace
3781 rhs of the statement with something harmless. */
3783 if (slp_node)
3784 return true;
3786 if (scalar_dest)
3788 type = TREE_TYPE (scalar_dest);
3789 if (is_pattern_stmt_p (stmt_info))
3790 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3791 else
3792 lhs = gimple_call_lhs (stmt);
3793 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3795 else
3796 new_stmt = gimple_build_nop ();
3797 set_vinfo_for_stmt (new_stmt, stmt_info);
3798 set_vinfo_for_stmt (stmt, NULL);
3799 STMT_VINFO_STMT (stmt_info) = new_stmt;
3800 gsi_replace (gsi, new_stmt, true);
3801 unlink_stmt_vdef (stmt);
3803 return true;
3807 /* Function vect_gen_widened_results_half
3809 Create a vector stmt whose code, type, number of arguments, and result
3810 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3811 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3812 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3813 needs to be created (DECL is a function-decl of a target-builtin).
3814 STMT is the original scalar stmt that we are vectorizing. */
3816 static gimple *
3817 vect_gen_widened_results_half (enum tree_code code,
3818 tree decl,
3819 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3820 tree vec_dest, gimple_stmt_iterator *gsi,
3821 gimple *stmt)
3823 gimple *new_stmt;
3824 tree new_temp;
3826 /* Generate half of the widened result: */
3827 if (code == CALL_EXPR)
3829 /* Target specific support */
3830 if (op_type == binary_op)
3831 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3832 else
3833 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3834 new_temp = make_ssa_name (vec_dest, new_stmt);
3835 gimple_call_set_lhs (new_stmt, new_temp);
3837 else
3839 /* Generic support */
3840 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3841 if (op_type != binary_op)
3842 vec_oprnd1 = NULL;
3843 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3844 new_temp = make_ssa_name (vec_dest, new_stmt);
3845 gimple_assign_set_lhs (new_stmt, new_temp);
3847 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3849 return new_stmt;
3853 /* Get vectorized definitions for loop-based vectorization. For the first
3854 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3855 scalar operand), and for the rest we get a copy with
3856 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3857 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3858 The vectors are collected into VEC_OPRNDS. */
3860 static void
3861 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3862 vec<tree> *vec_oprnds, int multi_step_cvt)
3864 tree vec_oprnd;
3866 /* Get first vector operand. */
3867 /* All the vector operands except the very first one (that is scalar oprnd)
3868 are stmt copies. */
3869 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3870 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3871 else
3872 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3874 vec_oprnds->quick_push (vec_oprnd);
3876 /* Get second vector operand. */
3877 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3878 vec_oprnds->quick_push (vec_oprnd);
3880 *oprnd = vec_oprnd;
3882 /* For conversion in multiple steps, continue to get operands
3883 recursively. */
3884 if (multi_step_cvt)
3885 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3889 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3890 For multi-step conversions store the resulting vectors and call the function
3891 recursively. */
3893 static void
3894 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3895 int multi_step_cvt, gimple *stmt,
3896 vec<tree> vec_dsts,
3897 gimple_stmt_iterator *gsi,
3898 slp_tree slp_node, enum tree_code code,
3899 stmt_vec_info *prev_stmt_info)
3901 unsigned int i;
3902 tree vop0, vop1, new_tmp, vec_dest;
3903 gimple *new_stmt;
3904 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3906 vec_dest = vec_dsts.pop ();
3908 for (i = 0; i < vec_oprnds->length (); i += 2)
3910 /* Create demotion operation. */
3911 vop0 = (*vec_oprnds)[i];
3912 vop1 = (*vec_oprnds)[i + 1];
3913 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3914 new_tmp = make_ssa_name (vec_dest, new_stmt);
3915 gimple_assign_set_lhs (new_stmt, new_tmp);
3916 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3918 if (multi_step_cvt)
3919 /* Store the resulting vector for next recursive call. */
3920 (*vec_oprnds)[i/2] = new_tmp;
3921 else
3923 /* This is the last step of the conversion sequence. Store the
3924 vectors in SLP_NODE or in vector info of the scalar statement
3925 (or in STMT_VINFO_RELATED_STMT chain). */
3926 if (slp_node)
3927 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3928 else
3930 if (!*prev_stmt_info)
3931 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3932 else
3933 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3935 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3940 /* For multi-step demotion operations we first generate demotion operations
3941 from the source type to the intermediate types, and then combine the
3942 results (stored in VEC_OPRNDS) in demotion operation to the destination
3943 type. */
3944 if (multi_step_cvt)
3946 /* At each level of recursion we have half of the operands we had at the
3947 previous level. */
3948 vec_oprnds->truncate ((i+1)/2);
3949 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3950 stmt, vec_dsts, gsi, slp_node,
3951 VEC_PACK_TRUNC_EXPR,
3952 prev_stmt_info);
3955 vec_dsts.quick_push (vec_dest);
3959 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3960 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3961 the resulting vectors and call the function recursively. */
3963 static void
3964 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3965 vec<tree> *vec_oprnds1,
3966 gimple *stmt, tree vec_dest,
3967 gimple_stmt_iterator *gsi,
3968 enum tree_code code1,
3969 enum tree_code code2, tree decl1,
3970 tree decl2, int op_type)
3972 int i;
3973 tree vop0, vop1, new_tmp1, new_tmp2;
3974 gimple *new_stmt1, *new_stmt2;
3975 vec<tree> vec_tmp = vNULL;
3977 vec_tmp.create (vec_oprnds0->length () * 2);
3978 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3980 if (op_type == binary_op)
3981 vop1 = (*vec_oprnds1)[i];
3982 else
3983 vop1 = NULL_TREE;
3985 /* Generate the two halves of promotion operation. */
3986 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3987 op_type, vec_dest, gsi, stmt);
3988 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3989 op_type, vec_dest, gsi, stmt);
3990 if (is_gimple_call (new_stmt1))
3992 new_tmp1 = gimple_call_lhs (new_stmt1);
3993 new_tmp2 = gimple_call_lhs (new_stmt2);
3995 else
3997 new_tmp1 = gimple_assign_lhs (new_stmt1);
3998 new_tmp2 = gimple_assign_lhs (new_stmt2);
4001 /* Store the results for the next step. */
4002 vec_tmp.quick_push (new_tmp1);
4003 vec_tmp.quick_push (new_tmp2);
4006 vec_oprnds0->release ();
4007 *vec_oprnds0 = vec_tmp;
4011 /* Check if STMT performs a conversion operation, that can be vectorized.
4012 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4013 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4014 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4016 static bool
4017 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4018 gimple **vec_stmt, slp_tree slp_node)
4020 tree vec_dest;
4021 tree scalar_dest;
4022 tree op0, op1 = NULL_TREE;
4023 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4024 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4025 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4026 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4027 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4028 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4029 tree new_temp;
4030 gimple *def_stmt;
4031 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4032 int ndts = 2;
4033 gimple *new_stmt = NULL;
4034 stmt_vec_info prev_stmt_info;
4035 int nunits_in;
4036 int nunits_out;
4037 tree vectype_out, vectype_in;
4038 int ncopies, i, j;
4039 tree lhs_type, rhs_type;
4040 enum { NARROW, NONE, WIDEN } modifier;
4041 vec<tree> vec_oprnds0 = vNULL;
4042 vec<tree> vec_oprnds1 = vNULL;
4043 tree vop0;
4044 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4045 vec_info *vinfo = stmt_info->vinfo;
4046 int multi_step_cvt = 0;
4047 vec<tree> interm_types = vNULL;
4048 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4049 int op_type;
4050 machine_mode rhs_mode;
4051 unsigned short fltsz;
4053 /* Is STMT a vectorizable conversion? */
4055 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4056 return false;
4058 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4059 && ! vec_stmt)
4060 return false;
4062 if (!is_gimple_assign (stmt))
4063 return false;
4065 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4066 return false;
4068 code = gimple_assign_rhs_code (stmt);
4069 if (!CONVERT_EXPR_CODE_P (code)
4070 && code != FIX_TRUNC_EXPR
4071 && code != FLOAT_EXPR
4072 && code != WIDEN_MULT_EXPR
4073 && code != WIDEN_LSHIFT_EXPR)
4074 return false;
4076 op_type = TREE_CODE_LENGTH (code);
4078 /* Check types of lhs and rhs. */
4079 scalar_dest = gimple_assign_lhs (stmt);
4080 lhs_type = TREE_TYPE (scalar_dest);
4081 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4083 op0 = gimple_assign_rhs1 (stmt);
4084 rhs_type = TREE_TYPE (op0);
4086 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4087 && !((INTEGRAL_TYPE_P (lhs_type)
4088 && INTEGRAL_TYPE_P (rhs_type))
4089 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4090 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4091 return false;
4093 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4094 && ((INTEGRAL_TYPE_P (lhs_type)
4095 && (TYPE_PRECISION (lhs_type)
4096 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
4097 || (INTEGRAL_TYPE_P (rhs_type)
4098 && (TYPE_PRECISION (rhs_type)
4099 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4101 if (dump_enabled_p ())
4102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4103 "type conversion to/from bit-precision unsupported."
4104 "\n");
4105 return false;
4108 /* Check the operands of the operation. */
4109 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4111 if (dump_enabled_p ())
4112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4113 "use not simple.\n");
4114 return false;
4116 if (op_type == binary_op)
4118 bool ok;
4120 op1 = gimple_assign_rhs2 (stmt);
4121 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4122 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4123 OP1. */
4124 if (CONSTANT_CLASS_P (op0))
4125 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4126 else
4127 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4129 if (!ok)
4131 if (dump_enabled_p ())
4132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4133 "use not simple.\n");
4134 return false;
4138 /* If op0 is an external or constant defs use a vector type of
4139 the same size as the output vector type. */
4140 if (!vectype_in)
4141 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4142 if (vec_stmt)
4143 gcc_assert (vectype_in);
4144 if (!vectype_in)
4146 if (dump_enabled_p ())
4148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4149 "no vectype for scalar type ");
4150 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4151 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4154 return false;
4157 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4158 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4160 if (dump_enabled_p ())
4162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4163 "can't convert between boolean and non "
4164 "boolean vectors");
4165 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4166 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4169 return false;
4172 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4173 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4174 if (nunits_in < nunits_out)
4175 modifier = NARROW;
4176 else if (nunits_out == nunits_in)
4177 modifier = NONE;
4178 else
4179 modifier = WIDEN;
4181 /* Multiple types in SLP are handled by creating the appropriate number of
4182 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4183 case of SLP. */
4184 if (slp_node)
4185 ncopies = 1;
4186 else if (modifier == NARROW)
4187 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4188 else
4189 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4191 /* Sanity check: make sure that at least one copy of the vectorized stmt
4192 needs to be generated. */
4193 gcc_assert (ncopies >= 1);
4195 /* Supportable by target? */
4196 switch (modifier)
4198 case NONE:
4199 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4200 return false;
4201 if (supportable_convert_operation (code, vectype_out, vectype_in,
4202 &decl1, &code1))
4203 break;
4204 /* FALLTHRU */
4205 unsupported:
4206 if (dump_enabled_p ())
4207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4208 "conversion not supported by target.\n");
4209 return false;
4211 case WIDEN:
4212 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4213 &code1, &code2, &multi_step_cvt,
4214 &interm_types))
4216 /* Binary widening operation can only be supported directly by the
4217 architecture. */
4218 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4219 break;
4222 if (code != FLOAT_EXPR
4223 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4224 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4225 goto unsupported;
4227 rhs_mode = TYPE_MODE (rhs_type);
4228 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4229 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4230 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4231 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4233 cvt_type
4234 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4235 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4236 if (cvt_type == NULL_TREE)
4237 goto unsupported;
4239 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4241 if (!supportable_convert_operation (code, vectype_out,
4242 cvt_type, &decl1, &codecvt1))
4243 goto unsupported;
4245 else if (!supportable_widening_operation (code, stmt, vectype_out,
4246 cvt_type, &codecvt1,
4247 &codecvt2, &multi_step_cvt,
4248 &interm_types))
4249 continue;
4250 else
4251 gcc_assert (multi_step_cvt == 0);
4253 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4254 vectype_in, &code1, &code2,
4255 &multi_step_cvt, &interm_types))
4256 break;
4259 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4260 goto unsupported;
4262 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4263 codecvt2 = ERROR_MARK;
4264 else
4266 multi_step_cvt++;
4267 interm_types.safe_push (cvt_type);
4268 cvt_type = NULL_TREE;
4270 break;
4272 case NARROW:
4273 gcc_assert (op_type == unary_op);
4274 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4275 &code1, &multi_step_cvt,
4276 &interm_types))
4277 break;
4279 if (code != FIX_TRUNC_EXPR
4280 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4281 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4282 goto unsupported;
4284 rhs_mode = TYPE_MODE (rhs_type);
4285 cvt_type
4286 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4287 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4288 if (cvt_type == NULL_TREE)
4289 goto unsupported;
4290 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4291 &decl1, &codecvt1))
4292 goto unsupported;
4293 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4294 &code1, &multi_step_cvt,
4295 &interm_types))
4296 break;
4297 goto unsupported;
4299 default:
4300 gcc_unreachable ();
4303 if (!vec_stmt) /* transformation not required. */
4305 if (dump_enabled_p ())
4306 dump_printf_loc (MSG_NOTE, vect_location,
4307 "=== vectorizable_conversion ===\n");
4308 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4310 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4311 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4313 else if (modifier == NARROW)
4315 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4316 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4318 else
4320 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4321 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4323 interm_types.release ();
4324 return true;
4327 /** Transform. **/
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_NOTE, vect_location,
4330 "transform conversion. ncopies = %d.\n", ncopies);
4332 if (op_type == binary_op)
4334 if (CONSTANT_CLASS_P (op0))
4335 op0 = fold_convert (TREE_TYPE (op1), op0);
4336 else if (CONSTANT_CLASS_P (op1))
4337 op1 = fold_convert (TREE_TYPE (op0), op1);
4340 /* In case of multi-step conversion, we first generate conversion operations
4341 to the intermediate types, and then from that types to the final one.
4342 We create vector destinations for the intermediate type (TYPES) received
4343 from supportable_*_operation, and store them in the correct order
4344 for future use in vect_create_vectorized_*_stmts (). */
4345 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4346 vec_dest = vect_create_destination_var (scalar_dest,
4347 (cvt_type && modifier == WIDEN)
4348 ? cvt_type : vectype_out);
4349 vec_dsts.quick_push (vec_dest);
4351 if (multi_step_cvt)
4353 for (i = interm_types.length () - 1;
4354 interm_types.iterate (i, &intermediate_type); i--)
4356 vec_dest = vect_create_destination_var (scalar_dest,
4357 intermediate_type);
4358 vec_dsts.quick_push (vec_dest);
4362 if (cvt_type)
4363 vec_dest = vect_create_destination_var (scalar_dest,
4364 modifier == WIDEN
4365 ? vectype_out : cvt_type);
4367 if (!slp_node)
4369 if (modifier == WIDEN)
4371 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4372 if (op_type == binary_op)
4373 vec_oprnds1.create (1);
4375 else if (modifier == NARROW)
4376 vec_oprnds0.create (
4377 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4379 else if (code == WIDEN_LSHIFT_EXPR)
4380 vec_oprnds1.create (slp_node->vec_stmts_size);
4382 last_oprnd = op0;
4383 prev_stmt_info = NULL;
4384 switch (modifier)
4386 case NONE:
4387 for (j = 0; j < ncopies; j++)
4389 if (j == 0)
4390 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4391 -1);
4392 else
4393 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4395 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4397 /* Arguments are ready, create the new vector stmt. */
4398 if (code1 == CALL_EXPR)
4400 new_stmt = gimple_build_call (decl1, 1, vop0);
4401 new_temp = make_ssa_name (vec_dest, new_stmt);
4402 gimple_call_set_lhs (new_stmt, new_temp);
4404 else
4406 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4407 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4408 new_temp = make_ssa_name (vec_dest, new_stmt);
4409 gimple_assign_set_lhs (new_stmt, new_temp);
4412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4413 if (slp_node)
4414 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4415 else
4417 if (!prev_stmt_info)
4418 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4419 else
4420 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4421 prev_stmt_info = vinfo_for_stmt (new_stmt);
4425 break;
4427 case WIDEN:
4428 /* In case the vectorization factor (VF) is bigger than the number
4429 of elements that we can fit in a vectype (nunits), we have to
4430 generate more than one vector stmt - i.e - we need to "unroll"
4431 the vector stmt by a factor VF/nunits. */
4432 for (j = 0; j < ncopies; j++)
4434 /* Handle uses. */
4435 if (j == 0)
4437 if (slp_node)
4439 if (code == WIDEN_LSHIFT_EXPR)
4441 unsigned int k;
4443 vec_oprnd1 = op1;
4444 /* Store vec_oprnd1 for every vector stmt to be created
4445 for SLP_NODE. We check during the analysis that all
4446 the shift arguments are the same. */
4447 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4448 vec_oprnds1.quick_push (vec_oprnd1);
4450 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4451 slp_node, -1);
4453 else
4454 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4455 &vec_oprnds1, slp_node, -1);
4457 else
4459 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4460 vec_oprnds0.quick_push (vec_oprnd0);
4461 if (op_type == binary_op)
4463 if (code == WIDEN_LSHIFT_EXPR)
4464 vec_oprnd1 = op1;
4465 else
4466 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4467 vec_oprnds1.quick_push (vec_oprnd1);
4471 else
4473 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4474 vec_oprnds0.truncate (0);
4475 vec_oprnds0.quick_push (vec_oprnd0);
4476 if (op_type == binary_op)
4478 if (code == WIDEN_LSHIFT_EXPR)
4479 vec_oprnd1 = op1;
4480 else
4481 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4482 vec_oprnd1);
4483 vec_oprnds1.truncate (0);
4484 vec_oprnds1.quick_push (vec_oprnd1);
4488 /* Arguments are ready. Create the new vector stmts. */
4489 for (i = multi_step_cvt; i >= 0; i--)
4491 tree this_dest = vec_dsts[i];
4492 enum tree_code c1 = code1, c2 = code2;
4493 if (i == 0 && codecvt2 != ERROR_MARK)
4495 c1 = codecvt1;
4496 c2 = codecvt2;
4498 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4499 &vec_oprnds1,
4500 stmt, this_dest, gsi,
4501 c1, c2, decl1, decl2,
4502 op_type);
4505 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4507 if (cvt_type)
4509 if (codecvt1 == CALL_EXPR)
4511 new_stmt = gimple_build_call (decl1, 1, vop0);
4512 new_temp = make_ssa_name (vec_dest, new_stmt);
4513 gimple_call_set_lhs (new_stmt, new_temp);
4515 else
4517 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4518 new_temp = make_ssa_name (vec_dest);
4519 new_stmt = gimple_build_assign (new_temp, codecvt1,
4520 vop0);
4523 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4525 else
4526 new_stmt = SSA_NAME_DEF_STMT (vop0);
4528 if (slp_node)
4529 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4530 else
4532 if (!prev_stmt_info)
4533 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4534 else
4535 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4536 prev_stmt_info = vinfo_for_stmt (new_stmt);
4541 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4542 break;
4544 case NARROW:
4545 /* In case the vectorization factor (VF) is bigger than the number
4546 of elements that we can fit in a vectype (nunits), we have to
4547 generate more than one vector stmt - i.e - we need to "unroll"
4548 the vector stmt by a factor VF/nunits. */
4549 for (j = 0; j < ncopies; j++)
4551 /* Handle uses. */
4552 if (slp_node)
4553 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4554 slp_node, -1);
4555 else
4557 vec_oprnds0.truncate (0);
4558 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4559 vect_pow2 (multi_step_cvt) - 1);
4562 /* Arguments are ready. Create the new vector stmts. */
4563 if (cvt_type)
4564 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4566 if (codecvt1 == CALL_EXPR)
4568 new_stmt = gimple_build_call (decl1, 1, vop0);
4569 new_temp = make_ssa_name (vec_dest, new_stmt);
4570 gimple_call_set_lhs (new_stmt, new_temp);
4572 else
4574 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4575 new_temp = make_ssa_name (vec_dest);
4576 new_stmt = gimple_build_assign (new_temp, codecvt1,
4577 vop0);
4580 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4581 vec_oprnds0[i] = new_temp;
4584 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4585 stmt, vec_dsts, gsi,
4586 slp_node, code1,
4587 &prev_stmt_info);
4590 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4591 break;
4594 vec_oprnds0.release ();
4595 vec_oprnds1.release ();
4596 interm_types.release ();
4598 return true;
4602 /* Function vectorizable_assignment.
4604 Check if STMT performs an assignment (copy) that can be vectorized.
4605 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4606 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4607 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4609 static bool
4610 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4611 gimple **vec_stmt, slp_tree slp_node)
4613 tree vec_dest;
4614 tree scalar_dest;
4615 tree op;
4616 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4617 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4618 tree new_temp;
4619 gimple *def_stmt;
4620 enum vect_def_type dt[1] = {vect_unknown_def_type};
4621 int ndts = 1;
4622 int ncopies;
4623 int i, j;
4624 vec<tree> vec_oprnds = vNULL;
4625 tree vop;
4626 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4627 vec_info *vinfo = stmt_info->vinfo;
4628 gimple *new_stmt = NULL;
4629 stmt_vec_info prev_stmt_info = NULL;
4630 enum tree_code code;
4631 tree vectype_in;
4633 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4634 return false;
4636 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4637 && ! vec_stmt)
4638 return false;
4640 /* Is vectorizable assignment? */
4641 if (!is_gimple_assign (stmt))
4642 return false;
4644 scalar_dest = gimple_assign_lhs (stmt);
4645 if (TREE_CODE (scalar_dest) != SSA_NAME)
4646 return false;
4648 code = gimple_assign_rhs_code (stmt);
4649 if (gimple_assign_single_p (stmt)
4650 || code == PAREN_EXPR
4651 || CONVERT_EXPR_CODE_P (code))
4652 op = gimple_assign_rhs1 (stmt);
4653 else
4654 return false;
4656 if (code == VIEW_CONVERT_EXPR)
4657 op = TREE_OPERAND (op, 0);
4659 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4660 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4662 /* Multiple types in SLP are handled by creating the appropriate number of
4663 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4664 case of SLP. */
4665 if (slp_node)
4666 ncopies = 1;
4667 else
4668 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4670 gcc_assert (ncopies >= 1);
4672 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4674 if (dump_enabled_p ())
4675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4676 "use not simple.\n");
4677 return false;
4680 /* We can handle NOP_EXPR conversions that do not change the number
4681 of elements or the vector size. */
4682 if ((CONVERT_EXPR_CODE_P (code)
4683 || code == VIEW_CONVERT_EXPR)
4684 && (!vectype_in
4685 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4686 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4687 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4688 return false;
4690 /* We do not handle bit-precision changes. */
4691 if ((CONVERT_EXPR_CODE_P (code)
4692 || code == VIEW_CONVERT_EXPR)
4693 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4694 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4695 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4696 || ((TYPE_PRECISION (TREE_TYPE (op))
4697 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4698 /* But a conversion that does not change the bit-pattern is ok. */
4699 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4700 > TYPE_PRECISION (TREE_TYPE (op)))
4701 && TYPE_UNSIGNED (TREE_TYPE (op)))
4702 /* Conversion between boolean types of different sizes is
4703 a simple assignment in case their vectypes are same
4704 boolean vectors. */
4705 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4706 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4708 if (dump_enabled_p ())
4709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4710 "type conversion to/from bit-precision "
4711 "unsupported.\n");
4712 return false;
4715 if (!vec_stmt) /* transformation not required. */
4717 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4718 if (dump_enabled_p ())
4719 dump_printf_loc (MSG_NOTE, vect_location,
4720 "=== vectorizable_assignment ===\n");
4721 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4722 return true;
4725 /** Transform. **/
4726 if (dump_enabled_p ())
4727 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4729 /* Handle def. */
4730 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4732 /* Handle use. */
4733 for (j = 0; j < ncopies; j++)
4735 /* Handle uses. */
4736 if (j == 0)
4737 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4738 else
4739 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4741 /* Arguments are ready. create the new vector stmt. */
4742 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4744 if (CONVERT_EXPR_CODE_P (code)
4745 || code == VIEW_CONVERT_EXPR)
4746 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4747 new_stmt = gimple_build_assign (vec_dest, vop);
4748 new_temp = make_ssa_name (vec_dest, new_stmt);
4749 gimple_assign_set_lhs (new_stmt, new_temp);
4750 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4751 if (slp_node)
4752 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4755 if (slp_node)
4756 continue;
4758 if (j == 0)
4759 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4760 else
4761 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4763 prev_stmt_info = vinfo_for_stmt (new_stmt);
4766 vec_oprnds.release ();
4767 return true;
4771 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4772 either as shift by a scalar or by a vector. */
4774 bool
4775 vect_supportable_shift (enum tree_code code, tree scalar_type)
4778 machine_mode vec_mode;
4779 optab optab;
4780 int icode;
4781 tree vectype;
4783 vectype = get_vectype_for_scalar_type (scalar_type);
4784 if (!vectype)
4785 return false;
4787 optab = optab_for_tree_code (code, vectype, optab_scalar);
4788 if (!optab
4789 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4791 optab = optab_for_tree_code (code, vectype, optab_vector);
4792 if (!optab
4793 || (optab_handler (optab, TYPE_MODE (vectype))
4794 == CODE_FOR_nothing))
4795 return false;
4798 vec_mode = TYPE_MODE (vectype);
4799 icode = (int) optab_handler (optab, vec_mode);
4800 if (icode == CODE_FOR_nothing)
4801 return false;
4803 return true;
4807 /* Function vectorizable_shift.
4809 Check if STMT performs a shift operation that can be vectorized.
4810 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4811 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4812 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4814 static bool
4815 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4816 gimple **vec_stmt, slp_tree slp_node)
4818 tree vec_dest;
4819 tree scalar_dest;
4820 tree op0, op1 = NULL;
4821 tree vec_oprnd1 = NULL_TREE;
4822 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4823 tree vectype;
4824 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4825 enum tree_code code;
4826 machine_mode vec_mode;
4827 tree new_temp;
4828 optab optab;
4829 int icode;
4830 machine_mode optab_op2_mode;
4831 gimple *def_stmt;
4832 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4833 int ndts = 2;
4834 gimple *new_stmt = NULL;
4835 stmt_vec_info prev_stmt_info;
4836 int nunits_in;
4837 int nunits_out;
4838 tree vectype_out;
4839 tree op1_vectype;
4840 int ncopies;
4841 int j, i;
4842 vec<tree> vec_oprnds0 = vNULL;
4843 vec<tree> vec_oprnds1 = vNULL;
4844 tree vop0, vop1;
4845 unsigned int k;
4846 bool scalar_shift_arg = true;
4847 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4848 vec_info *vinfo = stmt_info->vinfo;
4849 int vf;
4851 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4852 return false;
4854 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4855 && ! vec_stmt)
4856 return false;
4858 /* Is STMT a vectorizable binary/unary operation? */
4859 if (!is_gimple_assign (stmt))
4860 return false;
4862 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4863 return false;
4865 code = gimple_assign_rhs_code (stmt);
4867 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4868 || code == RROTATE_EXPR))
4869 return false;
4871 scalar_dest = gimple_assign_lhs (stmt);
4872 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4873 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4874 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4876 if (dump_enabled_p ())
4877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4878 "bit-precision shifts not supported.\n");
4879 return false;
4882 op0 = gimple_assign_rhs1 (stmt);
4883 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4885 if (dump_enabled_p ())
4886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4887 "use not simple.\n");
4888 return false;
4890 /* If op0 is an external or constant def use a vector type with
4891 the same size as the output vector type. */
4892 if (!vectype)
4893 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4894 if (vec_stmt)
4895 gcc_assert (vectype);
4896 if (!vectype)
4898 if (dump_enabled_p ())
4899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4900 "no vectype for scalar type\n");
4901 return false;
4904 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4905 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4906 if (nunits_out != nunits_in)
4907 return false;
4909 op1 = gimple_assign_rhs2 (stmt);
4910 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4912 if (dump_enabled_p ())
4913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4914 "use not simple.\n");
4915 return false;
4918 if (loop_vinfo)
4919 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4920 else
4921 vf = 1;
4923 /* Multiple types in SLP are handled by creating the appropriate number of
4924 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4925 case of SLP. */
4926 if (slp_node)
4927 ncopies = 1;
4928 else
4929 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4931 gcc_assert (ncopies >= 1);
4933 /* Determine whether the shift amount is a vector, or scalar. If the
4934 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4936 if ((dt[1] == vect_internal_def
4937 || dt[1] == vect_induction_def)
4938 && !slp_node)
4939 scalar_shift_arg = false;
4940 else if (dt[1] == vect_constant_def
4941 || dt[1] == vect_external_def
4942 || dt[1] == vect_internal_def)
4944 /* In SLP, need to check whether the shift count is the same,
4945 in loops if it is a constant or invariant, it is always
4946 a scalar shift. */
4947 if (slp_node)
4949 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4950 gimple *slpstmt;
4952 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4953 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4954 scalar_shift_arg = false;
4957 /* If the shift amount is computed by a pattern stmt we cannot
4958 use the scalar amount directly thus give up and use a vector
4959 shift. */
4960 if (dt[1] == vect_internal_def)
4962 gimple *def = SSA_NAME_DEF_STMT (op1);
4963 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4964 scalar_shift_arg = false;
4967 else
4969 if (dump_enabled_p ())
4970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4971 "operand mode requires invariant argument.\n");
4972 return false;
4975 /* Vector shifted by vector. */
4976 if (!scalar_shift_arg)
4978 optab = optab_for_tree_code (code, vectype, optab_vector);
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_NOTE, vect_location,
4981 "vector/vector shift/rotate found.\n");
4983 if (!op1_vectype)
4984 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4985 if (op1_vectype == NULL_TREE
4986 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4988 if (dump_enabled_p ())
4989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4990 "unusable type for last operand in"
4991 " vector/vector shift/rotate.\n");
4992 return false;
4995 /* See if the machine has a vector shifted by scalar insn and if not
4996 then see if it has a vector shifted by vector insn. */
4997 else
4999 optab = optab_for_tree_code (code, vectype, optab_scalar);
5000 if (optab
5001 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5003 if (dump_enabled_p ())
5004 dump_printf_loc (MSG_NOTE, vect_location,
5005 "vector/scalar shift/rotate found.\n");
5007 else
5009 optab = optab_for_tree_code (code, vectype, optab_vector);
5010 if (optab
5011 && (optab_handler (optab, TYPE_MODE (vectype))
5012 != CODE_FOR_nothing))
5014 scalar_shift_arg = false;
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_NOTE, vect_location,
5018 "vector/vector shift/rotate found.\n");
5020 /* Unlike the other binary operators, shifts/rotates have
5021 the rhs being int, instead of the same type as the lhs,
5022 so make sure the scalar is the right type if we are
5023 dealing with vectors of long long/long/short/char. */
5024 if (dt[1] == vect_constant_def)
5025 op1 = fold_convert (TREE_TYPE (vectype), op1);
5026 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5027 TREE_TYPE (op1)))
5029 if (slp_node
5030 && TYPE_MODE (TREE_TYPE (vectype))
5031 != TYPE_MODE (TREE_TYPE (op1)))
5033 if (dump_enabled_p ())
5034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5035 "unusable type for last operand in"
5036 " vector/vector shift/rotate.\n");
5037 return false;
5039 if (vec_stmt && !slp_node)
5041 op1 = fold_convert (TREE_TYPE (vectype), op1);
5042 op1 = vect_init_vector (stmt, op1,
5043 TREE_TYPE (vectype), NULL);
5050 /* Supportable by target? */
5051 if (!optab)
5053 if (dump_enabled_p ())
5054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5055 "no optab.\n");
5056 return false;
5058 vec_mode = TYPE_MODE (vectype);
5059 icode = (int) optab_handler (optab, vec_mode);
5060 if (icode == CODE_FOR_nothing)
5062 if (dump_enabled_p ())
5063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5064 "op not supported by target.\n");
5065 /* Check only during analysis. */
5066 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5067 || (vf < vect_min_worthwhile_factor (code)
5068 && !vec_stmt))
5069 return false;
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_NOTE, vect_location,
5072 "proceeding using word mode.\n");
5075 /* Worthwhile without SIMD support? Check only during analysis. */
5076 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5077 && vf < vect_min_worthwhile_factor (code)
5078 && !vec_stmt)
5080 if (dump_enabled_p ())
5081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5082 "not worthwhile without SIMD support.\n");
5083 return false;
5086 if (!vec_stmt) /* transformation not required. */
5088 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5089 if (dump_enabled_p ())
5090 dump_printf_loc (MSG_NOTE, vect_location,
5091 "=== vectorizable_shift ===\n");
5092 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5093 return true;
5096 /** Transform. **/
5098 if (dump_enabled_p ())
5099 dump_printf_loc (MSG_NOTE, vect_location,
5100 "transform binary/unary operation.\n");
5102 /* Handle def. */
5103 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5105 prev_stmt_info = NULL;
5106 for (j = 0; j < ncopies; j++)
5108 /* Handle uses. */
5109 if (j == 0)
5111 if (scalar_shift_arg)
5113 /* Vector shl and shr insn patterns can be defined with scalar
5114 operand 2 (shift operand). In this case, use constant or loop
5115 invariant op1 directly, without extending it to vector mode
5116 first. */
5117 optab_op2_mode = insn_data[icode].operand[2].mode;
5118 if (!VECTOR_MODE_P (optab_op2_mode))
5120 if (dump_enabled_p ())
5121 dump_printf_loc (MSG_NOTE, vect_location,
5122 "operand 1 using scalar mode.\n");
5123 vec_oprnd1 = op1;
5124 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5125 vec_oprnds1.quick_push (vec_oprnd1);
5126 if (slp_node)
5128 /* Store vec_oprnd1 for every vector stmt to be created
5129 for SLP_NODE. We check during the analysis that all
5130 the shift arguments are the same.
5131 TODO: Allow different constants for different vector
5132 stmts generated for an SLP instance. */
5133 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5134 vec_oprnds1.quick_push (vec_oprnd1);
5139 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5140 (a special case for certain kind of vector shifts); otherwise,
5141 operand 1 should be of a vector type (the usual case). */
5142 if (vec_oprnd1)
5143 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5144 slp_node, -1);
5145 else
5146 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5147 slp_node, -1);
5149 else
5150 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5152 /* Arguments are ready. Create the new vector stmt. */
5153 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5155 vop1 = vec_oprnds1[i];
5156 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5157 new_temp = make_ssa_name (vec_dest, new_stmt);
5158 gimple_assign_set_lhs (new_stmt, new_temp);
5159 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5160 if (slp_node)
5161 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5164 if (slp_node)
5165 continue;
5167 if (j == 0)
5168 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5169 else
5170 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5171 prev_stmt_info = vinfo_for_stmt (new_stmt);
5174 vec_oprnds0.release ();
5175 vec_oprnds1.release ();
5177 return true;
5181 /* Function vectorizable_operation.
5183 Check if STMT performs a binary, unary or ternary operation that can
5184 be vectorized.
5185 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5186 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5187 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5189 static bool
5190 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5191 gimple **vec_stmt, slp_tree slp_node)
5193 tree vec_dest;
5194 tree scalar_dest;
5195 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5196 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5197 tree vectype;
5198 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5199 enum tree_code code;
5200 machine_mode vec_mode;
5201 tree new_temp;
5202 int op_type;
5203 optab optab;
5204 bool target_support_p;
5205 gimple *def_stmt;
5206 enum vect_def_type dt[3]
5207 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5208 int ndts = 3;
5209 gimple *new_stmt = NULL;
5210 stmt_vec_info prev_stmt_info;
5211 int nunits_in;
5212 int nunits_out;
5213 tree vectype_out;
5214 int ncopies;
5215 int j, i;
5216 vec<tree> vec_oprnds0 = vNULL;
5217 vec<tree> vec_oprnds1 = vNULL;
5218 vec<tree> vec_oprnds2 = vNULL;
5219 tree vop0, vop1, vop2;
5220 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5221 vec_info *vinfo = stmt_info->vinfo;
5222 int vf;
5224 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5225 return false;
5227 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5228 && ! vec_stmt)
5229 return false;
5231 /* Is STMT a vectorizable binary/unary operation? */
5232 if (!is_gimple_assign (stmt))
5233 return false;
5235 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5236 return false;
5238 code = gimple_assign_rhs_code (stmt);
5240 /* For pointer addition, we should use the normal plus for
5241 the vector addition. */
5242 if (code == POINTER_PLUS_EXPR)
5243 code = PLUS_EXPR;
5245 /* Support only unary or binary operations. */
5246 op_type = TREE_CODE_LENGTH (code);
5247 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5249 if (dump_enabled_p ())
5250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5251 "num. args = %d (not unary/binary/ternary op).\n",
5252 op_type);
5253 return false;
5256 scalar_dest = gimple_assign_lhs (stmt);
5257 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5259 /* Most operations cannot handle bit-precision types without extra
5260 truncations. */
5261 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5262 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5263 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5264 /* Exception are bitwise binary operations. */
5265 && code != BIT_IOR_EXPR
5266 && code != BIT_XOR_EXPR
5267 && code != BIT_AND_EXPR)
5269 if (dump_enabled_p ())
5270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5271 "bit-precision arithmetic not supported.\n");
5272 return false;
5275 op0 = gimple_assign_rhs1 (stmt);
5276 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5278 if (dump_enabled_p ())
5279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5280 "use not simple.\n");
5281 return false;
5283 /* If op0 is an external or constant def use a vector type with
5284 the same size as the output vector type. */
5285 if (!vectype)
5287 /* For boolean type we cannot determine vectype by
5288 invariant value (don't know whether it is a vector
5289 of booleans or vector of integers). We use output
5290 vectype because operations on boolean don't change
5291 type. */
5292 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5294 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5296 if (dump_enabled_p ())
5297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5298 "not supported operation on bool value.\n");
5299 return false;
5301 vectype = vectype_out;
5303 else
5304 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5306 if (vec_stmt)
5307 gcc_assert (vectype);
5308 if (!vectype)
5310 if (dump_enabled_p ())
5312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5313 "no vectype for scalar type ");
5314 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5315 TREE_TYPE (op0));
5316 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5319 return false;
5322 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5323 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5324 if (nunits_out != nunits_in)
5325 return false;
5327 if (op_type == binary_op || op_type == ternary_op)
5329 op1 = gimple_assign_rhs2 (stmt);
5330 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5334 "use not simple.\n");
5335 return false;
5338 if (op_type == ternary_op)
5340 op2 = gimple_assign_rhs3 (stmt);
5341 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5343 if (dump_enabled_p ())
5344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5345 "use not simple.\n");
5346 return false;
5350 if (loop_vinfo)
5351 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5352 else
5353 vf = 1;
5355 /* Multiple types in SLP are handled by creating the appropriate number of
5356 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5357 case of SLP. */
5358 if (slp_node)
5359 ncopies = 1;
5360 else
5361 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5363 gcc_assert (ncopies >= 1);
5365 /* Shifts are handled in vectorizable_shift (). */
5366 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5367 || code == RROTATE_EXPR)
5368 return false;
5370 /* Supportable by target? */
5372 vec_mode = TYPE_MODE (vectype);
5373 if (code == MULT_HIGHPART_EXPR)
5374 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5375 else
5377 optab = optab_for_tree_code (code, vectype, optab_default);
5378 if (!optab)
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5382 "no optab.\n");
5383 return false;
5385 target_support_p = (optab_handler (optab, vec_mode)
5386 != CODE_FOR_nothing);
5389 if (!target_support_p)
5391 if (dump_enabled_p ())
5392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5393 "op not supported by target.\n");
5394 /* Check only during analysis. */
5395 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5396 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5397 return false;
5398 if (dump_enabled_p ())
5399 dump_printf_loc (MSG_NOTE, vect_location,
5400 "proceeding using word mode.\n");
5403 /* Worthwhile without SIMD support? Check only during analysis. */
5404 if (!VECTOR_MODE_P (vec_mode)
5405 && !vec_stmt
5406 && vf < vect_min_worthwhile_factor (code))
5408 if (dump_enabled_p ())
5409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5410 "not worthwhile without SIMD support.\n");
5411 return false;
5414 if (!vec_stmt) /* transformation not required. */
5416 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5417 if (dump_enabled_p ())
5418 dump_printf_loc (MSG_NOTE, vect_location,
5419 "=== vectorizable_operation ===\n");
5420 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5421 return true;
5424 /** Transform. **/
5426 if (dump_enabled_p ())
5427 dump_printf_loc (MSG_NOTE, vect_location,
5428 "transform binary/unary operation.\n");
5430 /* Handle def. */
5431 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5433 /* In case the vectorization factor (VF) is bigger than the number
5434 of elements that we can fit in a vectype (nunits), we have to generate
5435 more than one vector stmt - i.e - we need to "unroll" the
5436 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5437 from one copy of the vector stmt to the next, in the field
5438 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5439 stages to find the correct vector defs to be used when vectorizing
5440 stmts that use the defs of the current stmt. The example below
5441 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5442 we need to create 4 vectorized stmts):
5444 before vectorization:
5445 RELATED_STMT VEC_STMT
5446 S1: x = memref - -
5447 S2: z = x + 1 - -
5449 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5450 there):
5451 RELATED_STMT VEC_STMT
5452 VS1_0: vx0 = memref0 VS1_1 -
5453 VS1_1: vx1 = memref1 VS1_2 -
5454 VS1_2: vx2 = memref2 VS1_3 -
5455 VS1_3: vx3 = memref3 - -
5456 S1: x = load - VS1_0
5457 S2: z = x + 1 - -
5459 step2: vectorize stmt S2 (done here):
5460 To vectorize stmt S2 we first need to find the relevant vector
5461 def for the first operand 'x'. This is, as usual, obtained from
5462 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5463 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5464 relevant vector def 'vx0'. Having found 'vx0' we can generate
5465 the vector stmt VS2_0, and as usual, record it in the
5466 STMT_VINFO_VEC_STMT of stmt S2.
5467 When creating the second copy (VS2_1), we obtain the relevant vector
5468 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5469 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5470 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5471 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5472 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5473 chain of stmts and pointers:
5474 RELATED_STMT VEC_STMT
5475 VS1_0: vx0 = memref0 VS1_1 -
5476 VS1_1: vx1 = memref1 VS1_2 -
5477 VS1_2: vx2 = memref2 VS1_3 -
5478 VS1_3: vx3 = memref3 - -
5479 S1: x = load - VS1_0
5480 VS2_0: vz0 = vx0 + v1 VS2_1 -
5481 VS2_1: vz1 = vx1 + v1 VS2_2 -
5482 VS2_2: vz2 = vx2 + v1 VS2_3 -
5483 VS2_3: vz3 = vx3 + v1 - -
5484 S2: z = x + 1 - VS2_0 */
5486 prev_stmt_info = NULL;
5487 for (j = 0; j < ncopies; j++)
5489 /* Handle uses. */
5490 if (j == 0)
5492 if (op_type == binary_op || op_type == ternary_op)
5493 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5494 slp_node, -1);
5495 else
5496 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5497 slp_node, -1);
5498 if (op_type == ternary_op)
5499 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5500 slp_node, -1);
5502 else
5504 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5505 if (op_type == ternary_op)
5507 tree vec_oprnd = vec_oprnds2.pop ();
5508 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5509 vec_oprnd));
5513 /* Arguments are ready. Create the new vector stmt. */
5514 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5516 vop1 = ((op_type == binary_op || op_type == ternary_op)
5517 ? vec_oprnds1[i] : NULL_TREE);
5518 vop2 = ((op_type == ternary_op)
5519 ? vec_oprnds2[i] : NULL_TREE);
5520 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5521 new_temp = make_ssa_name (vec_dest, new_stmt);
5522 gimple_assign_set_lhs (new_stmt, new_temp);
5523 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5524 if (slp_node)
5525 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5528 if (slp_node)
5529 continue;
5531 if (j == 0)
5532 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5533 else
5534 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5535 prev_stmt_info = vinfo_for_stmt (new_stmt);
5538 vec_oprnds0.release ();
5539 vec_oprnds1.release ();
5540 vec_oprnds2.release ();
5542 return true;
5545 /* A helper function to ensure data reference DR's base alignment
5546 for STMT_INFO. */
5548 static void
5549 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5551 if (!dr->aux)
5552 return;
5554 if (DR_VECT_AUX (dr)->base_misaligned)
5556 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5557 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5559 if (decl_in_symtab_p (base_decl))
5560 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5561 else
5563 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5564 DECL_USER_ALIGN (base_decl) = 1;
5566 DR_VECT_AUX (dr)->base_misaligned = false;
5571 /* Function get_group_alias_ptr_type.
5573 Return the alias type for the group starting at FIRST_STMT. */
5575 static tree
5576 get_group_alias_ptr_type (gimple *first_stmt)
5578 struct data_reference *first_dr, *next_dr;
5579 gimple *next_stmt;
5581 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5582 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5583 while (next_stmt)
5585 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5586 if (get_alias_set (DR_REF (first_dr))
5587 != get_alias_set (DR_REF (next_dr)))
5589 if (dump_enabled_p ())
5590 dump_printf_loc (MSG_NOTE, vect_location,
5591 "conflicting alias set types.\n");
5592 return ptr_type_node;
5594 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5596 return reference_alias_ptr_type (DR_REF (first_dr));
5600 /* Function vectorizable_store.
5602 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5603 can be vectorized.
5604 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5605 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5606 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5608 static bool
5609 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5610 slp_tree slp_node)
5612 tree scalar_dest;
5613 tree data_ref;
5614 tree op;
5615 tree vec_oprnd = NULL_TREE;
5616 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5617 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5618 tree elem_type;
5619 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5620 struct loop *loop = NULL;
5621 machine_mode vec_mode;
5622 tree dummy;
5623 enum dr_alignment_support alignment_support_scheme;
5624 gimple *def_stmt;
5625 enum vect_def_type dt;
5626 stmt_vec_info prev_stmt_info = NULL;
5627 tree dataref_ptr = NULL_TREE;
5628 tree dataref_offset = NULL_TREE;
5629 gimple *ptr_incr = NULL;
5630 int ncopies;
5631 int j;
5632 gimple *next_stmt, *first_stmt;
5633 bool grouped_store;
5634 unsigned int group_size, i;
5635 vec<tree> oprnds = vNULL;
5636 vec<tree> result_chain = vNULL;
5637 bool inv_p;
5638 tree offset = NULL_TREE;
5639 vec<tree> vec_oprnds = vNULL;
5640 bool slp = (slp_node != NULL);
5641 unsigned int vec_num;
5642 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5643 vec_info *vinfo = stmt_info->vinfo;
5644 tree aggr_type;
5645 gather_scatter_info gs_info;
5646 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5647 gimple *new_stmt;
5648 int vf;
5649 vec_load_store_type vls_type;
5650 tree ref_type;
5652 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5653 return false;
5655 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5656 && ! vec_stmt)
5657 return false;
5659 /* Is vectorizable store? */
5661 if (!is_gimple_assign (stmt))
5662 return false;
5664 scalar_dest = gimple_assign_lhs (stmt);
5665 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5666 && is_pattern_stmt_p (stmt_info))
5667 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5668 if (TREE_CODE (scalar_dest) != ARRAY_REF
5669 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5670 && TREE_CODE (scalar_dest) != INDIRECT_REF
5671 && TREE_CODE (scalar_dest) != COMPONENT_REF
5672 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5673 && TREE_CODE (scalar_dest) != REALPART_EXPR
5674 && TREE_CODE (scalar_dest) != MEM_REF)
5675 return false;
5677 /* Cannot have hybrid store SLP -- that would mean storing to the
5678 same location twice. */
5679 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5681 gcc_assert (gimple_assign_single_p (stmt));
5683 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5684 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5686 if (loop_vinfo)
5688 loop = LOOP_VINFO_LOOP (loop_vinfo);
5689 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5691 else
5692 vf = 1;
5694 /* Multiple types in SLP are handled by creating the appropriate number of
5695 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5696 case of SLP. */
5697 if (slp)
5698 ncopies = 1;
5699 else
5700 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5702 gcc_assert (ncopies >= 1);
5704 /* FORNOW. This restriction should be relaxed. */
5705 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5707 if (dump_enabled_p ())
5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5709 "multiple types in nested loop.\n");
5710 return false;
5713 op = gimple_assign_rhs1 (stmt);
5715 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5717 if (dump_enabled_p ())
5718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5719 "use not simple.\n");
5720 return false;
5723 if (dt == vect_constant_def || dt == vect_external_def)
5724 vls_type = VLS_STORE_INVARIANT;
5725 else
5726 vls_type = VLS_STORE;
5728 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5729 return false;
5731 elem_type = TREE_TYPE (vectype);
5732 vec_mode = TYPE_MODE (vectype);
5734 /* FORNOW. In some cases can vectorize even if data-type not supported
5735 (e.g. - array initialization with 0). */
5736 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5737 return false;
5739 if (!STMT_VINFO_DATA_REF (stmt_info))
5740 return false;
5742 vect_memory_access_type memory_access_type;
5743 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5744 &memory_access_type, &gs_info))
5745 return false;
5747 if (!vec_stmt) /* transformation not required. */
5749 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5750 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5751 /* The SLP costs are calculated during SLP analysis. */
5752 if (!PURE_SLP_STMT (stmt_info))
5753 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5754 NULL, NULL, NULL);
5755 return true;
5757 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5759 /** Transform. **/
5761 ensure_base_align (stmt_info, dr);
5763 if (memory_access_type == VMAT_GATHER_SCATTER)
5765 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5766 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5767 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5768 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5769 edge pe = loop_preheader_edge (loop);
5770 gimple_seq seq;
5771 basic_block new_bb;
5772 enum { NARROW, NONE, WIDEN } modifier;
5773 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5775 if (nunits == (unsigned int) scatter_off_nunits)
5776 modifier = NONE;
5777 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5779 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5780 modifier = WIDEN;
5782 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5783 sel[i] = i | nunits;
5785 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5786 gcc_assert (perm_mask != NULL_TREE);
5788 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5790 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5791 modifier = NARROW;
5793 for (i = 0; i < (unsigned int) nunits; ++i)
5794 sel[i] = i | scatter_off_nunits;
5796 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5797 gcc_assert (perm_mask != NULL_TREE);
5798 ncopies *= 2;
5800 else
5801 gcc_unreachable ();
5803 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5804 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5805 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5806 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5807 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5808 scaletype = TREE_VALUE (arglist);
5810 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5811 && TREE_CODE (rettype) == VOID_TYPE);
5813 ptr = fold_convert (ptrtype, gs_info.base);
5814 if (!is_gimple_min_invariant (ptr))
5816 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5817 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5818 gcc_assert (!new_bb);
5821 /* Currently we support only unconditional scatter stores,
5822 so mask should be all ones. */
5823 mask = build_int_cst (masktype, -1);
5824 mask = vect_init_vector (stmt, mask, masktype, NULL);
5826 scale = build_int_cst (scaletype, gs_info.scale);
5828 prev_stmt_info = NULL;
5829 for (j = 0; j < ncopies; ++j)
5831 if (j == 0)
5833 src = vec_oprnd1
5834 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5835 op = vec_oprnd0
5836 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5838 else if (modifier != NONE && (j & 1))
5840 if (modifier == WIDEN)
5842 src = vec_oprnd1
5843 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5844 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5845 stmt, gsi);
5847 else if (modifier == NARROW)
5849 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5850 stmt, gsi);
5851 op = vec_oprnd0
5852 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5853 vec_oprnd0);
5855 else
5856 gcc_unreachable ();
5858 else
5860 src = vec_oprnd1
5861 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5862 op = vec_oprnd0
5863 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5864 vec_oprnd0);
5867 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5869 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5870 == TYPE_VECTOR_SUBPARTS (srctype));
5871 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5872 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5873 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5874 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5875 src = var;
5878 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5880 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5881 == TYPE_VECTOR_SUBPARTS (idxtype));
5882 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5883 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5884 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5885 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5886 op = var;
5889 new_stmt
5890 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5892 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5894 if (prev_stmt_info == NULL)
5895 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5896 else
5897 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5898 prev_stmt_info = vinfo_for_stmt (new_stmt);
5900 return true;
5903 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5904 if (grouped_store)
5906 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5907 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5908 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5910 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5912 /* FORNOW */
5913 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5915 /* We vectorize all the stmts of the interleaving group when we
5916 reach the last stmt in the group. */
5917 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5918 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5919 && !slp)
5921 *vec_stmt = NULL;
5922 return true;
5925 if (slp)
5927 grouped_store = false;
5928 /* VEC_NUM is the number of vect stmts to be created for this
5929 group. */
5930 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5931 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5932 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5933 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5934 op = gimple_assign_rhs1 (first_stmt);
5936 else
5937 /* VEC_NUM is the number of vect stmts to be created for this
5938 group. */
5939 vec_num = group_size;
5941 ref_type = get_group_alias_ptr_type (first_stmt);
5943 else
5945 first_stmt = stmt;
5946 first_dr = dr;
5947 group_size = vec_num = 1;
5948 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5951 if (dump_enabled_p ())
5952 dump_printf_loc (MSG_NOTE, vect_location,
5953 "transform store. ncopies = %d\n", ncopies);
5955 if (memory_access_type == VMAT_ELEMENTWISE
5956 || memory_access_type == VMAT_STRIDED_SLP)
5958 gimple_stmt_iterator incr_gsi;
5959 bool insert_after;
5960 gimple *incr;
5961 tree offvar;
5962 tree ivstep;
5963 tree running_off;
5964 gimple_seq stmts = NULL;
5965 tree stride_base, stride_step, alias_off;
5966 tree vec_oprnd;
5967 unsigned int g;
5969 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5971 stride_base
5972 = fold_build_pointer_plus
5973 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5974 size_binop (PLUS_EXPR,
5975 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5976 convert_to_ptrofftype (DR_INIT (first_dr))));
5977 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5979 /* For a store with loop-invariant (but other than power-of-2)
5980 stride (i.e. not a grouped access) like so:
5982 for (i = 0; i < n; i += stride)
5983 array[i] = ...;
5985 we generate a new induction variable and new stores from
5986 the components of the (vectorized) rhs:
5988 for (j = 0; ; j += VF*stride)
5989 vectemp = ...;
5990 tmp1 = vectemp[0];
5991 array[j] = tmp1;
5992 tmp2 = vectemp[1];
5993 array[j + stride] = tmp2;
5997 unsigned nstores = nunits;
5998 unsigned lnel = 1;
5999 tree ltype = elem_type;
6000 if (slp)
6002 if (group_size < nunits
6003 && nunits % group_size == 0)
6005 nstores = nunits / group_size;
6006 lnel = group_size;
6007 ltype = build_vector_type (elem_type, group_size);
6009 else if (group_size >= nunits
6010 && group_size % nunits == 0)
6012 nstores = 1;
6013 lnel = nunits;
6014 ltype = vectype;
6016 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6017 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6020 ivstep = stride_step;
6021 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6022 build_int_cst (TREE_TYPE (ivstep), vf));
6024 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6026 create_iv (stride_base, ivstep, NULL,
6027 loop, &incr_gsi, insert_after,
6028 &offvar, NULL);
6029 incr = gsi_stmt (incr_gsi);
6030 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6032 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6033 if (stmts)
6034 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6036 prev_stmt_info = NULL;
6037 alias_off = build_int_cst (ref_type, 0);
6038 next_stmt = first_stmt;
6039 for (g = 0; g < group_size; g++)
6041 running_off = offvar;
6042 if (g)
6044 tree size = TYPE_SIZE_UNIT (ltype);
6045 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6046 size);
6047 tree newoff = copy_ssa_name (running_off, NULL);
6048 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6049 running_off, pos);
6050 vect_finish_stmt_generation (stmt, incr, gsi);
6051 running_off = newoff;
6053 unsigned int group_el = 0;
6054 unsigned HOST_WIDE_INT
6055 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6056 for (j = 0; j < ncopies; j++)
6058 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6059 and first_stmt == stmt. */
6060 if (j == 0)
6062 if (slp)
6064 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6065 slp_node, -1);
6066 vec_oprnd = vec_oprnds[0];
6068 else
6070 gcc_assert (gimple_assign_single_p (next_stmt));
6071 op = gimple_assign_rhs1 (next_stmt);
6072 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6075 else
6077 if (slp)
6078 vec_oprnd = vec_oprnds[j];
6079 else
6081 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6082 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6086 for (i = 0; i < nstores; i++)
6088 tree newref, newoff;
6089 gimple *incr, *assign;
6090 tree size = TYPE_SIZE (ltype);
6091 /* Extract the i'th component. */
6092 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6093 bitsize_int (i), size);
6094 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6095 size, pos);
6097 elem = force_gimple_operand_gsi (gsi, elem, true,
6098 NULL_TREE, true,
6099 GSI_SAME_STMT);
6101 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6102 group_el * elsz);
6103 newref = build2 (MEM_REF, ltype,
6104 running_off, this_off);
6106 /* And store it to *running_off. */
6107 assign = gimple_build_assign (newref, elem);
6108 vect_finish_stmt_generation (stmt, assign, gsi);
6110 group_el += lnel;
6111 if (! slp
6112 || group_el == group_size)
6114 newoff = copy_ssa_name (running_off, NULL);
6115 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6116 running_off, stride_step);
6117 vect_finish_stmt_generation (stmt, incr, gsi);
6119 running_off = newoff;
6120 group_el = 0;
6122 if (g == group_size - 1
6123 && !slp)
6125 if (j == 0 && i == 0)
6126 STMT_VINFO_VEC_STMT (stmt_info)
6127 = *vec_stmt = assign;
6128 else
6129 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6130 prev_stmt_info = vinfo_for_stmt (assign);
6134 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6135 if (slp)
6136 break;
6139 vec_oprnds.release ();
6140 return true;
6143 auto_vec<tree> dr_chain (group_size);
6144 oprnds.create (group_size);
6146 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6147 gcc_assert (alignment_support_scheme);
6148 /* Targets with store-lane instructions must not require explicit
6149 realignment. */
6150 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6151 || alignment_support_scheme == dr_aligned
6152 || alignment_support_scheme == dr_unaligned_supported);
6154 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6155 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6156 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6158 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6159 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6160 else
6161 aggr_type = vectype;
6163 /* In case the vectorization factor (VF) is bigger than the number
6164 of elements that we can fit in a vectype (nunits), we have to generate
6165 more than one vector stmt - i.e - we need to "unroll" the
6166 vector stmt by a factor VF/nunits. For more details see documentation in
6167 vect_get_vec_def_for_copy_stmt. */
6169 /* In case of interleaving (non-unit grouped access):
6171 S1: &base + 2 = x2
6172 S2: &base = x0
6173 S3: &base + 1 = x1
6174 S4: &base + 3 = x3
6176 We create vectorized stores starting from base address (the access of the
6177 first stmt in the chain (S2 in the above example), when the last store stmt
6178 of the chain (S4) is reached:
6180 VS1: &base = vx2
6181 VS2: &base + vec_size*1 = vx0
6182 VS3: &base + vec_size*2 = vx1
6183 VS4: &base + vec_size*3 = vx3
6185 Then permutation statements are generated:
6187 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6188 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6191 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6192 (the order of the data-refs in the output of vect_permute_store_chain
6193 corresponds to the order of scalar stmts in the interleaving chain - see
6194 the documentation of vect_permute_store_chain()).
6196 In case of both multiple types and interleaving, above vector stores and
6197 permutation stmts are created for every copy. The result vector stmts are
6198 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6199 STMT_VINFO_RELATED_STMT for the next copies.
6202 prev_stmt_info = NULL;
6203 for (j = 0; j < ncopies; j++)
6206 if (j == 0)
6208 if (slp)
6210 /* Get vectorized arguments for SLP_NODE. */
6211 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6212 NULL, slp_node, -1);
6214 vec_oprnd = vec_oprnds[0];
6216 else
6218 /* For interleaved stores we collect vectorized defs for all the
6219 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6220 used as an input to vect_permute_store_chain(), and OPRNDS as
6221 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6223 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6224 OPRNDS are of size 1. */
6225 next_stmt = first_stmt;
6226 for (i = 0; i < group_size; i++)
6228 /* Since gaps are not supported for interleaved stores,
6229 GROUP_SIZE is the exact number of stmts in the chain.
6230 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6231 there is no interleaving, GROUP_SIZE is 1, and only one
6232 iteration of the loop will be executed. */
6233 gcc_assert (next_stmt
6234 && gimple_assign_single_p (next_stmt));
6235 op = gimple_assign_rhs1 (next_stmt);
6237 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6238 dr_chain.quick_push (vec_oprnd);
6239 oprnds.quick_push (vec_oprnd);
6240 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6244 /* We should have catched mismatched types earlier. */
6245 gcc_assert (useless_type_conversion_p (vectype,
6246 TREE_TYPE (vec_oprnd)));
6247 bool simd_lane_access_p
6248 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6249 if (simd_lane_access_p
6250 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6251 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6252 && integer_zerop (DR_OFFSET (first_dr))
6253 && integer_zerop (DR_INIT (first_dr))
6254 && alias_sets_conflict_p (get_alias_set (aggr_type),
6255 get_alias_set (TREE_TYPE (ref_type))))
6257 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6258 dataref_offset = build_int_cst (ref_type, 0);
6259 inv_p = false;
6261 else
6262 dataref_ptr
6263 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6264 simd_lane_access_p ? loop : NULL,
6265 offset, &dummy, gsi, &ptr_incr,
6266 simd_lane_access_p, &inv_p);
6267 gcc_assert (bb_vinfo || !inv_p);
6269 else
6271 /* For interleaved stores we created vectorized defs for all the
6272 defs stored in OPRNDS in the previous iteration (previous copy).
6273 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6274 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6275 next copy.
6276 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6277 OPRNDS are of size 1. */
6278 for (i = 0; i < group_size; i++)
6280 op = oprnds[i];
6281 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6282 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6283 dr_chain[i] = vec_oprnd;
6284 oprnds[i] = vec_oprnd;
6286 if (dataref_offset)
6287 dataref_offset
6288 = int_const_binop (PLUS_EXPR, dataref_offset,
6289 TYPE_SIZE_UNIT (aggr_type));
6290 else
6291 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6292 TYPE_SIZE_UNIT (aggr_type));
6295 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6297 tree vec_array;
6299 /* Combine all the vectors into an array. */
6300 vec_array = create_vector_array (vectype, vec_num);
6301 for (i = 0; i < vec_num; i++)
6303 vec_oprnd = dr_chain[i];
6304 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6307 /* Emit:
6308 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6309 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6310 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6311 gimple_call_set_lhs (new_stmt, data_ref);
6312 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6314 else
6316 new_stmt = NULL;
6317 if (grouped_store)
6319 if (j == 0)
6320 result_chain.create (group_size);
6321 /* Permute. */
6322 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6323 &result_chain);
6326 next_stmt = first_stmt;
6327 for (i = 0; i < vec_num; i++)
6329 unsigned align, misalign;
6331 if (i > 0)
6332 /* Bump the vector pointer. */
6333 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6334 stmt, NULL_TREE);
6336 if (slp)
6337 vec_oprnd = vec_oprnds[i];
6338 else if (grouped_store)
6339 /* For grouped stores vectorized defs are interleaved in
6340 vect_permute_store_chain(). */
6341 vec_oprnd = result_chain[i];
6343 data_ref = fold_build2 (MEM_REF, vectype,
6344 dataref_ptr,
6345 dataref_offset
6346 ? dataref_offset
6347 : build_int_cst (ref_type, 0));
6348 align = TYPE_ALIGN_UNIT (vectype);
6349 if (aligned_access_p (first_dr))
6350 misalign = 0;
6351 else if (DR_MISALIGNMENT (first_dr) == -1)
6353 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6354 align = TYPE_ALIGN_UNIT (elem_type);
6355 else
6356 align = get_object_alignment (DR_REF (first_dr))
6357 / BITS_PER_UNIT;
6358 misalign = 0;
6359 TREE_TYPE (data_ref)
6360 = build_aligned_type (TREE_TYPE (data_ref),
6361 align * BITS_PER_UNIT);
6363 else
6365 TREE_TYPE (data_ref)
6366 = build_aligned_type (TREE_TYPE (data_ref),
6367 TYPE_ALIGN (elem_type));
6368 misalign = DR_MISALIGNMENT (first_dr);
6370 if (dataref_offset == NULL_TREE
6371 && TREE_CODE (dataref_ptr) == SSA_NAME)
6372 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6373 misalign);
6375 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6377 tree perm_mask = perm_mask_for_reverse (vectype);
6378 tree perm_dest
6379 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6380 vectype);
6381 tree new_temp = make_ssa_name (perm_dest);
6383 /* Generate the permute statement. */
6384 gimple *perm_stmt
6385 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6386 vec_oprnd, perm_mask);
6387 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6389 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6390 vec_oprnd = new_temp;
6393 /* Arguments are ready. Create the new vector stmt. */
6394 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6395 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6397 if (slp)
6398 continue;
6400 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6401 if (!next_stmt)
6402 break;
6405 if (!slp)
6407 if (j == 0)
6408 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6409 else
6410 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6411 prev_stmt_info = vinfo_for_stmt (new_stmt);
6415 oprnds.release ();
6416 result_chain.release ();
6417 vec_oprnds.release ();
6419 return true;
6422 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6423 VECTOR_CST mask. No checks are made that the target platform supports the
6424 mask, so callers may wish to test can_vec_perm_p separately, or use
6425 vect_gen_perm_mask_checked. */
6427 tree
6428 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6430 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6431 int i, nunits;
6433 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6435 mask_elt_type = lang_hooks.types.type_for_mode
6436 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6437 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6439 mask_elts = XALLOCAVEC (tree, nunits);
6440 for (i = nunits - 1; i >= 0; i--)
6441 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6442 mask_vec = build_vector (mask_type, mask_elts);
6444 return mask_vec;
6447 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6448 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6450 tree
6451 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6453 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6454 return vect_gen_perm_mask_any (vectype, sel);
6457 /* Given a vector variable X and Y, that was generated for the scalar
6458 STMT, generate instructions to permute the vector elements of X and Y
6459 using permutation mask MASK_VEC, insert them at *GSI and return the
6460 permuted vector variable. */
6462 static tree
6463 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6464 gimple_stmt_iterator *gsi)
6466 tree vectype = TREE_TYPE (x);
6467 tree perm_dest, data_ref;
6468 gimple *perm_stmt;
6470 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6471 data_ref = make_ssa_name (perm_dest);
6473 /* Generate the permute statement. */
6474 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6475 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6477 return data_ref;
6480 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6481 inserting them on the loops preheader edge. Returns true if we
6482 were successful in doing so (and thus STMT can be moved then),
6483 otherwise returns false. */
6485 static bool
6486 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6488 ssa_op_iter i;
6489 tree op;
6490 bool any = false;
6492 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6494 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6495 if (!gimple_nop_p (def_stmt)
6496 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6498 /* Make sure we don't need to recurse. While we could do
6499 so in simple cases when there are more complex use webs
6500 we don't have an easy way to preserve stmt order to fulfil
6501 dependencies within them. */
6502 tree op2;
6503 ssa_op_iter i2;
6504 if (gimple_code (def_stmt) == GIMPLE_PHI)
6505 return false;
6506 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6508 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6509 if (!gimple_nop_p (def_stmt2)
6510 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6511 return false;
6513 any = true;
6517 if (!any)
6518 return true;
6520 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6522 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6523 if (!gimple_nop_p (def_stmt)
6524 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6526 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6527 gsi_remove (&gsi, false);
6528 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6532 return true;
6535 /* vectorizable_load.
6537 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6538 can be vectorized.
6539 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6540 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6541 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6543 static bool
6544 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6545 slp_tree slp_node, slp_instance slp_node_instance)
6547 tree scalar_dest;
6548 tree vec_dest = NULL;
6549 tree data_ref = NULL;
6550 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6551 stmt_vec_info prev_stmt_info;
6552 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6553 struct loop *loop = NULL;
6554 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6555 bool nested_in_vect_loop = false;
6556 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6557 tree elem_type;
6558 tree new_temp;
6559 machine_mode mode;
6560 gimple *new_stmt = NULL;
6561 tree dummy;
6562 enum dr_alignment_support alignment_support_scheme;
6563 tree dataref_ptr = NULL_TREE;
6564 tree dataref_offset = NULL_TREE;
6565 gimple *ptr_incr = NULL;
6566 int ncopies;
6567 int i, j, group_size, group_gap_adj;
6568 tree msq = NULL_TREE, lsq;
6569 tree offset = NULL_TREE;
6570 tree byte_offset = NULL_TREE;
6571 tree realignment_token = NULL_TREE;
6572 gphi *phi = NULL;
6573 vec<tree> dr_chain = vNULL;
6574 bool grouped_load = false;
6575 gimple *first_stmt;
6576 gimple *first_stmt_for_drptr = NULL;
6577 bool inv_p;
6578 bool compute_in_loop = false;
6579 struct loop *at_loop;
6580 int vec_num;
6581 bool slp = (slp_node != NULL);
6582 bool slp_perm = false;
6583 enum tree_code code;
6584 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6585 int vf;
6586 tree aggr_type;
6587 gather_scatter_info gs_info;
6588 vec_info *vinfo = stmt_info->vinfo;
6589 tree ref_type;
6591 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6592 return false;
6594 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6595 && ! vec_stmt)
6596 return false;
6598 /* Is vectorizable load? */
6599 if (!is_gimple_assign (stmt))
6600 return false;
6602 scalar_dest = gimple_assign_lhs (stmt);
6603 if (TREE_CODE (scalar_dest) != SSA_NAME)
6604 return false;
6606 code = gimple_assign_rhs_code (stmt);
6607 if (code != ARRAY_REF
6608 && code != BIT_FIELD_REF
6609 && code != INDIRECT_REF
6610 && code != COMPONENT_REF
6611 && code != IMAGPART_EXPR
6612 && code != REALPART_EXPR
6613 && code != MEM_REF
6614 && TREE_CODE_CLASS (code) != tcc_declaration)
6615 return false;
6617 if (!STMT_VINFO_DATA_REF (stmt_info))
6618 return false;
6620 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6621 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6623 if (loop_vinfo)
6625 loop = LOOP_VINFO_LOOP (loop_vinfo);
6626 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6627 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6629 else
6630 vf = 1;
6632 /* Multiple types in SLP are handled by creating the appropriate number of
6633 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6634 case of SLP. */
6635 if (slp)
6636 ncopies = 1;
6637 else
6638 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6640 gcc_assert (ncopies >= 1);
6642 /* FORNOW. This restriction should be relaxed. */
6643 if (nested_in_vect_loop && ncopies > 1)
6645 if (dump_enabled_p ())
6646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6647 "multiple types in nested loop.\n");
6648 return false;
6651 /* Invalidate assumptions made by dependence analysis when vectorization
6652 on the unrolled body effectively re-orders stmts. */
6653 if (ncopies > 1
6654 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6655 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6656 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6658 if (dump_enabled_p ())
6659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6660 "cannot perform implicit CSE when unrolling "
6661 "with negative dependence distance\n");
6662 return false;
6665 elem_type = TREE_TYPE (vectype);
6666 mode = TYPE_MODE (vectype);
6668 /* FORNOW. In some cases can vectorize even if data-type not supported
6669 (e.g. - data copies). */
6670 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6672 if (dump_enabled_p ())
6673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6674 "Aligned load, but unsupported type.\n");
6675 return false;
6678 /* Check if the load is a part of an interleaving chain. */
6679 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6681 grouped_load = true;
6682 /* FORNOW */
6683 gcc_assert (!nested_in_vect_loop);
6684 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6686 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6687 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6689 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6690 slp_perm = true;
6692 /* Invalidate assumptions made by dependence analysis when vectorization
6693 on the unrolled body effectively re-orders stmts. */
6694 if (!PURE_SLP_STMT (stmt_info)
6695 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6696 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6697 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6699 if (dump_enabled_p ())
6700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6701 "cannot perform implicit CSE when performing "
6702 "group loads with negative dependence distance\n");
6703 return false;
6706 /* Similarly when the stmt is a load that is both part of a SLP
6707 instance and a loop vectorized stmt via the same-dr mechanism
6708 we have to give up. */
6709 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6710 && (STMT_SLP_TYPE (stmt_info)
6711 != STMT_SLP_TYPE (vinfo_for_stmt
6712 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6714 if (dump_enabled_p ())
6715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6716 "conflicting SLP types for CSEd load\n");
6717 return false;
6721 vect_memory_access_type memory_access_type;
6722 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6723 &memory_access_type, &gs_info))
6724 return false;
6726 if (!vec_stmt) /* transformation not required. */
6728 if (!slp)
6729 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6730 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6731 /* The SLP costs are calculated during SLP analysis. */
6732 if (!PURE_SLP_STMT (stmt_info))
6733 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6734 NULL, NULL, NULL);
6735 return true;
6738 if (!slp)
6739 gcc_assert (memory_access_type
6740 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6742 if (dump_enabled_p ())
6743 dump_printf_loc (MSG_NOTE, vect_location,
6744 "transform load. ncopies = %d\n", ncopies);
6746 /** Transform. **/
6748 ensure_base_align (stmt_info, dr);
6750 if (memory_access_type == VMAT_GATHER_SCATTER)
6752 tree vec_oprnd0 = NULL_TREE, op;
6753 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6754 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6755 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6756 edge pe = loop_preheader_edge (loop);
6757 gimple_seq seq;
6758 basic_block new_bb;
6759 enum { NARROW, NONE, WIDEN } modifier;
6760 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6762 if (nunits == gather_off_nunits)
6763 modifier = NONE;
6764 else if (nunits == gather_off_nunits / 2)
6766 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6767 modifier = WIDEN;
6769 for (i = 0; i < gather_off_nunits; ++i)
6770 sel[i] = i | nunits;
6772 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6774 else if (nunits == gather_off_nunits * 2)
6776 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6777 modifier = NARROW;
6779 for (i = 0; i < nunits; ++i)
6780 sel[i] = i < gather_off_nunits
6781 ? i : i + nunits - gather_off_nunits;
6783 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6784 ncopies *= 2;
6786 else
6787 gcc_unreachable ();
6789 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6790 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6791 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6792 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6793 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6794 scaletype = TREE_VALUE (arglist);
6795 gcc_checking_assert (types_compatible_p (srctype, rettype));
6797 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6799 ptr = fold_convert (ptrtype, gs_info.base);
6800 if (!is_gimple_min_invariant (ptr))
6802 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6803 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6804 gcc_assert (!new_bb);
6807 /* Currently we support only unconditional gather loads,
6808 so mask should be all ones. */
6809 if (TREE_CODE (masktype) == INTEGER_TYPE)
6810 mask = build_int_cst (masktype, -1);
6811 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6813 mask = build_int_cst (TREE_TYPE (masktype), -1);
6814 mask = build_vector_from_val (masktype, mask);
6815 mask = vect_init_vector (stmt, mask, masktype, NULL);
6817 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6819 REAL_VALUE_TYPE r;
6820 long tmp[6];
6821 for (j = 0; j < 6; ++j)
6822 tmp[j] = -1;
6823 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6824 mask = build_real (TREE_TYPE (masktype), r);
6825 mask = build_vector_from_val (masktype, mask);
6826 mask = vect_init_vector (stmt, mask, masktype, NULL);
6828 else
6829 gcc_unreachable ();
6831 scale = build_int_cst (scaletype, gs_info.scale);
6833 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6834 merge = build_int_cst (TREE_TYPE (rettype), 0);
6835 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6837 REAL_VALUE_TYPE r;
6838 long tmp[6];
6839 for (j = 0; j < 6; ++j)
6840 tmp[j] = 0;
6841 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6842 merge = build_real (TREE_TYPE (rettype), r);
6844 else
6845 gcc_unreachable ();
6846 merge = build_vector_from_val (rettype, merge);
6847 merge = vect_init_vector (stmt, merge, rettype, NULL);
6849 prev_stmt_info = NULL;
6850 for (j = 0; j < ncopies; ++j)
6852 if (modifier == WIDEN && (j & 1))
6853 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6854 perm_mask, stmt, gsi);
6855 else if (j == 0)
6856 op = vec_oprnd0
6857 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6858 else
6859 op = vec_oprnd0
6860 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6862 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6864 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6865 == TYPE_VECTOR_SUBPARTS (idxtype));
6866 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6867 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6868 new_stmt
6869 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6870 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6871 op = var;
6874 new_stmt
6875 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6877 if (!useless_type_conversion_p (vectype, rettype))
6879 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6880 == TYPE_VECTOR_SUBPARTS (rettype));
6881 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6882 gimple_call_set_lhs (new_stmt, op);
6883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6884 var = make_ssa_name (vec_dest);
6885 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6886 new_stmt
6887 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6889 else
6891 var = make_ssa_name (vec_dest, new_stmt);
6892 gimple_call_set_lhs (new_stmt, var);
6895 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6897 if (modifier == NARROW)
6899 if ((j & 1) == 0)
6901 prev_res = var;
6902 continue;
6904 var = permute_vec_elements (prev_res, var,
6905 perm_mask, stmt, gsi);
6906 new_stmt = SSA_NAME_DEF_STMT (var);
6909 if (prev_stmt_info == NULL)
6910 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6911 else
6912 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6913 prev_stmt_info = vinfo_for_stmt (new_stmt);
6915 return true;
6918 if (memory_access_type == VMAT_ELEMENTWISE
6919 || memory_access_type == VMAT_STRIDED_SLP)
6921 gimple_stmt_iterator incr_gsi;
6922 bool insert_after;
6923 gimple *incr;
6924 tree offvar;
6925 tree ivstep;
6926 tree running_off;
6927 vec<constructor_elt, va_gc> *v = NULL;
6928 gimple_seq stmts = NULL;
6929 tree stride_base, stride_step, alias_off;
6931 gcc_assert (!nested_in_vect_loop);
6933 if (slp && grouped_load)
6935 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6936 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6937 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6938 ref_type = get_group_alias_ptr_type (first_stmt);
6940 else
6942 first_stmt = stmt;
6943 first_dr = dr;
6944 group_size = 1;
6945 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6948 stride_base
6949 = fold_build_pointer_plus
6950 (DR_BASE_ADDRESS (first_dr),
6951 size_binop (PLUS_EXPR,
6952 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6953 convert_to_ptrofftype (DR_INIT (first_dr))));
6954 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6956 /* For a load with loop-invariant (but other than power-of-2)
6957 stride (i.e. not a grouped access) like so:
6959 for (i = 0; i < n; i += stride)
6960 ... = array[i];
6962 we generate a new induction variable and new accesses to
6963 form a new vector (or vectors, depending on ncopies):
6965 for (j = 0; ; j += VF*stride)
6966 tmp1 = array[j];
6967 tmp2 = array[j + stride];
6969 vectemp = {tmp1, tmp2, ...}
6972 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6973 build_int_cst (TREE_TYPE (stride_step), vf));
6975 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6977 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6978 loop, &incr_gsi, insert_after,
6979 &offvar, NULL);
6980 incr = gsi_stmt (incr_gsi);
6981 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6983 stride_step = force_gimple_operand (unshare_expr (stride_step),
6984 &stmts, true, NULL_TREE);
6985 if (stmts)
6986 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6988 prev_stmt_info = NULL;
6989 running_off = offvar;
6990 alias_off = build_int_cst (ref_type, 0);
6991 int nloads = nunits;
6992 int lnel = 1;
6993 tree ltype = TREE_TYPE (vectype);
6994 tree lvectype = vectype;
6995 auto_vec<tree> dr_chain;
6996 if (memory_access_type == VMAT_STRIDED_SLP)
6998 if (group_size < nunits)
7000 /* Avoid emitting a constructor of vector elements by performing
7001 the loads using an integer type of the same size,
7002 constructing a vector of those and then re-interpreting it
7003 as the original vector type. This works around the fact
7004 that the vec_init optab was only designed for scalar
7005 element modes and thus expansion goes through memory.
7006 This avoids a huge runtime penalty due to the general
7007 inability to perform store forwarding from smaller stores
7008 to a larger load. */
7009 unsigned lsize
7010 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7011 enum machine_mode elmode = mode_for_size (lsize, MODE_INT, 0);
7012 enum machine_mode vmode = mode_for_vector (elmode,
7013 nunits / group_size);
7014 /* If we can't construct such a vector fall back to
7015 element loads of the original vector type. */
7016 if (VECTOR_MODE_P (vmode)
7017 && optab_handler (vec_init_optab, vmode) != CODE_FOR_nothing)
7019 nloads = nunits / group_size;
7020 lnel = group_size;
7021 ltype = build_nonstandard_integer_type (lsize, 1);
7022 lvectype = build_vector_type (ltype, nloads);
7025 else
7027 nloads = 1;
7028 lnel = nunits;
7029 ltype = vectype;
7031 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7033 if (slp)
7035 /* For SLP permutation support we need to load the whole group,
7036 not only the number of vector stmts the permutation result
7037 fits in. */
7038 if (slp_perm)
7040 ncopies = (group_size * vf + nunits - 1) / nunits;
7041 dr_chain.create (ncopies);
7043 else
7044 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7046 int group_el = 0;
7047 unsigned HOST_WIDE_INT
7048 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7049 for (j = 0; j < ncopies; j++)
7051 if (nloads > 1)
7052 vec_alloc (v, nloads);
7053 for (i = 0; i < nloads; i++)
7055 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7056 group_el * elsz);
7057 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7058 build2 (MEM_REF, ltype,
7059 running_off, this_off));
7060 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7061 if (nloads > 1)
7062 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7063 gimple_assign_lhs (new_stmt));
7065 group_el += lnel;
7066 if (! slp
7067 || group_el == group_size)
7069 tree newoff = copy_ssa_name (running_off);
7070 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7071 running_off, stride_step);
7072 vect_finish_stmt_generation (stmt, incr, gsi);
7074 running_off = newoff;
7075 group_el = 0;
7078 if (nloads > 1)
7080 tree vec_inv = build_constructor (lvectype, v);
7081 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7082 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7083 if (lvectype != vectype)
7085 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7086 VIEW_CONVERT_EXPR,
7087 build1 (VIEW_CONVERT_EXPR,
7088 vectype, new_temp));
7089 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7093 if (slp)
7095 if (slp_perm)
7096 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7097 else
7098 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7100 else
7102 if (j == 0)
7103 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7104 else
7105 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7106 prev_stmt_info = vinfo_for_stmt (new_stmt);
7109 if (slp_perm)
7111 unsigned n_perms;
7112 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7113 slp_node_instance, false, &n_perms);
7115 return true;
7118 if (grouped_load)
7120 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7121 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7122 /* For SLP vectorization we directly vectorize a subchain
7123 without permutation. */
7124 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7125 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7126 /* For BB vectorization always use the first stmt to base
7127 the data ref pointer on. */
7128 if (bb_vinfo)
7129 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7131 /* Check if the chain of loads is already vectorized. */
7132 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7133 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7134 ??? But we can only do so if there is exactly one
7135 as we have no way to get at the rest. Leave the CSE
7136 opportunity alone.
7137 ??? With the group load eventually participating
7138 in multiple different permutations (having multiple
7139 slp nodes which refer to the same group) the CSE
7140 is even wrong code. See PR56270. */
7141 && !slp)
7143 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7144 return true;
7146 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7147 group_gap_adj = 0;
7149 /* VEC_NUM is the number of vect stmts to be created for this group. */
7150 if (slp)
7152 grouped_load = false;
7153 /* For SLP permutation support we need to load the whole group,
7154 not only the number of vector stmts the permutation result
7155 fits in. */
7156 if (slp_perm)
7157 vec_num = (group_size * vf + nunits - 1) / nunits;
7158 else
7159 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7160 group_gap_adj = vf * group_size - nunits * vec_num;
7162 else
7163 vec_num = group_size;
7165 ref_type = get_group_alias_ptr_type (first_stmt);
7167 else
7169 first_stmt = stmt;
7170 first_dr = dr;
7171 group_size = vec_num = 1;
7172 group_gap_adj = 0;
7173 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7176 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7177 gcc_assert (alignment_support_scheme);
7178 /* Targets with load-lane instructions must not require explicit
7179 realignment. */
7180 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7181 || alignment_support_scheme == dr_aligned
7182 || alignment_support_scheme == dr_unaligned_supported);
7184 /* In case the vectorization factor (VF) is bigger than the number
7185 of elements that we can fit in a vectype (nunits), we have to generate
7186 more than one vector stmt - i.e - we need to "unroll" the
7187 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7188 from one copy of the vector stmt to the next, in the field
7189 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7190 stages to find the correct vector defs to be used when vectorizing
7191 stmts that use the defs of the current stmt. The example below
7192 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7193 need to create 4 vectorized stmts):
7195 before vectorization:
7196 RELATED_STMT VEC_STMT
7197 S1: x = memref - -
7198 S2: z = x + 1 - -
7200 step 1: vectorize stmt S1:
7201 We first create the vector stmt VS1_0, and, as usual, record a
7202 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7203 Next, we create the vector stmt VS1_1, and record a pointer to
7204 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7205 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7206 stmts and pointers:
7207 RELATED_STMT VEC_STMT
7208 VS1_0: vx0 = memref0 VS1_1 -
7209 VS1_1: vx1 = memref1 VS1_2 -
7210 VS1_2: vx2 = memref2 VS1_3 -
7211 VS1_3: vx3 = memref3 - -
7212 S1: x = load - VS1_0
7213 S2: z = x + 1 - -
7215 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7216 information we recorded in RELATED_STMT field is used to vectorize
7217 stmt S2. */
7219 /* In case of interleaving (non-unit grouped access):
7221 S1: x2 = &base + 2
7222 S2: x0 = &base
7223 S3: x1 = &base + 1
7224 S4: x3 = &base + 3
7226 Vectorized loads are created in the order of memory accesses
7227 starting from the access of the first stmt of the chain:
7229 VS1: vx0 = &base
7230 VS2: vx1 = &base + vec_size*1
7231 VS3: vx3 = &base + vec_size*2
7232 VS4: vx4 = &base + vec_size*3
7234 Then permutation statements are generated:
7236 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7237 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7240 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7241 (the order of the data-refs in the output of vect_permute_load_chain
7242 corresponds to the order of scalar stmts in the interleaving chain - see
7243 the documentation of vect_permute_load_chain()).
7244 The generation of permutation stmts and recording them in
7245 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7247 In case of both multiple types and interleaving, the vector loads and
7248 permutation stmts above are created for every copy. The result vector
7249 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7250 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7252 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7253 on a target that supports unaligned accesses (dr_unaligned_supported)
7254 we generate the following code:
7255 p = initial_addr;
7256 indx = 0;
7257 loop {
7258 p = p + indx * vectype_size;
7259 vec_dest = *(p);
7260 indx = indx + 1;
7263 Otherwise, the data reference is potentially unaligned on a target that
7264 does not support unaligned accesses (dr_explicit_realign_optimized) -
7265 then generate the following code, in which the data in each iteration is
7266 obtained by two vector loads, one from the previous iteration, and one
7267 from the current iteration:
7268 p1 = initial_addr;
7269 msq_init = *(floor(p1))
7270 p2 = initial_addr + VS - 1;
7271 realignment_token = call target_builtin;
7272 indx = 0;
7273 loop {
7274 p2 = p2 + indx * vectype_size
7275 lsq = *(floor(p2))
7276 vec_dest = realign_load (msq, lsq, realignment_token)
7277 indx = indx + 1;
7278 msq = lsq;
7279 } */
7281 /* If the misalignment remains the same throughout the execution of the
7282 loop, we can create the init_addr and permutation mask at the loop
7283 preheader. Otherwise, it needs to be created inside the loop.
7284 This can only occur when vectorizing memory accesses in the inner-loop
7285 nested within an outer-loop that is being vectorized. */
7287 if (nested_in_vect_loop
7288 && (TREE_INT_CST_LOW (DR_STEP (dr))
7289 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7291 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7292 compute_in_loop = true;
7295 if ((alignment_support_scheme == dr_explicit_realign_optimized
7296 || alignment_support_scheme == dr_explicit_realign)
7297 && !compute_in_loop)
7299 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7300 alignment_support_scheme, NULL_TREE,
7301 &at_loop);
7302 if (alignment_support_scheme == dr_explicit_realign_optimized)
7304 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7305 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7306 size_one_node);
7309 else
7310 at_loop = loop;
7312 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7313 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7315 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7316 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7317 else
7318 aggr_type = vectype;
7320 prev_stmt_info = NULL;
7321 for (j = 0; j < ncopies; j++)
7323 /* 1. Create the vector or array pointer update chain. */
7324 if (j == 0)
7326 bool simd_lane_access_p
7327 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7328 if (simd_lane_access_p
7329 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7330 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7331 && integer_zerop (DR_OFFSET (first_dr))
7332 && integer_zerop (DR_INIT (first_dr))
7333 && alias_sets_conflict_p (get_alias_set (aggr_type),
7334 get_alias_set (TREE_TYPE (ref_type)))
7335 && (alignment_support_scheme == dr_aligned
7336 || alignment_support_scheme == dr_unaligned_supported))
7338 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7339 dataref_offset = build_int_cst (ref_type, 0);
7340 inv_p = false;
7342 else if (first_stmt_for_drptr
7343 && first_stmt != first_stmt_for_drptr)
7345 dataref_ptr
7346 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7347 at_loop, offset, &dummy, gsi,
7348 &ptr_incr, simd_lane_access_p,
7349 &inv_p, byte_offset);
7350 /* Adjust the pointer by the difference to first_stmt. */
7351 data_reference_p ptrdr
7352 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7353 tree diff = fold_convert (sizetype,
7354 size_binop (MINUS_EXPR,
7355 DR_INIT (first_dr),
7356 DR_INIT (ptrdr)));
7357 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7358 stmt, diff);
7360 else
7361 dataref_ptr
7362 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7363 offset, &dummy, gsi, &ptr_incr,
7364 simd_lane_access_p, &inv_p,
7365 byte_offset);
7367 else if (dataref_offset)
7368 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7369 TYPE_SIZE_UNIT (aggr_type));
7370 else
7371 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7372 TYPE_SIZE_UNIT (aggr_type));
7374 if (grouped_load || slp_perm)
7375 dr_chain.create (vec_num);
7377 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7379 tree vec_array;
7381 vec_array = create_vector_array (vectype, vec_num);
7383 /* Emit:
7384 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7385 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7386 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7387 gimple_call_set_lhs (new_stmt, vec_array);
7388 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7390 /* Extract each vector into an SSA_NAME. */
7391 for (i = 0; i < vec_num; i++)
7393 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7394 vec_array, i);
7395 dr_chain.quick_push (new_temp);
7398 /* Record the mapping between SSA_NAMEs and statements. */
7399 vect_record_grouped_load_vectors (stmt, dr_chain);
7401 else
7403 for (i = 0; i < vec_num; i++)
7405 if (i > 0)
7406 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7407 stmt, NULL_TREE);
7409 /* 2. Create the vector-load in the loop. */
7410 switch (alignment_support_scheme)
7412 case dr_aligned:
7413 case dr_unaligned_supported:
7415 unsigned int align, misalign;
7417 data_ref
7418 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7419 dataref_offset
7420 ? dataref_offset
7421 : build_int_cst (ref_type, 0));
7422 align = TYPE_ALIGN_UNIT (vectype);
7423 if (alignment_support_scheme == dr_aligned)
7425 gcc_assert (aligned_access_p (first_dr));
7426 misalign = 0;
7428 else if (DR_MISALIGNMENT (first_dr) == -1)
7430 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7431 align = TYPE_ALIGN_UNIT (elem_type);
7432 else
7433 align = (get_object_alignment (DR_REF (first_dr))
7434 / BITS_PER_UNIT);
7435 misalign = 0;
7436 TREE_TYPE (data_ref)
7437 = build_aligned_type (TREE_TYPE (data_ref),
7438 align * BITS_PER_UNIT);
7440 else
7442 TREE_TYPE (data_ref)
7443 = build_aligned_type (TREE_TYPE (data_ref),
7444 TYPE_ALIGN (elem_type));
7445 misalign = DR_MISALIGNMENT (first_dr);
7447 if (dataref_offset == NULL_TREE
7448 && TREE_CODE (dataref_ptr) == SSA_NAME)
7449 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7450 align, misalign);
7451 break;
7453 case dr_explicit_realign:
7455 tree ptr, bump;
7457 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7459 if (compute_in_loop)
7460 msq = vect_setup_realignment (first_stmt, gsi,
7461 &realignment_token,
7462 dr_explicit_realign,
7463 dataref_ptr, NULL);
7465 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7466 ptr = copy_ssa_name (dataref_ptr);
7467 else
7468 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7469 new_stmt = gimple_build_assign
7470 (ptr, BIT_AND_EXPR, dataref_ptr,
7471 build_int_cst
7472 (TREE_TYPE (dataref_ptr),
7473 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7474 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7475 data_ref
7476 = build2 (MEM_REF, vectype, ptr,
7477 build_int_cst (ref_type, 0));
7478 vec_dest = vect_create_destination_var (scalar_dest,
7479 vectype);
7480 new_stmt = gimple_build_assign (vec_dest, data_ref);
7481 new_temp = make_ssa_name (vec_dest, new_stmt);
7482 gimple_assign_set_lhs (new_stmt, new_temp);
7483 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7484 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7485 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7486 msq = new_temp;
7488 bump = size_binop (MULT_EXPR, vs,
7489 TYPE_SIZE_UNIT (elem_type));
7490 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7491 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7492 new_stmt = gimple_build_assign
7493 (NULL_TREE, BIT_AND_EXPR, ptr,
7494 build_int_cst
7495 (TREE_TYPE (ptr),
7496 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7497 ptr = copy_ssa_name (ptr, new_stmt);
7498 gimple_assign_set_lhs (new_stmt, ptr);
7499 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7500 data_ref
7501 = build2 (MEM_REF, vectype, ptr,
7502 build_int_cst (ref_type, 0));
7503 break;
7505 case dr_explicit_realign_optimized:
7506 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7507 new_temp = copy_ssa_name (dataref_ptr);
7508 else
7509 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7510 new_stmt = gimple_build_assign
7511 (new_temp, BIT_AND_EXPR, dataref_ptr,
7512 build_int_cst
7513 (TREE_TYPE (dataref_ptr),
7514 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7515 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7516 data_ref
7517 = build2 (MEM_REF, vectype, new_temp,
7518 build_int_cst (ref_type, 0));
7519 break;
7520 default:
7521 gcc_unreachable ();
7523 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7524 new_stmt = gimple_build_assign (vec_dest, data_ref);
7525 new_temp = make_ssa_name (vec_dest, new_stmt);
7526 gimple_assign_set_lhs (new_stmt, new_temp);
7527 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7529 /* 3. Handle explicit realignment if necessary/supported.
7530 Create in loop:
7531 vec_dest = realign_load (msq, lsq, realignment_token) */
7532 if (alignment_support_scheme == dr_explicit_realign_optimized
7533 || alignment_support_scheme == dr_explicit_realign)
7535 lsq = gimple_assign_lhs (new_stmt);
7536 if (!realignment_token)
7537 realignment_token = dataref_ptr;
7538 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7539 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7540 msq, lsq, realignment_token);
7541 new_temp = make_ssa_name (vec_dest, new_stmt);
7542 gimple_assign_set_lhs (new_stmt, new_temp);
7543 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7545 if (alignment_support_scheme == dr_explicit_realign_optimized)
7547 gcc_assert (phi);
7548 if (i == vec_num - 1 && j == ncopies - 1)
7549 add_phi_arg (phi, lsq,
7550 loop_latch_edge (containing_loop),
7551 UNKNOWN_LOCATION);
7552 msq = lsq;
7556 /* 4. Handle invariant-load. */
7557 if (inv_p && !bb_vinfo)
7559 gcc_assert (!grouped_load);
7560 /* If we have versioned for aliasing or the loop doesn't
7561 have any data dependencies that would preclude this,
7562 then we are sure this is a loop invariant load and
7563 thus we can insert it on the preheader edge. */
7564 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7565 && !nested_in_vect_loop
7566 && hoist_defs_of_uses (stmt, loop))
7568 if (dump_enabled_p ())
7570 dump_printf_loc (MSG_NOTE, vect_location,
7571 "hoisting out of the vectorized "
7572 "loop: ");
7573 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7575 tree tem = copy_ssa_name (scalar_dest);
7576 gsi_insert_on_edge_immediate
7577 (loop_preheader_edge (loop),
7578 gimple_build_assign (tem,
7579 unshare_expr
7580 (gimple_assign_rhs1 (stmt))));
7581 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7582 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7583 set_vinfo_for_stmt (new_stmt,
7584 new_stmt_vec_info (new_stmt, vinfo));
7586 else
7588 gimple_stmt_iterator gsi2 = *gsi;
7589 gsi_next (&gsi2);
7590 new_temp = vect_init_vector (stmt, scalar_dest,
7591 vectype, &gsi2);
7592 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7596 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7598 tree perm_mask = perm_mask_for_reverse (vectype);
7599 new_temp = permute_vec_elements (new_temp, new_temp,
7600 perm_mask, stmt, gsi);
7601 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7604 /* Collect vector loads and later create their permutation in
7605 vect_transform_grouped_load (). */
7606 if (grouped_load || slp_perm)
7607 dr_chain.quick_push (new_temp);
7609 /* Store vector loads in the corresponding SLP_NODE. */
7610 if (slp && !slp_perm)
7611 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7613 /* Bump the vector pointer to account for a gap or for excess
7614 elements loaded for a permuted SLP load. */
7615 if (group_gap_adj != 0)
7617 bool ovf;
7618 tree bump
7619 = wide_int_to_tree (sizetype,
7620 wi::smul (TYPE_SIZE_UNIT (elem_type),
7621 group_gap_adj, &ovf));
7622 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7623 stmt, bump);
7627 if (slp && !slp_perm)
7628 continue;
7630 if (slp_perm)
7632 unsigned n_perms;
7633 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7634 slp_node_instance, false,
7635 &n_perms))
7637 dr_chain.release ();
7638 return false;
7641 else
7643 if (grouped_load)
7645 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7646 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7647 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7649 else
7651 if (j == 0)
7652 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7653 else
7654 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7655 prev_stmt_info = vinfo_for_stmt (new_stmt);
7658 dr_chain.release ();
7661 return true;
7664 /* Function vect_is_simple_cond.
7666 Input:
7667 LOOP - the loop that is being vectorized.
7668 COND - Condition that is checked for simple use.
7670 Output:
7671 *COMP_VECTYPE - the vector type for the comparison.
7672 *DTS - The def types for the arguments of the comparison
7674 Returns whether a COND can be vectorized. Checks whether
7675 condition operands are supportable using vec_is_simple_use. */
7677 static bool
7678 vect_is_simple_cond (tree cond, vec_info *vinfo,
7679 tree *comp_vectype, enum vect_def_type *dts)
7681 tree lhs, rhs;
7682 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7684 /* Mask case. */
7685 if (TREE_CODE (cond) == SSA_NAME
7686 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7688 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7689 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7690 &dts[0], comp_vectype)
7691 || !*comp_vectype
7692 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7693 return false;
7694 return true;
7697 if (!COMPARISON_CLASS_P (cond))
7698 return false;
7700 lhs = TREE_OPERAND (cond, 0);
7701 rhs = TREE_OPERAND (cond, 1);
7703 if (TREE_CODE (lhs) == SSA_NAME)
7705 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7706 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7707 return false;
7709 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7710 || TREE_CODE (lhs) == FIXED_CST)
7711 dts[0] = vect_constant_def;
7712 else
7713 return false;
7715 if (TREE_CODE (rhs) == SSA_NAME)
7717 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7718 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7719 return false;
7721 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7722 || TREE_CODE (rhs) == FIXED_CST)
7723 dts[1] = vect_constant_def;
7724 else
7725 return false;
7727 if (vectype1 && vectype2
7728 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7729 return false;
7731 *comp_vectype = vectype1 ? vectype1 : vectype2;
7732 return true;
7735 /* vectorizable_condition.
7737 Check if STMT is conditional modify expression that can be vectorized.
7738 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7739 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7740 at GSI.
7742 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7743 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7744 else clause if it is 2).
7746 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7748 bool
7749 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7750 gimple **vec_stmt, tree reduc_def, int reduc_index,
7751 slp_tree slp_node)
7753 tree scalar_dest = NULL_TREE;
7754 tree vec_dest = NULL_TREE;
7755 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7756 tree then_clause, else_clause;
7757 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7758 tree comp_vectype = NULL_TREE;
7759 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7760 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7761 tree vec_compare;
7762 tree new_temp;
7763 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7764 enum vect_def_type dts[4]
7765 = {vect_unknown_def_type, vect_unknown_def_type,
7766 vect_unknown_def_type, vect_unknown_def_type};
7767 int ndts = 4;
7768 int ncopies;
7769 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7770 stmt_vec_info prev_stmt_info = NULL;
7771 int i, j;
7772 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7773 vec<tree> vec_oprnds0 = vNULL;
7774 vec<tree> vec_oprnds1 = vNULL;
7775 vec<tree> vec_oprnds2 = vNULL;
7776 vec<tree> vec_oprnds3 = vNULL;
7777 tree vec_cmp_type;
7778 bool masked = false;
7780 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7781 return false;
7783 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7785 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7786 return false;
7788 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7789 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7790 && reduc_def))
7791 return false;
7793 /* FORNOW: not yet supported. */
7794 if (STMT_VINFO_LIVE_P (stmt_info))
7796 if (dump_enabled_p ())
7797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7798 "value used after loop.\n");
7799 return false;
7803 /* Is vectorizable conditional operation? */
7804 if (!is_gimple_assign (stmt))
7805 return false;
7807 code = gimple_assign_rhs_code (stmt);
7809 if (code != COND_EXPR)
7810 return false;
7812 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7813 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7814 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7816 if (slp_node)
7817 ncopies = 1;
7818 else
7819 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7821 gcc_assert (ncopies >= 1);
7822 if (reduc_index && ncopies > 1)
7823 return false; /* FORNOW */
7825 cond_expr = gimple_assign_rhs1 (stmt);
7826 then_clause = gimple_assign_rhs2 (stmt);
7827 else_clause = gimple_assign_rhs3 (stmt);
7829 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7830 &comp_vectype, &dts[0])
7831 || !comp_vectype)
7832 return false;
7834 gimple *def_stmt;
7835 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7836 &vectype1))
7837 return false;
7838 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7839 &vectype2))
7840 return false;
7842 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7843 return false;
7845 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7846 return false;
7848 masked = !COMPARISON_CLASS_P (cond_expr);
7849 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7851 if (vec_cmp_type == NULL_TREE)
7852 return false;
7854 cond_code = TREE_CODE (cond_expr);
7855 if (!masked)
7857 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7858 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7861 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7863 /* Boolean values may have another representation in vectors
7864 and therefore we prefer bit operations over comparison for
7865 them (which also works for scalar masks). We store opcodes
7866 to use in bitop1 and bitop2. Statement is vectorized as
7867 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7868 depending on bitop1 and bitop2 arity. */
7869 switch (cond_code)
7871 case GT_EXPR:
7872 bitop1 = BIT_NOT_EXPR;
7873 bitop2 = BIT_AND_EXPR;
7874 break;
7875 case GE_EXPR:
7876 bitop1 = BIT_NOT_EXPR;
7877 bitop2 = BIT_IOR_EXPR;
7878 break;
7879 case LT_EXPR:
7880 bitop1 = BIT_NOT_EXPR;
7881 bitop2 = BIT_AND_EXPR;
7882 std::swap (cond_expr0, cond_expr1);
7883 break;
7884 case LE_EXPR:
7885 bitop1 = BIT_NOT_EXPR;
7886 bitop2 = BIT_IOR_EXPR;
7887 std::swap (cond_expr0, cond_expr1);
7888 break;
7889 case NE_EXPR:
7890 bitop1 = BIT_XOR_EXPR;
7891 break;
7892 case EQ_EXPR:
7893 bitop1 = BIT_XOR_EXPR;
7894 bitop2 = BIT_NOT_EXPR;
7895 break;
7896 default:
7897 return false;
7899 cond_code = SSA_NAME;
7902 if (!vec_stmt)
7904 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7905 if (bitop1 != NOP_EXPR)
7907 machine_mode mode = TYPE_MODE (comp_vectype);
7908 optab optab;
7910 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
7911 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7912 return false;
7914 if (bitop2 != NOP_EXPR)
7916 optab = optab_for_tree_code (bitop2, comp_vectype,
7917 optab_default);
7918 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7919 return false;
7922 if (expand_vec_cond_expr_p (vectype, comp_vectype,
7923 cond_code))
7925 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
7926 return true;
7928 return false;
7931 /* Transform. */
7933 if (!slp_node)
7935 vec_oprnds0.create (1);
7936 vec_oprnds1.create (1);
7937 vec_oprnds2.create (1);
7938 vec_oprnds3.create (1);
7941 /* Handle def. */
7942 scalar_dest = gimple_assign_lhs (stmt);
7943 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7945 /* Handle cond expr. */
7946 for (j = 0; j < ncopies; j++)
7948 gassign *new_stmt = NULL;
7949 if (j == 0)
7951 if (slp_node)
7953 auto_vec<tree, 4> ops;
7954 auto_vec<vec<tree>, 4> vec_defs;
7956 if (masked)
7957 ops.safe_push (cond_expr);
7958 else
7960 ops.safe_push (cond_expr0);
7961 ops.safe_push (cond_expr1);
7963 ops.safe_push (then_clause);
7964 ops.safe_push (else_clause);
7965 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7966 vec_oprnds3 = vec_defs.pop ();
7967 vec_oprnds2 = vec_defs.pop ();
7968 if (!masked)
7969 vec_oprnds1 = vec_defs.pop ();
7970 vec_oprnds0 = vec_defs.pop ();
7972 else
7974 gimple *gtemp;
7975 if (masked)
7977 vec_cond_lhs
7978 = vect_get_vec_def_for_operand (cond_expr, stmt,
7979 comp_vectype);
7980 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7981 &gtemp, &dts[0]);
7983 else
7985 vec_cond_lhs
7986 = vect_get_vec_def_for_operand (cond_expr0,
7987 stmt, comp_vectype);
7988 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
7990 vec_cond_rhs
7991 = vect_get_vec_def_for_operand (cond_expr1,
7992 stmt, comp_vectype);
7993 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
7995 if (reduc_index == 1)
7996 vec_then_clause = reduc_def;
7997 else
7999 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8000 stmt);
8001 vect_is_simple_use (then_clause, loop_vinfo,
8002 &gtemp, &dts[2]);
8004 if (reduc_index == 2)
8005 vec_else_clause = reduc_def;
8006 else
8008 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8009 stmt);
8010 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8014 else
8016 vec_cond_lhs
8017 = vect_get_vec_def_for_stmt_copy (dts[0],
8018 vec_oprnds0.pop ());
8019 if (!masked)
8020 vec_cond_rhs
8021 = vect_get_vec_def_for_stmt_copy (dts[1],
8022 vec_oprnds1.pop ());
8024 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8025 vec_oprnds2.pop ());
8026 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8027 vec_oprnds3.pop ());
8030 if (!slp_node)
8032 vec_oprnds0.quick_push (vec_cond_lhs);
8033 if (!masked)
8034 vec_oprnds1.quick_push (vec_cond_rhs);
8035 vec_oprnds2.quick_push (vec_then_clause);
8036 vec_oprnds3.quick_push (vec_else_clause);
8039 /* Arguments are ready. Create the new vector stmt. */
8040 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8042 vec_then_clause = vec_oprnds2[i];
8043 vec_else_clause = vec_oprnds3[i];
8045 if (masked)
8046 vec_compare = vec_cond_lhs;
8047 else
8049 vec_cond_rhs = vec_oprnds1[i];
8050 if (bitop1 == NOP_EXPR)
8051 vec_compare = build2 (cond_code, vec_cmp_type,
8052 vec_cond_lhs, vec_cond_rhs);
8053 else
8055 new_temp = make_ssa_name (vec_cmp_type);
8056 if (bitop1 == BIT_NOT_EXPR)
8057 new_stmt = gimple_build_assign (new_temp, bitop1,
8058 vec_cond_rhs);
8059 else
8060 new_stmt
8061 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8062 vec_cond_rhs);
8063 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8064 if (bitop2 == NOP_EXPR)
8065 vec_compare = new_temp;
8066 else if (bitop2 == BIT_NOT_EXPR)
8068 /* Instead of doing ~x ? y : z do x ? z : y. */
8069 vec_compare = new_temp;
8070 std::swap (vec_then_clause, vec_else_clause);
8072 else
8074 vec_compare = make_ssa_name (vec_cmp_type);
8075 new_stmt
8076 = gimple_build_assign (vec_compare, bitop2,
8077 vec_cond_lhs, new_temp);
8078 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8082 new_temp = make_ssa_name (vec_dest);
8083 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8084 vec_compare, vec_then_clause,
8085 vec_else_clause);
8086 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8087 if (slp_node)
8088 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8091 if (slp_node)
8092 continue;
8094 if (j == 0)
8095 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8096 else
8097 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8099 prev_stmt_info = vinfo_for_stmt (new_stmt);
8102 vec_oprnds0.release ();
8103 vec_oprnds1.release ();
8104 vec_oprnds2.release ();
8105 vec_oprnds3.release ();
8107 return true;
8110 /* vectorizable_comparison.
8112 Check if STMT is comparison expression that can be vectorized.
8113 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8114 comparison, put it in VEC_STMT, and insert it at GSI.
8116 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8118 static bool
8119 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8120 gimple **vec_stmt, tree reduc_def,
8121 slp_tree slp_node)
8123 tree lhs, rhs1, rhs2;
8124 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8125 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8126 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8127 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8128 tree new_temp;
8129 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8130 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8131 int ndts = 2;
8132 unsigned nunits;
8133 int ncopies;
8134 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8135 stmt_vec_info prev_stmt_info = NULL;
8136 int i, j;
8137 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8138 vec<tree> vec_oprnds0 = vNULL;
8139 vec<tree> vec_oprnds1 = vNULL;
8140 gimple *def_stmt;
8141 tree mask_type;
8142 tree mask;
8144 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8145 return false;
8147 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8148 return false;
8150 mask_type = vectype;
8151 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8153 if (slp_node)
8154 ncopies = 1;
8155 else
8156 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
8158 gcc_assert (ncopies >= 1);
8159 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8160 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8161 && reduc_def))
8162 return false;
8164 if (STMT_VINFO_LIVE_P (stmt_info))
8166 if (dump_enabled_p ())
8167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8168 "value used after loop.\n");
8169 return false;
8172 if (!is_gimple_assign (stmt))
8173 return false;
8175 code = gimple_assign_rhs_code (stmt);
8177 if (TREE_CODE_CLASS (code) != tcc_comparison)
8178 return false;
8180 rhs1 = gimple_assign_rhs1 (stmt);
8181 rhs2 = gimple_assign_rhs2 (stmt);
8183 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8184 &dts[0], &vectype1))
8185 return false;
8187 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8188 &dts[1], &vectype2))
8189 return false;
8191 if (vectype1 && vectype2
8192 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8193 return false;
8195 vectype = vectype1 ? vectype1 : vectype2;
8197 /* Invariant comparison. */
8198 if (!vectype)
8200 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8201 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8202 return false;
8204 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8205 return false;
8207 /* Can't compare mask and non-mask types. */
8208 if (vectype1 && vectype2
8209 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8210 return false;
8212 /* Boolean values may have another representation in vectors
8213 and therefore we prefer bit operations over comparison for
8214 them (which also works for scalar masks). We store opcodes
8215 to use in bitop1 and bitop2. Statement is vectorized as
8216 BITOP2 (rhs1 BITOP1 rhs2) or
8217 rhs1 BITOP2 (BITOP1 rhs2)
8218 depending on bitop1 and bitop2 arity. */
8219 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8221 if (code == GT_EXPR)
8223 bitop1 = BIT_NOT_EXPR;
8224 bitop2 = BIT_AND_EXPR;
8226 else if (code == GE_EXPR)
8228 bitop1 = BIT_NOT_EXPR;
8229 bitop2 = BIT_IOR_EXPR;
8231 else if (code == LT_EXPR)
8233 bitop1 = BIT_NOT_EXPR;
8234 bitop2 = BIT_AND_EXPR;
8235 std::swap (rhs1, rhs2);
8236 std::swap (dts[0], dts[1]);
8238 else if (code == LE_EXPR)
8240 bitop1 = BIT_NOT_EXPR;
8241 bitop2 = BIT_IOR_EXPR;
8242 std::swap (rhs1, rhs2);
8243 std::swap (dts[0], dts[1]);
8245 else
8247 bitop1 = BIT_XOR_EXPR;
8248 if (code == EQ_EXPR)
8249 bitop2 = BIT_NOT_EXPR;
8253 if (!vec_stmt)
8255 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8256 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8257 dts, ndts, NULL, NULL);
8258 if (bitop1 == NOP_EXPR)
8259 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8260 else
8262 machine_mode mode = TYPE_MODE (vectype);
8263 optab optab;
8265 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8266 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8267 return false;
8269 if (bitop2 != NOP_EXPR)
8271 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8272 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8273 return false;
8275 return true;
8279 /* Transform. */
8280 if (!slp_node)
8282 vec_oprnds0.create (1);
8283 vec_oprnds1.create (1);
8286 /* Handle def. */
8287 lhs = gimple_assign_lhs (stmt);
8288 mask = vect_create_destination_var (lhs, mask_type);
8290 /* Handle cmp expr. */
8291 for (j = 0; j < ncopies; j++)
8293 gassign *new_stmt = NULL;
8294 if (j == 0)
8296 if (slp_node)
8298 auto_vec<tree, 2> ops;
8299 auto_vec<vec<tree>, 2> vec_defs;
8301 ops.safe_push (rhs1);
8302 ops.safe_push (rhs2);
8303 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
8304 vec_oprnds1 = vec_defs.pop ();
8305 vec_oprnds0 = vec_defs.pop ();
8307 else
8309 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8310 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8313 else
8315 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8316 vec_oprnds0.pop ());
8317 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8318 vec_oprnds1.pop ());
8321 if (!slp_node)
8323 vec_oprnds0.quick_push (vec_rhs1);
8324 vec_oprnds1.quick_push (vec_rhs2);
8327 /* Arguments are ready. Create the new vector stmt. */
8328 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8330 vec_rhs2 = vec_oprnds1[i];
8332 new_temp = make_ssa_name (mask);
8333 if (bitop1 == NOP_EXPR)
8335 new_stmt = gimple_build_assign (new_temp, code,
8336 vec_rhs1, vec_rhs2);
8337 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8339 else
8341 if (bitop1 == BIT_NOT_EXPR)
8342 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8343 else
8344 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8345 vec_rhs2);
8346 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8347 if (bitop2 != NOP_EXPR)
8349 tree res = make_ssa_name (mask);
8350 if (bitop2 == BIT_NOT_EXPR)
8351 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8352 else
8353 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8354 new_temp);
8355 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8358 if (slp_node)
8359 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8362 if (slp_node)
8363 continue;
8365 if (j == 0)
8366 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8367 else
8368 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8370 prev_stmt_info = vinfo_for_stmt (new_stmt);
8373 vec_oprnds0.release ();
8374 vec_oprnds1.release ();
8376 return true;
8379 /* Make sure the statement is vectorizable. */
8381 bool
8382 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8384 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8385 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8386 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8387 bool ok;
8388 tree scalar_type, vectype;
8389 gimple *pattern_stmt;
8390 gimple_seq pattern_def_seq;
8392 if (dump_enabled_p ())
8394 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8395 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8398 if (gimple_has_volatile_ops (stmt))
8400 if (dump_enabled_p ())
8401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8402 "not vectorized: stmt has volatile operands\n");
8404 return false;
8407 /* Skip stmts that do not need to be vectorized. In loops this is expected
8408 to include:
8409 - the COND_EXPR which is the loop exit condition
8410 - any LABEL_EXPRs in the loop
8411 - computations that are used only for array indexing or loop control.
8412 In basic blocks we only analyze statements that are a part of some SLP
8413 instance, therefore, all the statements are relevant.
8415 Pattern statement needs to be analyzed instead of the original statement
8416 if the original statement is not relevant. Otherwise, we analyze both
8417 statements. In basic blocks we are called from some SLP instance
8418 traversal, don't analyze pattern stmts instead, the pattern stmts
8419 already will be part of SLP instance. */
8421 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8422 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8423 && !STMT_VINFO_LIVE_P (stmt_info))
8425 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8426 && pattern_stmt
8427 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8428 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8430 /* Analyze PATTERN_STMT instead of the original stmt. */
8431 stmt = pattern_stmt;
8432 stmt_info = vinfo_for_stmt (pattern_stmt);
8433 if (dump_enabled_p ())
8435 dump_printf_loc (MSG_NOTE, vect_location,
8436 "==> examining pattern statement: ");
8437 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8440 else
8442 if (dump_enabled_p ())
8443 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8445 return true;
8448 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8449 && node == NULL
8450 && pattern_stmt
8451 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8452 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8454 /* Analyze PATTERN_STMT too. */
8455 if (dump_enabled_p ())
8457 dump_printf_loc (MSG_NOTE, vect_location,
8458 "==> examining pattern statement: ");
8459 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8462 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8463 return false;
8466 if (is_pattern_stmt_p (stmt_info)
8467 && node == NULL
8468 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8470 gimple_stmt_iterator si;
8472 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8474 gimple *pattern_def_stmt = gsi_stmt (si);
8475 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8476 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8478 /* Analyze def stmt of STMT if it's a pattern stmt. */
8479 if (dump_enabled_p ())
8481 dump_printf_loc (MSG_NOTE, vect_location,
8482 "==> examining pattern def statement: ");
8483 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8486 if (!vect_analyze_stmt (pattern_def_stmt,
8487 need_to_vectorize, node))
8488 return false;
8493 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8495 case vect_internal_def:
8496 break;
8498 case vect_reduction_def:
8499 case vect_nested_cycle:
8500 gcc_assert (!bb_vinfo
8501 && (relevance == vect_used_in_outer
8502 || relevance == vect_used_in_outer_by_reduction
8503 || relevance == vect_used_by_reduction
8504 || relevance == vect_unused_in_scope
8505 || relevance == vect_used_only_live));
8506 break;
8508 case vect_induction_def:
8509 case vect_constant_def:
8510 case vect_external_def:
8511 case vect_unknown_def_type:
8512 default:
8513 gcc_unreachable ();
8516 if (bb_vinfo)
8518 gcc_assert (PURE_SLP_STMT (stmt_info));
8520 /* Memory accesses already got their vector type assigned
8521 in vect_analyze_data_refs. */
8522 if (! STMT_VINFO_DATA_REF (stmt_info))
8524 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8525 if (dump_enabled_p ())
8527 dump_printf_loc (MSG_NOTE, vect_location,
8528 "get vectype for scalar type: ");
8529 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8530 dump_printf (MSG_NOTE, "\n");
8533 vectype = get_vectype_for_scalar_type (scalar_type);
8534 if (!vectype)
8536 if (dump_enabled_p ())
8538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8539 "not SLPed: unsupported data-type ");
8540 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8541 scalar_type);
8542 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8544 return false;
8547 if (dump_enabled_p ())
8549 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8550 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8551 dump_printf (MSG_NOTE, "\n");
8554 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8558 if (STMT_VINFO_RELEVANT_P (stmt_info))
8560 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8561 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8562 || (is_gimple_call (stmt)
8563 && gimple_call_lhs (stmt) == NULL_TREE));
8564 *need_to_vectorize = true;
8567 if (PURE_SLP_STMT (stmt_info) && !node)
8569 dump_printf_loc (MSG_NOTE, vect_location,
8570 "handled only by SLP analysis\n");
8571 return true;
8574 ok = true;
8575 if (!bb_vinfo
8576 && (STMT_VINFO_RELEVANT_P (stmt_info)
8577 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8578 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8579 || vectorizable_conversion (stmt, NULL, NULL, node)
8580 || vectorizable_shift (stmt, NULL, NULL, node)
8581 || vectorizable_operation (stmt, NULL, NULL, node)
8582 || vectorizable_assignment (stmt, NULL, NULL, node)
8583 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8584 || vectorizable_call (stmt, NULL, NULL, node)
8585 || vectorizable_store (stmt, NULL, NULL, node)
8586 || vectorizable_reduction (stmt, NULL, NULL, node)
8587 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8588 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8589 else
8591 if (bb_vinfo)
8592 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8593 || vectorizable_conversion (stmt, NULL, NULL, node)
8594 || vectorizable_shift (stmt, NULL, NULL, node)
8595 || vectorizable_operation (stmt, NULL, NULL, node)
8596 || vectorizable_assignment (stmt, NULL, NULL, node)
8597 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8598 || vectorizable_call (stmt, NULL, NULL, node)
8599 || vectorizable_store (stmt, NULL, NULL, node)
8600 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8601 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8604 if (!ok)
8606 if (dump_enabled_p ())
8608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8609 "not vectorized: relevant stmt not ");
8610 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8611 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8614 return false;
8617 if (bb_vinfo)
8618 return true;
8620 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8621 need extra handling, except for vectorizable reductions. */
8622 if (STMT_VINFO_LIVE_P (stmt_info)
8623 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8624 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8626 if (!ok)
8628 if (dump_enabled_p ())
8630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8631 "not vectorized: live stmt not ");
8632 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8633 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8636 return false;
8639 return true;
8643 /* Function vect_transform_stmt.
8645 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8647 bool
8648 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8649 bool *grouped_store, slp_tree slp_node,
8650 slp_instance slp_node_instance)
8652 bool is_store = false;
8653 gimple *vec_stmt = NULL;
8654 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8655 bool done;
8657 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8658 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8660 switch (STMT_VINFO_TYPE (stmt_info))
8662 case type_demotion_vec_info_type:
8663 case type_promotion_vec_info_type:
8664 case type_conversion_vec_info_type:
8665 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8666 gcc_assert (done);
8667 break;
8669 case induc_vec_info_type:
8670 gcc_assert (!slp_node);
8671 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8672 gcc_assert (done);
8673 break;
8675 case shift_vec_info_type:
8676 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8677 gcc_assert (done);
8678 break;
8680 case op_vec_info_type:
8681 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8682 gcc_assert (done);
8683 break;
8685 case assignment_vec_info_type:
8686 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8687 gcc_assert (done);
8688 break;
8690 case load_vec_info_type:
8691 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8692 slp_node_instance);
8693 gcc_assert (done);
8694 break;
8696 case store_vec_info_type:
8697 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8698 gcc_assert (done);
8699 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8701 /* In case of interleaving, the whole chain is vectorized when the
8702 last store in the chain is reached. Store stmts before the last
8703 one are skipped, and there vec_stmt_info shouldn't be freed
8704 meanwhile. */
8705 *grouped_store = true;
8706 if (STMT_VINFO_VEC_STMT (stmt_info))
8707 is_store = true;
8709 else
8710 is_store = true;
8711 break;
8713 case condition_vec_info_type:
8714 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8715 gcc_assert (done);
8716 break;
8718 case comparison_vec_info_type:
8719 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8720 gcc_assert (done);
8721 break;
8723 case call_vec_info_type:
8724 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8725 stmt = gsi_stmt (*gsi);
8726 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8727 is_store = true;
8728 break;
8730 case call_simd_clone_vec_info_type:
8731 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8732 stmt = gsi_stmt (*gsi);
8733 break;
8735 case reduc_vec_info_type:
8736 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8737 gcc_assert (done);
8738 break;
8740 default:
8741 if (!STMT_VINFO_LIVE_P (stmt_info))
8743 if (dump_enabled_p ())
8744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8745 "stmt not supported.\n");
8746 gcc_unreachable ();
8750 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8751 This would break hybrid SLP vectorization. */
8752 if (slp_node)
8753 gcc_assert (!vec_stmt
8754 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8756 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8757 is being vectorized, but outside the immediately enclosing loop. */
8758 if (vec_stmt
8759 && STMT_VINFO_LOOP_VINFO (stmt_info)
8760 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8761 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8762 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8763 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8764 || STMT_VINFO_RELEVANT (stmt_info) ==
8765 vect_used_in_outer_by_reduction))
8767 struct loop *innerloop = LOOP_VINFO_LOOP (
8768 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8769 imm_use_iterator imm_iter;
8770 use_operand_p use_p;
8771 tree scalar_dest;
8772 gimple *exit_phi;
8774 if (dump_enabled_p ())
8775 dump_printf_loc (MSG_NOTE, vect_location,
8776 "Record the vdef for outer-loop vectorization.\n");
8778 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8779 (to be used when vectorizing outer-loop stmts that use the DEF of
8780 STMT). */
8781 if (gimple_code (stmt) == GIMPLE_PHI)
8782 scalar_dest = PHI_RESULT (stmt);
8783 else
8784 scalar_dest = gimple_assign_lhs (stmt);
8786 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8788 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8790 exit_phi = USE_STMT (use_p);
8791 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8796 /* Handle stmts whose DEF is used outside the loop-nest that is
8797 being vectorized. */
8798 if (slp_node)
8800 gimple *slp_stmt;
8801 int i;
8802 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8804 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8805 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8806 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8808 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8809 &vec_stmt);
8810 gcc_assert (done);
8814 else if (STMT_VINFO_LIVE_P (stmt_info)
8815 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8817 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8818 gcc_assert (done);
8821 if (vec_stmt)
8822 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8824 return is_store;
8828 /* Remove a group of stores (for SLP or interleaving), free their
8829 stmt_vec_info. */
8831 void
8832 vect_remove_stores (gimple *first_stmt)
8834 gimple *next = first_stmt;
8835 gimple *tmp;
8836 gimple_stmt_iterator next_si;
8838 while (next)
8840 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8842 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8843 if (is_pattern_stmt_p (stmt_info))
8844 next = STMT_VINFO_RELATED_STMT (stmt_info);
8845 /* Free the attached stmt_vec_info and remove the stmt. */
8846 next_si = gsi_for_stmt (next);
8847 unlink_stmt_vdef (next);
8848 gsi_remove (&next_si, true);
8849 release_defs (next);
8850 free_stmt_vec_info (next);
8851 next = tmp;
8856 /* Function new_stmt_vec_info.
8858 Create and initialize a new stmt_vec_info struct for STMT. */
8860 stmt_vec_info
8861 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8863 stmt_vec_info res;
8864 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8866 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8867 STMT_VINFO_STMT (res) = stmt;
8868 res->vinfo = vinfo;
8869 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8870 STMT_VINFO_LIVE_P (res) = false;
8871 STMT_VINFO_VECTYPE (res) = NULL;
8872 STMT_VINFO_VEC_STMT (res) = NULL;
8873 STMT_VINFO_VECTORIZABLE (res) = true;
8874 STMT_VINFO_IN_PATTERN_P (res) = false;
8875 STMT_VINFO_RELATED_STMT (res) = NULL;
8876 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8877 STMT_VINFO_DATA_REF (res) = NULL;
8878 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8879 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8881 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8882 STMT_VINFO_DR_OFFSET (res) = NULL;
8883 STMT_VINFO_DR_INIT (res) = NULL;
8884 STMT_VINFO_DR_STEP (res) = NULL;
8885 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8887 if (gimple_code (stmt) == GIMPLE_PHI
8888 && is_loop_header_bb_p (gimple_bb (stmt)))
8889 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8890 else
8891 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8893 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8894 STMT_SLP_TYPE (res) = loop_vect;
8895 STMT_VINFO_NUM_SLP_USES (res) = 0;
8897 GROUP_FIRST_ELEMENT (res) = NULL;
8898 GROUP_NEXT_ELEMENT (res) = NULL;
8899 GROUP_SIZE (res) = 0;
8900 GROUP_STORE_COUNT (res) = 0;
8901 GROUP_GAP (res) = 0;
8902 GROUP_SAME_DR_STMT (res) = NULL;
8904 return res;
8908 /* Create a hash table for stmt_vec_info. */
8910 void
8911 init_stmt_vec_info_vec (void)
8913 gcc_assert (!stmt_vec_info_vec.exists ());
8914 stmt_vec_info_vec.create (50);
8918 /* Free hash table for stmt_vec_info. */
8920 void
8921 free_stmt_vec_info_vec (void)
8923 unsigned int i;
8924 stmt_vec_info info;
8925 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8926 if (info != NULL)
8927 free_stmt_vec_info (STMT_VINFO_STMT (info));
8928 gcc_assert (stmt_vec_info_vec.exists ());
8929 stmt_vec_info_vec.release ();
8933 /* Free stmt vectorization related info. */
8935 void
8936 free_stmt_vec_info (gimple *stmt)
8938 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8940 if (!stmt_info)
8941 return;
8943 /* Check if this statement has a related "pattern stmt"
8944 (introduced by the vectorizer during the pattern recognition
8945 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8946 too. */
8947 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8949 stmt_vec_info patt_info
8950 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8951 if (patt_info)
8953 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8954 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8955 gimple_set_bb (patt_stmt, NULL);
8956 tree lhs = gimple_get_lhs (patt_stmt);
8957 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8958 release_ssa_name (lhs);
8959 if (seq)
8961 gimple_stmt_iterator si;
8962 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8964 gimple *seq_stmt = gsi_stmt (si);
8965 gimple_set_bb (seq_stmt, NULL);
8966 lhs = gimple_get_lhs (seq_stmt);
8967 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8968 release_ssa_name (lhs);
8969 free_stmt_vec_info (seq_stmt);
8972 free_stmt_vec_info (patt_stmt);
8976 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8977 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8978 set_vinfo_for_stmt (stmt, NULL);
8979 free (stmt_info);
8983 /* Function get_vectype_for_scalar_type_and_size.
8985 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8986 by the target. */
8988 static tree
8989 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8991 tree orig_scalar_type = scalar_type;
8992 machine_mode inner_mode = TYPE_MODE (scalar_type);
8993 machine_mode simd_mode;
8994 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8995 int nunits;
8996 tree vectype;
8998 if (nbytes == 0)
8999 return NULL_TREE;
9001 if (GET_MODE_CLASS (inner_mode) != MODE_INT
9002 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
9003 return NULL_TREE;
9005 /* For vector types of elements whose mode precision doesn't
9006 match their types precision we use a element type of mode
9007 precision. The vectorization routines will have to make sure
9008 they support the proper result truncation/extension.
9009 We also make sure to build vector types with INTEGER_TYPE
9010 component type only. */
9011 if (INTEGRAL_TYPE_P (scalar_type)
9012 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9013 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9014 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9015 TYPE_UNSIGNED (scalar_type));
9017 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9018 When the component mode passes the above test simply use a type
9019 corresponding to that mode. The theory is that any use that
9020 would cause problems with this will disable vectorization anyway. */
9021 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9022 && !INTEGRAL_TYPE_P (scalar_type))
9023 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9025 /* We can't build a vector type of elements with alignment bigger than
9026 their size. */
9027 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9028 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9029 TYPE_UNSIGNED (scalar_type));
9031 /* If we felt back to using the mode fail if there was
9032 no scalar type for it. */
9033 if (scalar_type == NULL_TREE)
9034 return NULL_TREE;
9036 /* If no size was supplied use the mode the target prefers. Otherwise
9037 lookup a vector mode of the specified size. */
9038 if (size == 0)
9039 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9040 else
9041 simd_mode = mode_for_vector (inner_mode, size / nbytes);
9042 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9043 if (nunits <= 1)
9044 return NULL_TREE;
9046 vectype = build_vector_type (scalar_type, nunits);
9048 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9049 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9050 return NULL_TREE;
9052 /* Re-attach the address-space qualifier if we canonicalized the scalar
9053 type. */
9054 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9055 return build_qualified_type
9056 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9058 return vectype;
9061 unsigned int current_vector_size;
9063 /* Function get_vectype_for_scalar_type.
9065 Returns the vector type corresponding to SCALAR_TYPE as supported
9066 by the target. */
9068 tree
9069 get_vectype_for_scalar_type (tree scalar_type)
9071 tree vectype;
9072 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9073 current_vector_size);
9074 if (vectype
9075 && current_vector_size == 0)
9076 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9077 return vectype;
9080 /* Function get_mask_type_for_scalar_type.
9082 Returns the mask type corresponding to a result of comparison
9083 of vectors of specified SCALAR_TYPE as supported by target. */
9085 tree
9086 get_mask_type_for_scalar_type (tree scalar_type)
9088 tree vectype = get_vectype_for_scalar_type (scalar_type);
9090 if (!vectype)
9091 return NULL;
9093 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9094 current_vector_size);
9097 /* Function get_same_sized_vectype
9099 Returns a vector type corresponding to SCALAR_TYPE of size
9100 VECTOR_TYPE if supported by the target. */
9102 tree
9103 get_same_sized_vectype (tree scalar_type, tree vector_type)
9105 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9106 return build_same_sized_truth_vector_type (vector_type);
9108 return get_vectype_for_scalar_type_and_size
9109 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9112 /* Function vect_is_simple_use.
9114 Input:
9115 VINFO - the vect info of the loop or basic block that is being vectorized.
9116 OPERAND - operand in the loop or bb.
9117 Output:
9118 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9119 DT - the type of definition
9121 Returns whether a stmt with OPERAND can be vectorized.
9122 For loops, supportable operands are constants, loop invariants, and operands
9123 that are defined by the current iteration of the loop. Unsupportable
9124 operands are those that are defined by a previous iteration of the loop (as
9125 is the case in reduction/induction computations).
9126 For basic blocks, supportable operands are constants and bb invariants.
9127 For now, operands defined outside the basic block are not supported. */
9129 bool
9130 vect_is_simple_use (tree operand, vec_info *vinfo,
9131 gimple **def_stmt, enum vect_def_type *dt)
9133 *def_stmt = NULL;
9134 *dt = vect_unknown_def_type;
9136 if (dump_enabled_p ())
9138 dump_printf_loc (MSG_NOTE, vect_location,
9139 "vect_is_simple_use: operand ");
9140 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9141 dump_printf (MSG_NOTE, "\n");
9144 if (CONSTANT_CLASS_P (operand))
9146 *dt = vect_constant_def;
9147 return true;
9150 if (is_gimple_min_invariant (operand))
9152 *dt = vect_external_def;
9153 return true;
9156 if (TREE_CODE (operand) != SSA_NAME)
9158 if (dump_enabled_p ())
9159 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9160 "not ssa-name.\n");
9161 return false;
9164 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9166 *dt = vect_external_def;
9167 return true;
9170 *def_stmt = SSA_NAME_DEF_STMT (operand);
9171 if (dump_enabled_p ())
9173 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9174 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9177 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9178 *dt = vect_external_def;
9179 else
9181 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9182 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9185 if (dump_enabled_p ())
9187 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9188 switch (*dt)
9190 case vect_uninitialized_def:
9191 dump_printf (MSG_NOTE, "uninitialized\n");
9192 break;
9193 case vect_constant_def:
9194 dump_printf (MSG_NOTE, "constant\n");
9195 break;
9196 case vect_external_def:
9197 dump_printf (MSG_NOTE, "external\n");
9198 break;
9199 case vect_internal_def:
9200 dump_printf (MSG_NOTE, "internal\n");
9201 break;
9202 case vect_induction_def:
9203 dump_printf (MSG_NOTE, "induction\n");
9204 break;
9205 case vect_reduction_def:
9206 dump_printf (MSG_NOTE, "reduction\n");
9207 break;
9208 case vect_double_reduction_def:
9209 dump_printf (MSG_NOTE, "double reduction\n");
9210 break;
9211 case vect_nested_cycle:
9212 dump_printf (MSG_NOTE, "nested cycle\n");
9213 break;
9214 case vect_unknown_def_type:
9215 dump_printf (MSG_NOTE, "unknown\n");
9216 break;
9220 if (*dt == vect_unknown_def_type)
9222 if (dump_enabled_p ())
9223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9224 "Unsupported pattern.\n");
9225 return false;
9228 switch (gimple_code (*def_stmt))
9230 case GIMPLE_PHI:
9231 case GIMPLE_ASSIGN:
9232 case GIMPLE_CALL:
9233 break;
9234 default:
9235 if (dump_enabled_p ())
9236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9237 "unsupported defining stmt:\n");
9238 return false;
9241 return true;
9244 /* Function vect_is_simple_use.
9246 Same as vect_is_simple_use but also determines the vector operand
9247 type of OPERAND and stores it to *VECTYPE. If the definition of
9248 OPERAND is vect_uninitialized_def, vect_constant_def or
9249 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9250 is responsible to compute the best suited vector type for the
9251 scalar operand. */
9253 bool
9254 vect_is_simple_use (tree operand, vec_info *vinfo,
9255 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9257 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9258 return false;
9260 /* Now get a vector type if the def is internal, otherwise supply
9261 NULL_TREE and leave it up to the caller to figure out a proper
9262 type for the use stmt. */
9263 if (*dt == vect_internal_def
9264 || *dt == vect_induction_def
9265 || *dt == vect_reduction_def
9266 || *dt == vect_double_reduction_def
9267 || *dt == vect_nested_cycle)
9269 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9271 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9272 && !STMT_VINFO_RELEVANT (stmt_info)
9273 && !STMT_VINFO_LIVE_P (stmt_info))
9274 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9276 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9277 gcc_assert (*vectype != NULL_TREE);
9279 else if (*dt == vect_uninitialized_def
9280 || *dt == vect_constant_def
9281 || *dt == vect_external_def)
9282 *vectype = NULL_TREE;
9283 else
9284 gcc_unreachable ();
9286 return true;
9290 /* Function supportable_widening_operation
9292 Check whether an operation represented by the code CODE is a
9293 widening operation that is supported by the target platform in
9294 vector form (i.e., when operating on arguments of type VECTYPE_IN
9295 producing a result of type VECTYPE_OUT).
9297 Widening operations we currently support are NOP (CONVERT), FLOAT
9298 and WIDEN_MULT. This function checks if these operations are supported
9299 by the target platform either directly (via vector tree-codes), or via
9300 target builtins.
9302 Output:
9303 - CODE1 and CODE2 are codes of vector operations to be used when
9304 vectorizing the operation, if available.
9305 - MULTI_STEP_CVT determines the number of required intermediate steps in
9306 case of multi-step conversion (like char->short->int - in that case
9307 MULTI_STEP_CVT will be 1).
9308 - INTERM_TYPES contains the intermediate type required to perform the
9309 widening operation (short in the above example). */
9311 bool
9312 supportable_widening_operation (enum tree_code code, gimple *stmt,
9313 tree vectype_out, tree vectype_in,
9314 enum tree_code *code1, enum tree_code *code2,
9315 int *multi_step_cvt,
9316 vec<tree> *interm_types)
9318 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9319 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9320 struct loop *vect_loop = NULL;
9321 machine_mode vec_mode;
9322 enum insn_code icode1, icode2;
9323 optab optab1, optab2;
9324 tree vectype = vectype_in;
9325 tree wide_vectype = vectype_out;
9326 enum tree_code c1, c2;
9327 int i;
9328 tree prev_type, intermediate_type;
9329 machine_mode intermediate_mode, prev_mode;
9330 optab optab3, optab4;
9332 *multi_step_cvt = 0;
9333 if (loop_info)
9334 vect_loop = LOOP_VINFO_LOOP (loop_info);
9336 switch (code)
9338 case WIDEN_MULT_EXPR:
9339 /* The result of a vectorized widening operation usually requires
9340 two vectors (because the widened results do not fit into one vector).
9341 The generated vector results would normally be expected to be
9342 generated in the same order as in the original scalar computation,
9343 i.e. if 8 results are generated in each vector iteration, they are
9344 to be organized as follows:
9345 vect1: [res1,res2,res3,res4],
9346 vect2: [res5,res6,res7,res8].
9348 However, in the special case that the result of the widening
9349 operation is used in a reduction computation only, the order doesn't
9350 matter (because when vectorizing a reduction we change the order of
9351 the computation). Some targets can take advantage of this and
9352 generate more efficient code. For example, targets like Altivec,
9353 that support widen_mult using a sequence of {mult_even,mult_odd}
9354 generate the following vectors:
9355 vect1: [res1,res3,res5,res7],
9356 vect2: [res2,res4,res6,res8].
9358 When vectorizing outer-loops, we execute the inner-loop sequentially
9359 (each vectorized inner-loop iteration contributes to VF outer-loop
9360 iterations in parallel). We therefore don't allow to change the
9361 order of the computation in the inner-loop during outer-loop
9362 vectorization. */
9363 /* TODO: Another case in which order doesn't *really* matter is when we
9364 widen and then contract again, e.g. (short)((int)x * y >> 8).
9365 Normally, pack_trunc performs an even/odd permute, whereas the
9366 repack from an even/odd expansion would be an interleave, which
9367 would be significantly simpler for e.g. AVX2. */
9368 /* In any case, in order to avoid duplicating the code below, recurse
9369 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9370 are properly set up for the caller. If we fail, we'll continue with
9371 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9372 if (vect_loop
9373 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9374 && !nested_in_vect_loop_p (vect_loop, stmt)
9375 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9376 stmt, vectype_out, vectype_in,
9377 code1, code2, multi_step_cvt,
9378 interm_types))
9380 /* Elements in a vector with vect_used_by_reduction property cannot
9381 be reordered if the use chain with this property does not have the
9382 same operation. One such an example is s += a * b, where elements
9383 in a and b cannot be reordered. Here we check if the vector defined
9384 by STMT is only directly used in the reduction statement. */
9385 tree lhs = gimple_assign_lhs (stmt);
9386 use_operand_p dummy;
9387 gimple *use_stmt;
9388 stmt_vec_info use_stmt_info = NULL;
9389 if (single_imm_use (lhs, &dummy, &use_stmt)
9390 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9391 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9392 return true;
9394 c1 = VEC_WIDEN_MULT_LO_EXPR;
9395 c2 = VEC_WIDEN_MULT_HI_EXPR;
9396 break;
9398 case DOT_PROD_EXPR:
9399 c1 = DOT_PROD_EXPR;
9400 c2 = DOT_PROD_EXPR;
9401 break;
9403 case SAD_EXPR:
9404 c1 = SAD_EXPR;
9405 c2 = SAD_EXPR;
9406 break;
9408 case VEC_WIDEN_MULT_EVEN_EXPR:
9409 /* Support the recursion induced just above. */
9410 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9411 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9412 break;
9414 case WIDEN_LSHIFT_EXPR:
9415 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9416 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9417 break;
9419 CASE_CONVERT:
9420 c1 = VEC_UNPACK_LO_EXPR;
9421 c2 = VEC_UNPACK_HI_EXPR;
9422 break;
9424 case FLOAT_EXPR:
9425 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9426 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9427 break;
9429 case FIX_TRUNC_EXPR:
9430 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9431 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9432 computing the operation. */
9433 return false;
9435 default:
9436 gcc_unreachable ();
9439 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9440 std::swap (c1, c2);
9442 if (code == FIX_TRUNC_EXPR)
9444 /* The signedness is determined from output operand. */
9445 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9446 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9448 else
9450 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9451 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9454 if (!optab1 || !optab2)
9455 return false;
9457 vec_mode = TYPE_MODE (vectype);
9458 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9459 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9460 return false;
9462 *code1 = c1;
9463 *code2 = c2;
9465 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9466 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9467 /* For scalar masks we may have different boolean
9468 vector types having the same QImode. Thus we
9469 add additional check for elements number. */
9470 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9471 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9472 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9474 /* Check if it's a multi-step conversion that can be done using intermediate
9475 types. */
9477 prev_type = vectype;
9478 prev_mode = vec_mode;
9480 if (!CONVERT_EXPR_CODE_P (code))
9481 return false;
9483 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9484 intermediate steps in promotion sequence. We try
9485 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9486 not. */
9487 interm_types->create (MAX_INTERM_CVT_STEPS);
9488 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9490 intermediate_mode = insn_data[icode1].operand[0].mode;
9491 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9493 intermediate_type
9494 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9495 current_vector_size);
9496 if (intermediate_mode != TYPE_MODE (intermediate_type))
9497 return false;
9499 else
9500 intermediate_type
9501 = lang_hooks.types.type_for_mode (intermediate_mode,
9502 TYPE_UNSIGNED (prev_type));
9504 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9505 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9507 if (!optab3 || !optab4
9508 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9509 || insn_data[icode1].operand[0].mode != intermediate_mode
9510 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9511 || insn_data[icode2].operand[0].mode != intermediate_mode
9512 || ((icode1 = optab_handler (optab3, intermediate_mode))
9513 == CODE_FOR_nothing)
9514 || ((icode2 = optab_handler (optab4, intermediate_mode))
9515 == CODE_FOR_nothing))
9516 break;
9518 interm_types->quick_push (intermediate_type);
9519 (*multi_step_cvt)++;
9521 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9522 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9523 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9524 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9525 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9527 prev_type = intermediate_type;
9528 prev_mode = intermediate_mode;
9531 interm_types->release ();
9532 return false;
9536 /* Function supportable_narrowing_operation
9538 Check whether an operation represented by the code CODE is a
9539 narrowing operation that is supported by the target platform in
9540 vector form (i.e., when operating on arguments of type VECTYPE_IN
9541 and producing a result of type VECTYPE_OUT).
9543 Narrowing operations we currently support are NOP (CONVERT) and
9544 FIX_TRUNC. This function checks if these operations are supported by
9545 the target platform directly via vector tree-codes.
9547 Output:
9548 - CODE1 is the code of a vector operation to be used when
9549 vectorizing the operation, if available.
9550 - MULTI_STEP_CVT determines the number of required intermediate steps in
9551 case of multi-step conversion (like int->short->char - in that case
9552 MULTI_STEP_CVT will be 1).
9553 - INTERM_TYPES contains the intermediate type required to perform the
9554 narrowing operation (short in the above example). */
9556 bool
9557 supportable_narrowing_operation (enum tree_code code,
9558 tree vectype_out, tree vectype_in,
9559 enum tree_code *code1, int *multi_step_cvt,
9560 vec<tree> *interm_types)
9562 machine_mode vec_mode;
9563 enum insn_code icode1;
9564 optab optab1, interm_optab;
9565 tree vectype = vectype_in;
9566 tree narrow_vectype = vectype_out;
9567 enum tree_code c1;
9568 tree intermediate_type, prev_type;
9569 machine_mode intermediate_mode, prev_mode;
9570 int i;
9571 bool uns;
9573 *multi_step_cvt = 0;
9574 switch (code)
9576 CASE_CONVERT:
9577 c1 = VEC_PACK_TRUNC_EXPR;
9578 break;
9580 case FIX_TRUNC_EXPR:
9581 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9582 break;
9584 case FLOAT_EXPR:
9585 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9586 tree code and optabs used for computing the operation. */
9587 return false;
9589 default:
9590 gcc_unreachable ();
9593 if (code == FIX_TRUNC_EXPR)
9594 /* The signedness is determined from output operand. */
9595 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9596 else
9597 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9599 if (!optab1)
9600 return false;
9602 vec_mode = TYPE_MODE (vectype);
9603 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9604 return false;
9606 *code1 = c1;
9608 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9609 /* For scalar masks we may have different boolean
9610 vector types having the same QImode. Thus we
9611 add additional check for elements number. */
9612 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9613 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9614 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9616 /* Check if it's a multi-step conversion that can be done using intermediate
9617 types. */
9618 prev_mode = vec_mode;
9619 prev_type = vectype;
9620 if (code == FIX_TRUNC_EXPR)
9621 uns = TYPE_UNSIGNED (vectype_out);
9622 else
9623 uns = TYPE_UNSIGNED (vectype);
9625 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9626 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9627 costly than signed. */
9628 if (code == FIX_TRUNC_EXPR && uns)
9630 enum insn_code icode2;
9632 intermediate_type
9633 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9634 interm_optab
9635 = optab_for_tree_code (c1, intermediate_type, optab_default);
9636 if (interm_optab != unknown_optab
9637 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9638 && insn_data[icode1].operand[0].mode
9639 == insn_data[icode2].operand[0].mode)
9641 uns = false;
9642 optab1 = interm_optab;
9643 icode1 = icode2;
9647 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9648 intermediate steps in promotion sequence. We try
9649 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9650 interm_types->create (MAX_INTERM_CVT_STEPS);
9651 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9653 intermediate_mode = insn_data[icode1].operand[0].mode;
9654 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9656 intermediate_type
9657 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9658 current_vector_size);
9659 if (intermediate_mode != TYPE_MODE (intermediate_type))
9660 return false;
9662 else
9663 intermediate_type
9664 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9665 interm_optab
9666 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9667 optab_default);
9668 if (!interm_optab
9669 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9670 || insn_data[icode1].operand[0].mode != intermediate_mode
9671 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9672 == CODE_FOR_nothing))
9673 break;
9675 interm_types->quick_push (intermediate_type);
9676 (*multi_step_cvt)++;
9678 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9679 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9680 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9681 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9683 prev_mode = intermediate_mode;
9684 prev_type = intermediate_type;
9685 optab1 = interm_optab;
9688 interm_types->release ();
9689 return false;