PR sanitizer/80403
[official-gcc.git] / gcc / tree-vect-stmts.c
blobbfb7185f5e0eb7333e1a6cb74e2f5eb95b5f4f8e
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
583 vect_mark_relevant (worklist, def_stmt, relevant, false);
584 return true;
588 /* Function vect_mark_stmts_to_be_vectorized.
590 Not all stmts in the loop need to be vectorized. For example:
592 for i...
593 for j...
594 1. T0 = i + j
595 2. T1 = a[T0]
597 3. j = j + 1
599 Stmt 1 and 3 do not need to be vectorized, because loop control and
600 addressing of vectorized data-refs are handled differently.
602 This pass detects such stmts. */
604 bool
605 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
607 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
608 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
609 unsigned int nbbs = loop->num_nodes;
610 gimple_stmt_iterator si;
611 gimple *stmt;
612 unsigned int i;
613 stmt_vec_info stmt_vinfo;
614 basic_block bb;
615 gimple *phi;
616 bool live_p;
617 enum vect_relevant relevant;
619 if (dump_enabled_p ())
620 dump_printf_loc (MSG_NOTE, vect_location,
621 "=== vect_mark_stmts_to_be_vectorized ===\n");
623 auto_vec<gimple *, 64> worklist;
625 /* 1. Init worklist. */
626 for (i = 0; i < nbbs; i++)
628 bb = bbs[i];
629 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
631 phi = gsi_stmt (si);
632 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
635 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
638 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
639 vect_mark_relevant (&worklist, phi, relevant, live_p);
641 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
643 stmt = gsi_stmt (si);
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
647 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
650 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
651 vect_mark_relevant (&worklist, stmt, relevant, live_p);
655 /* 2. Process_worklist */
656 while (worklist.length () > 0)
658 use_operand_p use_p;
659 ssa_op_iter iter;
661 stmt = worklist.pop ();
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
668 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
669 (DEF_STMT) as relevant/irrelevant according to the relevance property
670 of STMT. */
671 stmt_vinfo = vinfo_for_stmt (stmt);
672 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
674 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
675 propagated as is to the DEF_STMTs of its USEs.
677 One exception is when STMT has been identified as defining a reduction
678 variable; in this case we set the relevance to vect_used_by_reduction.
679 This is because we distinguish between two kinds of relevant stmts -
680 those that are used by a reduction computation, and those that are
681 (also) used by a regular computation. This allows us later on to
682 identify stmts that are used solely by a reduction, and therefore the
683 order of the results that they produce does not have to be kept. */
685 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
687 case vect_reduction_def:
688 gcc_assert (relevant != vect_unused_in_scope);
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_scope
691 && relevant != vect_used_by_reduction
692 && relevant != vect_used_only_live)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of reduction.\n");
697 return false;
699 break;
701 case vect_nested_cycle:
702 if (relevant != vect_unused_in_scope
703 && relevant != vect_used_in_outer_by_reduction
704 && relevant != vect_used_in_outer)
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "unsupported use of nested cycle.\n");
710 return false;
712 break;
714 case vect_double_reduction_def:
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_by_reduction
717 && relevant != vect_used_only_live)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721 "unsupported use of double reduction.\n");
723 return false;
725 break;
727 default:
728 break;
731 if (is_pattern_stmt_p (stmt_vinfo))
733 /* Pattern statements are not inserted into the code, so
734 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
735 have to scan the RHS or function arguments instead. */
736 if (is_gimple_assign (stmt))
738 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
739 tree op = gimple_assign_rhs1 (stmt);
741 i = 1;
742 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
744 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
745 relevant, &worklist, false)
746 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
747 relevant, &worklist, false))
748 return false;
749 i = 2;
751 for (; i < gimple_num_ops (stmt); i++)
753 op = gimple_op (stmt, i);
754 if (TREE_CODE (op) == SSA_NAME
755 && !process_use (stmt, op, loop_vinfo, relevant,
756 &worklist, false))
757 return false;
760 else if (is_gimple_call (stmt))
762 for (i = 0; i < gimple_call_num_args (stmt); i++)
764 tree arg = gimple_call_arg (stmt, i);
765 if (!process_use (stmt, arg, loop_vinfo, relevant,
766 &worklist, false))
767 return false;
771 else
772 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 tree op = USE_FROM_PTR (use_p);
775 if (!process_use (stmt, op, loop_vinfo, relevant,
776 &worklist, false))
777 return false;
780 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
782 gather_scatter_info gs_info;
783 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
784 gcc_unreachable ();
785 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
786 &worklist, true))
787 return false;
789 } /* while worklist */
791 return true;
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
801 void
802 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
803 enum vect_def_type *dt,
804 stmt_vector_for_cost *prologue_cost_vec,
805 stmt_vector_for_cost *body_cost_vec)
807 int i;
808 int inside_cost = 0, prologue_cost = 0;
810 /* The SLP costs were already calculated during SLP tree build. */
811 if (PURE_SLP_STMT (stmt_info))
812 return;
814 /* FORNOW: Assuming maximum 2 args per stmts. */
815 for (i = 0; i < 2; i++)
816 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
817 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
818 stmt_info, 0, vect_prologue);
820 /* Pass the inside-of-loop statements to the target-specific cost model. */
821 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
822 stmt_info, 0, vect_body);
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_NOTE, vect_location,
826 "vect_model_simple_cost: inside_cost = %d, "
827 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 /* Model cost for type demotion and promotion operations. PWR is normally
832 zero for single-step promotions and demotions. It will be one if
833 two-step promotion/demotion is required, and so on. Each additional
834 step doubles the number of instructions required. */
836 static void
837 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
838 enum vect_def_type *dt, int pwr)
840 int i, tmp;
841 int inside_cost = 0, prologue_cost = 0;
842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
843 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
844 void *target_cost_data;
846 /* The SLP costs were already calculated during SLP tree build. */
847 if (PURE_SLP_STMT (stmt_info))
848 return;
850 if (loop_vinfo)
851 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
852 else
853 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
855 for (i = 0; i < pwr + 1; i++)
857 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
858 (i + 1) : i;
859 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
860 vec_promote_demote, stmt_info, 0,
861 vect_body);
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i = 0; i < 2; i++)
866 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
867 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
868 stmt_info, 0, vect_prologue);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE, vect_location,
872 "vect_model_promotion_demotion_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost, prologue_cost);
876 /* Function vect_model_store_cost
878 Models cost for stores. In the case of grouped accesses, one access
879 has the overhead of the grouped access attributed to it. */
881 void
882 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
883 vect_memory_access_type memory_access_type,
884 enum vect_def_type dt, slp_tree slp_node,
885 stmt_vector_for_cost *prologue_cost_vec,
886 stmt_vector_for_cost *body_cost_vec)
888 unsigned int inside_cost = 0, prologue_cost = 0;
889 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
890 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
891 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
893 if (dt == vect_constant_def || dt == vect_external_def)
894 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
895 stmt_info, 0, vect_prologue);
897 /* Grouped stores update all elements in the group at once,
898 so we want the DR for the first statement. */
899 if (!slp_node && grouped_access_p)
901 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
902 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
905 /* True if we should include any once-per-group costs as well as
906 the cost of the statement itself. For SLP we only get called
907 once per group anyhow. */
908 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
910 /* We assume that the cost of a single store-lanes instruction is
911 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
912 access is instead being provided by a permute-and-store operation,
913 include the cost of the permutes. */
914 if (first_stmt_p
915 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
917 /* Uses a high and low interleave or shuffle operations for each
918 needed permute. */
919 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
920 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
921 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
922 stmt_info, 0, vect_body);
924 if (dump_enabled_p ())
925 dump_printf_loc (MSG_NOTE, vect_location,
926 "vect_model_store_cost: strided group_size = %d .\n",
927 group_size);
930 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
931 /* Costs of the stores. */
932 if (memory_access_type == VMAT_ELEMENTWISE)
933 /* N scalar stores plus extracting the elements. */
934 inside_cost += record_stmt_cost (body_cost_vec,
935 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
936 scalar_store, stmt_info, 0, vect_body);
937 else
938 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
940 if (memory_access_type == VMAT_ELEMENTWISE
941 || memory_access_type == VMAT_STRIDED_SLP)
942 inside_cost += record_stmt_cost (body_cost_vec,
943 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
944 vec_to_scalar, stmt_info, 0, vect_body);
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE, vect_location,
948 "vect_model_store_cost: inside_cost = %d, "
949 "prologue_cost = %d .\n", inside_cost, prologue_cost);
953 /* Calculate cost of DR's memory access. */
954 void
955 vect_get_store_cost (struct data_reference *dr, int ncopies,
956 unsigned int *inside_cost,
957 stmt_vector_for_cost *body_cost_vec)
959 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
960 gimple *stmt = DR_STMT (dr);
961 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
963 switch (alignment_support_scheme)
965 case dr_aligned:
967 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
968 vector_store, stmt_info, 0,
969 vect_body);
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE, vect_location,
973 "vect_model_store_cost: aligned.\n");
974 break;
977 case dr_unaligned_supported:
979 /* Here, we assign an additional cost for the unaligned store. */
980 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
981 unaligned_store, stmt_info,
982 DR_MISALIGNMENT (dr), vect_body);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_model_store_cost: unaligned supported by "
986 "hardware.\n");
987 break;
990 case dr_unaligned_unsupported:
992 *inside_cost = VECT_MAX_COST;
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
996 "vect_model_store_cost: unsupported access.\n");
997 break;
1000 default:
1001 gcc_unreachable ();
1006 /* Function vect_model_load_cost
1008 Models cost for loads. In the case of grouped accesses, one access has
1009 the overhead of the grouped access attributed to it. Since unaligned
1010 accesses are supported for loads, we also account for the costs of the
1011 access scheme chosen. */
1013 void
1014 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1015 vect_memory_access_type memory_access_type,
1016 slp_tree slp_node,
1017 stmt_vector_for_cost *prologue_cost_vec,
1018 stmt_vector_for_cost *body_cost_vec)
1020 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1021 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1022 unsigned int inside_cost = 0, prologue_cost = 0;
1023 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1025 /* Grouped loads read all elements in the group at once,
1026 so we want the DR for the first statement. */
1027 if (!slp_node && grouped_access_p)
1029 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1030 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1033 /* True if we should include any once-per-group costs as well as
1034 the cost of the statement itself. For SLP we only get called
1035 once per group anyhow. */
1036 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1038 /* We assume that the cost of a single load-lanes instruction is
1039 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1040 access is instead being provided by a load-and-permute operation,
1041 include the cost of the permutes. */
1042 if (first_stmt_p
1043 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1045 /* Uses an even and odd extract operations or shuffle operations
1046 for each needed permute. */
1047 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1048 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1049 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1050 stmt_info, 0, vect_body);
1052 if (dump_enabled_p ())
1053 dump_printf_loc (MSG_NOTE, vect_location,
1054 "vect_model_load_cost: strided group_size = %d .\n",
1055 group_size);
1058 /* The loads themselves. */
1059 if (memory_access_type == VMAT_ELEMENTWISE)
1061 /* N scalar loads plus gathering them into a vector. */
1062 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1063 inside_cost += record_stmt_cost (body_cost_vec,
1064 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1065 scalar_load, stmt_info, 0, vect_body);
1067 else
1068 vect_get_load_cost (dr, ncopies, first_stmt_p,
1069 &inside_cost, &prologue_cost,
1070 prologue_cost_vec, body_cost_vec, true);
1071 if (memory_access_type == VMAT_ELEMENTWISE
1072 || memory_access_type == VMAT_STRIDED_SLP)
1073 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1074 stmt_info, 0, vect_body);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE, vect_location,
1078 "vect_model_load_cost: inside_cost = %d, "
1079 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1083 /* Calculate cost of DR's memory access. */
1084 void
1085 vect_get_load_cost (struct data_reference *dr, int ncopies,
1086 bool add_realign_cost, unsigned int *inside_cost,
1087 unsigned int *prologue_cost,
1088 stmt_vector_for_cost *prologue_cost_vec,
1089 stmt_vector_for_cost *body_cost_vec,
1090 bool record_prologue_costs)
1092 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1093 gimple *stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096 switch (alignment_support_scheme)
1098 case dr_aligned:
1100 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1101 stmt_info, 0, vect_body);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: aligned.\n");
1107 break;
1109 case dr_unaligned_supported:
1111 /* Here, we assign an additional cost for the unaligned load. */
1112 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1113 unaligned_load, stmt_info,
1114 DR_MISALIGNMENT (dr), vect_body);
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE, vect_location,
1118 "vect_model_load_cost: unaligned supported by "
1119 "hardware.\n");
1121 break;
1123 case dr_explicit_realign:
1125 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1126 vector_load, stmt_info, 0, vect_body);
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1128 vec_perm, stmt_info, 0, vect_body);
1130 /* FIXME: If the misalignment remains fixed across the iterations of
1131 the containing loop, the following cost should be added to the
1132 prologue costs. */
1133 if (targetm.vectorize.builtin_mask_for_load)
1134 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1135 stmt_info, 0, vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: explicit realign\n");
1141 break;
1143 case dr_explicit_realign_optimized:
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned software "
1148 "pipelined.\n");
1150 /* Unaligned software pipeline has a load of an address, an initial
1151 load, and possibly a mask operation to "prime" the loop. However,
1152 if this is an access in a group of loads, which provide grouped
1153 access, then the above cost should only be considered for one
1154 access in the group. Inside the loop, there is a load op
1155 and a realignment op. */
1157 if (add_realign_cost && record_prologue_costs)
1159 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1160 vector_stmt, stmt_info,
1161 0, vect_prologue);
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1164 vector_stmt, stmt_info,
1165 0, vect_prologue);
1168 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1169 stmt_info, 0, vect_body);
1170 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1171 stmt_info, 0, vect_body);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: explicit realign optimized"
1176 "\n");
1178 break;
1181 case dr_unaligned_unsupported:
1183 *inside_cost = VECT_MAX_COST;
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1187 "vect_model_load_cost: unsupported access.\n");
1188 break;
1191 default:
1192 gcc_unreachable ();
1196 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1197 the loop preheader for the vectorized stmt STMT. */
1199 static void
1200 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1202 if (gsi)
1203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1204 else
1206 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1209 if (loop_vinfo)
1211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1212 basic_block new_bb;
1213 edge pe;
1215 if (nested_in_vect_loop_p (loop, stmt))
1216 loop = loop->inner;
1218 pe = loop_preheader_edge (loop);
1219 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1220 gcc_assert (!new_bb);
1222 else
1224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1225 basic_block bb;
1226 gimple_stmt_iterator gsi_bb_start;
1228 gcc_assert (bb_vinfo);
1229 bb = BB_VINFO_BB (bb_vinfo);
1230 gsi_bb_start = gsi_after_labels (bb);
1231 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE, vect_location,
1238 "created new init_stmt: ");
1239 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1243 /* Function vect_init_vector.
1245 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1246 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1247 vector type a vector with all elements equal to VAL is created first.
1248 Place the initialization at BSI if it is not NULL. Otherwise, place the
1249 initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1253 tree
1254 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1256 gimple *init_stmt;
1257 tree new_temp;
1259 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1260 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1262 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1263 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1265 /* Scalar boolean value should be transformed into
1266 all zeros or all ones value before building a vector. */
1267 if (VECTOR_BOOLEAN_TYPE_P (type))
1269 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1270 tree false_val = build_zero_cst (TREE_TYPE (type));
1272 if (CONSTANT_CLASS_P (val))
1273 val = integer_zerop (val) ? false_val : true_val;
1274 else
1276 new_temp = make_ssa_name (TREE_TYPE (type));
1277 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1278 val, true_val, false_val);
1279 vect_init_vector_1 (stmt, init_stmt, gsi);
1280 val = new_temp;
1283 else if (CONSTANT_CLASS_P (val))
1284 val = fold_convert (TREE_TYPE (type), val);
1285 else
1287 new_temp = make_ssa_name (TREE_TYPE (type));
1288 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1289 init_stmt = gimple_build_assign (new_temp,
1290 fold_build1 (VIEW_CONVERT_EXPR,
1291 TREE_TYPE (type),
1292 val));
1293 else
1294 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1295 vect_init_vector_1 (stmt, init_stmt, gsi);
1296 val = new_temp;
1299 val = build_vector_from_val (type, val);
1302 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1303 init_stmt = gimple_build_assign (new_temp, val);
1304 vect_init_vector_1 (stmt, init_stmt, gsi);
1305 return new_temp;
1308 /* Function vect_get_vec_def_for_operand_1.
1310 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1311 DT that will be used in the vectorized stmt. */
1313 tree
1314 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1316 tree vec_oprnd;
1317 gimple *vec_stmt;
1318 stmt_vec_info def_stmt_info = NULL;
1320 switch (dt)
1322 /* operand is a constant or a loop invariant. */
1323 case vect_constant_def:
1324 case vect_external_def:
1325 /* Code should use vect_get_vec_def_for_operand. */
1326 gcc_unreachable ();
1328 /* operand is defined inside the loop. */
1329 case vect_internal_def:
1331 /* Get the def from the vectorized stmt. */
1332 def_stmt_info = vinfo_for_stmt (def_stmt);
1334 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1335 /* Get vectorized pattern statement. */
1336 if (!vec_stmt
1337 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1338 && !STMT_VINFO_RELEVANT (def_stmt_info))
1339 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1340 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1341 gcc_assert (vec_stmt);
1342 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1343 vec_oprnd = PHI_RESULT (vec_stmt);
1344 else if (is_gimple_call (vec_stmt))
1345 vec_oprnd = gimple_call_lhs (vec_stmt);
1346 else
1347 vec_oprnd = gimple_assign_lhs (vec_stmt);
1348 return vec_oprnd;
1351 /* operand is defined by a loop header phi - reduction */
1352 case vect_reduction_def:
1353 case vect_double_reduction_def:
1354 case vect_nested_cycle:
1355 /* Code should use get_initial_def_for_reduction. */
1356 gcc_unreachable ();
1358 /* operand is defined by loop-header phi - induction. */
1359 case vect_induction_def:
1361 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1363 /* Get the def from the vectorized stmt. */
1364 def_stmt_info = vinfo_for_stmt (def_stmt);
1365 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1366 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1367 vec_oprnd = PHI_RESULT (vec_stmt);
1368 else
1369 vec_oprnd = gimple_get_lhs (vec_stmt);
1370 return vec_oprnd;
1373 default:
1374 gcc_unreachable ();
1379 /* Function vect_get_vec_def_for_operand.
1381 OP is an operand in STMT. This function returns a (vector) def that will be
1382 used in the vectorized stmt for STMT.
1384 In the case that OP is an SSA_NAME which is defined in the loop, then
1385 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1387 In case OP is an invariant or constant, a new stmt that creates a vector def
1388 needs to be introduced. VECTYPE may be used to specify a required type for
1389 vector invariant. */
1391 tree
1392 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1394 gimple *def_stmt;
1395 enum vect_def_type dt;
1396 bool is_simple_use;
1397 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1398 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1400 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE, vect_location,
1403 "vect_get_vec_def_for_operand: ");
1404 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1405 dump_printf (MSG_NOTE, "\n");
1408 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1409 gcc_assert (is_simple_use);
1410 if (def_stmt && dump_enabled_p ())
1412 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1416 if (dt == vect_constant_def || dt == vect_external_def)
1418 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1419 tree vector_type;
1421 if (vectype)
1422 vector_type = vectype;
1423 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1424 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1425 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1426 else
1427 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1429 gcc_assert (vector_type);
1430 return vect_init_vector (stmt, op, vector_type, NULL);
1432 else
1433 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1437 /* Function vect_get_vec_def_for_stmt_copy
1439 Return a vector-def for an operand. This function is used when the
1440 vectorized stmt to be created (by the caller to this function) is a "copy"
1441 created in case the vectorized result cannot fit in one vector, and several
1442 copies of the vector-stmt are required. In this case the vector-def is
1443 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1444 of the stmt that defines VEC_OPRND.
1445 DT is the type of the vector def VEC_OPRND.
1447 Context:
1448 In case the vectorization factor (VF) is bigger than the number
1449 of elements that can fit in a vectype (nunits), we have to generate
1450 more than one vector stmt to vectorize the scalar stmt. This situation
1451 arises when there are multiple data-types operated upon in the loop; the
1452 smallest data-type determines the VF, and as a result, when vectorizing
1453 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1454 vector stmt (each computing a vector of 'nunits' results, and together
1455 computing 'VF' results in each iteration). This function is called when
1456 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1457 which VF=16 and nunits=4, so the number of copies required is 4):
1459 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1461 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1462 VS1.1: vx.1 = memref1 VS1.2
1463 VS1.2: vx.2 = memref2 VS1.3
1464 VS1.3: vx.3 = memref3
1466 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1467 VSnew.1: vz1 = vx.1 + ... VSnew.2
1468 VSnew.2: vz2 = vx.2 + ... VSnew.3
1469 VSnew.3: vz3 = vx.3 + ...
1471 The vectorization of S1 is explained in vectorizable_load.
1472 The vectorization of S2:
1473 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1474 the function 'vect_get_vec_def_for_operand' is called to
1475 get the relevant vector-def for each operand of S2. For operand x it
1476 returns the vector-def 'vx.0'.
1478 To create the remaining copies of the vector-stmt (VSnew.j), this
1479 function is called to get the relevant vector-def for each operand. It is
1480 obtained from the respective VS1.j stmt, which is recorded in the
1481 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1483 For example, to obtain the vector-def 'vx.1' in order to create the
1484 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1485 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1486 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1487 and return its def ('vx.1').
1488 Overall, to create the above sequence this function will be called 3 times:
1489 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1490 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1491 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1493 tree
1494 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1496 gimple *vec_stmt_for_operand;
1497 stmt_vec_info def_stmt_info;
1499 /* Do nothing; can reuse same def. */
1500 if (dt == vect_external_def || dt == vect_constant_def )
1501 return vec_oprnd;
1503 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1504 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1505 gcc_assert (def_stmt_info);
1506 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1507 gcc_assert (vec_stmt_for_operand);
1508 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1509 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1510 else
1511 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1512 return vec_oprnd;
1516 /* Get vectorized definitions for the operands to create a copy of an original
1517 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1519 static void
1520 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1521 vec<tree> *vec_oprnds0,
1522 vec<tree> *vec_oprnds1)
1524 tree vec_oprnd = vec_oprnds0->pop ();
1526 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1527 vec_oprnds0->quick_push (vec_oprnd);
1529 if (vec_oprnds1 && vec_oprnds1->length ())
1531 vec_oprnd = vec_oprnds1->pop ();
1532 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1533 vec_oprnds1->quick_push (vec_oprnd);
1538 /* Get vectorized definitions for OP0 and OP1.
1539 REDUC_INDEX is the index of reduction operand in case of reduction,
1540 and -1 otherwise. */
1542 void
1543 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1544 vec<tree> *vec_oprnds0,
1545 vec<tree> *vec_oprnds1,
1546 slp_tree slp_node, int reduc_index)
1548 if (slp_node)
1550 int nops = (op1 == NULL_TREE) ? 1 : 2;
1551 auto_vec<tree> ops (nops);
1552 auto_vec<vec<tree> > vec_defs (nops);
1554 ops.quick_push (op0);
1555 if (op1)
1556 ops.quick_push (op1);
1558 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1560 *vec_oprnds0 = vec_defs[0];
1561 if (op1)
1562 *vec_oprnds1 = vec_defs[1];
1564 else
1566 tree vec_oprnd;
1568 vec_oprnds0->create (1);
1569 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1570 vec_oprnds0->quick_push (vec_oprnd);
1572 if (op1)
1574 vec_oprnds1->create (1);
1575 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1576 vec_oprnds1->quick_push (vec_oprnd);
1582 /* Function vect_finish_stmt_generation.
1584 Insert a new stmt. */
1586 void
1587 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1588 gimple_stmt_iterator *gsi)
1590 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1591 vec_info *vinfo = stmt_info->vinfo;
1593 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1595 if (!gsi_end_p (*gsi)
1596 && gimple_has_mem_ops (vec_stmt))
1598 gimple *at_stmt = gsi_stmt (*gsi);
1599 tree vuse = gimple_vuse (at_stmt);
1600 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1602 tree vdef = gimple_vdef (at_stmt);
1603 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1604 /* If we have an SSA vuse and insert a store, update virtual
1605 SSA form to avoid triggering the renamer. Do so only
1606 if we can easily see all uses - which is what almost always
1607 happens with the way vectorized stmts are inserted. */
1608 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1609 && ((is_gimple_assign (vec_stmt)
1610 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1611 || (is_gimple_call (vec_stmt)
1612 && !(gimple_call_flags (vec_stmt)
1613 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1615 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1616 gimple_set_vdef (vec_stmt, new_vdef);
1617 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1621 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1623 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1625 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1631 gimple_set_location (vec_stmt, gimple_location (stmt));
1633 /* While EH edges will generally prevent vectorization, stmt might
1634 e.g. be in a must-not-throw region. Ensure newly created stmts
1635 that could throw are part of the same region. */
1636 int lp_nr = lookup_stmt_eh_lp (stmt);
1637 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1638 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1641 /* We want to vectorize a call to combined function CFN with function
1642 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1643 as the types of all inputs. Check whether this is possible using
1644 an internal function, returning its code if so or IFN_LAST if not. */
1646 static internal_fn
1647 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1648 tree vectype_out, tree vectype_in)
1650 internal_fn ifn;
1651 if (internal_fn_p (cfn))
1652 ifn = as_internal_fn (cfn);
1653 else
1654 ifn = associated_internal_fn (fndecl);
1655 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1657 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1658 if (info.vectorizable)
1660 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1661 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1662 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1663 OPTIMIZE_FOR_SPEED))
1664 return ifn;
1667 return IFN_LAST;
1671 static tree permute_vec_elements (tree, tree, tree, gimple *,
1672 gimple_stmt_iterator *);
1674 /* STMT is a non-strided load or store, meaning that it accesses
1675 elements with a known constant step. Return -1 if that step
1676 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1678 static int
1679 compare_step_with_zero (gimple *stmt)
1681 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1682 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1683 tree step;
1684 if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
1685 step = STMT_VINFO_DR_STEP (stmt_info);
1686 else
1687 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1688 return tree_int_cst_compare (step, size_zero_node);
1691 /* If the target supports a permute mask that reverses the elements in
1692 a vector of type VECTYPE, return that mask, otherwise return null. */
1694 static tree
1695 perm_mask_for_reverse (tree vectype)
1697 int i, nunits;
1698 unsigned char *sel;
1700 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1701 sel = XALLOCAVEC (unsigned char, nunits);
1703 for (i = 0; i < nunits; ++i)
1704 sel[i] = nunits - 1 - i;
1706 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1707 return NULL_TREE;
1708 return vect_gen_perm_mask_checked (vectype, sel);
1711 /* A subroutine of get_load_store_type, with a subset of the same
1712 arguments. Handle the case where STMT is part of a grouped load
1713 or store.
1715 For stores, the statements in the group are all consecutive
1716 and there is no gap at the end. For loads, the statements in the
1717 group might not be consecutive; there can be gaps between statements
1718 as well as at the end. */
1720 static bool
1721 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1722 vec_load_store_type vls_type,
1723 vect_memory_access_type *memory_access_type)
1725 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1726 vec_info *vinfo = stmt_info->vinfo;
1727 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1728 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1729 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1730 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1731 bool single_element_p = (stmt == first_stmt
1732 && !GROUP_NEXT_ELEMENT (stmt_info));
1733 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1734 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1736 /* True if the vectorized statements would access beyond the last
1737 statement in the group. */
1738 bool overrun_p = false;
1740 /* True if we can cope with such overrun by peeling for gaps, so that
1741 there is at least one final scalar iteration after the vector loop. */
1742 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1744 /* There can only be a gap at the end of the group if the stride is
1745 known at compile time. */
1746 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1748 /* Stores can't yet have gaps. */
1749 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1751 if (slp)
1753 if (STMT_VINFO_STRIDED_P (stmt_info))
1755 /* Try to use consecutive accesses of GROUP_SIZE elements,
1756 separated by the stride, until we have a complete vector.
1757 Fall back to scalar accesses if that isn't possible. */
1758 if (nunits % group_size == 0)
1759 *memory_access_type = VMAT_STRIDED_SLP;
1760 else
1761 *memory_access_type = VMAT_ELEMENTWISE;
1763 else
1765 overrun_p = loop_vinfo && gap != 0;
1766 if (overrun_p && vls_type != VLS_LOAD)
1768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1769 "Grouped store with gaps requires"
1770 " non-consecutive accesses\n");
1771 return false;
1773 /* If the access is aligned an overrun is fine. */
1774 if (overrun_p
1775 && aligned_access_p
1776 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1777 overrun_p = false;
1778 if (overrun_p && !can_overrun_p)
1780 if (dump_enabled_p ())
1781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1782 "Peeling for outer loop is not supported\n");
1783 return false;
1785 *memory_access_type = VMAT_CONTIGUOUS;
1788 else
1790 /* We can always handle this case using elementwise accesses,
1791 but see if something more efficient is available. */
1792 *memory_access_type = VMAT_ELEMENTWISE;
1794 /* If there is a gap at the end of the group then these optimizations
1795 would access excess elements in the last iteration. */
1796 bool would_overrun_p = (gap != 0);
1797 /* If the access is aligned an overrun is fine, but only if the
1798 overrun is not inside an unused vector (if the gap is as large
1799 or larger than a vector). */
1800 if (would_overrun_p
1801 && gap < nunits
1802 && aligned_access_p
1803 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1804 would_overrun_p = false;
1805 if (!STMT_VINFO_STRIDED_P (stmt_info)
1806 && (can_overrun_p || !would_overrun_p)
1807 && compare_step_with_zero (stmt) > 0)
1809 /* First try using LOAD/STORE_LANES. */
1810 if (vls_type == VLS_LOAD
1811 ? vect_load_lanes_supported (vectype, group_size)
1812 : vect_store_lanes_supported (vectype, group_size))
1814 *memory_access_type = VMAT_LOAD_STORE_LANES;
1815 overrun_p = would_overrun_p;
1818 /* If that fails, try using permuting loads. */
1819 if (*memory_access_type == VMAT_ELEMENTWISE
1820 && (vls_type == VLS_LOAD
1821 ? vect_grouped_load_supported (vectype, single_element_p,
1822 group_size)
1823 : vect_grouped_store_supported (vectype, group_size)))
1825 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1826 overrun_p = would_overrun_p;
1831 if (vls_type != VLS_LOAD && first_stmt == stmt)
1833 /* STMT is the leader of the group. Check the operands of all the
1834 stmts of the group. */
1835 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1836 while (next_stmt)
1838 gcc_assert (gimple_assign_single_p (next_stmt));
1839 tree op = gimple_assign_rhs1 (next_stmt);
1840 gimple *def_stmt;
1841 enum vect_def_type dt;
1842 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846 "use not simple.\n");
1847 return false;
1849 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1853 if (overrun_p)
1855 gcc_assert (can_overrun_p);
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1858 "Data access with gaps requires scalar "
1859 "epilogue loop\n");
1860 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1863 return true;
1866 /* A subroutine of get_load_store_type, with a subset of the same
1867 arguments. Handle the case where STMT is a load or store that
1868 accesses consecutive elements with a negative step. */
1870 static vect_memory_access_type
1871 get_negative_load_store_type (gimple *stmt, tree vectype,
1872 vec_load_store_type vls_type,
1873 unsigned int ncopies)
1875 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1876 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1877 dr_alignment_support alignment_support_scheme;
1879 if (ncopies > 1)
1881 if (dump_enabled_p ())
1882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1883 "multiple types with negative step.\n");
1884 return VMAT_ELEMENTWISE;
1887 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1888 if (alignment_support_scheme != dr_aligned
1889 && alignment_support_scheme != dr_unaligned_supported)
1891 if (dump_enabled_p ())
1892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1893 "negative step but alignment required.\n");
1894 return VMAT_ELEMENTWISE;
1897 if (vls_type == VLS_STORE_INVARIANT)
1899 if (dump_enabled_p ())
1900 dump_printf_loc (MSG_NOTE, vect_location,
1901 "negative step with invariant source;"
1902 " no permute needed.\n");
1903 return VMAT_CONTIGUOUS_DOWN;
1906 if (!perm_mask_for_reverse (vectype))
1908 if (dump_enabled_p ())
1909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1910 "negative step and reversing not supported.\n");
1911 return VMAT_ELEMENTWISE;
1914 return VMAT_CONTIGUOUS_REVERSE;
1917 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1918 if there is a memory access type that the vectorized form can use,
1919 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1920 or scatters, fill in GS_INFO accordingly.
1922 SLP says whether we're performing SLP rather than loop vectorization.
1923 VECTYPE is the vector type that the vectorized statements will use.
1924 NCOPIES is the number of vector statements that will be needed. */
1926 static bool
1927 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1928 vec_load_store_type vls_type, unsigned int ncopies,
1929 vect_memory_access_type *memory_access_type,
1930 gather_scatter_info *gs_info)
1932 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1933 vec_info *vinfo = stmt_info->vinfo;
1934 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1935 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1937 *memory_access_type = VMAT_GATHER_SCATTER;
1938 gimple *def_stmt;
1939 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1940 gcc_unreachable ();
1941 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1942 &gs_info->offset_dt,
1943 &gs_info->offset_vectype))
1945 if (dump_enabled_p ())
1946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1947 "%s index use not simple.\n",
1948 vls_type == VLS_LOAD ? "gather" : "scatter");
1949 return false;
1952 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1954 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1955 memory_access_type))
1956 return false;
1958 else if (STMT_VINFO_STRIDED_P (stmt_info))
1960 gcc_assert (!slp);
1961 *memory_access_type = VMAT_ELEMENTWISE;
1963 else
1965 int cmp = compare_step_with_zero (stmt);
1966 if (cmp < 0)
1967 *memory_access_type = get_negative_load_store_type
1968 (stmt, vectype, vls_type, ncopies);
1969 else if (cmp == 0)
1971 gcc_assert (vls_type == VLS_LOAD);
1972 *memory_access_type = VMAT_INVARIANT;
1974 else
1975 *memory_access_type = VMAT_CONTIGUOUS;
1978 /* FIXME: At the moment the cost model seems to underestimate the
1979 cost of using elementwise accesses. This check preserves the
1980 traditional behavior until that can be fixed. */
1981 if (*memory_access_type == VMAT_ELEMENTWISE
1982 && !STMT_VINFO_STRIDED_P (stmt_info))
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1986 "not falling back to elementwise accesses\n");
1987 return false;
1989 return true;
1992 /* Function vectorizable_mask_load_store.
1994 Check if STMT performs a conditional load or store that can be vectorized.
1995 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1996 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1997 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1999 static bool
2000 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2001 gimple **vec_stmt, slp_tree slp_node)
2003 tree vec_dest = NULL;
2004 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2005 stmt_vec_info prev_stmt_info;
2006 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2007 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2008 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2009 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2010 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2011 tree rhs_vectype = NULL_TREE;
2012 tree mask_vectype;
2013 tree elem_type;
2014 gimple *new_stmt;
2015 tree dummy;
2016 tree dataref_ptr = NULL_TREE;
2017 gimple *ptr_incr;
2018 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2019 int ncopies;
2020 int i, j;
2021 bool inv_p;
2022 gather_scatter_info gs_info;
2023 vec_load_store_type vls_type;
2024 tree mask;
2025 gimple *def_stmt;
2026 enum vect_def_type dt;
2028 if (slp_node != NULL)
2029 return false;
2031 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2032 gcc_assert (ncopies >= 1);
2034 mask = gimple_call_arg (stmt, 2);
2036 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2037 return false;
2039 /* FORNOW. This restriction should be relaxed. */
2040 if (nested_in_vect_loop && ncopies > 1)
2042 if (dump_enabled_p ())
2043 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2044 "multiple types in nested loop.");
2045 return false;
2048 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2049 return false;
2051 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2052 && ! vec_stmt)
2053 return false;
2055 if (!STMT_VINFO_DATA_REF (stmt_info))
2056 return false;
2058 elem_type = TREE_TYPE (vectype);
2060 if (TREE_CODE (mask) != SSA_NAME)
2061 return false;
2063 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2064 return false;
2066 if (!mask_vectype)
2067 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2069 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2070 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2071 return false;
2073 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2075 tree rhs = gimple_call_arg (stmt, 3);
2076 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2077 return false;
2078 if (dt == vect_constant_def || dt == vect_external_def)
2079 vls_type = VLS_STORE_INVARIANT;
2080 else
2081 vls_type = VLS_STORE;
2083 else
2084 vls_type = VLS_LOAD;
2086 vect_memory_access_type memory_access_type;
2087 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2088 &memory_access_type, &gs_info))
2089 return false;
2091 if (memory_access_type == VMAT_GATHER_SCATTER)
2093 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2094 tree masktype
2095 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2096 if (TREE_CODE (masktype) == INTEGER_TYPE)
2098 if (dump_enabled_p ())
2099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2100 "masked gather with integer mask not supported.");
2101 return false;
2104 else if (memory_access_type != VMAT_CONTIGUOUS)
2106 if (dump_enabled_p ())
2107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2108 "unsupported access type for masked %s.\n",
2109 vls_type == VLS_LOAD ? "load" : "store");
2110 return false;
2112 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2113 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2114 TYPE_MODE (mask_vectype),
2115 vls_type == VLS_LOAD)
2116 || (rhs_vectype
2117 && !useless_type_conversion_p (vectype, rhs_vectype)))
2118 return false;
2120 if (!vec_stmt) /* transformation not required. */
2122 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2123 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2124 if (vls_type == VLS_LOAD)
2125 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2126 NULL, NULL, NULL);
2127 else
2128 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2129 dt, NULL, NULL, NULL);
2130 return true;
2132 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2134 /** Transform. **/
2136 if (memory_access_type == VMAT_GATHER_SCATTER)
2138 tree vec_oprnd0 = NULL_TREE, op;
2139 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2140 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2141 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2142 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2143 tree mask_perm_mask = NULL_TREE;
2144 edge pe = loop_preheader_edge (loop);
2145 gimple_seq seq;
2146 basic_block new_bb;
2147 enum { NARROW, NONE, WIDEN } modifier;
2148 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2150 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2151 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2152 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2153 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2154 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2155 scaletype = TREE_VALUE (arglist);
2156 gcc_checking_assert (types_compatible_p (srctype, rettype)
2157 && types_compatible_p (srctype, masktype));
2159 if (nunits == gather_off_nunits)
2160 modifier = NONE;
2161 else if (nunits == gather_off_nunits / 2)
2163 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2164 modifier = WIDEN;
2166 for (i = 0; i < gather_off_nunits; ++i)
2167 sel[i] = i | nunits;
2169 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2171 else if (nunits == gather_off_nunits * 2)
2173 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2174 modifier = NARROW;
2176 for (i = 0; i < nunits; ++i)
2177 sel[i] = i < gather_off_nunits
2178 ? i : i + nunits - gather_off_nunits;
2180 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2181 ncopies *= 2;
2182 for (i = 0; i < nunits; ++i)
2183 sel[i] = i | gather_off_nunits;
2184 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2186 else
2187 gcc_unreachable ();
2189 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2191 ptr = fold_convert (ptrtype, gs_info.base);
2192 if (!is_gimple_min_invariant (ptr))
2194 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2195 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2196 gcc_assert (!new_bb);
2199 scale = build_int_cst (scaletype, gs_info.scale);
2201 prev_stmt_info = NULL;
2202 for (j = 0; j < ncopies; ++j)
2204 if (modifier == WIDEN && (j & 1))
2205 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2206 perm_mask, stmt, gsi);
2207 else if (j == 0)
2208 op = vec_oprnd0
2209 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2210 else
2211 op = vec_oprnd0
2212 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2214 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2216 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2217 == TYPE_VECTOR_SUBPARTS (idxtype));
2218 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2219 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2220 new_stmt
2221 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2223 op = var;
2226 if (mask_perm_mask && (j & 1))
2227 mask_op = permute_vec_elements (mask_op, mask_op,
2228 mask_perm_mask, stmt, gsi);
2229 else
2231 if (j == 0)
2232 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2233 else
2235 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2236 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2239 mask_op = vec_mask;
2240 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2242 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2243 == TYPE_VECTOR_SUBPARTS (masktype));
2244 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2245 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2246 new_stmt
2247 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2248 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2249 mask_op = var;
2253 new_stmt
2254 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2255 scale);
2257 if (!useless_type_conversion_p (vectype, rettype))
2259 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2260 == TYPE_VECTOR_SUBPARTS (rettype));
2261 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2262 gimple_call_set_lhs (new_stmt, op);
2263 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2264 var = make_ssa_name (vec_dest);
2265 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2266 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2268 else
2270 var = make_ssa_name (vec_dest, new_stmt);
2271 gimple_call_set_lhs (new_stmt, var);
2274 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2276 if (modifier == NARROW)
2278 if ((j & 1) == 0)
2280 prev_res = var;
2281 continue;
2283 var = permute_vec_elements (prev_res, var,
2284 perm_mask, stmt, gsi);
2285 new_stmt = SSA_NAME_DEF_STMT (var);
2288 if (prev_stmt_info == NULL)
2289 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2290 else
2291 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2292 prev_stmt_info = vinfo_for_stmt (new_stmt);
2295 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2296 from the IL. */
2297 if (STMT_VINFO_RELATED_STMT (stmt_info))
2299 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2300 stmt_info = vinfo_for_stmt (stmt);
2302 tree lhs = gimple_call_lhs (stmt);
2303 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2304 set_vinfo_for_stmt (new_stmt, stmt_info);
2305 set_vinfo_for_stmt (stmt, NULL);
2306 STMT_VINFO_STMT (stmt_info) = new_stmt;
2307 gsi_replace (gsi, new_stmt, true);
2308 return true;
2310 else if (vls_type != VLS_LOAD)
2312 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2313 prev_stmt_info = NULL;
2314 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2315 for (i = 0; i < ncopies; i++)
2317 unsigned align, misalign;
2319 if (i == 0)
2321 tree rhs = gimple_call_arg (stmt, 3);
2322 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2323 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2324 /* We should have catched mismatched types earlier. */
2325 gcc_assert (useless_type_conversion_p (vectype,
2326 TREE_TYPE (vec_rhs)));
2327 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2328 NULL_TREE, &dummy, gsi,
2329 &ptr_incr, false, &inv_p);
2330 gcc_assert (!inv_p);
2332 else
2334 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2335 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2336 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2337 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2338 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2339 TYPE_SIZE_UNIT (vectype));
2342 align = TYPE_ALIGN_UNIT (vectype);
2343 if (aligned_access_p (dr))
2344 misalign = 0;
2345 else if (DR_MISALIGNMENT (dr) == -1)
2347 align = TYPE_ALIGN_UNIT (elem_type);
2348 misalign = 0;
2350 else
2351 misalign = DR_MISALIGNMENT (dr);
2352 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2353 misalign);
2354 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2355 misalign ? least_bit_hwi (misalign) : align);
2356 new_stmt
2357 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2358 ptr, vec_mask, vec_rhs);
2359 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2360 if (i == 0)
2361 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2362 else
2363 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2364 prev_stmt_info = vinfo_for_stmt (new_stmt);
2367 else
2369 tree vec_mask = NULL_TREE;
2370 prev_stmt_info = NULL;
2371 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2372 for (i = 0; i < ncopies; i++)
2374 unsigned align, misalign;
2376 if (i == 0)
2378 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2379 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2380 NULL_TREE, &dummy, gsi,
2381 &ptr_incr, false, &inv_p);
2382 gcc_assert (!inv_p);
2384 else
2386 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2387 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2388 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2389 TYPE_SIZE_UNIT (vectype));
2392 align = TYPE_ALIGN_UNIT (vectype);
2393 if (aligned_access_p (dr))
2394 misalign = 0;
2395 else if (DR_MISALIGNMENT (dr) == -1)
2397 align = TYPE_ALIGN_UNIT (elem_type);
2398 misalign = 0;
2400 else
2401 misalign = DR_MISALIGNMENT (dr);
2402 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2403 misalign);
2404 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2405 misalign ? least_bit_hwi (misalign) : align);
2406 new_stmt
2407 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2408 ptr, vec_mask);
2409 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2410 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2411 if (i == 0)
2412 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2413 else
2414 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2415 prev_stmt_info = vinfo_for_stmt (new_stmt);
2419 if (vls_type == VLS_LOAD)
2421 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2422 from the IL. */
2423 if (STMT_VINFO_RELATED_STMT (stmt_info))
2425 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2426 stmt_info = vinfo_for_stmt (stmt);
2428 tree lhs = gimple_call_lhs (stmt);
2429 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2430 set_vinfo_for_stmt (new_stmt, stmt_info);
2431 set_vinfo_for_stmt (stmt, NULL);
2432 STMT_VINFO_STMT (stmt_info) = new_stmt;
2433 gsi_replace (gsi, new_stmt, true);
2436 return true;
2439 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2441 static bool
2442 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2443 gimple **vec_stmt, slp_tree slp_node,
2444 tree vectype_in, enum vect_def_type *dt)
2446 tree op, vectype;
2447 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2448 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2449 unsigned ncopies, nunits;
2451 op = gimple_call_arg (stmt, 0);
2452 vectype = STMT_VINFO_VECTYPE (stmt_info);
2453 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2455 /* Multiple types in SLP are handled by creating the appropriate number of
2456 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2457 case of SLP. */
2458 if (slp_node)
2459 ncopies = 1;
2460 else
2461 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2463 gcc_assert (ncopies >= 1);
2465 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2466 if (! char_vectype)
2467 return false;
2469 unsigned char *elts
2470 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype));
2471 unsigned char *elt = elts;
2472 unsigned word_bytes = TYPE_VECTOR_SUBPARTS (char_vectype) / nunits;
2473 for (unsigned i = 0; i < nunits; ++i)
2474 for (unsigned j = 0; j < word_bytes; ++j)
2475 *elt++ = (i + 1) * word_bytes - j - 1;
2477 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
2478 return false;
2480 if (! vec_stmt)
2482 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2483 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2485 "\n");
2486 if (! PURE_SLP_STMT (stmt_info))
2488 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2489 1, vector_stmt, stmt_info, 0, vect_prologue);
2490 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2491 ncopies, vec_perm, stmt_info, 0, vect_body);
2493 return true;
2496 tree *telts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (char_vectype));
2497 for (unsigned i = 0; i < TYPE_VECTOR_SUBPARTS (char_vectype); ++i)
2498 telts[i] = build_int_cst (char_type_node, elts[i]);
2499 tree bswap_vconst = build_vector (char_vectype, telts);
2501 /* Transform. */
2502 vec<tree> vec_oprnds = vNULL;
2503 gimple *new_stmt = NULL;
2504 stmt_vec_info prev_stmt_info = NULL;
2505 for (unsigned j = 0; j < ncopies; j++)
2507 /* Handle uses. */
2508 if (j == 0)
2509 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2510 else
2511 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2513 /* Arguments are ready. create the new vector stmt. */
2514 unsigned i;
2515 tree vop;
2516 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2518 tree tem = make_ssa_name (char_vectype);
2519 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2520 char_vectype, vop));
2521 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2522 tree tem2 = make_ssa_name (char_vectype);
2523 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2524 tem, tem, bswap_vconst);
2525 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2526 tem = make_ssa_name (vectype);
2527 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2528 vectype, tem2));
2529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2530 if (slp_node)
2531 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2534 if (slp_node)
2535 continue;
2537 if (j == 0)
2538 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2539 else
2540 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2542 prev_stmt_info = vinfo_for_stmt (new_stmt);
2545 vec_oprnds.release ();
2546 return true;
2549 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2550 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2551 in a single step. On success, store the binary pack code in
2552 *CONVERT_CODE. */
2554 static bool
2555 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2556 tree_code *convert_code)
2558 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2559 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2560 return false;
2562 tree_code code;
2563 int multi_step_cvt = 0;
2564 auto_vec <tree, 8> interm_types;
2565 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2566 &code, &multi_step_cvt,
2567 &interm_types)
2568 || multi_step_cvt)
2569 return false;
2571 *convert_code = code;
2572 return true;
2575 /* Function vectorizable_call.
2577 Check if GS performs a function call that can be vectorized.
2578 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2579 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2580 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2582 static bool
2583 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2584 slp_tree slp_node)
2586 gcall *stmt;
2587 tree vec_dest;
2588 tree scalar_dest;
2589 tree op, type;
2590 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2591 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2592 tree vectype_out, vectype_in;
2593 int nunits_in;
2594 int nunits_out;
2595 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2596 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2597 vec_info *vinfo = stmt_info->vinfo;
2598 tree fndecl, new_temp, rhs_type;
2599 gimple *def_stmt;
2600 enum vect_def_type dt[3]
2601 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2602 gimple *new_stmt = NULL;
2603 int ncopies, j;
2604 vec<tree> vargs = vNULL;
2605 enum { NARROW, NONE, WIDEN } modifier;
2606 size_t i, nargs;
2607 tree lhs;
2609 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2610 return false;
2612 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2613 && ! vec_stmt)
2614 return false;
2616 /* Is GS a vectorizable call? */
2617 stmt = dyn_cast <gcall *> (gs);
2618 if (!stmt)
2619 return false;
2621 if (gimple_call_internal_p (stmt)
2622 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2623 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2624 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2625 slp_node);
2627 if (gimple_call_lhs (stmt) == NULL_TREE
2628 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2629 return false;
2631 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2633 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2635 /* Process function arguments. */
2636 rhs_type = NULL_TREE;
2637 vectype_in = NULL_TREE;
2638 nargs = gimple_call_num_args (stmt);
2640 /* Bail out if the function has more than three arguments, we do not have
2641 interesting builtin functions to vectorize with more than two arguments
2642 except for fma. No arguments is also not good. */
2643 if (nargs == 0 || nargs > 3)
2644 return false;
2646 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2647 if (gimple_call_internal_p (stmt)
2648 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2650 nargs = 0;
2651 rhs_type = unsigned_type_node;
2654 for (i = 0; i < nargs; i++)
2656 tree opvectype;
2658 op = gimple_call_arg (stmt, i);
2660 /* We can only handle calls with arguments of the same type. */
2661 if (rhs_type
2662 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2664 if (dump_enabled_p ())
2665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2666 "argument types differ.\n");
2667 return false;
2669 if (!rhs_type)
2670 rhs_type = TREE_TYPE (op);
2672 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2674 if (dump_enabled_p ())
2675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2676 "use not simple.\n");
2677 return false;
2680 if (!vectype_in)
2681 vectype_in = opvectype;
2682 else if (opvectype
2683 && opvectype != vectype_in)
2685 if (dump_enabled_p ())
2686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2687 "argument vector types differ.\n");
2688 return false;
2691 /* If all arguments are external or constant defs use a vector type with
2692 the same size as the output vector type. */
2693 if (!vectype_in)
2694 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2695 if (vec_stmt)
2696 gcc_assert (vectype_in);
2697 if (!vectype_in)
2699 if (dump_enabled_p ())
2701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2702 "no vectype for scalar type ");
2703 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2704 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2707 return false;
2710 /* FORNOW */
2711 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2712 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2713 if (nunits_in == nunits_out / 2)
2714 modifier = NARROW;
2715 else if (nunits_out == nunits_in)
2716 modifier = NONE;
2717 else if (nunits_out == nunits_in / 2)
2718 modifier = WIDEN;
2719 else
2720 return false;
2722 /* We only handle functions that do not read or clobber memory. */
2723 if (gimple_vuse (stmt))
2725 if (dump_enabled_p ())
2726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2727 "function reads from or writes to memory.\n");
2728 return false;
2731 /* For now, we only vectorize functions if a target specific builtin
2732 is available. TODO -- in some cases, it might be profitable to
2733 insert the calls for pieces of the vector, in order to be able
2734 to vectorize other operations in the loop. */
2735 fndecl = NULL_TREE;
2736 internal_fn ifn = IFN_LAST;
2737 combined_fn cfn = gimple_call_combined_fn (stmt);
2738 tree callee = gimple_call_fndecl (stmt);
2740 /* First try using an internal function. */
2741 tree_code convert_code = ERROR_MARK;
2742 if (cfn != CFN_LAST
2743 && (modifier == NONE
2744 || (modifier == NARROW
2745 && simple_integer_narrowing (vectype_out, vectype_in,
2746 &convert_code))))
2747 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2748 vectype_in);
2750 /* If that fails, try asking for a target-specific built-in function. */
2751 if (ifn == IFN_LAST)
2753 if (cfn != CFN_LAST)
2754 fndecl = targetm.vectorize.builtin_vectorized_function
2755 (cfn, vectype_out, vectype_in);
2756 else
2757 fndecl = targetm.vectorize.builtin_md_vectorized_function
2758 (callee, vectype_out, vectype_in);
2761 if (ifn == IFN_LAST && !fndecl)
2763 if (cfn == CFN_GOMP_SIMD_LANE
2764 && !slp_node
2765 && loop_vinfo
2766 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2767 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2768 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2769 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2771 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2772 { 0, 1, 2, ... vf - 1 } vector. */
2773 gcc_assert (nargs == 0);
2775 else if (modifier == NONE
2776 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2777 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2778 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2779 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2780 vectype_in, dt);
2781 else
2783 if (dump_enabled_p ())
2784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2785 "function is not vectorizable.\n");
2786 return false;
2790 if (slp_node)
2791 ncopies = 1;
2792 else if (modifier == NARROW && ifn == IFN_LAST)
2793 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2794 else
2795 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2797 /* Sanity check: make sure that at least one copy of the vectorized stmt
2798 needs to be generated. */
2799 gcc_assert (ncopies >= 1);
2801 if (!vec_stmt) /* transformation not required. */
2803 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2804 if (dump_enabled_p ())
2805 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2806 "\n");
2807 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2808 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2809 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2810 vec_promote_demote, stmt_info, 0, vect_body);
2812 return true;
2815 /** Transform. **/
2817 if (dump_enabled_p ())
2818 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2820 /* Handle def. */
2821 scalar_dest = gimple_call_lhs (stmt);
2822 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2824 prev_stmt_info = NULL;
2825 if (modifier == NONE || ifn != IFN_LAST)
2827 tree prev_res = NULL_TREE;
2828 for (j = 0; j < ncopies; ++j)
2830 /* Build argument list for the vectorized call. */
2831 if (j == 0)
2832 vargs.create (nargs);
2833 else
2834 vargs.truncate (0);
2836 if (slp_node)
2838 auto_vec<vec<tree> > vec_defs (nargs);
2839 vec<tree> vec_oprnds0;
2841 for (i = 0; i < nargs; i++)
2842 vargs.quick_push (gimple_call_arg (stmt, i));
2843 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2844 vec_oprnds0 = vec_defs[0];
2846 /* Arguments are ready. Create the new vector stmt. */
2847 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2849 size_t k;
2850 for (k = 0; k < nargs; k++)
2852 vec<tree> vec_oprndsk = vec_defs[k];
2853 vargs[k] = vec_oprndsk[i];
2855 if (modifier == NARROW)
2857 tree half_res = make_ssa_name (vectype_in);
2858 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2859 gimple_call_set_lhs (new_stmt, half_res);
2860 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2861 if ((i & 1) == 0)
2863 prev_res = half_res;
2864 continue;
2866 new_temp = make_ssa_name (vec_dest);
2867 new_stmt = gimple_build_assign (new_temp, convert_code,
2868 prev_res, half_res);
2870 else
2872 if (ifn != IFN_LAST)
2873 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2874 else
2875 new_stmt = gimple_build_call_vec (fndecl, vargs);
2876 new_temp = make_ssa_name (vec_dest, new_stmt);
2877 gimple_call_set_lhs (new_stmt, new_temp);
2879 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2880 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2883 for (i = 0; i < nargs; i++)
2885 vec<tree> vec_oprndsi = vec_defs[i];
2886 vec_oprndsi.release ();
2888 continue;
2891 for (i = 0; i < nargs; i++)
2893 op = gimple_call_arg (stmt, i);
2894 if (j == 0)
2895 vec_oprnd0
2896 = vect_get_vec_def_for_operand (op, stmt);
2897 else
2899 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2900 vec_oprnd0
2901 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2904 vargs.quick_push (vec_oprnd0);
2907 if (gimple_call_internal_p (stmt)
2908 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2910 tree *v = XALLOCAVEC (tree, nunits_out);
2911 int k;
2912 for (k = 0; k < nunits_out; ++k)
2913 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2914 tree cst = build_vector (vectype_out, v);
2915 tree new_var
2916 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2917 gimple *init_stmt = gimple_build_assign (new_var, cst);
2918 vect_init_vector_1 (stmt, init_stmt, NULL);
2919 new_temp = make_ssa_name (vec_dest);
2920 new_stmt = gimple_build_assign (new_temp, new_var);
2922 else if (modifier == NARROW)
2924 tree half_res = make_ssa_name (vectype_in);
2925 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2926 gimple_call_set_lhs (new_stmt, half_res);
2927 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2928 if ((j & 1) == 0)
2930 prev_res = half_res;
2931 continue;
2933 new_temp = make_ssa_name (vec_dest);
2934 new_stmt = gimple_build_assign (new_temp, convert_code,
2935 prev_res, half_res);
2937 else
2939 if (ifn != IFN_LAST)
2940 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2941 else
2942 new_stmt = gimple_build_call_vec (fndecl, vargs);
2943 new_temp = make_ssa_name (vec_dest, new_stmt);
2944 gimple_call_set_lhs (new_stmt, new_temp);
2946 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2948 if (j == (modifier == NARROW ? 1 : 0))
2949 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2950 else
2951 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2953 prev_stmt_info = vinfo_for_stmt (new_stmt);
2956 else if (modifier == NARROW)
2958 for (j = 0; j < ncopies; ++j)
2960 /* Build argument list for the vectorized call. */
2961 if (j == 0)
2962 vargs.create (nargs * 2);
2963 else
2964 vargs.truncate (0);
2966 if (slp_node)
2968 auto_vec<vec<tree> > vec_defs (nargs);
2969 vec<tree> vec_oprnds0;
2971 for (i = 0; i < nargs; i++)
2972 vargs.quick_push (gimple_call_arg (stmt, i));
2973 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2974 vec_oprnds0 = vec_defs[0];
2976 /* Arguments are ready. Create the new vector stmt. */
2977 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2979 size_t k;
2980 vargs.truncate (0);
2981 for (k = 0; k < nargs; k++)
2983 vec<tree> vec_oprndsk = vec_defs[k];
2984 vargs.quick_push (vec_oprndsk[i]);
2985 vargs.quick_push (vec_oprndsk[i + 1]);
2987 if (ifn != IFN_LAST)
2988 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2989 else
2990 new_stmt = gimple_build_call_vec (fndecl, vargs);
2991 new_temp = make_ssa_name (vec_dest, new_stmt);
2992 gimple_call_set_lhs (new_stmt, new_temp);
2993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2994 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2997 for (i = 0; i < nargs; i++)
2999 vec<tree> vec_oprndsi = vec_defs[i];
3000 vec_oprndsi.release ();
3002 continue;
3005 for (i = 0; i < nargs; i++)
3007 op = gimple_call_arg (stmt, i);
3008 if (j == 0)
3010 vec_oprnd0
3011 = vect_get_vec_def_for_operand (op, stmt);
3012 vec_oprnd1
3013 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3015 else
3017 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3018 vec_oprnd0
3019 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3020 vec_oprnd1
3021 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3024 vargs.quick_push (vec_oprnd0);
3025 vargs.quick_push (vec_oprnd1);
3028 new_stmt = gimple_build_call_vec (fndecl, vargs);
3029 new_temp = make_ssa_name (vec_dest, new_stmt);
3030 gimple_call_set_lhs (new_stmt, new_temp);
3031 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3033 if (j == 0)
3034 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3035 else
3036 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3038 prev_stmt_info = vinfo_for_stmt (new_stmt);
3041 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3043 else
3044 /* No current target implements this case. */
3045 return false;
3047 vargs.release ();
3049 /* The call in STMT might prevent it from being removed in dce.
3050 We however cannot remove it here, due to the way the ssa name
3051 it defines is mapped to the new definition. So just replace
3052 rhs of the statement with something harmless. */
3054 if (slp_node)
3055 return true;
3057 type = TREE_TYPE (scalar_dest);
3058 if (is_pattern_stmt_p (stmt_info))
3059 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3060 else
3061 lhs = gimple_call_lhs (stmt);
3063 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3064 set_vinfo_for_stmt (new_stmt, stmt_info);
3065 set_vinfo_for_stmt (stmt, NULL);
3066 STMT_VINFO_STMT (stmt_info) = new_stmt;
3067 gsi_replace (gsi, new_stmt, false);
3069 return true;
3073 struct simd_call_arg_info
3075 tree vectype;
3076 tree op;
3077 HOST_WIDE_INT linear_step;
3078 enum vect_def_type dt;
3079 unsigned int align;
3080 bool simd_lane_linear;
3083 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3084 is linear within simd lane (but not within whole loop), note it in
3085 *ARGINFO. */
3087 static void
3088 vect_simd_lane_linear (tree op, struct loop *loop,
3089 struct simd_call_arg_info *arginfo)
3091 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3093 if (!is_gimple_assign (def_stmt)
3094 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3095 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3096 return;
3098 tree base = gimple_assign_rhs1 (def_stmt);
3099 HOST_WIDE_INT linear_step = 0;
3100 tree v = gimple_assign_rhs2 (def_stmt);
3101 while (TREE_CODE (v) == SSA_NAME)
3103 tree t;
3104 def_stmt = SSA_NAME_DEF_STMT (v);
3105 if (is_gimple_assign (def_stmt))
3106 switch (gimple_assign_rhs_code (def_stmt))
3108 case PLUS_EXPR:
3109 t = gimple_assign_rhs2 (def_stmt);
3110 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3111 return;
3112 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3113 v = gimple_assign_rhs1 (def_stmt);
3114 continue;
3115 case MULT_EXPR:
3116 t = gimple_assign_rhs2 (def_stmt);
3117 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3118 return;
3119 linear_step = tree_to_shwi (t);
3120 v = gimple_assign_rhs1 (def_stmt);
3121 continue;
3122 CASE_CONVERT:
3123 t = gimple_assign_rhs1 (def_stmt);
3124 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3125 || (TYPE_PRECISION (TREE_TYPE (v))
3126 < TYPE_PRECISION (TREE_TYPE (t))))
3127 return;
3128 if (!linear_step)
3129 linear_step = 1;
3130 v = t;
3131 continue;
3132 default:
3133 return;
3135 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3136 && loop->simduid
3137 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3138 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3139 == loop->simduid))
3141 if (!linear_step)
3142 linear_step = 1;
3143 arginfo->linear_step = linear_step;
3144 arginfo->op = base;
3145 arginfo->simd_lane_linear = true;
3146 return;
3151 /* Function vectorizable_simd_clone_call.
3153 Check if STMT performs a function call that can be vectorized
3154 by calling a simd clone of the function.
3155 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3156 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3157 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3159 static bool
3160 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3161 gimple **vec_stmt, slp_tree slp_node)
3163 tree vec_dest;
3164 tree scalar_dest;
3165 tree op, type;
3166 tree vec_oprnd0 = NULL_TREE;
3167 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3168 tree vectype;
3169 unsigned int nunits;
3170 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3171 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3172 vec_info *vinfo = stmt_info->vinfo;
3173 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3174 tree fndecl, new_temp;
3175 gimple *def_stmt;
3176 gimple *new_stmt = NULL;
3177 int ncopies, j;
3178 auto_vec<simd_call_arg_info> arginfo;
3179 vec<tree> vargs = vNULL;
3180 size_t i, nargs;
3181 tree lhs, rtype, ratype;
3182 vec<constructor_elt, va_gc> *ret_ctor_elts;
3184 /* Is STMT a vectorizable call? */
3185 if (!is_gimple_call (stmt))
3186 return false;
3188 fndecl = gimple_call_fndecl (stmt);
3189 if (fndecl == NULL_TREE)
3190 return false;
3192 struct cgraph_node *node = cgraph_node::get (fndecl);
3193 if (node == NULL || node->simd_clones == NULL)
3194 return false;
3196 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3197 return false;
3199 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3200 && ! vec_stmt)
3201 return false;
3203 if (gimple_call_lhs (stmt)
3204 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3205 return false;
3207 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3209 vectype = STMT_VINFO_VECTYPE (stmt_info);
3211 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3212 return false;
3214 /* FORNOW */
3215 if (slp_node)
3216 return false;
3218 /* Process function arguments. */
3219 nargs = gimple_call_num_args (stmt);
3221 /* Bail out if the function has zero arguments. */
3222 if (nargs == 0)
3223 return false;
3225 arginfo.reserve (nargs, true);
3227 for (i = 0; i < nargs; i++)
3229 simd_call_arg_info thisarginfo;
3230 affine_iv iv;
3232 thisarginfo.linear_step = 0;
3233 thisarginfo.align = 0;
3234 thisarginfo.op = NULL_TREE;
3235 thisarginfo.simd_lane_linear = false;
3237 op = gimple_call_arg (stmt, i);
3238 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3239 &thisarginfo.vectype)
3240 || thisarginfo.dt == vect_uninitialized_def)
3242 if (dump_enabled_p ())
3243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3244 "use not simple.\n");
3245 return false;
3248 if (thisarginfo.dt == vect_constant_def
3249 || thisarginfo.dt == vect_external_def)
3250 gcc_assert (thisarginfo.vectype == NULL_TREE);
3251 else
3252 gcc_assert (thisarginfo.vectype != NULL_TREE);
3254 /* For linear arguments, the analyze phase should have saved
3255 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3256 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3257 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3259 gcc_assert (vec_stmt);
3260 thisarginfo.linear_step
3261 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3262 thisarginfo.op
3263 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3264 thisarginfo.simd_lane_linear
3265 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3266 == boolean_true_node);
3267 /* If loop has been peeled for alignment, we need to adjust it. */
3268 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3269 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3270 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3272 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3273 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3274 tree opt = TREE_TYPE (thisarginfo.op);
3275 bias = fold_convert (TREE_TYPE (step), bias);
3276 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3277 thisarginfo.op
3278 = fold_build2 (POINTER_TYPE_P (opt)
3279 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3280 thisarginfo.op, bias);
3283 else if (!vec_stmt
3284 && thisarginfo.dt != vect_constant_def
3285 && thisarginfo.dt != vect_external_def
3286 && loop_vinfo
3287 && TREE_CODE (op) == SSA_NAME
3288 && simple_iv (loop, loop_containing_stmt (stmt), op,
3289 &iv, false)
3290 && tree_fits_shwi_p (iv.step))
3292 thisarginfo.linear_step = tree_to_shwi (iv.step);
3293 thisarginfo.op = iv.base;
3295 else if ((thisarginfo.dt == vect_constant_def
3296 || thisarginfo.dt == vect_external_def)
3297 && POINTER_TYPE_P (TREE_TYPE (op)))
3298 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3299 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3300 linear too. */
3301 if (POINTER_TYPE_P (TREE_TYPE (op))
3302 && !thisarginfo.linear_step
3303 && !vec_stmt
3304 && thisarginfo.dt != vect_constant_def
3305 && thisarginfo.dt != vect_external_def
3306 && loop_vinfo
3307 && !slp_node
3308 && TREE_CODE (op) == SSA_NAME)
3309 vect_simd_lane_linear (op, loop, &thisarginfo);
3311 arginfo.quick_push (thisarginfo);
3314 unsigned int badness = 0;
3315 struct cgraph_node *bestn = NULL;
3316 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3317 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3318 else
3319 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3320 n = n->simdclone->next_clone)
3322 unsigned int this_badness = 0;
3323 if (n->simdclone->simdlen
3324 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3325 || n->simdclone->nargs != nargs)
3326 continue;
3327 if (n->simdclone->simdlen
3328 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3329 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3330 - exact_log2 (n->simdclone->simdlen)) * 1024;
3331 if (n->simdclone->inbranch)
3332 this_badness += 2048;
3333 int target_badness = targetm.simd_clone.usable (n);
3334 if (target_badness < 0)
3335 continue;
3336 this_badness += target_badness * 512;
3337 /* FORNOW: Have to add code to add the mask argument. */
3338 if (n->simdclone->inbranch)
3339 continue;
3340 for (i = 0; i < nargs; i++)
3342 switch (n->simdclone->args[i].arg_type)
3344 case SIMD_CLONE_ARG_TYPE_VECTOR:
3345 if (!useless_type_conversion_p
3346 (n->simdclone->args[i].orig_type,
3347 TREE_TYPE (gimple_call_arg (stmt, i))))
3348 i = -1;
3349 else if (arginfo[i].dt == vect_constant_def
3350 || arginfo[i].dt == vect_external_def
3351 || arginfo[i].linear_step)
3352 this_badness += 64;
3353 break;
3354 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3355 if (arginfo[i].dt != vect_constant_def
3356 && arginfo[i].dt != vect_external_def)
3357 i = -1;
3358 break;
3359 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3360 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3361 if (arginfo[i].dt == vect_constant_def
3362 || arginfo[i].dt == vect_external_def
3363 || (arginfo[i].linear_step
3364 != n->simdclone->args[i].linear_step))
3365 i = -1;
3366 break;
3367 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3368 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3369 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3370 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3371 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3372 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3373 /* FORNOW */
3374 i = -1;
3375 break;
3376 case SIMD_CLONE_ARG_TYPE_MASK:
3377 gcc_unreachable ();
3379 if (i == (size_t) -1)
3380 break;
3381 if (n->simdclone->args[i].alignment > arginfo[i].align)
3383 i = -1;
3384 break;
3386 if (arginfo[i].align)
3387 this_badness += (exact_log2 (arginfo[i].align)
3388 - exact_log2 (n->simdclone->args[i].alignment));
3390 if (i == (size_t) -1)
3391 continue;
3392 if (bestn == NULL || this_badness < badness)
3394 bestn = n;
3395 badness = this_badness;
3399 if (bestn == NULL)
3400 return false;
3402 for (i = 0; i < nargs; i++)
3403 if ((arginfo[i].dt == vect_constant_def
3404 || arginfo[i].dt == vect_external_def)
3405 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3407 arginfo[i].vectype
3408 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3409 i)));
3410 if (arginfo[i].vectype == NULL
3411 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3412 > bestn->simdclone->simdlen))
3413 return false;
3416 fndecl = bestn->decl;
3417 nunits = bestn->simdclone->simdlen;
3418 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3420 /* If the function isn't const, only allow it in simd loops where user
3421 has asserted that at least nunits consecutive iterations can be
3422 performed using SIMD instructions. */
3423 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3424 && gimple_vuse (stmt))
3425 return false;
3427 /* Sanity check: make sure that at least one copy of the vectorized stmt
3428 needs to be generated. */
3429 gcc_assert (ncopies >= 1);
3431 if (!vec_stmt) /* transformation not required. */
3433 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3434 for (i = 0; i < nargs; i++)
3435 if ((bestn->simdclone->args[i].arg_type
3436 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3437 || (bestn->simdclone->args[i].arg_type
3438 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3440 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3441 + 1);
3442 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3443 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3444 ? size_type_node : TREE_TYPE (arginfo[i].op);
3445 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3446 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3447 tree sll = arginfo[i].simd_lane_linear
3448 ? boolean_true_node : boolean_false_node;
3449 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3451 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3452 if (dump_enabled_p ())
3453 dump_printf_loc (MSG_NOTE, vect_location,
3454 "=== vectorizable_simd_clone_call ===\n");
3455 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3456 return true;
3459 /** Transform. **/
3461 if (dump_enabled_p ())
3462 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3464 /* Handle def. */
3465 scalar_dest = gimple_call_lhs (stmt);
3466 vec_dest = NULL_TREE;
3467 rtype = NULL_TREE;
3468 ratype = NULL_TREE;
3469 if (scalar_dest)
3471 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3472 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3473 if (TREE_CODE (rtype) == ARRAY_TYPE)
3475 ratype = rtype;
3476 rtype = TREE_TYPE (ratype);
3480 prev_stmt_info = NULL;
3481 for (j = 0; j < ncopies; ++j)
3483 /* Build argument list for the vectorized call. */
3484 if (j == 0)
3485 vargs.create (nargs);
3486 else
3487 vargs.truncate (0);
3489 for (i = 0; i < nargs; i++)
3491 unsigned int k, l, m, o;
3492 tree atype;
3493 op = gimple_call_arg (stmt, i);
3494 switch (bestn->simdclone->args[i].arg_type)
3496 case SIMD_CLONE_ARG_TYPE_VECTOR:
3497 atype = bestn->simdclone->args[i].vector_type;
3498 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3499 for (m = j * o; m < (j + 1) * o; m++)
3501 if (TYPE_VECTOR_SUBPARTS (atype)
3502 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3504 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3505 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3506 / TYPE_VECTOR_SUBPARTS (atype));
3507 gcc_assert ((k & (k - 1)) == 0);
3508 if (m == 0)
3509 vec_oprnd0
3510 = vect_get_vec_def_for_operand (op, stmt);
3511 else
3513 vec_oprnd0 = arginfo[i].op;
3514 if ((m & (k - 1)) == 0)
3515 vec_oprnd0
3516 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3517 vec_oprnd0);
3519 arginfo[i].op = vec_oprnd0;
3520 vec_oprnd0
3521 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3522 size_int (prec),
3523 bitsize_int ((m & (k - 1)) * prec));
3524 new_stmt
3525 = gimple_build_assign (make_ssa_name (atype),
3526 vec_oprnd0);
3527 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3528 vargs.safe_push (gimple_assign_lhs (new_stmt));
3530 else
3532 k = (TYPE_VECTOR_SUBPARTS (atype)
3533 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3534 gcc_assert ((k & (k - 1)) == 0);
3535 vec<constructor_elt, va_gc> *ctor_elts;
3536 if (k != 1)
3537 vec_alloc (ctor_elts, k);
3538 else
3539 ctor_elts = NULL;
3540 for (l = 0; l < k; l++)
3542 if (m == 0 && l == 0)
3543 vec_oprnd0
3544 = vect_get_vec_def_for_operand (op, stmt);
3545 else
3546 vec_oprnd0
3547 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3548 arginfo[i].op);
3549 arginfo[i].op = vec_oprnd0;
3550 if (k == 1)
3551 break;
3552 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3553 vec_oprnd0);
3555 if (k == 1)
3556 vargs.safe_push (vec_oprnd0);
3557 else
3559 vec_oprnd0 = build_constructor (atype, ctor_elts);
3560 new_stmt
3561 = gimple_build_assign (make_ssa_name (atype),
3562 vec_oprnd0);
3563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3564 vargs.safe_push (gimple_assign_lhs (new_stmt));
3568 break;
3569 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3570 vargs.safe_push (op);
3571 break;
3572 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3573 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3574 if (j == 0)
3576 gimple_seq stmts;
3577 arginfo[i].op
3578 = force_gimple_operand (arginfo[i].op, &stmts, true,
3579 NULL_TREE);
3580 if (stmts != NULL)
3582 basic_block new_bb;
3583 edge pe = loop_preheader_edge (loop);
3584 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3585 gcc_assert (!new_bb);
3587 if (arginfo[i].simd_lane_linear)
3589 vargs.safe_push (arginfo[i].op);
3590 break;
3592 tree phi_res = copy_ssa_name (op);
3593 gphi *new_phi = create_phi_node (phi_res, loop->header);
3594 set_vinfo_for_stmt (new_phi,
3595 new_stmt_vec_info (new_phi, loop_vinfo));
3596 add_phi_arg (new_phi, arginfo[i].op,
3597 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3598 enum tree_code code
3599 = POINTER_TYPE_P (TREE_TYPE (op))
3600 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3601 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3602 ? sizetype : TREE_TYPE (op);
3603 widest_int cst
3604 = wi::mul (bestn->simdclone->args[i].linear_step,
3605 ncopies * nunits);
3606 tree tcst = wide_int_to_tree (type, cst);
3607 tree phi_arg = copy_ssa_name (op);
3608 new_stmt
3609 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3610 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3611 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3612 set_vinfo_for_stmt (new_stmt,
3613 new_stmt_vec_info (new_stmt, loop_vinfo));
3614 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3615 UNKNOWN_LOCATION);
3616 arginfo[i].op = phi_res;
3617 vargs.safe_push (phi_res);
3619 else
3621 enum tree_code code
3622 = POINTER_TYPE_P (TREE_TYPE (op))
3623 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3624 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3625 ? sizetype : TREE_TYPE (op);
3626 widest_int cst
3627 = wi::mul (bestn->simdclone->args[i].linear_step,
3628 j * nunits);
3629 tree tcst = wide_int_to_tree (type, cst);
3630 new_temp = make_ssa_name (TREE_TYPE (op));
3631 new_stmt = gimple_build_assign (new_temp, code,
3632 arginfo[i].op, tcst);
3633 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3634 vargs.safe_push (new_temp);
3636 break;
3637 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3638 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3639 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3640 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3641 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3642 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3643 default:
3644 gcc_unreachable ();
3648 new_stmt = gimple_build_call_vec (fndecl, vargs);
3649 if (vec_dest)
3651 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3652 if (ratype)
3653 new_temp = create_tmp_var (ratype);
3654 else if (TYPE_VECTOR_SUBPARTS (vectype)
3655 == TYPE_VECTOR_SUBPARTS (rtype))
3656 new_temp = make_ssa_name (vec_dest, new_stmt);
3657 else
3658 new_temp = make_ssa_name (rtype, new_stmt);
3659 gimple_call_set_lhs (new_stmt, new_temp);
3661 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3663 if (vec_dest)
3665 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3667 unsigned int k, l;
3668 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3669 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3670 gcc_assert ((k & (k - 1)) == 0);
3671 for (l = 0; l < k; l++)
3673 tree t;
3674 if (ratype)
3676 t = build_fold_addr_expr (new_temp);
3677 t = build2 (MEM_REF, vectype, t,
3678 build_int_cst (TREE_TYPE (t),
3679 l * prec / BITS_PER_UNIT));
3681 else
3682 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3683 size_int (prec), bitsize_int (l * prec));
3684 new_stmt
3685 = gimple_build_assign (make_ssa_name (vectype), t);
3686 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3687 if (j == 0 && l == 0)
3688 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3689 else
3690 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3692 prev_stmt_info = vinfo_for_stmt (new_stmt);
3695 if (ratype)
3697 tree clobber = build_constructor (ratype, NULL);
3698 TREE_THIS_VOLATILE (clobber) = 1;
3699 new_stmt = gimple_build_assign (new_temp, clobber);
3700 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3702 continue;
3704 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3706 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3707 / TYPE_VECTOR_SUBPARTS (rtype));
3708 gcc_assert ((k & (k - 1)) == 0);
3709 if ((j & (k - 1)) == 0)
3710 vec_alloc (ret_ctor_elts, k);
3711 if (ratype)
3713 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3714 for (m = 0; m < o; m++)
3716 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3717 size_int (m), NULL_TREE, NULL_TREE);
3718 new_stmt
3719 = gimple_build_assign (make_ssa_name (rtype), tem);
3720 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3721 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3722 gimple_assign_lhs (new_stmt));
3724 tree clobber = build_constructor (ratype, NULL);
3725 TREE_THIS_VOLATILE (clobber) = 1;
3726 new_stmt = gimple_build_assign (new_temp, clobber);
3727 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3729 else
3730 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3731 if ((j & (k - 1)) != k - 1)
3732 continue;
3733 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3734 new_stmt
3735 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3736 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3738 if ((unsigned) j == k - 1)
3739 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3740 else
3741 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3743 prev_stmt_info = vinfo_for_stmt (new_stmt);
3744 continue;
3746 else if (ratype)
3748 tree t = build_fold_addr_expr (new_temp);
3749 t = build2 (MEM_REF, vectype, t,
3750 build_int_cst (TREE_TYPE (t), 0));
3751 new_stmt
3752 = gimple_build_assign (make_ssa_name (vec_dest), t);
3753 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3754 tree clobber = build_constructor (ratype, NULL);
3755 TREE_THIS_VOLATILE (clobber) = 1;
3756 vect_finish_stmt_generation (stmt,
3757 gimple_build_assign (new_temp,
3758 clobber), gsi);
3762 if (j == 0)
3763 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3764 else
3765 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3767 prev_stmt_info = vinfo_for_stmt (new_stmt);
3770 vargs.release ();
3772 /* The call in STMT might prevent it from being removed in dce.
3773 We however cannot remove it here, due to the way the ssa name
3774 it defines is mapped to the new definition. So just replace
3775 rhs of the statement with something harmless. */
3777 if (slp_node)
3778 return true;
3780 if (scalar_dest)
3782 type = TREE_TYPE (scalar_dest);
3783 if (is_pattern_stmt_p (stmt_info))
3784 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3785 else
3786 lhs = gimple_call_lhs (stmt);
3787 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3789 else
3790 new_stmt = gimple_build_nop ();
3791 set_vinfo_for_stmt (new_stmt, stmt_info);
3792 set_vinfo_for_stmt (stmt, NULL);
3793 STMT_VINFO_STMT (stmt_info) = new_stmt;
3794 gsi_replace (gsi, new_stmt, true);
3795 unlink_stmt_vdef (stmt);
3797 return true;
3801 /* Function vect_gen_widened_results_half
3803 Create a vector stmt whose code, type, number of arguments, and result
3804 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3805 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3806 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3807 needs to be created (DECL is a function-decl of a target-builtin).
3808 STMT is the original scalar stmt that we are vectorizing. */
3810 static gimple *
3811 vect_gen_widened_results_half (enum tree_code code,
3812 tree decl,
3813 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3814 tree vec_dest, gimple_stmt_iterator *gsi,
3815 gimple *stmt)
3817 gimple *new_stmt;
3818 tree new_temp;
3820 /* Generate half of the widened result: */
3821 if (code == CALL_EXPR)
3823 /* Target specific support */
3824 if (op_type == binary_op)
3825 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3826 else
3827 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3828 new_temp = make_ssa_name (vec_dest, new_stmt);
3829 gimple_call_set_lhs (new_stmt, new_temp);
3831 else
3833 /* Generic support */
3834 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3835 if (op_type != binary_op)
3836 vec_oprnd1 = NULL;
3837 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3838 new_temp = make_ssa_name (vec_dest, new_stmt);
3839 gimple_assign_set_lhs (new_stmt, new_temp);
3841 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3843 return new_stmt;
3847 /* Get vectorized definitions for loop-based vectorization. For the first
3848 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3849 scalar operand), and for the rest we get a copy with
3850 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3851 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3852 The vectors are collected into VEC_OPRNDS. */
3854 static void
3855 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3856 vec<tree> *vec_oprnds, int multi_step_cvt)
3858 tree vec_oprnd;
3860 /* Get first vector operand. */
3861 /* All the vector operands except the very first one (that is scalar oprnd)
3862 are stmt copies. */
3863 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3864 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3865 else
3866 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3868 vec_oprnds->quick_push (vec_oprnd);
3870 /* Get second vector operand. */
3871 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3872 vec_oprnds->quick_push (vec_oprnd);
3874 *oprnd = vec_oprnd;
3876 /* For conversion in multiple steps, continue to get operands
3877 recursively. */
3878 if (multi_step_cvt)
3879 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3883 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3884 For multi-step conversions store the resulting vectors and call the function
3885 recursively. */
3887 static void
3888 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3889 int multi_step_cvt, gimple *stmt,
3890 vec<tree> vec_dsts,
3891 gimple_stmt_iterator *gsi,
3892 slp_tree slp_node, enum tree_code code,
3893 stmt_vec_info *prev_stmt_info)
3895 unsigned int i;
3896 tree vop0, vop1, new_tmp, vec_dest;
3897 gimple *new_stmt;
3898 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3900 vec_dest = vec_dsts.pop ();
3902 for (i = 0; i < vec_oprnds->length (); i += 2)
3904 /* Create demotion operation. */
3905 vop0 = (*vec_oprnds)[i];
3906 vop1 = (*vec_oprnds)[i + 1];
3907 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3908 new_tmp = make_ssa_name (vec_dest, new_stmt);
3909 gimple_assign_set_lhs (new_stmt, new_tmp);
3910 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3912 if (multi_step_cvt)
3913 /* Store the resulting vector for next recursive call. */
3914 (*vec_oprnds)[i/2] = new_tmp;
3915 else
3917 /* This is the last step of the conversion sequence. Store the
3918 vectors in SLP_NODE or in vector info of the scalar statement
3919 (or in STMT_VINFO_RELATED_STMT chain). */
3920 if (slp_node)
3921 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3922 else
3924 if (!*prev_stmt_info)
3925 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3926 else
3927 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3929 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3934 /* For multi-step demotion operations we first generate demotion operations
3935 from the source type to the intermediate types, and then combine the
3936 results (stored in VEC_OPRNDS) in demotion operation to the destination
3937 type. */
3938 if (multi_step_cvt)
3940 /* At each level of recursion we have half of the operands we had at the
3941 previous level. */
3942 vec_oprnds->truncate ((i+1)/2);
3943 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3944 stmt, vec_dsts, gsi, slp_node,
3945 VEC_PACK_TRUNC_EXPR,
3946 prev_stmt_info);
3949 vec_dsts.quick_push (vec_dest);
3953 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3954 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3955 the resulting vectors and call the function recursively. */
3957 static void
3958 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3959 vec<tree> *vec_oprnds1,
3960 gimple *stmt, tree vec_dest,
3961 gimple_stmt_iterator *gsi,
3962 enum tree_code code1,
3963 enum tree_code code2, tree decl1,
3964 tree decl2, int op_type)
3966 int i;
3967 tree vop0, vop1, new_tmp1, new_tmp2;
3968 gimple *new_stmt1, *new_stmt2;
3969 vec<tree> vec_tmp = vNULL;
3971 vec_tmp.create (vec_oprnds0->length () * 2);
3972 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3974 if (op_type == binary_op)
3975 vop1 = (*vec_oprnds1)[i];
3976 else
3977 vop1 = NULL_TREE;
3979 /* Generate the two halves of promotion operation. */
3980 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3981 op_type, vec_dest, gsi, stmt);
3982 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3983 op_type, vec_dest, gsi, stmt);
3984 if (is_gimple_call (new_stmt1))
3986 new_tmp1 = gimple_call_lhs (new_stmt1);
3987 new_tmp2 = gimple_call_lhs (new_stmt2);
3989 else
3991 new_tmp1 = gimple_assign_lhs (new_stmt1);
3992 new_tmp2 = gimple_assign_lhs (new_stmt2);
3995 /* Store the results for the next step. */
3996 vec_tmp.quick_push (new_tmp1);
3997 vec_tmp.quick_push (new_tmp2);
4000 vec_oprnds0->release ();
4001 *vec_oprnds0 = vec_tmp;
4005 /* Check if STMT performs a conversion operation, that can be vectorized.
4006 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4007 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4008 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4010 static bool
4011 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4012 gimple **vec_stmt, slp_tree slp_node)
4014 tree vec_dest;
4015 tree scalar_dest;
4016 tree op0, op1 = NULL_TREE;
4017 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4019 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4020 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4021 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4022 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4023 tree new_temp;
4024 gimple *def_stmt;
4025 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4026 gimple *new_stmt = NULL;
4027 stmt_vec_info prev_stmt_info;
4028 int nunits_in;
4029 int nunits_out;
4030 tree vectype_out, vectype_in;
4031 int ncopies, i, j;
4032 tree lhs_type, rhs_type;
4033 enum { NARROW, NONE, WIDEN } modifier;
4034 vec<tree> vec_oprnds0 = vNULL;
4035 vec<tree> vec_oprnds1 = vNULL;
4036 tree vop0;
4037 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4038 vec_info *vinfo = stmt_info->vinfo;
4039 int multi_step_cvt = 0;
4040 vec<tree> interm_types = vNULL;
4041 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4042 int op_type;
4043 machine_mode rhs_mode;
4044 unsigned short fltsz;
4046 /* Is STMT a vectorizable conversion? */
4048 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4049 return false;
4051 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4052 && ! vec_stmt)
4053 return false;
4055 if (!is_gimple_assign (stmt))
4056 return false;
4058 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4059 return false;
4061 code = gimple_assign_rhs_code (stmt);
4062 if (!CONVERT_EXPR_CODE_P (code)
4063 && code != FIX_TRUNC_EXPR
4064 && code != FLOAT_EXPR
4065 && code != WIDEN_MULT_EXPR
4066 && code != WIDEN_LSHIFT_EXPR)
4067 return false;
4069 op_type = TREE_CODE_LENGTH (code);
4071 /* Check types of lhs and rhs. */
4072 scalar_dest = gimple_assign_lhs (stmt);
4073 lhs_type = TREE_TYPE (scalar_dest);
4074 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4076 op0 = gimple_assign_rhs1 (stmt);
4077 rhs_type = TREE_TYPE (op0);
4079 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4080 && !((INTEGRAL_TYPE_P (lhs_type)
4081 && INTEGRAL_TYPE_P (rhs_type))
4082 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4083 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4084 return false;
4086 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4087 && ((INTEGRAL_TYPE_P (lhs_type)
4088 && (TYPE_PRECISION (lhs_type)
4089 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
4090 || (INTEGRAL_TYPE_P (rhs_type)
4091 && (TYPE_PRECISION (rhs_type)
4092 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4094 if (dump_enabled_p ())
4095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4096 "type conversion to/from bit-precision unsupported."
4097 "\n");
4098 return false;
4101 /* Check the operands of the operation. */
4102 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4104 if (dump_enabled_p ())
4105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4106 "use not simple.\n");
4107 return false;
4109 if (op_type == binary_op)
4111 bool ok;
4113 op1 = gimple_assign_rhs2 (stmt);
4114 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4115 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4116 OP1. */
4117 if (CONSTANT_CLASS_P (op0))
4118 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4119 else
4120 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4122 if (!ok)
4124 if (dump_enabled_p ())
4125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4126 "use not simple.\n");
4127 return false;
4131 /* If op0 is an external or constant defs use a vector type of
4132 the same size as the output vector type. */
4133 if (!vectype_in)
4134 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4135 if (vec_stmt)
4136 gcc_assert (vectype_in);
4137 if (!vectype_in)
4139 if (dump_enabled_p ())
4141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4142 "no vectype for scalar type ");
4143 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4144 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4147 return false;
4150 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4151 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4153 if (dump_enabled_p ())
4155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4156 "can't convert between boolean and non "
4157 "boolean vectors");
4158 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4159 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4162 return false;
4165 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4166 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4167 if (nunits_in < nunits_out)
4168 modifier = NARROW;
4169 else if (nunits_out == nunits_in)
4170 modifier = NONE;
4171 else
4172 modifier = WIDEN;
4174 /* Multiple types in SLP are handled by creating the appropriate number of
4175 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4176 case of SLP. */
4177 if (slp_node)
4178 ncopies = 1;
4179 else if (modifier == NARROW)
4180 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4181 else
4182 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4184 /* Sanity check: make sure that at least one copy of the vectorized stmt
4185 needs to be generated. */
4186 gcc_assert (ncopies >= 1);
4188 /* Supportable by target? */
4189 switch (modifier)
4191 case NONE:
4192 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4193 return false;
4194 if (supportable_convert_operation (code, vectype_out, vectype_in,
4195 &decl1, &code1))
4196 break;
4197 /* FALLTHRU */
4198 unsupported:
4199 if (dump_enabled_p ())
4200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4201 "conversion not supported by target.\n");
4202 return false;
4204 case WIDEN:
4205 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4206 &code1, &code2, &multi_step_cvt,
4207 &interm_types))
4209 /* Binary widening operation can only be supported directly by the
4210 architecture. */
4211 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4212 break;
4215 if (code != FLOAT_EXPR
4216 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4217 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4218 goto unsupported;
4220 rhs_mode = TYPE_MODE (rhs_type);
4221 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4222 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4223 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4224 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4226 cvt_type
4227 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4228 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4229 if (cvt_type == NULL_TREE)
4230 goto unsupported;
4232 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4234 if (!supportable_convert_operation (code, vectype_out,
4235 cvt_type, &decl1, &codecvt1))
4236 goto unsupported;
4238 else if (!supportable_widening_operation (code, stmt, vectype_out,
4239 cvt_type, &codecvt1,
4240 &codecvt2, &multi_step_cvt,
4241 &interm_types))
4242 continue;
4243 else
4244 gcc_assert (multi_step_cvt == 0);
4246 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4247 vectype_in, &code1, &code2,
4248 &multi_step_cvt, &interm_types))
4249 break;
4252 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4253 goto unsupported;
4255 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4256 codecvt2 = ERROR_MARK;
4257 else
4259 multi_step_cvt++;
4260 interm_types.safe_push (cvt_type);
4261 cvt_type = NULL_TREE;
4263 break;
4265 case NARROW:
4266 gcc_assert (op_type == unary_op);
4267 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4268 &code1, &multi_step_cvt,
4269 &interm_types))
4270 break;
4272 if (code != FIX_TRUNC_EXPR
4273 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4274 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4275 goto unsupported;
4277 rhs_mode = TYPE_MODE (rhs_type);
4278 cvt_type
4279 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4280 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4281 if (cvt_type == NULL_TREE)
4282 goto unsupported;
4283 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4284 &decl1, &codecvt1))
4285 goto unsupported;
4286 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4287 &code1, &multi_step_cvt,
4288 &interm_types))
4289 break;
4290 goto unsupported;
4292 default:
4293 gcc_unreachable ();
4296 if (!vec_stmt) /* transformation not required. */
4298 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_NOTE, vect_location,
4300 "=== vectorizable_conversion ===\n");
4301 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4303 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4304 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4306 else if (modifier == NARROW)
4308 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4309 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4311 else
4313 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4314 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4316 interm_types.release ();
4317 return true;
4320 /** Transform. **/
4321 if (dump_enabled_p ())
4322 dump_printf_loc (MSG_NOTE, vect_location,
4323 "transform conversion. ncopies = %d.\n", ncopies);
4325 if (op_type == binary_op)
4327 if (CONSTANT_CLASS_P (op0))
4328 op0 = fold_convert (TREE_TYPE (op1), op0);
4329 else if (CONSTANT_CLASS_P (op1))
4330 op1 = fold_convert (TREE_TYPE (op0), op1);
4333 /* In case of multi-step conversion, we first generate conversion operations
4334 to the intermediate types, and then from that types to the final one.
4335 We create vector destinations for the intermediate type (TYPES) received
4336 from supportable_*_operation, and store them in the correct order
4337 for future use in vect_create_vectorized_*_stmts (). */
4338 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4339 vec_dest = vect_create_destination_var (scalar_dest,
4340 (cvt_type && modifier == WIDEN)
4341 ? cvt_type : vectype_out);
4342 vec_dsts.quick_push (vec_dest);
4344 if (multi_step_cvt)
4346 for (i = interm_types.length () - 1;
4347 interm_types.iterate (i, &intermediate_type); i--)
4349 vec_dest = vect_create_destination_var (scalar_dest,
4350 intermediate_type);
4351 vec_dsts.quick_push (vec_dest);
4355 if (cvt_type)
4356 vec_dest = vect_create_destination_var (scalar_dest,
4357 modifier == WIDEN
4358 ? vectype_out : cvt_type);
4360 if (!slp_node)
4362 if (modifier == WIDEN)
4364 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4365 if (op_type == binary_op)
4366 vec_oprnds1.create (1);
4368 else if (modifier == NARROW)
4369 vec_oprnds0.create (
4370 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4372 else if (code == WIDEN_LSHIFT_EXPR)
4373 vec_oprnds1.create (slp_node->vec_stmts_size);
4375 last_oprnd = op0;
4376 prev_stmt_info = NULL;
4377 switch (modifier)
4379 case NONE:
4380 for (j = 0; j < ncopies; j++)
4382 if (j == 0)
4383 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4384 -1);
4385 else
4386 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4388 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4390 /* Arguments are ready, create the new vector stmt. */
4391 if (code1 == CALL_EXPR)
4393 new_stmt = gimple_build_call (decl1, 1, vop0);
4394 new_temp = make_ssa_name (vec_dest, new_stmt);
4395 gimple_call_set_lhs (new_stmt, new_temp);
4397 else
4399 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4400 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4401 new_temp = make_ssa_name (vec_dest, new_stmt);
4402 gimple_assign_set_lhs (new_stmt, new_temp);
4405 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4406 if (slp_node)
4407 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4408 else
4410 if (!prev_stmt_info)
4411 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4412 else
4413 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4414 prev_stmt_info = vinfo_for_stmt (new_stmt);
4418 break;
4420 case WIDEN:
4421 /* In case the vectorization factor (VF) is bigger than the number
4422 of elements that we can fit in a vectype (nunits), we have to
4423 generate more than one vector stmt - i.e - we need to "unroll"
4424 the vector stmt by a factor VF/nunits. */
4425 for (j = 0; j < ncopies; j++)
4427 /* Handle uses. */
4428 if (j == 0)
4430 if (slp_node)
4432 if (code == WIDEN_LSHIFT_EXPR)
4434 unsigned int k;
4436 vec_oprnd1 = op1;
4437 /* Store vec_oprnd1 for every vector stmt to be created
4438 for SLP_NODE. We check during the analysis that all
4439 the shift arguments are the same. */
4440 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4441 vec_oprnds1.quick_push (vec_oprnd1);
4443 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4444 slp_node, -1);
4446 else
4447 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4448 &vec_oprnds1, slp_node, -1);
4450 else
4452 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4453 vec_oprnds0.quick_push (vec_oprnd0);
4454 if (op_type == binary_op)
4456 if (code == WIDEN_LSHIFT_EXPR)
4457 vec_oprnd1 = op1;
4458 else
4459 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4460 vec_oprnds1.quick_push (vec_oprnd1);
4464 else
4466 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4467 vec_oprnds0.truncate (0);
4468 vec_oprnds0.quick_push (vec_oprnd0);
4469 if (op_type == binary_op)
4471 if (code == WIDEN_LSHIFT_EXPR)
4472 vec_oprnd1 = op1;
4473 else
4474 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4475 vec_oprnd1);
4476 vec_oprnds1.truncate (0);
4477 vec_oprnds1.quick_push (vec_oprnd1);
4481 /* Arguments are ready. Create the new vector stmts. */
4482 for (i = multi_step_cvt; i >= 0; i--)
4484 tree this_dest = vec_dsts[i];
4485 enum tree_code c1 = code1, c2 = code2;
4486 if (i == 0 && codecvt2 != ERROR_MARK)
4488 c1 = codecvt1;
4489 c2 = codecvt2;
4491 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4492 &vec_oprnds1,
4493 stmt, this_dest, gsi,
4494 c1, c2, decl1, decl2,
4495 op_type);
4498 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4500 if (cvt_type)
4502 if (codecvt1 == CALL_EXPR)
4504 new_stmt = gimple_build_call (decl1, 1, vop0);
4505 new_temp = make_ssa_name (vec_dest, new_stmt);
4506 gimple_call_set_lhs (new_stmt, new_temp);
4508 else
4510 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4511 new_temp = make_ssa_name (vec_dest);
4512 new_stmt = gimple_build_assign (new_temp, codecvt1,
4513 vop0);
4516 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4518 else
4519 new_stmt = SSA_NAME_DEF_STMT (vop0);
4521 if (slp_node)
4522 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4523 else
4525 if (!prev_stmt_info)
4526 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4527 else
4528 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4529 prev_stmt_info = vinfo_for_stmt (new_stmt);
4534 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4535 break;
4537 case NARROW:
4538 /* In case the vectorization factor (VF) is bigger than the number
4539 of elements that we can fit in a vectype (nunits), we have to
4540 generate more than one vector stmt - i.e - we need to "unroll"
4541 the vector stmt by a factor VF/nunits. */
4542 for (j = 0; j < ncopies; j++)
4544 /* Handle uses. */
4545 if (slp_node)
4546 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4547 slp_node, -1);
4548 else
4550 vec_oprnds0.truncate (0);
4551 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4552 vect_pow2 (multi_step_cvt) - 1);
4555 /* Arguments are ready. Create the new vector stmts. */
4556 if (cvt_type)
4557 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4559 if (codecvt1 == CALL_EXPR)
4561 new_stmt = gimple_build_call (decl1, 1, vop0);
4562 new_temp = make_ssa_name (vec_dest, new_stmt);
4563 gimple_call_set_lhs (new_stmt, new_temp);
4565 else
4567 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4568 new_temp = make_ssa_name (vec_dest);
4569 new_stmt = gimple_build_assign (new_temp, codecvt1,
4570 vop0);
4573 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4574 vec_oprnds0[i] = new_temp;
4577 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4578 stmt, vec_dsts, gsi,
4579 slp_node, code1,
4580 &prev_stmt_info);
4583 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4584 break;
4587 vec_oprnds0.release ();
4588 vec_oprnds1.release ();
4589 interm_types.release ();
4591 return true;
4595 /* Function vectorizable_assignment.
4597 Check if STMT performs an assignment (copy) that can be vectorized.
4598 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4599 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4600 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4602 static bool
4603 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4604 gimple **vec_stmt, slp_tree slp_node)
4606 tree vec_dest;
4607 tree scalar_dest;
4608 tree op;
4609 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4610 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4611 tree new_temp;
4612 gimple *def_stmt;
4613 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4614 int ncopies;
4615 int i, j;
4616 vec<tree> vec_oprnds = vNULL;
4617 tree vop;
4618 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4619 vec_info *vinfo = stmt_info->vinfo;
4620 gimple *new_stmt = NULL;
4621 stmt_vec_info prev_stmt_info = NULL;
4622 enum tree_code code;
4623 tree vectype_in;
4625 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4626 return false;
4628 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4629 && ! vec_stmt)
4630 return false;
4632 /* Is vectorizable assignment? */
4633 if (!is_gimple_assign (stmt))
4634 return false;
4636 scalar_dest = gimple_assign_lhs (stmt);
4637 if (TREE_CODE (scalar_dest) != SSA_NAME)
4638 return false;
4640 code = gimple_assign_rhs_code (stmt);
4641 if (gimple_assign_single_p (stmt)
4642 || code == PAREN_EXPR
4643 || CONVERT_EXPR_CODE_P (code))
4644 op = gimple_assign_rhs1 (stmt);
4645 else
4646 return false;
4648 if (code == VIEW_CONVERT_EXPR)
4649 op = TREE_OPERAND (op, 0);
4651 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4652 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4654 /* Multiple types in SLP are handled by creating the appropriate number of
4655 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4656 case of SLP. */
4657 if (slp_node)
4658 ncopies = 1;
4659 else
4660 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4662 gcc_assert (ncopies >= 1);
4664 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4666 if (dump_enabled_p ())
4667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4668 "use not simple.\n");
4669 return false;
4672 /* We can handle NOP_EXPR conversions that do not change the number
4673 of elements or the vector size. */
4674 if ((CONVERT_EXPR_CODE_P (code)
4675 || code == VIEW_CONVERT_EXPR)
4676 && (!vectype_in
4677 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4678 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4679 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4680 return false;
4682 /* We do not handle bit-precision changes. */
4683 if ((CONVERT_EXPR_CODE_P (code)
4684 || code == VIEW_CONVERT_EXPR)
4685 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4686 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4687 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4688 || ((TYPE_PRECISION (TREE_TYPE (op))
4689 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4690 /* But a conversion that does not change the bit-pattern is ok. */
4691 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4692 > TYPE_PRECISION (TREE_TYPE (op)))
4693 && TYPE_UNSIGNED (TREE_TYPE (op)))
4694 /* Conversion between boolean types of different sizes is
4695 a simple assignment in case their vectypes are same
4696 boolean vectors. */
4697 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4698 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4700 if (dump_enabled_p ())
4701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4702 "type conversion to/from bit-precision "
4703 "unsupported.\n");
4704 return false;
4707 if (!vec_stmt) /* transformation not required. */
4709 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4710 if (dump_enabled_p ())
4711 dump_printf_loc (MSG_NOTE, vect_location,
4712 "=== vectorizable_assignment ===\n");
4713 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4714 return true;
4717 /** Transform. **/
4718 if (dump_enabled_p ())
4719 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4721 /* Handle def. */
4722 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4724 /* Handle use. */
4725 for (j = 0; j < ncopies; j++)
4727 /* Handle uses. */
4728 if (j == 0)
4729 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4730 else
4731 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4733 /* Arguments are ready. create the new vector stmt. */
4734 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4736 if (CONVERT_EXPR_CODE_P (code)
4737 || code == VIEW_CONVERT_EXPR)
4738 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4739 new_stmt = gimple_build_assign (vec_dest, vop);
4740 new_temp = make_ssa_name (vec_dest, new_stmt);
4741 gimple_assign_set_lhs (new_stmt, new_temp);
4742 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4743 if (slp_node)
4744 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4747 if (slp_node)
4748 continue;
4750 if (j == 0)
4751 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4752 else
4753 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4755 prev_stmt_info = vinfo_for_stmt (new_stmt);
4758 vec_oprnds.release ();
4759 return true;
4763 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4764 either as shift by a scalar or by a vector. */
4766 bool
4767 vect_supportable_shift (enum tree_code code, tree scalar_type)
4770 machine_mode vec_mode;
4771 optab optab;
4772 int icode;
4773 tree vectype;
4775 vectype = get_vectype_for_scalar_type (scalar_type);
4776 if (!vectype)
4777 return false;
4779 optab = optab_for_tree_code (code, vectype, optab_scalar);
4780 if (!optab
4781 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4783 optab = optab_for_tree_code (code, vectype, optab_vector);
4784 if (!optab
4785 || (optab_handler (optab, TYPE_MODE (vectype))
4786 == CODE_FOR_nothing))
4787 return false;
4790 vec_mode = TYPE_MODE (vectype);
4791 icode = (int) optab_handler (optab, vec_mode);
4792 if (icode == CODE_FOR_nothing)
4793 return false;
4795 return true;
4799 /* Function vectorizable_shift.
4801 Check if STMT performs a shift operation that can be vectorized.
4802 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4803 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4804 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4806 static bool
4807 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4808 gimple **vec_stmt, slp_tree slp_node)
4810 tree vec_dest;
4811 tree scalar_dest;
4812 tree op0, op1 = NULL;
4813 tree vec_oprnd1 = NULL_TREE;
4814 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4815 tree vectype;
4816 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4817 enum tree_code code;
4818 machine_mode vec_mode;
4819 tree new_temp;
4820 optab optab;
4821 int icode;
4822 machine_mode optab_op2_mode;
4823 gimple *def_stmt;
4824 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4825 gimple *new_stmt = NULL;
4826 stmt_vec_info prev_stmt_info;
4827 int nunits_in;
4828 int nunits_out;
4829 tree vectype_out;
4830 tree op1_vectype;
4831 int ncopies;
4832 int j, i;
4833 vec<tree> vec_oprnds0 = vNULL;
4834 vec<tree> vec_oprnds1 = vNULL;
4835 tree vop0, vop1;
4836 unsigned int k;
4837 bool scalar_shift_arg = true;
4838 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4839 vec_info *vinfo = stmt_info->vinfo;
4840 int vf;
4842 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4843 return false;
4845 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4846 && ! vec_stmt)
4847 return false;
4849 /* Is STMT a vectorizable binary/unary operation? */
4850 if (!is_gimple_assign (stmt))
4851 return false;
4853 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4854 return false;
4856 code = gimple_assign_rhs_code (stmt);
4858 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4859 || code == RROTATE_EXPR))
4860 return false;
4862 scalar_dest = gimple_assign_lhs (stmt);
4863 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4864 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4865 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4867 if (dump_enabled_p ())
4868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4869 "bit-precision shifts not supported.\n");
4870 return false;
4873 op0 = gimple_assign_rhs1 (stmt);
4874 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4876 if (dump_enabled_p ())
4877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4878 "use not simple.\n");
4879 return false;
4881 /* If op0 is an external or constant def use a vector type with
4882 the same size as the output vector type. */
4883 if (!vectype)
4884 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4885 if (vec_stmt)
4886 gcc_assert (vectype);
4887 if (!vectype)
4889 if (dump_enabled_p ())
4890 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4891 "no vectype for scalar type\n");
4892 return false;
4895 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4896 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4897 if (nunits_out != nunits_in)
4898 return false;
4900 op1 = gimple_assign_rhs2 (stmt);
4901 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4903 if (dump_enabled_p ())
4904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4905 "use not simple.\n");
4906 return false;
4909 if (loop_vinfo)
4910 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4911 else
4912 vf = 1;
4914 /* Multiple types in SLP are handled by creating the appropriate number of
4915 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4916 case of SLP. */
4917 if (slp_node)
4918 ncopies = 1;
4919 else
4920 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4922 gcc_assert (ncopies >= 1);
4924 /* Determine whether the shift amount is a vector, or scalar. If the
4925 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4927 if ((dt[1] == vect_internal_def
4928 || dt[1] == vect_induction_def)
4929 && !slp_node)
4930 scalar_shift_arg = false;
4931 else if (dt[1] == vect_constant_def
4932 || dt[1] == vect_external_def
4933 || dt[1] == vect_internal_def)
4935 /* In SLP, need to check whether the shift count is the same,
4936 in loops if it is a constant or invariant, it is always
4937 a scalar shift. */
4938 if (slp_node)
4940 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4941 gimple *slpstmt;
4943 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4944 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4945 scalar_shift_arg = false;
4948 /* If the shift amount is computed by a pattern stmt we cannot
4949 use the scalar amount directly thus give up and use a vector
4950 shift. */
4951 if (dt[1] == vect_internal_def)
4953 gimple *def = SSA_NAME_DEF_STMT (op1);
4954 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4955 scalar_shift_arg = false;
4958 else
4960 if (dump_enabled_p ())
4961 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4962 "operand mode requires invariant argument.\n");
4963 return false;
4966 /* Vector shifted by vector. */
4967 if (!scalar_shift_arg)
4969 optab = optab_for_tree_code (code, vectype, optab_vector);
4970 if (dump_enabled_p ())
4971 dump_printf_loc (MSG_NOTE, vect_location,
4972 "vector/vector shift/rotate found.\n");
4974 if (!op1_vectype)
4975 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4976 if (op1_vectype == NULL_TREE
4977 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4981 "unusable type for last operand in"
4982 " vector/vector shift/rotate.\n");
4983 return false;
4986 /* See if the machine has a vector shifted by scalar insn and if not
4987 then see if it has a vector shifted by vector insn. */
4988 else
4990 optab = optab_for_tree_code (code, vectype, optab_scalar);
4991 if (optab
4992 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4994 if (dump_enabled_p ())
4995 dump_printf_loc (MSG_NOTE, vect_location,
4996 "vector/scalar shift/rotate found.\n");
4998 else
5000 optab = optab_for_tree_code (code, vectype, optab_vector);
5001 if (optab
5002 && (optab_handler (optab, TYPE_MODE (vectype))
5003 != CODE_FOR_nothing))
5005 scalar_shift_arg = false;
5007 if (dump_enabled_p ())
5008 dump_printf_loc (MSG_NOTE, vect_location,
5009 "vector/vector shift/rotate found.\n");
5011 /* Unlike the other binary operators, shifts/rotates have
5012 the rhs being int, instead of the same type as the lhs,
5013 so make sure the scalar is the right type if we are
5014 dealing with vectors of long long/long/short/char. */
5015 if (dt[1] == vect_constant_def)
5016 op1 = fold_convert (TREE_TYPE (vectype), op1);
5017 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5018 TREE_TYPE (op1)))
5020 if (slp_node
5021 && TYPE_MODE (TREE_TYPE (vectype))
5022 != TYPE_MODE (TREE_TYPE (op1)))
5024 if (dump_enabled_p ())
5025 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5026 "unusable type for last operand in"
5027 " vector/vector shift/rotate.\n");
5028 return false;
5030 if (vec_stmt && !slp_node)
5032 op1 = fold_convert (TREE_TYPE (vectype), op1);
5033 op1 = vect_init_vector (stmt, op1,
5034 TREE_TYPE (vectype), NULL);
5041 /* Supportable by target? */
5042 if (!optab)
5044 if (dump_enabled_p ())
5045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5046 "no optab.\n");
5047 return false;
5049 vec_mode = TYPE_MODE (vectype);
5050 icode = (int) optab_handler (optab, vec_mode);
5051 if (icode == CODE_FOR_nothing)
5053 if (dump_enabled_p ())
5054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5055 "op not supported by target.\n");
5056 /* Check only during analysis. */
5057 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5058 || (vf < vect_min_worthwhile_factor (code)
5059 && !vec_stmt))
5060 return false;
5061 if (dump_enabled_p ())
5062 dump_printf_loc (MSG_NOTE, vect_location,
5063 "proceeding using word mode.\n");
5066 /* Worthwhile without SIMD support? Check only during analysis. */
5067 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5068 && vf < vect_min_worthwhile_factor (code)
5069 && !vec_stmt)
5071 if (dump_enabled_p ())
5072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5073 "not worthwhile without SIMD support.\n");
5074 return false;
5077 if (!vec_stmt) /* transformation not required. */
5079 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5080 if (dump_enabled_p ())
5081 dump_printf_loc (MSG_NOTE, vect_location,
5082 "=== vectorizable_shift ===\n");
5083 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5084 return true;
5087 /** Transform. **/
5089 if (dump_enabled_p ())
5090 dump_printf_loc (MSG_NOTE, vect_location,
5091 "transform binary/unary operation.\n");
5093 /* Handle def. */
5094 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5096 prev_stmt_info = NULL;
5097 for (j = 0; j < ncopies; j++)
5099 /* Handle uses. */
5100 if (j == 0)
5102 if (scalar_shift_arg)
5104 /* Vector shl and shr insn patterns can be defined with scalar
5105 operand 2 (shift operand). In this case, use constant or loop
5106 invariant op1 directly, without extending it to vector mode
5107 first. */
5108 optab_op2_mode = insn_data[icode].operand[2].mode;
5109 if (!VECTOR_MODE_P (optab_op2_mode))
5111 if (dump_enabled_p ())
5112 dump_printf_loc (MSG_NOTE, vect_location,
5113 "operand 1 using scalar mode.\n");
5114 vec_oprnd1 = op1;
5115 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5116 vec_oprnds1.quick_push (vec_oprnd1);
5117 if (slp_node)
5119 /* Store vec_oprnd1 for every vector stmt to be created
5120 for SLP_NODE. We check during the analysis that all
5121 the shift arguments are the same.
5122 TODO: Allow different constants for different vector
5123 stmts generated for an SLP instance. */
5124 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5125 vec_oprnds1.quick_push (vec_oprnd1);
5130 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5131 (a special case for certain kind of vector shifts); otherwise,
5132 operand 1 should be of a vector type (the usual case). */
5133 if (vec_oprnd1)
5134 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5135 slp_node, -1);
5136 else
5137 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5138 slp_node, -1);
5140 else
5141 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5143 /* Arguments are ready. Create the new vector stmt. */
5144 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5146 vop1 = vec_oprnds1[i];
5147 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5148 new_temp = make_ssa_name (vec_dest, new_stmt);
5149 gimple_assign_set_lhs (new_stmt, new_temp);
5150 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5151 if (slp_node)
5152 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5155 if (slp_node)
5156 continue;
5158 if (j == 0)
5159 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5160 else
5161 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5162 prev_stmt_info = vinfo_for_stmt (new_stmt);
5165 vec_oprnds0.release ();
5166 vec_oprnds1.release ();
5168 return true;
5172 /* Function vectorizable_operation.
5174 Check if STMT performs a binary, unary or ternary operation that can
5175 be vectorized.
5176 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5177 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5178 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5180 static bool
5181 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5182 gimple **vec_stmt, slp_tree slp_node)
5184 tree vec_dest;
5185 tree scalar_dest;
5186 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5188 tree vectype;
5189 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5190 enum tree_code code;
5191 machine_mode vec_mode;
5192 tree new_temp;
5193 int op_type;
5194 optab optab;
5195 bool target_support_p;
5196 gimple *def_stmt;
5197 enum vect_def_type dt[3]
5198 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5199 gimple *new_stmt = NULL;
5200 stmt_vec_info prev_stmt_info;
5201 int nunits_in;
5202 int nunits_out;
5203 tree vectype_out;
5204 int ncopies;
5205 int j, i;
5206 vec<tree> vec_oprnds0 = vNULL;
5207 vec<tree> vec_oprnds1 = vNULL;
5208 vec<tree> vec_oprnds2 = vNULL;
5209 tree vop0, vop1, vop2;
5210 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5211 vec_info *vinfo = stmt_info->vinfo;
5212 int vf;
5214 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5215 return false;
5217 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5218 && ! vec_stmt)
5219 return false;
5221 /* Is STMT a vectorizable binary/unary operation? */
5222 if (!is_gimple_assign (stmt))
5223 return false;
5225 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5226 return false;
5228 code = gimple_assign_rhs_code (stmt);
5230 /* For pointer addition, we should use the normal plus for
5231 the vector addition. */
5232 if (code == POINTER_PLUS_EXPR)
5233 code = PLUS_EXPR;
5235 /* Support only unary or binary operations. */
5236 op_type = TREE_CODE_LENGTH (code);
5237 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5239 if (dump_enabled_p ())
5240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5241 "num. args = %d (not unary/binary/ternary op).\n",
5242 op_type);
5243 return false;
5246 scalar_dest = gimple_assign_lhs (stmt);
5247 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5249 /* Most operations cannot handle bit-precision types without extra
5250 truncations. */
5251 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5252 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5253 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5254 /* Exception are bitwise binary operations. */
5255 && code != BIT_IOR_EXPR
5256 && code != BIT_XOR_EXPR
5257 && code != BIT_AND_EXPR)
5259 if (dump_enabled_p ())
5260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5261 "bit-precision arithmetic not supported.\n");
5262 return false;
5265 op0 = gimple_assign_rhs1 (stmt);
5266 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5268 if (dump_enabled_p ())
5269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5270 "use not simple.\n");
5271 return false;
5273 /* If op0 is an external or constant def use a vector type with
5274 the same size as the output vector type. */
5275 if (!vectype)
5277 /* For boolean type we cannot determine vectype by
5278 invariant value (don't know whether it is a vector
5279 of booleans or vector of integers). We use output
5280 vectype because operations on boolean don't change
5281 type. */
5282 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5284 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5286 if (dump_enabled_p ())
5287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5288 "not supported operation on bool value.\n");
5289 return false;
5291 vectype = vectype_out;
5293 else
5294 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5296 if (vec_stmt)
5297 gcc_assert (vectype);
5298 if (!vectype)
5300 if (dump_enabled_p ())
5302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5303 "no vectype for scalar type ");
5304 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5305 TREE_TYPE (op0));
5306 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5309 return false;
5312 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5313 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5314 if (nunits_out != nunits_in)
5315 return false;
5317 if (op_type == binary_op || op_type == ternary_op)
5319 op1 = gimple_assign_rhs2 (stmt);
5320 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5324 "use not simple.\n");
5325 return false;
5328 if (op_type == ternary_op)
5330 op2 = gimple_assign_rhs3 (stmt);
5331 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5333 if (dump_enabled_p ())
5334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5335 "use not simple.\n");
5336 return false;
5340 if (loop_vinfo)
5341 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5342 else
5343 vf = 1;
5345 /* Multiple types in SLP are handled by creating the appropriate number of
5346 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5347 case of SLP. */
5348 if (slp_node)
5349 ncopies = 1;
5350 else
5351 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5353 gcc_assert (ncopies >= 1);
5355 /* Shifts are handled in vectorizable_shift (). */
5356 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5357 || code == RROTATE_EXPR)
5358 return false;
5360 /* Supportable by target? */
5362 vec_mode = TYPE_MODE (vectype);
5363 if (code == MULT_HIGHPART_EXPR)
5364 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5365 else
5367 optab = optab_for_tree_code (code, vectype, optab_default);
5368 if (!optab)
5370 if (dump_enabled_p ())
5371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5372 "no optab.\n");
5373 return false;
5375 target_support_p = (optab_handler (optab, vec_mode)
5376 != CODE_FOR_nothing);
5379 if (!target_support_p)
5381 if (dump_enabled_p ())
5382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5383 "op not supported by target.\n");
5384 /* Check only during analysis. */
5385 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5386 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5387 return false;
5388 if (dump_enabled_p ())
5389 dump_printf_loc (MSG_NOTE, vect_location,
5390 "proceeding using word mode.\n");
5393 /* Worthwhile without SIMD support? Check only during analysis. */
5394 if (!VECTOR_MODE_P (vec_mode)
5395 && !vec_stmt
5396 && vf < vect_min_worthwhile_factor (code))
5398 if (dump_enabled_p ())
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5400 "not worthwhile without SIMD support.\n");
5401 return false;
5404 if (!vec_stmt) /* transformation not required. */
5406 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5407 if (dump_enabled_p ())
5408 dump_printf_loc (MSG_NOTE, vect_location,
5409 "=== vectorizable_operation ===\n");
5410 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5411 return true;
5414 /** Transform. **/
5416 if (dump_enabled_p ())
5417 dump_printf_loc (MSG_NOTE, vect_location,
5418 "transform binary/unary operation.\n");
5420 /* Handle def. */
5421 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5423 /* In case the vectorization factor (VF) is bigger than the number
5424 of elements that we can fit in a vectype (nunits), we have to generate
5425 more than one vector stmt - i.e - we need to "unroll" the
5426 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5427 from one copy of the vector stmt to the next, in the field
5428 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5429 stages to find the correct vector defs to be used when vectorizing
5430 stmts that use the defs of the current stmt. The example below
5431 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5432 we need to create 4 vectorized stmts):
5434 before vectorization:
5435 RELATED_STMT VEC_STMT
5436 S1: x = memref - -
5437 S2: z = x + 1 - -
5439 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5440 there):
5441 RELATED_STMT VEC_STMT
5442 VS1_0: vx0 = memref0 VS1_1 -
5443 VS1_1: vx1 = memref1 VS1_2 -
5444 VS1_2: vx2 = memref2 VS1_3 -
5445 VS1_3: vx3 = memref3 - -
5446 S1: x = load - VS1_0
5447 S2: z = x + 1 - -
5449 step2: vectorize stmt S2 (done here):
5450 To vectorize stmt S2 we first need to find the relevant vector
5451 def for the first operand 'x'. This is, as usual, obtained from
5452 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5453 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5454 relevant vector def 'vx0'. Having found 'vx0' we can generate
5455 the vector stmt VS2_0, and as usual, record it in the
5456 STMT_VINFO_VEC_STMT of stmt S2.
5457 When creating the second copy (VS2_1), we obtain the relevant vector
5458 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5459 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5460 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5461 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5462 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5463 chain of stmts and pointers:
5464 RELATED_STMT VEC_STMT
5465 VS1_0: vx0 = memref0 VS1_1 -
5466 VS1_1: vx1 = memref1 VS1_2 -
5467 VS1_2: vx2 = memref2 VS1_3 -
5468 VS1_3: vx3 = memref3 - -
5469 S1: x = load - VS1_0
5470 VS2_0: vz0 = vx0 + v1 VS2_1 -
5471 VS2_1: vz1 = vx1 + v1 VS2_2 -
5472 VS2_2: vz2 = vx2 + v1 VS2_3 -
5473 VS2_3: vz3 = vx3 + v1 - -
5474 S2: z = x + 1 - VS2_0 */
5476 prev_stmt_info = NULL;
5477 for (j = 0; j < ncopies; j++)
5479 /* Handle uses. */
5480 if (j == 0)
5482 if (op_type == binary_op || op_type == ternary_op)
5483 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5484 slp_node, -1);
5485 else
5486 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5487 slp_node, -1);
5488 if (op_type == ternary_op)
5489 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5490 slp_node, -1);
5492 else
5494 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5495 if (op_type == ternary_op)
5497 tree vec_oprnd = vec_oprnds2.pop ();
5498 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5499 vec_oprnd));
5503 /* Arguments are ready. Create the new vector stmt. */
5504 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5506 vop1 = ((op_type == binary_op || op_type == ternary_op)
5507 ? vec_oprnds1[i] : NULL_TREE);
5508 vop2 = ((op_type == ternary_op)
5509 ? vec_oprnds2[i] : NULL_TREE);
5510 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5511 new_temp = make_ssa_name (vec_dest, new_stmt);
5512 gimple_assign_set_lhs (new_stmt, new_temp);
5513 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5514 if (slp_node)
5515 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5518 if (slp_node)
5519 continue;
5521 if (j == 0)
5522 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5523 else
5524 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5525 prev_stmt_info = vinfo_for_stmt (new_stmt);
5528 vec_oprnds0.release ();
5529 vec_oprnds1.release ();
5530 vec_oprnds2.release ();
5532 return true;
5535 /* A helper function to ensure data reference DR's base alignment
5536 for STMT_INFO. */
5538 static void
5539 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5541 if (!dr->aux)
5542 return;
5544 if (DR_VECT_AUX (dr)->base_misaligned)
5546 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5547 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5549 if (decl_in_symtab_p (base_decl))
5550 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5551 else
5553 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5554 DECL_USER_ALIGN (base_decl) = 1;
5556 DR_VECT_AUX (dr)->base_misaligned = false;
5561 /* Function get_group_alias_ptr_type.
5563 Return the alias type for the group starting at FIRST_STMT. */
5565 static tree
5566 get_group_alias_ptr_type (gimple *first_stmt)
5568 struct data_reference *first_dr, *next_dr;
5569 gimple *next_stmt;
5571 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5572 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5573 while (next_stmt)
5575 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5576 if (get_alias_set (DR_REF (first_dr))
5577 != get_alias_set (DR_REF (next_dr)))
5579 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_NOTE, vect_location,
5581 "conflicting alias set types.\n");
5582 return ptr_type_node;
5584 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5586 return reference_alias_ptr_type (DR_REF (first_dr));
5590 /* Function vectorizable_store.
5592 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5593 can be vectorized.
5594 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5595 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5596 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5598 static bool
5599 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5600 slp_tree slp_node)
5602 tree scalar_dest;
5603 tree data_ref;
5604 tree op;
5605 tree vec_oprnd = NULL_TREE;
5606 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5607 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5608 tree elem_type;
5609 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5610 struct loop *loop = NULL;
5611 machine_mode vec_mode;
5612 tree dummy;
5613 enum dr_alignment_support alignment_support_scheme;
5614 gimple *def_stmt;
5615 enum vect_def_type dt;
5616 stmt_vec_info prev_stmt_info = NULL;
5617 tree dataref_ptr = NULL_TREE;
5618 tree dataref_offset = NULL_TREE;
5619 gimple *ptr_incr = NULL;
5620 int ncopies;
5621 int j;
5622 gimple *next_stmt, *first_stmt;
5623 bool grouped_store;
5624 unsigned int group_size, i;
5625 vec<tree> oprnds = vNULL;
5626 vec<tree> result_chain = vNULL;
5627 bool inv_p;
5628 tree offset = NULL_TREE;
5629 vec<tree> vec_oprnds = vNULL;
5630 bool slp = (slp_node != NULL);
5631 unsigned int vec_num;
5632 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5633 vec_info *vinfo = stmt_info->vinfo;
5634 tree aggr_type;
5635 gather_scatter_info gs_info;
5636 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5637 gimple *new_stmt;
5638 int vf;
5639 vec_load_store_type vls_type;
5640 tree ref_type;
5642 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5643 return false;
5645 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5646 && ! vec_stmt)
5647 return false;
5649 /* Is vectorizable store? */
5651 if (!is_gimple_assign (stmt))
5652 return false;
5654 scalar_dest = gimple_assign_lhs (stmt);
5655 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5656 && is_pattern_stmt_p (stmt_info))
5657 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5658 if (TREE_CODE (scalar_dest) != ARRAY_REF
5659 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5660 && TREE_CODE (scalar_dest) != INDIRECT_REF
5661 && TREE_CODE (scalar_dest) != COMPONENT_REF
5662 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5663 && TREE_CODE (scalar_dest) != REALPART_EXPR
5664 && TREE_CODE (scalar_dest) != MEM_REF)
5665 return false;
5667 /* Cannot have hybrid store SLP -- that would mean storing to the
5668 same location twice. */
5669 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5671 gcc_assert (gimple_assign_single_p (stmt));
5673 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5674 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5676 if (loop_vinfo)
5678 loop = LOOP_VINFO_LOOP (loop_vinfo);
5679 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5681 else
5682 vf = 1;
5684 /* Multiple types in SLP are handled by creating the appropriate number of
5685 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5686 case of SLP. */
5687 if (slp)
5688 ncopies = 1;
5689 else
5690 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5692 gcc_assert (ncopies >= 1);
5694 /* FORNOW. This restriction should be relaxed. */
5695 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5697 if (dump_enabled_p ())
5698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5699 "multiple types in nested loop.\n");
5700 return false;
5703 op = gimple_assign_rhs1 (stmt);
5705 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5707 if (dump_enabled_p ())
5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5709 "use not simple.\n");
5710 return false;
5713 if (dt == vect_constant_def || dt == vect_external_def)
5714 vls_type = VLS_STORE_INVARIANT;
5715 else
5716 vls_type = VLS_STORE;
5718 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5719 return false;
5721 elem_type = TREE_TYPE (vectype);
5722 vec_mode = TYPE_MODE (vectype);
5724 /* FORNOW. In some cases can vectorize even if data-type not supported
5725 (e.g. - array initialization with 0). */
5726 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5727 return false;
5729 if (!STMT_VINFO_DATA_REF (stmt_info))
5730 return false;
5732 vect_memory_access_type memory_access_type;
5733 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5734 &memory_access_type, &gs_info))
5735 return false;
5737 if (!vec_stmt) /* transformation not required. */
5739 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5740 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5741 /* The SLP costs are calculated during SLP analysis. */
5742 if (!PURE_SLP_STMT (stmt_info))
5743 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5744 NULL, NULL, NULL);
5745 return true;
5747 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5749 /** Transform. **/
5751 ensure_base_align (stmt_info, dr);
5753 if (memory_access_type == VMAT_GATHER_SCATTER)
5755 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5756 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5757 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5758 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5759 edge pe = loop_preheader_edge (loop);
5760 gimple_seq seq;
5761 basic_block new_bb;
5762 enum { NARROW, NONE, WIDEN } modifier;
5763 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5765 if (nunits == (unsigned int) scatter_off_nunits)
5766 modifier = NONE;
5767 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5769 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5770 modifier = WIDEN;
5772 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5773 sel[i] = i | nunits;
5775 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5776 gcc_assert (perm_mask != NULL_TREE);
5778 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5780 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5781 modifier = NARROW;
5783 for (i = 0; i < (unsigned int) nunits; ++i)
5784 sel[i] = i | scatter_off_nunits;
5786 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5787 gcc_assert (perm_mask != NULL_TREE);
5788 ncopies *= 2;
5790 else
5791 gcc_unreachable ();
5793 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5794 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5795 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5796 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5797 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5798 scaletype = TREE_VALUE (arglist);
5800 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5801 && TREE_CODE (rettype) == VOID_TYPE);
5803 ptr = fold_convert (ptrtype, gs_info.base);
5804 if (!is_gimple_min_invariant (ptr))
5806 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5807 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5808 gcc_assert (!new_bb);
5811 /* Currently we support only unconditional scatter stores,
5812 so mask should be all ones. */
5813 mask = build_int_cst (masktype, -1);
5814 mask = vect_init_vector (stmt, mask, masktype, NULL);
5816 scale = build_int_cst (scaletype, gs_info.scale);
5818 prev_stmt_info = NULL;
5819 for (j = 0; j < ncopies; ++j)
5821 if (j == 0)
5823 src = vec_oprnd1
5824 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5825 op = vec_oprnd0
5826 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5828 else if (modifier != NONE && (j & 1))
5830 if (modifier == WIDEN)
5832 src = vec_oprnd1
5833 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5834 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5835 stmt, gsi);
5837 else if (modifier == NARROW)
5839 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5840 stmt, gsi);
5841 op = vec_oprnd0
5842 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5843 vec_oprnd0);
5845 else
5846 gcc_unreachable ();
5848 else
5850 src = vec_oprnd1
5851 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5852 op = vec_oprnd0
5853 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5854 vec_oprnd0);
5857 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5859 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5860 == TYPE_VECTOR_SUBPARTS (srctype));
5861 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5862 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5863 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5864 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5865 src = var;
5868 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5870 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5871 == TYPE_VECTOR_SUBPARTS (idxtype));
5872 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5873 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5874 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5875 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5876 op = var;
5879 new_stmt
5880 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5882 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5884 if (prev_stmt_info == NULL)
5885 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5886 else
5887 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5888 prev_stmt_info = vinfo_for_stmt (new_stmt);
5890 return true;
5893 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5894 if (grouped_store)
5896 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5897 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5898 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5900 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5902 /* FORNOW */
5903 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5905 /* We vectorize all the stmts of the interleaving group when we
5906 reach the last stmt in the group. */
5907 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5908 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5909 && !slp)
5911 *vec_stmt = NULL;
5912 return true;
5915 if (slp)
5917 grouped_store = false;
5918 /* VEC_NUM is the number of vect stmts to be created for this
5919 group. */
5920 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5921 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5922 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5923 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5924 op = gimple_assign_rhs1 (first_stmt);
5926 else
5927 /* VEC_NUM is the number of vect stmts to be created for this
5928 group. */
5929 vec_num = group_size;
5931 ref_type = get_group_alias_ptr_type (first_stmt);
5933 else
5935 first_stmt = stmt;
5936 first_dr = dr;
5937 group_size = vec_num = 1;
5938 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5941 if (dump_enabled_p ())
5942 dump_printf_loc (MSG_NOTE, vect_location,
5943 "transform store. ncopies = %d\n", ncopies);
5945 if (memory_access_type == VMAT_ELEMENTWISE
5946 || memory_access_type == VMAT_STRIDED_SLP)
5948 gimple_stmt_iterator incr_gsi;
5949 bool insert_after;
5950 gimple *incr;
5951 tree offvar;
5952 tree ivstep;
5953 tree running_off;
5954 gimple_seq stmts = NULL;
5955 tree stride_base, stride_step, alias_off;
5956 tree vec_oprnd;
5957 unsigned int g;
5959 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5961 stride_base
5962 = fold_build_pointer_plus
5963 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5964 size_binop (PLUS_EXPR,
5965 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5966 convert_to_ptrofftype (DR_INIT (first_dr))));
5967 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5969 /* For a store with loop-invariant (but other than power-of-2)
5970 stride (i.e. not a grouped access) like so:
5972 for (i = 0; i < n; i += stride)
5973 array[i] = ...;
5975 we generate a new induction variable and new stores from
5976 the components of the (vectorized) rhs:
5978 for (j = 0; ; j += VF*stride)
5979 vectemp = ...;
5980 tmp1 = vectemp[0];
5981 array[j] = tmp1;
5982 tmp2 = vectemp[1];
5983 array[j + stride] = tmp2;
5987 unsigned nstores = nunits;
5988 unsigned lnel = 1;
5989 tree ltype = elem_type;
5990 if (slp)
5992 if (group_size < nunits
5993 && nunits % group_size == 0)
5995 nstores = nunits / group_size;
5996 lnel = group_size;
5997 ltype = build_vector_type (elem_type, group_size);
5999 else if (group_size >= nunits
6000 && group_size % nunits == 0)
6002 nstores = 1;
6003 lnel = nunits;
6004 ltype = vectype;
6006 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6007 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6010 ivstep = stride_step;
6011 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6012 build_int_cst (TREE_TYPE (ivstep), vf));
6014 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6016 create_iv (stride_base, ivstep, NULL,
6017 loop, &incr_gsi, insert_after,
6018 &offvar, NULL);
6019 incr = gsi_stmt (incr_gsi);
6020 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6022 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6023 if (stmts)
6024 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6026 prev_stmt_info = NULL;
6027 alias_off = build_int_cst (ref_type, 0);
6028 next_stmt = first_stmt;
6029 for (g = 0; g < group_size; g++)
6031 running_off = offvar;
6032 if (g)
6034 tree size = TYPE_SIZE_UNIT (ltype);
6035 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6036 size);
6037 tree newoff = copy_ssa_name (running_off, NULL);
6038 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6039 running_off, pos);
6040 vect_finish_stmt_generation (stmt, incr, gsi);
6041 running_off = newoff;
6043 unsigned int group_el = 0;
6044 unsigned HOST_WIDE_INT
6045 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6046 for (j = 0; j < ncopies; j++)
6048 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6049 and first_stmt == stmt. */
6050 if (j == 0)
6052 if (slp)
6054 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6055 slp_node, -1);
6056 vec_oprnd = vec_oprnds[0];
6058 else
6060 gcc_assert (gimple_assign_single_p (next_stmt));
6061 op = gimple_assign_rhs1 (next_stmt);
6062 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6065 else
6067 if (slp)
6068 vec_oprnd = vec_oprnds[j];
6069 else
6071 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6072 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6076 for (i = 0; i < nstores; i++)
6078 tree newref, newoff;
6079 gimple *incr, *assign;
6080 tree size = TYPE_SIZE (ltype);
6081 /* Extract the i'th component. */
6082 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6083 bitsize_int (i), size);
6084 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6085 size, pos);
6087 elem = force_gimple_operand_gsi (gsi, elem, true,
6088 NULL_TREE, true,
6089 GSI_SAME_STMT);
6091 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6092 group_el * elsz);
6093 newref = build2 (MEM_REF, ltype,
6094 running_off, this_off);
6096 /* And store it to *running_off. */
6097 assign = gimple_build_assign (newref, elem);
6098 vect_finish_stmt_generation (stmt, assign, gsi);
6100 group_el += lnel;
6101 if (! slp
6102 || group_el == group_size)
6104 newoff = copy_ssa_name (running_off, NULL);
6105 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6106 running_off, stride_step);
6107 vect_finish_stmt_generation (stmt, incr, gsi);
6109 running_off = newoff;
6110 group_el = 0;
6112 if (g == group_size - 1
6113 && !slp)
6115 if (j == 0 && i == 0)
6116 STMT_VINFO_VEC_STMT (stmt_info)
6117 = *vec_stmt = assign;
6118 else
6119 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6120 prev_stmt_info = vinfo_for_stmt (assign);
6124 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6125 if (slp)
6126 break;
6129 vec_oprnds.release ();
6130 return true;
6133 auto_vec<tree> dr_chain (group_size);
6134 oprnds.create (group_size);
6136 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6137 gcc_assert (alignment_support_scheme);
6138 /* Targets with store-lane instructions must not require explicit
6139 realignment. */
6140 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6141 || alignment_support_scheme == dr_aligned
6142 || alignment_support_scheme == dr_unaligned_supported);
6144 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6145 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6146 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6148 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6149 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6150 else
6151 aggr_type = vectype;
6153 /* In case the vectorization factor (VF) is bigger than the number
6154 of elements that we can fit in a vectype (nunits), we have to generate
6155 more than one vector stmt - i.e - we need to "unroll" the
6156 vector stmt by a factor VF/nunits. For more details see documentation in
6157 vect_get_vec_def_for_copy_stmt. */
6159 /* In case of interleaving (non-unit grouped access):
6161 S1: &base + 2 = x2
6162 S2: &base = x0
6163 S3: &base + 1 = x1
6164 S4: &base + 3 = x3
6166 We create vectorized stores starting from base address (the access of the
6167 first stmt in the chain (S2 in the above example), when the last store stmt
6168 of the chain (S4) is reached:
6170 VS1: &base = vx2
6171 VS2: &base + vec_size*1 = vx0
6172 VS3: &base + vec_size*2 = vx1
6173 VS4: &base + vec_size*3 = vx3
6175 Then permutation statements are generated:
6177 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6178 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6181 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6182 (the order of the data-refs in the output of vect_permute_store_chain
6183 corresponds to the order of scalar stmts in the interleaving chain - see
6184 the documentation of vect_permute_store_chain()).
6186 In case of both multiple types and interleaving, above vector stores and
6187 permutation stmts are created for every copy. The result vector stmts are
6188 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6189 STMT_VINFO_RELATED_STMT for the next copies.
6192 prev_stmt_info = NULL;
6193 for (j = 0; j < ncopies; j++)
6196 if (j == 0)
6198 if (slp)
6200 /* Get vectorized arguments for SLP_NODE. */
6201 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6202 NULL, slp_node, -1);
6204 vec_oprnd = vec_oprnds[0];
6206 else
6208 /* For interleaved stores we collect vectorized defs for all the
6209 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6210 used as an input to vect_permute_store_chain(), and OPRNDS as
6211 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6213 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6214 OPRNDS are of size 1. */
6215 next_stmt = first_stmt;
6216 for (i = 0; i < group_size; i++)
6218 /* Since gaps are not supported for interleaved stores,
6219 GROUP_SIZE is the exact number of stmts in the chain.
6220 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6221 there is no interleaving, GROUP_SIZE is 1, and only one
6222 iteration of the loop will be executed. */
6223 gcc_assert (next_stmt
6224 && gimple_assign_single_p (next_stmt));
6225 op = gimple_assign_rhs1 (next_stmt);
6227 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6228 dr_chain.quick_push (vec_oprnd);
6229 oprnds.quick_push (vec_oprnd);
6230 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6234 /* We should have catched mismatched types earlier. */
6235 gcc_assert (useless_type_conversion_p (vectype,
6236 TREE_TYPE (vec_oprnd)));
6237 bool simd_lane_access_p
6238 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6239 if (simd_lane_access_p
6240 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6241 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6242 && integer_zerop (DR_OFFSET (first_dr))
6243 && integer_zerop (DR_INIT (first_dr))
6244 && alias_sets_conflict_p (get_alias_set (aggr_type),
6245 get_alias_set (TREE_TYPE (ref_type))))
6247 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6248 dataref_offset = build_int_cst (ref_type, 0);
6249 inv_p = false;
6251 else
6252 dataref_ptr
6253 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6254 simd_lane_access_p ? loop : NULL,
6255 offset, &dummy, gsi, &ptr_incr,
6256 simd_lane_access_p, &inv_p);
6257 gcc_assert (bb_vinfo || !inv_p);
6259 else
6261 /* For interleaved stores we created vectorized defs for all the
6262 defs stored in OPRNDS in the previous iteration (previous copy).
6263 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6264 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6265 next copy.
6266 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6267 OPRNDS are of size 1. */
6268 for (i = 0; i < group_size; i++)
6270 op = oprnds[i];
6271 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6272 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6273 dr_chain[i] = vec_oprnd;
6274 oprnds[i] = vec_oprnd;
6276 if (dataref_offset)
6277 dataref_offset
6278 = int_const_binop (PLUS_EXPR, dataref_offset,
6279 TYPE_SIZE_UNIT (aggr_type));
6280 else
6281 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6282 TYPE_SIZE_UNIT (aggr_type));
6285 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6287 tree vec_array;
6289 /* Combine all the vectors into an array. */
6290 vec_array = create_vector_array (vectype, vec_num);
6291 for (i = 0; i < vec_num; i++)
6293 vec_oprnd = dr_chain[i];
6294 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6297 /* Emit:
6298 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6299 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6300 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6301 gimple_call_set_lhs (new_stmt, data_ref);
6302 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6304 else
6306 new_stmt = NULL;
6307 if (grouped_store)
6309 if (j == 0)
6310 result_chain.create (group_size);
6311 /* Permute. */
6312 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6313 &result_chain);
6316 next_stmt = first_stmt;
6317 for (i = 0; i < vec_num; i++)
6319 unsigned align, misalign;
6321 if (i > 0)
6322 /* Bump the vector pointer. */
6323 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6324 stmt, NULL_TREE);
6326 if (slp)
6327 vec_oprnd = vec_oprnds[i];
6328 else if (grouped_store)
6329 /* For grouped stores vectorized defs are interleaved in
6330 vect_permute_store_chain(). */
6331 vec_oprnd = result_chain[i];
6333 data_ref = fold_build2 (MEM_REF, vectype,
6334 dataref_ptr,
6335 dataref_offset
6336 ? dataref_offset
6337 : build_int_cst (ref_type, 0));
6338 align = TYPE_ALIGN_UNIT (vectype);
6339 if (aligned_access_p (first_dr))
6340 misalign = 0;
6341 else if (DR_MISALIGNMENT (first_dr) == -1)
6343 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6344 align = TYPE_ALIGN_UNIT (elem_type);
6345 else
6346 align = get_object_alignment (DR_REF (first_dr))
6347 / BITS_PER_UNIT;
6348 misalign = 0;
6349 TREE_TYPE (data_ref)
6350 = build_aligned_type (TREE_TYPE (data_ref),
6351 align * BITS_PER_UNIT);
6353 else
6355 TREE_TYPE (data_ref)
6356 = build_aligned_type (TREE_TYPE (data_ref),
6357 TYPE_ALIGN (elem_type));
6358 misalign = DR_MISALIGNMENT (first_dr);
6360 if (dataref_offset == NULL_TREE
6361 && TREE_CODE (dataref_ptr) == SSA_NAME)
6362 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6363 misalign);
6365 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6367 tree perm_mask = perm_mask_for_reverse (vectype);
6368 tree perm_dest
6369 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6370 vectype);
6371 tree new_temp = make_ssa_name (perm_dest);
6373 /* Generate the permute statement. */
6374 gimple *perm_stmt
6375 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6376 vec_oprnd, perm_mask);
6377 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6379 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6380 vec_oprnd = new_temp;
6383 /* Arguments are ready. Create the new vector stmt. */
6384 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6385 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6387 if (slp)
6388 continue;
6390 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6391 if (!next_stmt)
6392 break;
6395 if (!slp)
6397 if (j == 0)
6398 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6399 else
6400 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6401 prev_stmt_info = vinfo_for_stmt (new_stmt);
6405 oprnds.release ();
6406 result_chain.release ();
6407 vec_oprnds.release ();
6409 return true;
6412 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6413 VECTOR_CST mask. No checks are made that the target platform supports the
6414 mask, so callers may wish to test can_vec_perm_p separately, or use
6415 vect_gen_perm_mask_checked. */
6417 tree
6418 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6420 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6421 int i, nunits;
6423 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6425 mask_elt_type = lang_hooks.types.type_for_mode
6426 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6427 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6429 mask_elts = XALLOCAVEC (tree, nunits);
6430 for (i = nunits - 1; i >= 0; i--)
6431 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6432 mask_vec = build_vector (mask_type, mask_elts);
6434 return mask_vec;
6437 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6438 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6440 tree
6441 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6443 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6444 return vect_gen_perm_mask_any (vectype, sel);
6447 /* Given a vector variable X and Y, that was generated for the scalar
6448 STMT, generate instructions to permute the vector elements of X and Y
6449 using permutation mask MASK_VEC, insert them at *GSI and return the
6450 permuted vector variable. */
6452 static tree
6453 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6454 gimple_stmt_iterator *gsi)
6456 tree vectype = TREE_TYPE (x);
6457 tree perm_dest, data_ref;
6458 gimple *perm_stmt;
6460 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6461 data_ref = make_ssa_name (perm_dest);
6463 /* Generate the permute statement. */
6464 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6465 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6467 return data_ref;
6470 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6471 inserting them on the loops preheader edge. Returns true if we
6472 were successful in doing so (and thus STMT can be moved then),
6473 otherwise returns false. */
6475 static bool
6476 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6478 ssa_op_iter i;
6479 tree op;
6480 bool any = false;
6482 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6484 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6485 if (!gimple_nop_p (def_stmt)
6486 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6488 /* Make sure we don't need to recurse. While we could do
6489 so in simple cases when there are more complex use webs
6490 we don't have an easy way to preserve stmt order to fulfil
6491 dependencies within them. */
6492 tree op2;
6493 ssa_op_iter i2;
6494 if (gimple_code (def_stmt) == GIMPLE_PHI)
6495 return false;
6496 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6498 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6499 if (!gimple_nop_p (def_stmt2)
6500 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6501 return false;
6503 any = true;
6507 if (!any)
6508 return true;
6510 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6512 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6513 if (!gimple_nop_p (def_stmt)
6514 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6516 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6517 gsi_remove (&gsi, false);
6518 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6522 return true;
6525 /* vectorizable_load.
6527 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6528 can be vectorized.
6529 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6530 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6531 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6533 static bool
6534 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6535 slp_tree slp_node, slp_instance slp_node_instance)
6537 tree scalar_dest;
6538 tree vec_dest = NULL;
6539 tree data_ref = NULL;
6540 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6541 stmt_vec_info prev_stmt_info;
6542 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6543 struct loop *loop = NULL;
6544 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6545 bool nested_in_vect_loop = false;
6546 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6547 tree elem_type;
6548 tree new_temp;
6549 machine_mode mode;
6550 gimple *new_stmt = NULL;
6551 tree dummy;
6552 enum dr_alignment_support alignment_support_scheme;
6553 tree dataref_ptr = NULL_TREE;
6554 tree dataref_offset = NULL_TREE;
6555 gimple *ptr_incr = NULL;
6556 int ncopies;
6557 int i, j, group_size, group_gap_adj;
6558 tree msq = NULL_TREE, lsq;
6559 tree offset = NULL_TREE;
6560 tree byte_offset = NULL_TREE;
6561 tree realignment_token = NULL_TREE;
6562 gphi *phi = NULL;
6563 vec<tree> dr_chain = vNULL;
6564 bool grouped_load = false;
6565 gimple *first_stmt;
6566 gimple *first_stmt_for_drptr = NULL;
6567 bool inv_p;
6568 bool compute_in_loop = false;
6569 struct loop *at_loop;
6570 int vec_num;
6571 bool slp = (slp_node != NULL);
6572 bool slp_perm = false;
6573 enum tree_code code;
6574 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6575 int vf;
6576 tree aggr_type;
6577 gather_scatter_info gs_info;
6578 vec_info *vinfo = stmt_info->vinfo;
6579 tree ref_type;
6581 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6582 return false;
6584 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6585 && ! vec_stmt)
6586 return false;
6588 /* Is vectorizable load? */
6589 if (!is_gimple_assign (stmt))
6590 return false;
6592 scalar_dest = gimple_assign_lhs (stmt);
6593 if (TREE_CODE (scalar_dest) != SSA_NAME)
6594 return false;
6596 code = gimple_assign_rhs_code (stmt);
6597 if (code != ARRAY_REF
6598 && code != BIT_FIELD_REF
6599 && code != INDIRECT_REF
6600 && code != COMPONENT_REF
6601 && code != IMAGPART_EXPR
6602 && code != REALPART_EXPR
6603 && code != MEM_REF
6604 && TREE_CODE_CLASS (code) != tcc_declaration)
6605 return false;
6607 if (!STMT_VINFO_DATA_REF (stmt_info))
6608 return false;
6610 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6611 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6613 if (loop_vinfo)
6615 loop = LOOP_VINFO_LOOP (loop_vinfo);
6616 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6617 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6619 else
6620 vf = 1;
6622 /* Multiple types in SLP are handled by creating the appropriate number of
6623 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6624 case of SLP. */
6625 if (slp)
6626 ncopies = 1;
6627 else
6628 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6630 gcc_assert (ncopies >= 1);
6632 /* FORNOW. This restriction should be relaxed. */
6633 if (nested_in_vect_loop && ncopies > 1)
6635 if (dump_enabled_p ())
6636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6637 "multiple types in nested loop.\n");
6638 return false;
6641 /* Invalidate assumptions made by dependence analysis when vectorization
6642 on the unrolled body effectively re-orders stmts. */
6643 if (ncopies > 1
6644 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6645 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6646 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6648 if (dump_enabled_p ())
6649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6650 "cannot perform implicit CSE when unrolling "
6651 "with negative dependence distance\n");
6652 return false;
6655 elem_type = TREE_TYPE (vectype);
6656 mode = TYPE_MODE (vectype);
6658 /* FORNOW. In some cases can vectorize even if data-type not supported
6659 (e.g. - data copies). */
6660 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6662 if (dump_enabled_p ())
6663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6664 "Aligned load, but unsupported type.\n");
6665 return false;
6668 /* Check if the load is a part of an interleaving chain. */
6669 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6671 grouped_load = true;
6672 /* FORNOW */
6673 gcc_assert (!nested_in_vect_loop);
6674 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6676 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6677 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6679 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6680 slp_perm = true;
6682 /* Invalidate assumptions made by dependence analysis when vectorization
6683 on the unrolled body effectively re-orders stmts. */
6684 if (!PURE_SLP_STMT (stmt_info)
6685 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6686 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6687 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6689 if (dump_enabled_p ())
6690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6691 "cannot perform implicit CSE when performing "
6692 "group loads with negative dependence distance\n");
6693 return false;
6696 /* Similarly when the stmt is a load that is both part of a SLP
6697 instance and a loop vectorized stmt via the same-dr mechanism
6698 we have to give up. */
6699 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6700 && (STMT_SLP_TYPE (stmt_info)
6701 != STMT_SLP_TYPE (vinfo_for_stmt
6702 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6704 if (dump_enabled_p ())
6705 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6706 "conflicting SLP types for CSEd load\n");
6707 return false;
6711 vect_memory_access_type memory_access_type;
6712 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6713 &memory_access_type, &gs_info))
6714 return false;
6716 if (!vec_stmt) /* transformation not required. */
6718 if (!slp)
6719 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6720 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6721 /* The SLP costs are calculated during SLP analysis. */
6722 if (!PURE_SLP_STMT (stmt_info))
6723 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6724 NULL, NULL, NULL);
6725 return true;
6728 if (!slp)
6729 gcc_assert (memory_access_type
6730 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6732 if (dump_enabled_p ())
6733 dump_printf_loc (MSG_NOTE, vect_location,
6734 "transform load. ncopies = %d\n", ncopies);
6736 /** Transform. **/
6738 ensure_base_align (stmt_info, dr);
6740 if (memory_access_type == VMAT_GATHER_SCATTER)
6742 tree vec_oprnd0 = NULL_TREE, op;
6743 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6744 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6745 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6746 edge pe = loop_preheader_edge (loop);
6747 gimple_seq seq;
6748 basic_block new_bb;
6749 enum { NARROW, NONE, WIDEN } modifier;
6750 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6752 if (nunits == gather_off_nunits)
6753 modifier = NONE;
6754 else if (nunits == gather_off_nunits / 2)
6756 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6757 modifier = WIDEN;
6759 for (i = 0; i < gather_off_nunits; ++i)
6760 sel[i] = i | nunits;
6762 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6764 else if (nunits == gather_off_nunits * 2)
6766 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6767 modifier = NARROW;
6769 for (i = 0; i < nunits; ++i)
6770 sel[i] = i < gather_off_nunits
6771 ? i : i + nunits - gather_off_nunits;
6773 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6774 ncopies *= 2;
6776 else
6777 gcc_unreachable ();
6779 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6780 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6781 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6782 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6783 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6784 scaletype = TREE_VALUE (arglist);
6785 gcc_checking_assert (types_compatible_p (srctype, rettype));
6787 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6789 ptr = fold_convert (ptrtype, gs_info.base);
6790 if (!is_gimple_min_invariant (ptr))
6792 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6793 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6794 gcc_assert (!new_bb);
6797 /* Currently we support only unconditional gather loads,
6798 so mask should be all ones. */
6799 if (TREE_CODE (masktype) == INTEGER_TYPE)
6800 mask = build_int_cst (masktype, -1);
6801 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6803 mask = build_int_cst (TREE_TYPE (masktype), -1);
6804 mask = build_vector_from_val (masktype, mask);
6805 mask = vect_init_vector (stmt, mask, masktype, NULL);
6807 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6809 REAL_VALUE_TYPE r;
6810 long tmp[6];
6811 for (j = 0; j < 6; ++j)
6812 tmp[j] = -1;
6813 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6814 mask = build_real (TREE_TYPE (masktype), r);
6815 mask = build_vector_from_val (masktype, mask);
6816 mask = vect_init_vector (stmt, mask, masktype, NULL);
6818 else
6819 gcc_unreachable ();
6821 scale = build_int_cst (scaletype, gs_info.scale);
6823 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6824 merge = build_int_cst (TREE_TYPE (rettype), 0);
6825 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6827 REAL_VALUE_TYPE r;
6828 long tmp[6];
6829 for (j = 0; j < 6; ++j)
6830 tmp[j] = 0;
6831 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6832 merge = build_real (TREE_TYPE (rettype), r);
6834 else
6835 gcc_unreachable ();
6836 merge = build_vector_from_val (rettype, merge);
6837 merge = vect_init_vector (stmt, merge, rettype, NULL);
6839 prev_stmt_info = NULL;
6840 for (j = 0; j < ncopies; ++j)
6842 if (modifier == WIDEN && (j & 1))
6843 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6844 perm_mask, stmt, gsi);
6845 else if (j == 0)
6846 op = vec_oprnd0
6847 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6848 else
6849 op = vec_oprnd0
6850 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6852 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6854 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6855 == TYPE_VECTOR_SUBPARTS (idxtype));
6856 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6857 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6858 new_stmt
6859 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6860 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6861 op = var;
6864 new_stmt
6865 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6867 if (!useless_type_conversion_p (vectype, rettype))
6869 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6870 == TYPE_VECTOR_SUBPARTS (rettype));
6871 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6872 gimple_call_set_lhs (new_stmt, op);
6873 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6874 var = make_ssa_name (vec_dest);
6875 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6876 new_stmt
6877 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6879 else
6881 var = make_ssa_name (vec_dest, new_stmt);
6882 gimple_call_set_lhs (new_stmt, var);
6885 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6887 if (modifier == NARROW)
6889 if ((j & 1) == 0)
6891 prev_res = var;
6892 continue;
6894 var = permute_vec_elements (prev_res, var,
6895 perm_mask, stmt, gsi);
6896 new_stmt = SSA_NAME_DEF_STMT (var);
6899 if (prev_stmt_info == NULL)
6900 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6901 else
6902 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6903 prev_stmt_info = vinfo_for_stmt (new_stmt);
6905 return true;
6908 if (memory_access_type == VMAT_ELEMENTWISE
6909 || memory_access_type == VMAT_STRIDED_SLP)
6911 gimple_stmt_iterator incr_gsi;
6912 bool insert_after;
6913 gimple *incr;
6914 tree offvar;
6915 tree ivstep;
6916 tree running_off;
6917 vec<constructor_elt, va_gc> *v = NULL;
6918 gimple_seq stmts = NULL;
6919 tree stride_base, stride_step, alias_off;
6921 gcc_assert (!nested_in_vect_loop);
6923 if (slp && grouped_load)
6925 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6926 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6927 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6928 ref_type = get_group_alias_ptr_type (first_stmt);
6930 else
6932 first_stmt = stmt;
6933 first_dr = dr;
6934 group_size = 1;
6935 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6938 stride_base
6939 = fold_build_pointer_plus
6940 (DR_BASE_ADDRESS (first_dr),
6941 size_binop (PLUS_EXPR,
6942 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6943 convert_to_ptrofftype (DR_INIT (first_dr))));
6944 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6946 /* For a load with loop-invariant (but other than power-of-2)
6947 stride (i.e. not a grouped access) like so:
6949 for (i = 0; i < n; i += stride)
6950 ... = array[i];
6952 we generate a new induction variable and new accesses to
6953 form a new vector (or vectors, depending on ncopies):
6955 for (j = 0; ; j += VF*stride)
6956 tmp1 = array[j];
6957 tmp2 = array[j + stride];
6959 vectemp = {tmp1, tmp2, ...}
6962 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6963 build_int_cst (TREE_TYPE (stride_step), vf));
6965 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6967 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6968 loop, &incr_gsi, insert_after,
6969 &offvar, NULL);
6970 incr = gsi_stmt (incr_gsi);
6971 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6973 stride_step = force_gimple_operand (unshare_expr (stride_step),
6974 &stmts, true, NULL_TREE);
6975 if (stmts)
6976 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6978 prev_stmt_info = NULL;
6979 running_off = offvar;
6980 alias_off = build_int_cst (ref_type, 0);
6981 int nloads = nunits;
6982 int lnel = 1;
6983 tree ltype = TREE_TYPE (vectype);
6984 tree lvectype = vectype;
6985 auto_vec<tree> dr_chain;
6986 if (memory_access_type == VMAT_STRIDED_SLP)
6988 if (group_size < nunits)
6990 /* Avoid emitting a constructor of vector elements by performing
6991 the loads using an integer type of the same size,
6992 constructing a vector of those and then re-interpreting it
6993 as the original vector type. This works around the fact
6994 that the vec_init optab was only designed for scalar
6995 element modes and thus expansion goes through memory.
6996 This avoids a huge runtime penalty due to the general
6997 inability to perform store forwarding from smaller stores
6998 to a larger load. */
6999 unsigned lsize
7000 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7001 enum machine_mode elmode = mode_for_size (lsize, MODE_INT, 0);
7002 enum machine_mode vmode = mode_for_vector (elmode,
7003 nunits / group_size);
7004 /* If we can't construct such a vector fall back to
7005 element loads of the original vector type. */
7006 if (VECTOR_MODE_P (vmode)
7007 && optab_handler (vec_init_optab, vmode) != CODE_FOR_nothing)
7009 nloads = nunits / group_size;
7010 lnel = group_size;
7011 ltype = build_nonstandard_integer_type (lsize, 1);
7012 lvectype = build_vector_type (ltype, nloads);
7015 else
7017 nloads = 1;
7018 lnel = nunits;
7019 ltype = vectype;
7021 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7023 if (slp)
7025 /* For SLP permutation support we need to load the whole group,
7026 not only the number of vector stmts the permutation result
7027 fits in. */
7028 if (slp_perm)
7030 ncopies = (group_size * vf + nunits - 1) / nunits;
7031 dr_chain.create (ncopies);
7033 else
7034 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7036 int group_el = 0;
7037 unsigned HOST_WIDE_INT
7038 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7039 for (j = 0; j < ncopies; j++)
7041 if (nloads > 1)
7042 vec_alloc (v, nloads);
7043 for (i = 0; i < nloads; i++)
7045 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7046 group_el * elsz);
7047 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7048 build2 (MEM_REF, ltype,
7049 running_off, this_off));
7050 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7051 if (nloads > 1)
7052 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7053 gimple_assign_lhs (new_stmt));
7055 group_el += lnel;
7056 if (! slp
7057 || group_el == group_size)
7059 tree newoff = copy_ssa_name (running_off);
7060 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7061 running_off, stride_step);
7062 vect_finish_stmt_generation (stmt, incr, gsi);
7064 running_off = newoff;
7065 group_el = 0;
7068 if (nloads > 1)
7070 tree vec_inv = build_constructor (lvectype, v);
7071 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7072 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7073 if (lvectype != vectype)
7075 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7076 VIEW_CONVERT_EXPR,
7077 build1 (VIEW_CONVERT_EXPR,
7078 vectype, new_temp));
7079 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7083 if (slp)
7085 if (slp_perm)
7086 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7087 else
7088 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7090 else
7092 if (j == 0)
7093 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7094 else
7095 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7096 prev_stmt_info = vinfo_for_stmt (new_stmt);
7099 if (slp_perm)
7101 unsigned n_perms;
7102 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7103 slp_node_instance, false, &n_perms);
7105 return true;
7108 if (grouped_load)
7110 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7111 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7112 /* For SLP vectorization we directly vectorize a subchain
7113 without permutation. */
7114 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7115 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7116 /* For BB vectorization always use the first stmt to base
7117 the data ref pointer on. */
7118 if (bb_vinfo)
7119 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7121 /* Check if the chain of loads is already vectorized. */
7122 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7123 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7124 ??? But we can only do so if there is exactly one
7125 as we have no way to get at the rest. Leave the CSE
7126 opportunity alone.
7127 ??? With the group load eventually participating
7128 in multiple different permutations (having multiple
7129 slp nodes which refer to the same group) the CSE
7130 is even wrong code. See PR56270. */
7131 && !slp)
7133 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7134 return true;
7136 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7137 group_gap_adj = 0;
7139 /* VEC_NUM is the number of vect stmts to be created for this group. */
7140 if (slp)
7142 grouped_load = false;
7143 /* For SLP permutation support we need to load the whole group,
7144 not only the number of vector stmts the permutation result
7145 fits in. */
7146 if (slp_perm)
7147 vec_num = (group_size * vf + nunits - 1) / nunits;
7148 else
7149 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7150 group_gap_adj = vf * group_size - nunits * vec_num;
7152 else
7153 vec_num = group_size;
7155 ref_type = get_group_alias_ptr_type (first_stmt);
7157 else
7159 first_stmt = stmt;
7160 first_dr = dr;
7161 group_size = vec_num = 1;
7162 group_gap_adj = 0;
7163 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7166 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7167 gcc_assert (alignment_support_scheme);
7168 /* Targets with load-lane instructions must not require explicit
7169 realignment. */
7170 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7171 || alignment_support_scheme == dr_aligned
7172 || alignment_support_scheme == dr_unaligned_supported);
7174 /* In case the vectorization factor (VF) is bigger than the number
7175 of elements that we can fit in a vectype (nunits), we have to generate
7176 more than one vector stmt - i.e - we need to "unroll" the
7177 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7178 from one copy of the vector stmt to the next, in the field
7179 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7180 stages to find the correct vector defs to be used when vectorizing
7181 stmts that use the defs of the current stmt. The example below
7182 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7183 need to create 4 vectorized stmts):
7185 before vectorization:
7186 RELATED_STMT VEC_STMT
7187 S1: x = memref - -
7188 S2: z = x + 1 - -
7190 step 1: vectorize stmt S1:
7191 We first create the vector stmt VS1_0, and, as usual, record a
7192 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7193 Next, we create the vector stmt VS1_1, and record a pointer to
7194 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7195 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7196 stmts and pointers:
7197 RELATED_STMT VEC_STMT
7198 VS1_0: vx0 = memref0 VS1_1 -
7199 VS1_1: vx1 = memref1 VS1_2 -
7200 VS1_2: vx2 = memref2 VS1_3 -
7201 VS1_3: vx3 = memref3 - -
7202 S1: x = load - VS1_0
7203 S2: z = x + 1 - -
7205 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7206 information we recorded in RELATED_STMT field is used to vectorize
7207 stmt S2. */
7209 /* In case of interleaving (non-unit grouped access):
7211 S1: x2 = &base + 2
7212 S2: x0 = &base
7213 S3: x1 = &base + 1
7214 S4: x3 = &base + 3
7216 Vectorized loads are created in the order of memory accesses
7217 starting from the access of the first stmt of the chain:
7219 VS1: vx0 = &base
7220 VS2: vx1 = &base + vec_size*1
7221 VS3: vx3 = &base + vec_size*2
7222 VS4: vx4 = &base + vec_size*3
7224 Then permutation statements are generated:
7226 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7227 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7230 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7231 (the order of the data-refs in the output of vect_permute_load_chain
7232 corresponds to the order of scalar stmts in the interleaving chain - see
7233 the documentation of vect_permute_load_chain()).
7234 The generation of permutation stmts and recording them in
7235 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7237 In case of both multiple types and interleaving, the vector loads and
7238 permutation stmts above are created for every copy. The result vector
7239 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7240 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7242 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7243 on a target that supports unaligned accesses (dr_unaligned_supported)
7244 we generate the following code:
7245 p = initial_addr;
7246 indx = 0;
7247 loop {
7248 p = p + indx * vectype_size;
7249 vec_dest = *(p);
7250 indx = indx + 1;
7253 Otherwise, the data reference is potentially unaligned on a target that
7254 does not support unaligned accesses (dr_explicit_realign_optimized) -
7255 then generate the following code, in which the data in each iteration is
7256 obtained by two vector loads, one from the previous iteration, and one
7257 from the current iteration:
7258 p1 = initial_addr;
7259 msq_init = *(floor(p1))
7260 p2 = initial_addr + VS - 1;
7261 realignment_token = call target_builtin;
7262 indx = 0;
7263 loop {
7264 p2 = p2 + indx * vectype_size
7265 lsq = *(floor(p2))
7266 vec_dest = realign_load (msq, lsq, realignment_token)
7267 indx = indx + 1;
7268 msq = lsq;
7269 } */
7271 /* If the misalignment remains the same throughout the execution of the
7272 loop, we can create the init_addr and permutation mask at the loop
7273 preheader. Otherwise, it needs to be created inside the loop.
7274 This can only occur when vectorizing memory accesses in the inner-loop
7275 nested within an outer-loop that is being vectorized. */
7277 if (nested_in_vect_loop
7278 && (TREE_INT_CST_LOW (DR_STEP (dr))
7279 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7281 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7282 compute_in_loop = true;
7285 if ((alignment_support_scheme == dr_explicit_realign_optimized
7286 || alignment_support_scheme == dr_explicit_realign)
7287 && !compute_in_loop)
7289 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7290 alignment_support_scheme, NULL_TREE,
7291 &at_loop);
7292 if (alignment_support_scheme == dr_explicit_realign_optimized)
7294 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7295 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7296 size_one_node);
7299 else
7300 at_loop = loop;
7302 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7303 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7305 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7306 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7307 else
7308 aggr_type = vectype;
7310 prev_stmt_info = NULL;
7311 for (j = 0; j < ncopies; j++)
7313 /* 1. Create the vector or array pointer update chain. */
7314 if (j == 0)
7316 bool simd_lane_access_p
7317 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7318 if (simd_lane_access_p
7319 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7320 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7321 && integer_zerop (DR_OFFSET (first_dr))
7322 && integer_zerop (DR_INIT (first_dr))
7323 && alias_sets_conflict_p (get_alias_set (aggr_type),
7324 get_alias_set (TREE_TYPE (ref_type)))
7325 && (alignment_support_scheme == dr_aligned
7326 || alignment_support_scheme == dr_unaligned_supported))
7328 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7329 dataref_offset = build_int_cst (ref_type, 0);
7330 inv_p = false;
7332 else if (first_stmt_for_drptr
7333 && first_stmt != first_stmt_for_drptr)
7335 dataref_ptr
7336 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7337 at_loop, offset, &dummy, gsi,
7338 &ptr_incr, simd_lane_access_p,
7339 &inv_p, byte_offset);
7340 /* Adjust the pointer by the difference to first_stmt. */
7341 data_reference_p ptrdr
7342 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7343 tree diff = fold_convert (sizetype,
7344 size_binop (MINUS_EXPR,
7345 DR_INIT (first_dr),
7346 DR_INIT (ptrdr)));
7347 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7348 stmt, diff);
7350 else
7351 dataref_ptr
7352 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7353 offset, &dummy, gsi, &ptr_incr,
7354 simd_lane_access_p, &inv_p,
7355 byte_offset);
7357 else if (dataref_offset)
7358 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7359 TYPE_SIZE_UNIT (aggr_type));
7360 else
7361 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7362 TYPE_SIZE_UNIT (aggr_type));
7364 if (grouped_load || slp_perm)
7365 dr_chain.create (vec_num);
7367 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7369 tree vec_array;
7371 vec_array = create_vector_array (vectype, vec_num);
7373 /* Emit:
7374 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7375 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7376 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7377 gimple_call_set_lhs (new_stmt, vec_array);
7378 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7380 /* Extract each vector into an SSA_NAME. */
7381 for (i = 0; i < vec_num; i++)
7383 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7384 vec_array, i);
7385 dr_chain.quick_push (new_temp);
7388 /* Record the mapping between SSA_NAMEs and statements. */
7389 vect_record_grouped_load_vectors (stmt, dr_chain);
7391 else
7393 for (i = 0; i < vec_num; i++)
7395 if (i > 0)
7396 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7397 stmt, NULL_TREE);
7399 /* 2. Create the vector-load in the loop. */
7400 switch (alignment_support_scheme)
7402 case dr_aligned:
7403 case dr_unaligned_supported:
7405 unsigned int align, misalign;
7407 data_ref
7408 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7409 dataref_offset
7410 ? dataref_offset
7411 : build_int_cst (ref_type, 0));
7412 align = TYPE_ALIGN_UNIT (vectype);
7413 if (alignment_support_scheme == dr_aligned)
7415 gcc_assert (aligned_access_p (first_dr));
7416 misalign = 0;
7418 else if (DR_MISALIGNMENT (first_dr) == -1)
7420 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7421 align = TYPE_ALIGN_UNIT (elem_type);
7422 else
7423 align = (get_object_alignment (DR_REF (first_dr))
7424 / BITS_PER_UNIT);
7425 misalign = 0;
7426 TREE_TYPE (data_ref)
7427 = build_aligned_type (TREE_TYPE (data_ref),
7428 align * BITS_PER_UNIT);
7430 else
7432 TREE_TYPE (data_ref)
7433 = build_aligned_type (TREE_TYPE (data_ref),
7434 TYPE_ALIGN (elem_type));
7435 misalign = DR_MISALIGNMENT (first_dr);
7437 if (dataref_offset == NULL_TREE
7438 && TREE_CODE (dataref_ptr) == SSA_NAME)
7439 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7440 align, misalign);
7441 break;
7443 case dr_explicit_realign:
7445 tree ptr, bump;
7447 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7449 if (compute_in_loop)
7450 msq = vect_setup_realignment (first_stmt, gsi,
7451 &realignment_token,
7452 dr_explicit_realign,
7453 dataref_ptr, NULL);
7455 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7456 ptr = copy_ssa_name (dataref_ptr);
7457 else
7458 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7459 new_stmt = gimple_build_assign
7460 (ptr, BIT_AND_EXPR, dataref_ptr,
7461 build_int_cst
7462 (TREE_TYPE (dataref_ptr),
7463 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7464 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7465 data_ref
7466 = build2 (MEM_REF, vectype, ptr,
7467 build_int_cst (ref_type, 0));
7468 vec_dest = vect_create_destination_var (scalar_dest,
7469 vectype);
7470 new_stmt = gimple_build_assign (vec_dest, data_ref);
7471 new_temp = make_ssa_name (vec_dest, new_stmt);
7472 gimple_assign_set_lhs (new_stmt, new_temp);
7473 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7474 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7475 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7476 msq = new_temp;
7478 bump = size_binop (MULT_EXPR, vs,
7479 TYPE_SIZE_UNIT (elem_type));
7480 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7481 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7482 new_stmt = gimple_build_assign
7483 (NULL_TREE, BIT_AND_EXPR, ptr,
7484 build_int_cst
7485 (TREE_TYPE (ptr),
7486 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7487 ptr = copy_ssa_name (ptr, new_stmt);
7488 gimple_assign_set_lhs (new_stmt, ptr);
7489 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7490 data_ref
7491 = build2 (MEM_REF, vectype, ptr,
7492 build_int_cst (ref_type, 0));
7493 break;
7495 case dr_explicit_realign_optimized:
7496 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7497 new_temp = copy_ssa_name (dataref_ptr);
7498 else
7499 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7500 new_stmt = gimple_build_assign
7501 (new_temp, BIT_AND_EXPR, dataref_ptr,
7502 build_int_cst
7503 (TREE_TYPE (dataref_ptr),
7504 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7505 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7506 data_ref
7507 = build2 (MEM_REF, vectype, new_temp,
7508 build_int_cst (ref_type, 0));
7509 break;
7510 default:
7511 gcc_unreachable ();
7513 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7514 new_stmt = gimple_build_assign (vec_dest, data_ref);
7515 new_temp = make_ssa_name (vec_dest, new_stmt);
7516 gimple_assign_set_lhs (new_stmt, new_temp);
7517 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7519 /* 3. Handle explicit realignment if necessary/supported.
7520 Create in loop:
7521 vec_dest = realign_load (msq, lsq, realignment_token) */
7522 if (alignment_support_scheme == dr_explicit_realign_optimized
7523 || alignment_support_scheme == dr_explicit_realign)
7525 lsq = gimple_assign_lhs (new_stmt);
7526 if (!realignment_token)
7527 realignment_token = dataref_ptr;
7528 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7529 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7530 msq, lsq, realignment_token);
7531 new_temp = make_ssa_name (vec_dest, new_stmt);
7532 gimple_assign_set_lhs (new_stmt, new_temp);
7533 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7535 if (alignment_support_scheme == dr_explicit_realign_optimized)
7537 gcc_assert (phi);
7538 if (i == vec_num - 1 && j == ncopies - 1)
7539 add_phi_arg (phi, lsq,
7540 loop_latch_edge (containing_loop),
7541 UNKNOWN_LOCATION);
7542 msq = lsq;
7546 /* 4. Handle invariant-load. */
7547 if (inv_p && !bb_vinfo)
7549 gcc_assert (!grouped_load);
7550 /* If we have versioned for aliasing or the loop doesn't
7551 have any data dependencies that would preclude this,
7552 then we are sure this is a loop invariant load and
7553 thus we can insert it on the preheader edge. */
7554 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7555 && !nested_in_vect_loop
7556 && hoist_defs_of_uses (stmt, loop))
7558 if (dump_enabled_p ())
7560 dump_printf_loc (MSG_NOTE, vect_location,
7561 "hoisting out of the vectorized "
7562 "loop: ");
7563 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7565 tree tem = copy_ssa_name (scalar_dest);
7566 gsi_insert_on_edge_immediate
7567 (loop_preheader_edge (loop),
7568 gimple_build_assign (tem,
7569 unshare_expr
7570 (gimple_assign_rhs1 (stmt))));
7571 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7572 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7573 set_vinfo_for_stmt (new_stmt,
7574 new_stmt_vec_info (new_stmt, vinfo));
7576 else
7578 gimple_stmt_iterator gsi2 = *gsi;
7579 gsi_next (&gsi2);
7580 new_temp = vect_init_vector (stmt, scalar_dest,
7581 vectype, &gsi2);
7582 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7586 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7588 tree perm_mask = perm_mask_for_reverse (vectype);
7589 new_temp = permute_vec_elements (new_temp, new_temp,
7590 perm_mask, stmt, gsi);
7591 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7594 /* Collect vector loads and later create their permutation in
7595 vect_transform_grouped_load (). */
7596 if (grouped_load || slp_perm)
7597 dr_chain.quick_push (new_temp);
7599 /* Store vector loads in the corresponding SLP_NODE. */
7600 if (slp && !slp_perm)
7601 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7603 /* Bump the vector pointer to account for a gap or for excess
7604 elements loaded for a permuted SLP load. */
7605 if (group_gap_adj != 0)
7607 bool ovf;
7608 tree bump
7609 = wide_int_to_tree (sizetype,
7610 wi::smul (TYPE_SIZE_UNIT (elem_type),
7611 group_gap_adj, &ovf));
7612 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7613 stmt, bump);
7617 if (slp && !slp_perm)
7618 continue;
7620 if (slp_perm)
7622 unsigned n_perms;
7623 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7624 slp_node_instance, false,
7625 &n_perms))
7627 dr_chain.release ();
7628 return false;
7631 else
7633 if (grouped_load)
7635 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7636 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7637 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7639 else
7641 if (j == 0)
7642 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7643 else
7644 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7645 prev_stmt_info = vinfo_for_stmt (new_stmt);
7648 dr_chain.release ();
7651 return true;
7654 /* Function vect_is_simple_cond.
7656 Input:
7657 LOOP - the loop that is being vectorized.
7658 COND - Condition that is checked for simple use.
7660 Output:
7661 *COMP_VECTYPE - the vector type for the comparison.
7663 Returns whether a COND can be vectorized. Checks whether
7664 condition operands are supportable using vec_is_simple_use. */
7666 static bool
7667 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7669 tree lhs, rhs;
7670 enum vect_def_type dt;
7671 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7673 /* Mask case. */
7674 if (TREE_CODE (cond) == SSA_NAME
7675 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7677 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7678 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7679 &dt, comp_vectype)
7680 || !*comp_vectype
7681 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7682 return false;
7683 return true;
7686 if (!COMPARISON_CLASS_P (cond))
7687 return false;
7689 lhs = TREE_OPERAND (cond, 0);
7690 rhs = TREE_OPERAND (cond, 1);
7692 if (TREE_CODE (lhs) == SSA_NAME)
7694 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7695 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7696 return false;
7698 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7699 && TREE_CODE (lhs) != FIXED_CST)
7700 return false;
7702 if (TREE_CODE (rhs) == SSA_NAME)
7704 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7705 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7706 return false;
7708 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7709 && TREE_CODE (rhs) != FIXED_CST)
7710 return false;
7712 if (vectype1 && vectype2
7713 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7714 return false;
7716 *comp_vectype = vectype1 ? vectype1 : vectype2;
7717 return true;
7720 /* vectorizable_condition.
7722 Check if STMT is conditional modify expression that can be vectorized.
7723 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7724 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7725 at GSI.
7727 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7728 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7729 else clause if it is 2).
7731 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7733 bool
7734 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7735 gimple **vec_stmt, tree reduc_def, int reduc_index,
7736 slp_tree slp_node)
7738 tree scalar_dest = NULL_TREE;
7739 tree vec_dest = NULL_TREE;
7740 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7741 tree then_clause, else_clause;
7742 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7743 tree comp_vectype = NULL_TREE;
7744 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7745 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7746 tree vec_compare;
7747 tree new_temp;
7748 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7749 enum vect_def_type dt, dts[4];
7750 int ncopies;
7751 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7752 stmt_vec_info prev_stmt_info = NULL;
7753 int i, j;
7754 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7755 vec<tree> vec_oprnds0 = vNULL;
7756 vec<tree> vec_oprnds1 = vNULL;
7757 vec<tree> vec_oprnds2 = vNULL;
7758 vec<tree> vec_oprnds3 = vNULL;
7759 tree vec_cmp_type;
7760 bool masked = false;
7762 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7763 return false;
7765 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7767 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7768 return false;
7770 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7771 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7772 && reduc_def))
7773 return false;
7775 /* FORNOW: not yet supported. */
7776 if (STMT_VINFO_LIVE_P (stmt_info))
7778 if (dump_enabled_p ())
7779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7780 "value used after loop.\n");
7781 return false;
7785 /* Is vectorizable conditional operation? */
7786 if (!is_gimple_assign (stmt))
7787 return false;
7789 code = gimple_assign_rhs_code (stmt);
7791 if (code != COND_EXPR)
7792 return false;
7794 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7795 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7796 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7798 if (slp_node)
7799 ncopies = 1;
7800 else
7801 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7803 gcc_assert (ncopies >= 1);
7804 if (reduc_index && ncopies > 1)
7805 return false; /* FORNOW */
7807 cond_expr = gimple_assign_rhs1 (stmt);
7808 then_clause = gimple_assign_rhs2 (stmt);
7809 else_clause = gimple_assign_rhs3 (stmt);
7811 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7812 || !comp_vectype)
7813 return false;
7815 gimple *def_stmt;
7816 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7817 &vectype1))
7818 return false;
7819 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7820 &vectype2))
7821 return false;
7823 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7824 return false;
7826 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7827 return false;
7829 masked = !COMPARISON_CLASS_P (cond_expr);
7830 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7832 if (vec_cmp_type == NULL_TREE)
7833 return false;
7835 cond_code = TREE_CODE (cond_expr);
7836 if (!masked)
7838 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7839 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7842 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7844 /* Boolean values may have another representation in vectors
7845 and therefore we prefer bit operations over comparison for
7846 them (which also works for scalar masks). We store opcodes
7847 to use in bitop1 and bitop2. Statement is vectorized as
7848 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7849 depending on bitop1 and bitop2 arity. */
7850 switch (cond_code)
7852 case GT_EXPR:
7853 bitop1 = BIT_NOT_EXPR;
7854 bitop2 = BIT_AND_EXPR;
7855 break;
7856 case GE_EXPR:
7857 bitop1 = BIT_NOT_EXPR;
7858 bitop2 = BIT_IOR_EXPR;
7859 break;
7860 case LT_EXPR:
7861 bitop1 = BIT_NOT_EXPR;
7862 bitop2 = BIT_AND_EXPR;
7863 std::swap (cond_expr0, cond_expr1);
7864 break;
7865 case LE_EXPR:
7866 bitop1 = BIT_NOT_EXPR;
7867 bitop2 = BIT_IOR_EXPR;
7868 std::swap (cond_expr0, cond_expr1);
7869 break;
7870 case NE_EXPR:
7871 bitop1 = BIT_XOR_EXPR;
7872 break;
7873 case EQ_EXPR:
7874 bitop1 = BIT_XOR_EXPR;
7875 bitop2 = BIT_NOT_EXPR;
7876 break;
7877 default:
7878 return false;
7880 cond_code = SSA_NAME;
7883 if (!vec_stmt)
7885 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7886 if (bitop1 != NOP_EXPR)
7888 machine_mode mode = TYPE_MODE (comp_vectype);
7889 optab optab;
7891 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
7892 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7893 return false;
7895 if (bitop2 != NOP_EXPR)
7897 optab = optab_for_tree_code (bitop2, comp_vectype,
7898 optab_default);
7899 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7900 return false;
7903 return expand_vec_cond_expr_p (vectype, comp_vectype,
7904 cond_code);
7907 /* Transform. */
7909 if (!slp_node)
7911 vec_oprnds0.create (1);
7912 vec_oprnds1.create (1);
7913 vec_oprnds2.create (1);
7914 vec_oprnds3.create (1);
7917 /* Handle def. */
7918 scalar_dest = gimple_assign_lhs (stmt);
7919 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7921 /* Handle cond expr. */
7922 for (j = 0; j < ncopies; j++)
7924 gassign *new_stmt = NULL;
7925 if (j == 0)
7927 if (slp_node)
7929 auto_vec<tree, 4> ops;
7930 auto_vec<vec<tree>, 4> vec_defs;
7932 if (masked)
7933 ops.safe_push (cond_expr);
7934 else
7936 ops.safe_push (cond_expr0);
7937 ops.safe_push (cond_expr1);
7939 ops.safe_push (then_clause);
7940 ops.safe_push (else_clause);
7941 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7942 vec_oprnds3 = vec_defs.pop ();
7943 vec_oprnds2 = vec_defs.pop ();
7944 if (!masked)
7945 vec_oprnds1 = vec_defs.pop ();
7946 vec_oprnds0 = vec_defs.pop ();
7948 else
7950 gimple *gtemp;
7951 if (masked)
7953 vec_cond_lhs
7954 = vect_get_vec_def_for_operand (cond_expr, stmt,
7955 comp_vectype);
7956 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7957 &gtemp, &dts[0]);
7959 else
7961 vec_cond_lhs
7962 = vect_get_vec_def_for_operand (cond_expr0,
7963 stmt, comp_vectype);
7964 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
7966 vec_cond_rhs
7967 = vect_get_vec_def_for_operand (cond_expr1,
7968 stmt, comp_vectype);
7969 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
7971 if (reduc_index == 1)
7972 vec_then_clause = reduc_def;
7973 else
7975 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7976 stmt);
7977 vect_is_simple_use (then_clause, loop_vinfo,
7978 &gtemp, &dts[2]);
7980 if (reduc_index == 2)
7981 vec_else_clause = reduc_def;
7982 else
7984 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7985 stmt);
7986 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7990 else
7992 vec_cond_lhs
7993 = vect_get_vec_def_for_stmt_copy (dts[0],
7994 vec_oprnds0.pop ());
7995 if (!masked)
7996 vec_cond_rhs
7997 = vect_get_vec_def_for_stmt_copy (dts[1],
7998 vec_oprnds1.pop ());
8000 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8001 vec_oprnds2.pop ());
8002 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8003 vec_oprnds3.pop ());
8006 if (!slp_node)
8008 vec_oprnds0.quick_push (vec_cond_lhs);
8009 if (!masked)
8010 vec_oprnds1.quick_push (vec_cond_rhs);
8011 vec_oprnds2.quick_push (vec_then_clause);
8012 vec_oprnds3.quick_push (vec_else_clause);
8015 /* Arguments are ready. Create the new vector stmt. */
8016 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8018 vec_then_clause = vec_oprnds2[i];
8019 vec_else_clause = vec_oprnds3[i];
8021 if (masked)
8022 vec_compare = vec_cond_lhs;
8023 else
8025 vec_cond_rhs = vec_oprnds1[i];
8026 if (bitop1 == NOP_EXPR)
8027 vec_compare = build2 (cond_code, vec_cmp_type,
8028 vec_cond_lhs, vec_cond_rhs);
8029 else
8031 new_temp = make_ssa_name (vec_cmp_type);
8032 if (bitop1 == BIT_NOT_EXPR)
8033 new_stmt = gimple_build_assign (new_temp, bitop1,
8034 vec_cond_rhs);
8035 else
8036 new_stmt
8037 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8038 vec_cond_rhs);
8039 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8040 if (bitop2 == NOP_EXPR)
8041 vec_compare = new_temp;
8042 else if (bitop2 == BIT_NOT_EXPR)
8044 /* Instead of doing ~x ? y : z do x ? z : y. */
8045 vec_compare = new_temp;
8046 std::swap (vec_then_clause, vec_else_clause);
8048 else
8050 vec_compare = make_ssa_name (vec_cmp_type);
8051 new_stmt
8052 = gimple_build_assign (vec_compare, bitop2,
8053 vec_cond_lhs, new_temp);
8054 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8058 new_temp = make_ssa_name (vec_dest);
8059 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8060 vec_compare, vec_then_clause,
8061 vec_else_clause);
8062 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8063 if (slp_node)
8064 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8067 if (slp_node)
8068 continue;
8070 if (j == 0)
8071 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8072 else
8073 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8075 prev_stmt_info = vinfo_for_stmt (new_stmt);
8078 vec_oprnds0.release ();
8079 vec_oprnds1.release ();
8080 vec_oprnds2.release ();
8081 vec_oprnds3.release ();
8083 return true;
8086 /* vectorizable_comparison.
8088 Check if STMT is comparison expression that can be vectorized.
8089 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8090 comparison, put it in VEC_STMT, and insert it at GSI.
8092 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8094 static bool
8095 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8096 gimple **vec_stmt, tree reduc_def,
8097 slp_tree slp_node)
8099 tree lhs, rhs1, rhs2;
8100 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8101 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8102 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8103 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8104 tree new_temp;
8105 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8106 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8107 unsigned nunits;
8108 int ncopies;
8109 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8110 stmt_vec_info prev_stmt_info = NULL;
8111 int i, j;
8112 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8113 vec<tree> vec_oprnds0 = vNULL;
8114 vec<tree> vec_oprnds1 = vNULL;
8115 gimple *def_stmt;
8116 tree mask_type;
8117 tree mask;
8119 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8120 return false;
8122 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8123 return false;
8125 mask_type = vectype;
8126 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8128 if (slp_node)
8129 ncopies = 1;
8130 else
8131 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
8133 gcc_assert (ncopies >= 1);
8134 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8135 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8136 && reduc_def))
8137 return false;
8139 if (STMT_VINFO_LIVE_P (stmt_info))
8141 if (dump_enabled_p ())
8142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8143 "value used after loop.\n");
8144 return false;
8147 if (!is_gimple_assign (stmt))
8148 return false;
8150 code = gimple_assign_rhs_code (stmt);
8152 if (TREE_CODE_CLASS (code) != tcc_comparison)
8153 return false;
8155 rhs1 = gimple_assign_rhs1 (stmt);
8156 rhs2 = gimple_assign_rhs2 (stmt);
8158 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8159 &dts[0], &vectype1))
8160 return false;
8162 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8163 &dts[1], &vectype2))
8164 return false;
8166 if (vectype1 && vectype2
8167 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8168 return false;
8170 vectype = vectype1 ? vectype1 : vectype2;
8172 /* Invariant comparison. */
8173 if (!vectype)
8175 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8176 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8177 return false;
8179 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8180 return false;
8182 /* Can't compare mask and non-mask types. */
8183 if (vectype1 && vectype2
8184 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8185 return false;
8187 /* Boolean values may have another representation in vectors
8188 and therefore we prefer bit operations over comparison for
8189 them (which also works for scalar masks). We store opcodes
8190 to use in bitop1 and bitop2. Statement is vectorized as
8191 BITOP2 (rhs1 BITOP1 rhs2) or
8192 rhs1 BITOP2 (BITOP1 rhs2)
8193 depending on bitop1 and bitop2 arity. */
8194 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8196 if (code == GT_EXPR)
8198 bitop1 = BIT_NOT_EXPR;
8199 bitop2 = BIT_AND_EXPR;
8201 else if (code == GE_EXPR)
8203 bitop1 = BIT_NOT_EXPR;
8204 bitop2 = BIT_IOR_EXPR;
8206 else if (code == LT_EXPR)
8208 bitop1 = BIT_NOT_EXPR;
8209 bitop2 = BIT_AND_EXPR;
8210 std::swap (rhs1, rhs2);
8211 std::swap (dts[0], dts[1]);
8213 else if (code == LE_EXPR)
8215 bitop1 = BIT_NOT_EXPR;
8216 bitop2 = BIT_IOR_EXPR;
8217 std::swap (rhs1, rhs2);
8218 std::swap (dts[0], dts[1]);
8220 else
8222 bitop1 = BIT_XOR_EXPR;
8223 if (code == EQ_EXPR)
8224 bitop2 = BIT_NOT_EXPR;
8228 if (!vec_stmt)
8230 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8231 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8232 dts, NULL, NULL);
8233 if (bitop1 == NOP_EXPR)
8234 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8235 else
8237 machine_mode mode = TYPE_MODE (vectype);
8238 optab optab;
8240 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8241 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8242 return false;
8244 if (bitop2 != NOP_EXPR)
8246 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8247 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8248 return false;
8250 return true;
8254 /* Transform. */
8255 if (!slp_node)
8257 vec_oprnds0.create (1);
8258 vec_oprnds1.create (1);
8261 /* Handle def. */
8262 lhs = gimple_assign_lhs (stmt);
8263 mask = vect_create_destination_var (lhs, mask_type);
8265 /* Handle cmp expr. */
8266 for (j = 0; j < ncopies; j++)
8268 gassign *new_stmt = NULL;
8269 if (j == 0)
8271 if (slp_node)
8273 auto_vec<tree, 2> ops;
8274 auto_vec<vec<tree>, 2> vec_defs;
8276 ops.safe_push (rhs1);
8277 ops.safe_push (rhs2);
8278 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
8279 vec_oprnds1 = vec_defs.pop ();
8280 vec_oprnds0 = vec_defs.pop ();
8282 else
8284 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8285 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8288 else
8290 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8291 vec_oprnds0.pop ());
8292 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8293 vec_oprnds1.pop ());
8296 if (!slp_node)
8298 vec_oprnds0.quick_push (vec_rhs1);
8299 vec_oprnds1.quick_push (vec_rhs2);
8302 /* Arguments are ready. Create the new vector stmt. */
8303 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8305 vec_rhs2 = vec_oprnds1[i];
8307 new_temp = make_ssa_name (mask);
8308 if (bitop1 == NOP_EXPR)
8310 new_stmt = gimple_build_assign (new_temp, code,
8311 vec_rhs1, vec_rhs2);
8312 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8314 else
8316 if (bitop1 == BIT_NOT_EXPR)
8317 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8318 else
8319 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8320 vec_rhs2);
8321 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8322 if (bitop2 != NOP_EXPR)
8324 tree res = make_ssa_name (mask);
8325 if (bitop2 == BIT_NOT_EXPR)
8326 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8327 else
8328 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8329 new_temp);
8330 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8333 if (slp_node)
8334 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8337 if (slp_node)
8338 continue;
8340 if (j == 0)
8341 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8342 else
8343 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8345 prev_stmt_info = vinfo_for_stmt (new_stmt);
8348 vec_oprnds0.release ();
8349 vec_oprnds1.release ();
8351 return true;
8354 /* Make sure the statement is vectorizable. */
8356 bool
8357 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8359 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8360 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8361 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8362 bool ok;
8363 tree scalar_type, vectype;
8364 gimple *pattern_stmt;
8365 gimple_seq pattern_def_seq;
8367 if (dump_enabled_p ())
8369 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8370 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8373 if (gimple_has_volatile_ops (stmt))
8375 if (dump_enabled_p ())
8376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8377 "not vectorized: stmt has volatile operands\n");
8379 return false;
8382 /* Skip stmts that do not need to be vectorized. In loops this is expected
8383 to include:
8384 - the COND_EXPR which is the loop exit condition
8385 - any LABEL_EXPRs in the loop
8386 - computations that are used only for array indexing or loop control.
8387 In basic blocks we only analyze statements that are a part of some SLP
8388 instance, therefore, all the statements are relevant.
8390 Pattern statement needs to be analyzed instead of the original statement
8391 if the original statement is not relevant. Otherwise, we analyze both
8392 statements. In basic blocks we are called from some SLP instance
8393 traversal, don't analyze pattern stmts instead, the pattern stmts
8394 already will be part of SLP instance. */
8396 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8397 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8398 && !STMT_VINFO_LIVE_P (stmt_info))
8400 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8401 && pattern_stmt
8402 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8403 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8405 /* Analyze PATTERN_STMT instead of the original stmt. */
8406 stmt = pattern_stmt;
8407 stmt_info = vinfo_for_stmt (pattern_stmt);
8408 if (dump_enabled_p ())
8410 dump_printf_loc (MSG_NOTE, vect_location,
8411 "==> examining pattern statement: ");
8412 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8415 else
8417 if (dump_enabled_p ())
8418 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8420 return true;
8423 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8424 && node == NULL
8425 && pattern_stmt
8426 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8427 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8429 /* Analyze PATTERN_STMT too. */
8430 if (dump_enabled_p ())
8432 dump_printf_loc (MSG_NOTE, vect_location,
8433 "==> examining pattern statement: ");
8434 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8437 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8438 return false;
8441 if (is_pattern_stmt_p (stmt_info)
8442 && node == NULL
8443 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8445 gimple_stmt_iterator si;
8447 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8449 gimple *pattern_def_stmt = gsi_stmt (si);
8450 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8451 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8453 /* Analyze def stmt of STMT if it's a pattern stmt. */
8454 if (dump_enabled_p ())
8456 dump_printf_loc (MSG_NOTE, vect_location,
8457 "==> examining pattern def statement: ");
8458 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8461 if (!vect_analyze_stmt (pattern_def_stmt,
8462 need_to_vectorize, node))
8463 return false;
8468 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8470 case vect_internal_def:
8471 break;
8473 case vect_reduction_def:
8474 case vect_nested_cycle:
8475 gcc_assert (!bb_vinfo
8476 && (relevance == vect_used_in_outer
8477 || relevance == vect_used_in_outer_by_reduction
8478 || relevance == vect_used_by_reduction
8479 || relevance == vect_unused_in_scope
8480 || relevance == vect_used_only_live));
8481 break;
8483 case vect_induction_def:
8484 case vect_constant_def:
8485 case vect_external_def:
8486 case vect_unknown_def_type:
8487 default:
8488 gcc_unreachable ();
8491 if (bb_vinfo)
8493 gcc_assert (PURE_SLP_STMT (stmt_info));
8495 /* Memory accesses already got their vector type assigned
8496 in vect_analyze_data_refs. */
8497 if (! STMT_VINFO_DATA_REF (stmt_info))
8499 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8500 if (dump_enabled_p ())
8502 dump_printf_loc (MSG_NOTE, vect_location,
8503 "get vectype for scalar type: ");
8504 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8505 dump_printf (MSG_NOTE, "\n");
8508 vectype = get_vectype_for_scalar_type (scalar_type);
8509 if (!vectype)
8511 if (dump_enabled_p ())
8513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8514 "not SLPed: unsupported data-type ");
8515 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8516 scalar_type);
8517 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8519 return false;
8522 if (dump_enabled_p ())
8524 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8525 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8526 dump_printf (MSG_NOTE, "\n");
8529 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8533 if (STMT_VINFO_RELEVANT_P (stmt_info))
8535 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8536 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8537 || (is_gimple_call (stmt)
8538 && gimple_call_lhs (stmt) == NULL_TREE));
8539 *need_to_vectorize = true;
8542 if (PURE_SLP_STMT (stmt_info) && !node)
8544 dump_printf_loc (MSG_NOTE, vect_location,
8545 "handled only by SLP analysis\n");
8546 return true;
8549 ok = true;
8550 if (!bb_vinfo
8551 && (STMT_VINFO_RELEVANT_P (stmt_info)
8552 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8553 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8554 || vectorizable_conversion (stmt, NULL, NULL, node)
8555 || vectorizable_shift (stmt, NULL, NULL, node)
8556 || vectorizable_operation (stmt, NULL, NULL, node)
8557 || vectorizable_assignment (stmt, NULL, NULL, node)
8558 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8559 || vectorizable_call (stmt, NULL, NULL, node)
8560 || vectorizable_store (stmt, NULL, NULL, node)
8561 || vectorizable_reduction (stmt, NULL, NULL, node)
8562 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8563 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8564 else
8566 if (bb_vinfo)
8567 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8568 || vectorizable_conversion (stmt, NULL, NULL, node)
8569 || vectorizable_shift (stmt, NULL, NULL, node)
8570 || vectorizable_operation (stmt, NULL, NULL, node)
8571 || vectorizable_assignment (stmt, NULL, NULL, node)
8572 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8573 || vectorizable_call (stmt, NULL, NULL, node)
8574 || vectorizable_store (stmt, NULL, NULL, node)
8575 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8576 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8579 if (!ok)
8581 if (dump_enabled_p ())
8583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8584 "not vectorized: relevant stmt not ");
8585 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8586 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8589 return false;
8592 if (bb_vinfo)
8593 return true;
8595 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8596 need extra handling, except for vectorizable reductions. */
8597 if (STMT_VINFO_LIVE_P (stmt_info)
8598 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8599 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8601 if (!ok)
8603 if (dump_enabled_p ())
8605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8606 "not vectorized: live stmt not ");
8607 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8608 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8611 return false;
8614 return true;
8618 /* Function vect_transform_stmt.
8620 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8622 bool
8623 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8624 bool *grouped_store, slp_tree slp_node,
8625 slp_instance slp_node_instance)
8627 bool is_store = false;
8628 gimple *vec_stmt = NULL;
8629 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8630 bool done;
8632 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8633 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8635 switch (STMT_VINFO_TYPE (stmt_info))
8637 case type_demotion_vec_info_type:
8638 case type_promotion_vec_info_type:
8639 case type_conversion_vec_info_type:
8640 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8641 gcc_assert (done);
8642 break;
8644 case induc_vec_info_type:
8645 gcc_assert (!slp_node);
8646 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8647 gcc_assert (done);
8648 break;
8650 case shift_vec_info_type:
8651 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8652 gcc_assert (done);
8653 break;
8655 case op_vec_info_type:
8656 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8657 gcc_assert (done);
8658 break;
8660 case assignment_vec_info_type:
8661 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8662 gcc_assert (done);
8663 break;
8665 case load_vec_info_type:
8666 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8667 slp_node_instance);
8668 gcc_assert (done);
8669 break;
8671 case store_vec_info_type:
8672 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8673 gcc_assert (done);
8674 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8676 /* In case of interleaving, the whole chain is vectorized when the
8677 last store in the chain is reached. Store stmts before the last
8678 one are skipped, and there vec_stmt_info shouldn't be freed
8679 meanwhile. */
8680 *grouped_store = true;
8681 if (STMT_VINFO_VEC_STMT (stmt_info))
8682 is_store = true;
8684 else
8685 is_store = true;
8686 break;
8688 case condition_vec_info_type:
8689 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8690 gcc_assert (done);
8691 break;
8693 case comparison_vec_info_type:
8694 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8695 gcc_assert (done);
8696 break;
8698 case call_vec_info_type:
8699 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8700 stmt = gsi_stmt (*gsi);
8701 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8702 is_store = true;
8703 break;
8705 case call_simd_clone_vec_info_type:
8706 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8707 stmt = gsi_stmt (*gsi);
8708 break;
8710 case reduc_vec_info_type:
8711 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8712 gcc_assert (done);
8713 break;
8715 default:
8716 if (!STMT_VINFO_LIVE_P (stmt_info))
8718 if (dump_enabled_p ())
8719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8720 "stmt not supported.\n");
8721 gcc_unreachable ();
8725 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8726 This would break hybrid SLP vectorization. */
8727 if (slp_node)
8728 gcc_assert (!vec_stmt
8729 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8731 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8732 is being vectorized, but outside the immediately enclosing loop. */
8733 if (vec_stmt
8734 && STMT_VINFO_LOOP_VINFO (stmt_info)
8735 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8736 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8737 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8738 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8739 || STMT_VINFO_RELEVANT (stmt_info) ==
8740 vect_used_in_outer_by_reduction))
8742 struct loop *innerloop = LOOP_VINFO_LOOP (
8743 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8744 imm_use_iterator imm_iter;
8745 use_operand_p use_p;
8746 tree scalar_dest;
8747 gimple *exit_phi;
8749 if (dump_enabled_p ())
8750 dump_printf_loc (MSG_NOTE, vect_location,
8751 "Record the vdef for outer-loop vectorization.\n");
8753 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8754 (to be used when vectorizing outer-loop stmts that use the DEF of
8755 STMT). */
8756 if (gimple_code (stmt) == GIMPLE_PHI)
8757 scalar_dest = PHI_RESULT (stmt);
8758 else
8759 scalar_dest = gimple_assign_lhs (stmt);
8761 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8763 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8765 exit_phi = USE_STMT (use_p);
8766 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8771 /* Handle stmts whose DEF is used outside the loop-nest that is
8772 being vectorized. */
8773 if (slp_node)
8775 gimple *slp_stmt;
8776 int i;
8777 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8779 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8780 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8781 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8783 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8784 &vec_stmt);
8785 gcc_assert (done);
8789 else if (STMT_VINFO_LIVE_P (stmt_info)
8790 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8792 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8793 gcc_assert (done);
8796 if (vec_stmt)
8797 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8799 return is_store;
8803 /* Remove a group of stores (for SLP or interleaving), free their
8804 stmt_vec_info. */
8806 void
8807 vect_remove_stores (gimple *first_stmt)
8809 gimple *next = first_stmt;
8810 gimple *tmp;
8811 gimple_stmt_iterator next_si;
8813 while (next)
8815 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8817 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8818 if (is_pattern_stmt_p (stmt_info))
8819 next = STMT_VINFO_RELATED_STMT (stmt_info);
8820 /* Free the attached stmt_vec_info and remove the stmt. */
8821 next_si = gsi_for_stmt (next);
8822 unlink_stmt_vdef (next);
8823 gsi_remove (&next_si, true);
8824 release_defs (next);
8825 free_stmt_vec_info (next);
8826 next = tmp;
8831 /* Function new_stmt_vec_info.
8833 Create and initialize a new stmt_vec_info struct for STMT. */
8835 stmt_vec_info
8836 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8838 stmt_vec_info res;
8839 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8841 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8842 STMT_VINFO_STMT (res) = stmt;
8843 res->vinfo = vinfo;
8844 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8845 STMT_VINFO_LIVE_P (res) = false;
8846 STMT_VINFO_VECTYPE (res) = NULL;
8847 STMT_VINFO_VEC_STMT (res) = NULL;
8848 STMT_VINFO_VECTORIZABLE (res) = true;
8849 STMT_VINFO_IN_PATTERN_P (res) = false;
8850 STMT_VINFO_RELATED_STMT (res) = NULL;
8851 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8852 STMT_VINFO_DATA_REF (res) = NULL;
8853 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8854 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8856 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8857 STMT_VINFO_DR_OFFSET (res) = NULL;
8858 STMT_VINFO_DR_INIT (res) = NULL;
8859 STMT_VINFO_DR_STEP (res) = NULL;
8860 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8862 if (gimple_code (stmt) == GIMPLE_PHI
8863 && is_loop_header_bb_p (gimple_bb (stmt)))
8864 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8865 else
8866 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8868 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8869 STMT_SLP_TYPE (res) = loop_vect;
8870 STMT_VINFO_NUM_SLP_USES (res) = 0;
8872 GROUP_FIRST_ELEMENT (res) = NULL;
8873 GROUP_NEXT_ELEMENT (res) = NULL;
8874 GROUP_SIZE (res) = 0;
8875 GROUP_STORE_COUNT (res) = 0;
8876 GROUP_GAP (res) = 0;
8877 GROUP_SAME_DR_STMT (res) = NULL;
8879 return res;
8883 /* Create a hash table for stmt_vec_info. */
8885 void
8886 init_stmt_vec_info_vec (void)
8888 gcc_assert (!stmt_vec_info_vec.exists ());
8889 stmt_vec_info_vec.create (50);
8893 /* Free hash table for stmt_vec_info. */
8895 void
8896 free_stmt_vec_info_vec (void)
8898 unsigned int i;
8899 stmt_vec_info info;
8900 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8901 if (info != NULL)
8902 free_stmt_vec_info (STMT_VINFO_STMT (info));
8903 gcc_assert (stmt_vec_info_vec.exists ());
8904 stmt_vec_info_vec.release ();
8908 /* Free stmt vectorization related info. */
8910 void
8911 free_stmt_vec_info (gimple *stmt)
8913 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8915 if (!stmt_info)
8916 return;
8918 /* Check if this statement has a related "pattern stmt"
8919 (introduced by the vectorizer during the pattern recognition
8920 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8921 too. */
8922 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8924 stmt_vec_info patt_info
8925 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8926 if (patt_info)
8928 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8929 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8930 gimple_set_bb (patt_stmt, NULL);
8931 tree lhs = gimple_get_lhs (patt_stmt);
8932 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8933 release_ssa_name (lhs);
8934 if (seq)
8936 gimple_stmt_iterator si;
8937 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8939 gimple *seq_stmt = gsi_stmt (si);
8940 gimple_set_bb (seq_stmt, NULL);
8941 lhs = gimple_get_lhs (seq_stmt);
8942 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8943 release_ssa_name (lhs);
8944 free_stmt_vec_info (seq_stmt);
8947 free_stmt_vec_info (patt_stmt);
8951 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8952 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8953 set_vinfo_for_stmt (stmt, NULL);
8954 free (stmt_info);
8958 /* Function get_vectype_for_scalar_type_and_size.
8960 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8961 by the target. */
8963 static tree
8964 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8966 tree orig_scalar_type = scalar_type;
8967 machine_mode inner_mode = TYPE_MODE (scalar_type);
8968 machine_mode simd_mode;
8969 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8970 int nunits;
8971 tree vectype;
8973 if (nbytes == 0)
8974 return NULL_TREE;
8976 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8977 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8978 return NULL_TREE;
8980 /* For vector types of elements whose mode precision doesn't
8981 match their types precision we use a element type of mode
8982 precision. The vectorization routines will have to make sure
8983 they support the proper result truncation/extension.
8984 We also make sure to build vector types with INTEGER_TYPE
8985 component type only. */
8986 if (INTEGRAL_TYPE_P (scalar_type)
8987 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8988 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8989 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8990 TYPE_UNSIGNED (scalar_type));
8992 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8993 When the component mode passes the above test simply use a type
8994 corresponding to that mode. The theory is that any use that
8995 would cause problems with this will disable vectorization anyway. */
8996 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8997 && !INTEGRAL_TYPE_P (scalar_type))
8998 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9000 /* We can't build a vector type of elements with alignment bigger than
9001 their size. */
9002 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9003 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9004 TYPE_UNSIGNED (scalar_type));
9006 /* If we felt back to using the mode fail if there was
9007 no scalar type for it. */
9008 if (scalar_type == NULL_TREE)
9009 return NULL_TREE;
9011 /* If no size was supplied use the mode the target prefers. Otherwise
9012 lookup a vector mode of the specified size. */
9013 if (size == 0)
9014 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9015 else
9016 simd_mode = mode_for_vector (inner_mode, size / nbytes);
9017 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9018 if (nunits <= 1)
9019 return NULL_TREE;
9021 vectype = build_vector_type (scalar_type, nunits);
9023 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9024 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9025 return NULL_TREE;
9027 /* Re-attach the address-space qualifier if we canonicalized the scalar
9028 type. */
9029 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9030 return build_qualified_type
9031 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9033 return vectype;
9036 unsigned int current_vector_size;
9038 /* Function get_vectype_for_scalar_type.
9040 Returns the vector type corresponding to SCALAR_TYPE as supported
9041 by the target. */
9043 tree
9044 get_vectype_for_scalar_type (tree scalar_type)
9046 tree vectype;
9047 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9048 current_vector_size);
9049 if (vectype
9050 && current_vector_size == 0)
9051 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9052 return vectype;
9055 /* Function get_mask_type_for_scalar_type.
9057 Returns the mask type corresponding to a result of comparison
9058 of vectors of specified SCALAR_TYPE as supported by target. */
9060 tree
9061 get_mask_type_for_scalar_type (tree scalar_type)
9063 tree vectype = get_vectype_for_scalar_type (scalar_type);
9065 if (!vectype)
9066 return NULL;
9068 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9069 current_vector_size);
9072 /* Function get_same_sized_vectype
9074 Returns a vector type corresponding to SCALAR_TYPE of size
9075 VECTOR_TYPE if supported by the target. */
9077 tree
9078 get_same_sized_vectype (tree scalar_type, tree vector_type)
9080 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9081 return build_same_sized_truth_vector_type (vector_type);
9083 return get_vectype_for_scalar_type_and_size
9084 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9087 /* Function vect_is_simple_use.
9089 Input:
9090 VINFO - the vect info of the loop or basic block that is being vectorized.
9091 OPERAND - operand in the loop or bb.
9092 Output:
9093 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9094 DT - the type of definition
9096 Returns whether a stmt with OPERAND can be vectorized.
9097 For loops, supportable operands are constants, loop invariants, and operands
9098 that are defined by the current iteration of the loop. Unsupportable
9099 operands are those that are defined by a previous iteration of the loop (as
9100 is the case in reduction/induction computations).
9101 For basic blocks, supportable operands are constants and bb invariants.
9102 For now, operands defined outside the basic block are not supported. */
9104 bool
9105 vect_is_simple_use (tree operand, vec_info *vinfo,
9106 gimple **def_stmt, enum vect_def_type *dt)
9108 *def_stmt = NULL;
9109 *dt = vect_unknown_def_type;
9111 if (dump_enabled_p ())
9113 dump_printf_loc (MSG_NOTE, vect_location,
9114 "vect_is_simple_use: operand ");
9115 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9116 dump_printf (MSG_NOTE, "\n");
9119 if (CONSTANT_CLASS_P (operand))
9121 *dt = vect_constant_def;
9122 return true;
9125 if (is_gimple_min_invariant (operand))
9127 *dt = vect_external_def;
9128 return true;
9131 if (TREE_CODE (operand) != SSA_NAME)
9133 if (dump_enabled_p ())
9134 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9135 "not ssa-name.\n");
9136 return false;
9139 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9141 *dt = vect_external_def;
9142 return true;
9145 *def_stmt = SSA_NAME_DEF_STMT (operand);
9146 if (dump_enabled_p ())
9148 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9149 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9152 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9153 *dt = vect_external_def;
9154 else
9156 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9157 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9160 if (dump_enabled_p ())
9162 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9163 switch (*dt)
9165 case vect_uninitialized_def:
9166 dump_printf (MSG_NOTE, "uninitialized\n");
9167 break;
9168 case vect_constant_def:
9169 dump_printf (MSG_NOTE, "constant\n");
9170 break;
9171 case vect_external_def:
9172 dump_printf (MSG_NOTE, "external\n");
9173 break;
9174 case vect_internal_def:
9175 dump_printf (MSG_NOTE, "internal\n");
9176 break;
9177 case vect_induction_def:
9178 dump_printf (MSG_NOTE, "induction\n");
9179 break;
9180 case vect_reduction_def:
9181 dump_printf (MSG_NOTE, "reduction\n");
9182 break;
9183 case vect_double_reduction_def:
9184 dump_printf (MSG_NOTE, "double reduction\n");
9185 break;
9186 case vect_nested_cycle:
9187 dump_printf (MSG_NOTE, "nested cycle\n");
9188 break;
9189 case vect_unknown_def_type:
9190 dump_printf (MSG_NOTE, "unknown\n");
9191 break;
9195 if (*dt == vect_unknown_def_type)
9197 if (dump_enabled_p ())
9198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9199 "Unsupported pattern.\n");
9200 return false;
9203 switch (gimple_code (*def_stmt))
9205 case GIMPLE_PHI:
9206 case GIMPLE_ASSIGN:
9207 case GIMPLE_CALL:
9208 break;
9209 default:
9210 if (dump_enabled_p ())
9211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9212 "unsupported defining stmt:\n");
9213 return false;
9216 return true;
9219 /* Function vect_is_simple_use.
9221 Same as vect_is_simple_use but also determines the vector operand
9222 type of OPERAND and stores it to *VECTYPE. If the definition of
9223 OPERAND is vect_uninitialized_def, vect_constant_def or
9224 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9225 is responsible to compute the best suited vector type for the
9226 scalar operand. */
9228 bool
9229 vect_is_simple_use (tree operand, vec_info *vinfo,
9230 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9232 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9233 return false;
9235 /* Now get a vector type if the def is internal, otherwise supply
9236 NULL_TREE and leave it up to the caller to figure out a proper
9237 type for the use stmt. */
9238 if (*dt == vect_internal_def
9239 || *dt == vect_induction_def
9240 || *dt == vect_reduction_def
9241 || *dt == vect_double_reduction_def
9242 || *dt == vect_nested_cycle)
9244 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9246 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9247 && !STMT_VINFO_RELEVANT (stmt_info)
9248 && !STMT_VINFO_LIVE_P (stmt_info))
9249 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9251 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9252 gcc_assert (*vectype != NULL_TREE);
9254 else if (*dt == vect_uninitialized_def
9255 || *dt == vect_constant_def
9256 || *dt == vect_external_def)
9257 *vectype = NULL_TREE;
9258 else
9259 gcc_unreachable ();
9261 return true;
9265 /* Function supportable_widening_operation
9267 Check whether an operation represented by the code CODE is a
9268 widening operation that is supported by the target platform in
9269 vector form (i.e., when operating on arguments of type VECTYPE_IN
9270 producing a result of type VECTYPE_OUT).
9272 Widening operations we currently support are NOP (CONVERT), FLOAT
9273 and WIDEN_MULT. This function checks if these operations are supported
9274 by the target platform either directly (via vector tree-codes), or via
9275 target builtins.
9277 Output:
9278 - CODE1 and CODE2 are codes of vector operations to be used when
9279 vectorizing the operation, if available.
9280 - MULTI_STEP_CVT determines the number of required intermediate steps in
9281 case of multi-step conversion (like char->short->int - in that case
9282 MULTI_STEP_CVT will be 1).
9283 - INTERM_TYPES contains the intermediate type required to perform the
9284 widening operation (short in the above example). */
9286 bool
9287 supportable_widening_operation (enum tree_code code, gimple *stmt,
9288 tree vectype_out, tree vectype_in,
9289 enum tree_code *code1, enum tree_code *code2,
9290 int *multi_step_cvt,
9291 vec<tree> *interm_types)
9293 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9294 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9295 struct loop *vect_loop = NULL;
9296 machine_mode vec_mode;
9297 enum insn_code icode1, icode2;
9298 optab optab1, optab2;
9299 tree vectype = vectype_in;
9300 tree wide_vectype = vectype_out;
9301 enum tree_code c1, c2;
9302 int i;
9303 tree prev_type, intermediate_type;
9304 machine_mode intermediate_mode, prev_mode;
9305 optab optab3, optab4;
9307 *multi_step_cvt = 0;
9308 if (loop_info)
9309 vect_loop = LOOP_VINFO_LOOP (loop_info);
9311 switch (code)
9313 case WIDEN_MULT_EXPR:
9314 /* The result of a vectorized widening operation usually requires
9315 two vectors (because the widened results do not fit into one vector).
9316 The generated vector results would normally be expected to be
9317 generated in the same order as in the original scalar computation,
9318 i.e. if 8 results are generated in each vector iteration, they are
9319 to be organized as follows:
9320 vect1: [res1,res2,res3,res4],
9321 vect2: [res5,res6,res7,res8].
9323 However, in the special case that the result of the widening
9324 operation is used in a reduction computation only, the order doesn't
9325 matter (because when vectorizing a reduction we change the order of
9326 the computation). Some targets can take advantage of this and
9327 generate more efficient code. For example, targets like Altivec,
9328 that support widen_mult using a sequence of {mult_even,mult_odd}
9329 generate the following vectors:
9330 vect1: [res1,res3,res5,res7],
9331 vect2: [res2,res4,res6,res8].
9333 When vectorizing outer-loops, we execute the inner-loop sequentially
9334 (each vectorized inner-loop iteration contributes to VF outer-loop
9335 iterations in parallel). We therefore don't allow to change the
9336 order of the computation in the inner-loop during outer-loop
9337 vectorization. */
9338 /* TODO: Another case in which order doesn't *really* matter is when we
9339 widen and then contract again, e.g. (short)((int)x * y >> 8).
9340 Normally, pack_trunc performs an even/odd permute, whereas the
9341 repack from an even/odd expansion would be an interleave, which
9342 would be significantly simpler for e.g. AVX2. */
9343 /* In any case, in order to avoid duplicating the code below, recurse
9344 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9345 are properly set up for the caller. If we fail, we'll continue with
9346 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9347 if (vect_loop
9348 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9349 && !nested_in_vect_loop_p (vect_loop, stmt)
9350 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9351 stmt, vectype_out, vectype_in,
9352 code1, code2, multi_step_cvt,
9353 interm_types))
9355 /* Elements in a vector with vect_used_by_reduction property cannot
9356 be reordered if the use chain with this property does not have the
9357 same operation. One such an example is s += a * b, where elements
9358 in a and b cannot be reordered. Here we check if the vector defined
9359 by STMT is only directly used in the reduction statement. */
9360 tree lhs = gimple_assign_lhs (stmt);
9361 use_operand_p dummy;
9362 gimple *use_stmt;
9363 stmt_vec_info use_stmt_info = NULL;
9364 if (single_imm_use (lhs, &dummy, &use_stmt)
9365 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9366 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9367 return true;
9369 c1 = VEC_WIDEN_MULT_LO_EXPR;
9370 c2 = VEC_WIDEN_MULT_HI_EXPR;
9371 break;
9373 case DOT_PROD_EXPR:
9374 c1 = DOT_PROD_EXPR;
9375 c2 = DOT_PROD_EXPR;
9376 break;
9378 case SAD_EXPR:
9379 c1 = SAD_EXPR;
9380 c2 = SAD_EXPR;
9381 break;
9383 case VEC_WIDEN_MULT_EVEN_EXPR:
9384 /* Support the recursion induced just above. */
9385 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9386 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9387 break;
9389 case WIDEN_LSHIFT_EXPR:
9390 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9391 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9392 break;
9394 CASE_CONVERT:
9395 c1 = VEC_UNPACK_LO_EXPR;
9396 c2 = VEC_UNPACK_HI_EXPR;
9397 break;
9399 case FLOAT_EXPR:
9400 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9401 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9402 break;
9404 case FIX_TRUNC_EXPR:
9405 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9406 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9407 computing the operation. */
9408 return false;
9410 default:
9411 gcc_unreachable ();
9414 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9415 std::swap (c1, c2);
9417 if (code == FIX_TRUNC_EXPR)
9419 /* The signedness is determined from output operand. */
9420 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9421 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9423 else
9425 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9426 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9429 if (!optab1 || !optab2)
9430 return false;
9432 vec_mode = TYPE_MODE (vectype);
9433 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9434 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9435 return false;
9437 *code1 = c1;
9438 *code2 = c2;
9440 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9441 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9442 /* For scalar masks we may have different boolean
9443 vector types having the same QImode. Thus we
9444 add additional check for elements number. */
9445 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9446 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9447 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9449 /* Check if it's a multi-step conversion that can be done using intermediate
9450 types. */
9452 prev_type = vectype;
9453 prev_mode = vec_mode;
9455 if (!CONVERT_EXPR_CODE_P (code))
9456 return false;
9458 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9459 intermediate steps in promotion sequence. We try
9460 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9461 not. */
9462 interm_types->create (MAX_INTERM_CVT_STEPS);
9463 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9465 intermediate_mode = insn_data[icode1].operand[0].mode;
9466 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9468 intermediate_type
9469 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9470 current_vector_size);
9471 if (intermediate_mode != TYPE_MODE (intermediate_type))
9472 return false;
9474 else
9475 intermediate_type
9476 = lang_hooks.types.type_for_mode (intermediate_mode,
9477 TYPE_UNSIGNED (prev_type));
9479 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9480 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9482 if (!optab3 || !optab4
9483 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9484 || insn_data[icode1].operand[0].mode != intermediate_mode
9485 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9486 || insn_data[icode2].operand[0].mode != intermediate_mode
9487 || ((icode1 = optab_handler (optab3, intermediate_mode))
9488 == CODE_FOR_nothing)
9489 || ((icode2 = optab_handler (optab4, intermediate_mode))
9490 == CODE_FOR_nothing))
9491 break;
9493 interm_types->quick_push (intermediate_type);
9494 (*multi_step_cvt)++;
9496 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9497 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9498 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9499 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9500 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9502 prev_type = intermediate_type;
9503 prev_mode = intermediate_mode;
9506 interm_types->release ();
9507 return false;
9511 /* Function supportable_narrowing_operation
9513 Check whether an operation represented by the code CODE is a
9514 narrowing operation that is supported by the target platform in
9515 vector form (i.e., when operating on arguments of type VECTYPE_IN
9516 and producing a result of type VECTYPE_OUT).
9518 Narrowing operations we currently support are NOP (CONVERT) and
9519 FIX_TRUNC. This function checks if these operations are supported by
9520 the target platform directly via vector tree-codes.
9522 Output:
9523 - CODE1 is the code of a vector operation to be used when
9524 vectorizing the operation, if available.
9525 - MULTI_STEP_CVT determines the number of required intermediate steps in
9526 case of multi-step conversion (like int->short->char - in that case
9527 MULTI_STEP_CVT will be 1).
9528 - INTERM_TYPES contains the intermediate type required to perform the
9529 narrowing operation (short in the above example). */
9531 bool
9532 supportable_narrowing_operation (enum tree_code code,
9533 tree vectype_out, tree vectype_in,
9534 enum tree_code *code1, int *multi_step_cvt,
9535 vec<tree> *interm_types)
9537 machine_mode vec_mode;
9538 enum insn_code icode1;
9539 optab optab1, interm_optab;
9540 tree vectype = vectype_in;
9541 tree narrow_vectype = vectype_out;
9542 enum tree_code c1;
9543 tree intermediate_type, prev_type;
9544 machine_mode intermediate_mode, prev_mode;
9545 int i;
9546 bool uns;
9548 *multi_step_cvt = 0;
9549 switch (code)
9551 CASE_CONVERT:
9552 c1 = VEC_PACK_TRUNC_EXPR;
9553 break;
9555 case FIX_TRUNC_EXPR:
9556 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9557 break;
9559 case FLOAT_EXPR:
9560 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9561 tree code and optabs used for computing the operation. */
9562 return false;
9564 default:
9565 gcc_unreachable ();
9568 if (code == FIX_TRUNC_EXPR)
9569 /* The signedness is determined from output operand. */
9570 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9571 else
9572 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9574 if (!optab1)
9575 return false;
9577 vec_mode = TYPE_MODE (vectype);
9578 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9579 return false;
9581 *code1 = c1;
9583 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9584 /* For scalar masks we may have different boolean
9585 vector types having the same QImode. Thus we
9586 add additional check for elements number. */
9587 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9588 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9589 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9591 /* Check if it's a multi-step conversion that can be done using intermediate
9592 types. */
9593 prev_mode = vec_mode;
9594 prev_type = vectype;
9595 if (code == FIX_TRUNC_EXPR)
9596 uns = TYPE_UNSIGNED (vectype_out);
9597 else
9598 uns = TYPE_UNSIGNED (vectype);
9600 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9601 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9602 costly than signed. */
9603 if (code == FIX_TRUNC_EXPR && uns)
9605 enum insn_code icode2;
9607 intermediate_type
9608 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9609 interm_optab
9610 = optab_for_tree_code (c1, intermediate_type, optab_default);
9611 if (interm_optab != unknown_optab
9612 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9613 && insn_data[icode1].operand[0].mode
9614 == insn_data[icode2].operand[0].mode)
9616 uns = false;
9617 optab1 = interm_optab;
9618 icode1 = icode2;
9622 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9623 intermediate steps in promotion sequence. We try
9624 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9625 interm_types->create (MAX_INTERM_CVT_STEPS);
9626 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9628 intermediate_mode = insn_data[icode1].operand[0].mode;
9629 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9631 intermediate_type
9632 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9633 current_vector_size);
9634 if (intermediate_mode != TYPE_MODE (intermediate_type))
9635 return false;
9637 else
9638 intermediate_type
9639 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9640 interm_optab
9641 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9642 optab_default);
9643 if (!interm_optab
9644 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9645 || insn_data[icode1].operand[0].mode != intermediate_mode
9646 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9647 == CODE_FOR_nothing))
9648 break;
9650 interm_types->quick_push (intermediate_type);
9651 (*multi_step_cvt)++;
9653 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9654 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9655 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9656 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9658 prev_mode = intermediate_mode;
9659 prev_type = intermediate_type;
9660 optab1 = interm_optab;
9663 interm_types->release ();
9664 return false;