Allow single-element interleaving for non-power-of-2 strides
[official-gcc.git] / gcc / tree-vect-stmts.c
blobe4d20514c00db75a299f366bb77076b5e3b9d197
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
100 if (body_cost_vec)
102 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
103 stmt_info_for_cost si = { count, kind,
104 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105 misalign };
106 body_cost_vec->safe_push (si);
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 else
111 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112 count, kind, stmt_info, misalign, where);
115 /* Return a variable of type ELEM_TYPE[NELEMS]. */
117 static tree
118 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
120 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121 "vect_array");
124 /* ARRAY is an array of vectors created by create_vector_array.
125 Return an SSA_NAME for the vector in index N. The reference
126 is part of the vectorization of STMT and the vector is associated
127 with scalar destination SCALAR_DEST. */
129 static tree
130 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
131 tree array, unsigned HOST_WIDE_INT n)
133 tree vect_type, vect, vect_name, array_ref;
134 gimple *new_stmt;
136 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137 vect_type = TREE_TYPE (TREE_TYPE (array));
138 vect = vect_create_destination_var (scalar_dest, vect_type);
139 array_ref = build4 (ARRAY_REF, vect_type, array,
140 build_int_cst (size_type_node, n),
141 NULL_TREE, NULL_TREE);
143 new_stmt = gimple_build_assign (vect, array_ref);
144 vect_name = make_ssa_name (vect, new_stmt);
145 gimple_assign_set_lhs (new_stmt, vect_name);
146 vect_finish_stmt_generation (stmt, new_stmt, gsi);
148 return vect_name;
151 /* ARRAY is an array of vectors created by create_vector_array.
152 Emit code to store SSA_NAME VECT in index N of the array.
153 The store is part of the vectorization of STMT. */
155 static void
156 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
157 tree array, unsigned HOST_WIDE_INT n)
159 tree array_ref;
160 gimple *new_stmt;
162 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
166 new_stmt = gimple_build_assign (array_ref, vect);
167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
170 /* PTR is a pointer to an array of type TYPE. Return a representation
171 of *PTR. The memory reference replaces those in FIRST_DR
172 (and its group). */
174 static tree
175 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
177 tree mem_ref;
179 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180 /* Arrays have the same alignment as their type. */
181 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
182 return mem_ref;
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
187 /* Function vect_mark_relevant.
189 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
191 static void
192 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
193 enum vect_relevant relevant, bool live_p)
195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
196 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
197 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 gimple *pattern_stmt;
200 if (dump_enabled_p ())
202 dump_printf_loc (MSG_NOTE, vect_location,
203 "mark relevant %d, live %d: ", relevant, live_p);
204 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
207 /* If this stmt is an original stmt in a pattern, we might need to mark its
208 related pattern stmt instead of the original stmt. However, such stmts
209 may have their own uses that are not in any pattern, in such cases the
210 stmt itself should be marked. */
211 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
213 /* This is the last stmt in a sequence that was detected as a
214 pattern that can potentially be vectorized. Don't mark the stmt
215 as relevant/live because it's not going to be vectorized.
216 Instead mark the pattern-stmt that replaces it. */
218 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_info = vinfo_for_stmt (pattern_stmt);
225 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
226 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
228 stmt = pattern_stmt;
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE, vect_location,
240 "already marked relevant/live.\n");
241 return;
244 worklist->safe_push (stmt);
248 /* Function is_simple_and_all_uses_invariant
250 Return true if STMT is simple and all uses of it are invariant. */
252 bool
253 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
255 tree op;
256 gimple *def_stmt;
257 ssa_op_iter iter;
259 if (!is_gimple_assign (stmt))
260 return false;
262 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
264 enum vect_def_type dt = vect_uninitialized_def;
266 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
268 if (dump_enabled_p ())
269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270 "use not simple.\n");
271 return false;
274 if (dt != vect_external_def && dt != vect_constant_def)
275 return false;
277 return true;
280 /* Function vect_stmt_relevant_p.
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
290 CHECKME: what other side effects would the vectorizer allow? */
292 static bool
293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
294 enum vect_relevant *relevant, bool *live_p)
296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297 ssa_op_iter op_iter;
298 imm_use_iterator imm_iter;
299 use_operand_p use_p;
300 def_operand_p def_p;
302 *relevant = vect_unused_in_scope;
303 *live_p = false;
305 /* cond stmt other than loop exit cond. */
306 if (is_ctrl_stmt (stmt)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308 != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
311 /* changing memory. */
312 if (gimple_code (stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt)
314 && !gimple_clobber_p (stmt))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
342 *live_p = true;
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
356 return (*live_p || *relevant);
360 /* Function exist_non_indexing_operands_for_use_p
362 USE is one of the uses attached to STMT. Check if USE is
363 used in STMT for anything other than indexing an array. */
365 static bool
366 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
368 tree operand;
369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
371 /* USE corresponds to some operand in STMT. If there is no data
372 reference in STMT, then any operand that corresponds to USE
373 is not indexing an array. */
374 if (!STMT_VINFO_DATA_REF (stmt_info))
375 return true;
377 /* STMT has a data_ref. FORNOW this means that its of one of
378 the following forms:
379 -1- ARRAY_REF = var
380 -2- var = ARRAY_REF
381 (This should have been verified in analyze_data_refs).
383 'var' in the second case corresponds to a def, not a use,
384 so USE cannot correspond to any operands that are not used
385 for array indexing.
387 Therefore, all we need to check is if STMT falls into the
388 first case, and whether var corresponds to USE. */
390 if (!gimple_assign_copy_p (stmt))
392 if (is_gimple_call (stmt)
393 && gimple_call_internal_p (stmt))
394 switch (gimple_call_internal_fn (stmt))
396 case IFN_MASK_STORE:
397 operand = gimple_call_arg (stmt, 3);
398 if (operand == use)
399 return true;
400 /* FALLTHRU */
401 case IFN_MASK_LOAD:
402 operand = gimple_call_arg (stmt, 2);
403 if (operand == use)
404 return true;
405 break;
406 default:
407 break;
409 return false;
412 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
413 return false;
414 operand = gimple_assign_rhs1 (stmt);
415 if (TREE_CODE (operand) != SSA_NAME)
416 return false;
418 if (operand == use)
419 return true;
421 return false;
426 Function process_use.
428 Inputs:
429 - a USE in STMT in a loop represented by LOOP_VINFO
430 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
431 that defined USE. This is done by calling mark_relevant and passing it
432 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
433 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
434 be performed.
436 Outputs:
437 Generally, LIVE_P and RELEVANT are used to define the liveness and
438 relevance info of the DEF_STMT of this USE:
439 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
440 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
441 Exceptions:
442 - case 1: If USE is used only for address computations (e.g. array indexing),
443 which does not need to be directly vectorized, then the liveness/relevance
444 of the respective DEF_STMT is left unchanged.
445 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
446 skip DEF_STMT cause it had already been processed.
447 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
448 be modified accordingly.
450 Return true if everything is as expected. Return false otherwise. */
452 static bool
453 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
454 enum vect_relevant relevant, vec<gimple *> *worklist,
455 bool force)
457 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
459 stmt_vec_info dstmt_vinfo;
460 basic_block bb, def_bb;
461 gimple *def_stmt;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
467 return true;
469 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
471 if (dump_enabled_p ())
472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
473 "not vectorized: unsupported use in stmt.\n");
474 return false;
477 if (!def_stmt || gimple_nop_p (def_stmt))
478 return true;
480 def_bb = gimple_bb (def_stmt);
481 if (!flow_bb_inside_loop_p (loop, def_bb))
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
485 return true;
488 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
489 DEF_STMT must have already been processed, because this should be the
490 only way that STMT, which is a reduction-phi, was put in the worklist,
491 as there should be no other uses for DEF_STMT in the loop. So we just
492 check that everything is as expected, and we are done. */
493 dstmt_vinfo = vinfo_for_stmt (def_stmt);
494 bb = gimple_bb (stmt);
495 if (gimple_code (stmt) == GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
497 && gimple_code (def_stmt) != GIMPLE_PHI
498 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
499 && bb->loop_father == def_bb->loop_father)
501 if (dump_enabled_p ())
502 dump_printf_loc (MSG_NOTE, vect_location,
503 "reduc-stmt defining reduc-phi in the same nest.\n");
504 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
505 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
506 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
507 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
508 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
509 return true;
512 /* case 3a: outer-loop stmt defining an inner-loop stmt:
513 outer-loop-header-bb:
514 d = def_stmt
515 inner-loop:
516 stmt # use (d)
517 outer-loop-tail-bb:
518 ... */
519 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
521 if (dump_enabled_p ())
522 dump_printf_loc (MSG_NOTE, vect_location,
523 "outer-loop def-stmt defining inner-loop stmt.\n");
525 switch (relevant)
527 case vect_unused_in_scope:
528 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
529 vect_used_in_scope : vect_unused_in_scope;
530 break;
532 case vect_used_in_outer_by_reduction:
533 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
534 relevant = vect_used_by_reduction;
535 break;
537 case vect_used_in_outer:
538 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
539 relevant = vect_used_in_scope;
540 break;
542 case vect_used_in_scope:
543 break;
545 default:
546 gcc_unreachable ();
550 /* case 3b: inner-loop stmt defining an outer-loop stmt:
551 outer-loop-header-bb:
553 inner-loop:
554 d = def_stmt
555 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
556 stmt # use (d) */
557 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
559 if (dump_enabled_p ())
560 dump_printf_loc (MSG_NOTE, vect_location,
561 "inner-loop def-stmt defining outer-loop stmt.\n");
563 switch (relevant)
565 case vect_unused_in_scope:
566 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
567 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
568 vect_used_in_outer_by_reduction : vect_unused_in_scope;
569 break;
571 case vect_used_by_reduction:
572 case vect_used_only_live:
573 relevant = vect_used_in_outer_by_reduction;
574 break;
576 case vect_used_in_scope:
577 relevant = vect_used_in_outer;
578 break;
580 default:
581 gcc_unreachable ();
584 /* We are also not interested in uses on loop PHI backedges that are
585 inductions. Otherwise we'll needlessly vectorize the IV increment
586 and cause hybrid SLP for SLP inductions. Unless the PHI is live
587 of course. */
588 else if (gimple_code (stmt) == GIMPLE_PHI
589 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
590 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
591 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
592 == use))
594 if (dump_enabled_p ())
595 dump_printf_loc (MSG_NOTE, vect_location,
596 "induction value on backedge.\n");
597 return true;
601 vect_mark_relevant (worklist, def_stmt, relevant, false);
602 return true;
606 /* Function vect_mark_stmts_to_be_vectorized.
608 Not all stmts in the loop need to be vectorized. For example:
610 for i...
611 for j...
612 1. T0 = i + j
613 2. T1 = a[T0]
615 3. j = j + 1
617 Stmt 1 and 3 do not need to be vectorized, because loop control and
618 addressing of vectorized data-refs are handled differently.
620 This pass detects such stmts. */
622 bool
623 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
625 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
626 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
627 unsigned int nbbs = loop->num_nodes;
628 gimple_stmt_iterator si;
629 gimple *stmt;
630 unsigned int i;
631 stmt_vec_info stmt_vinfo;
632 basic_block bb;
633 gimple *phi;
634 bool live_p;
635 enum vect_relevant relevant;
637 if (dump_enabled_p ())
638 dump_printf_loc (MSG_NOTE, vect_location,
639 "=== vect_mark_stmts_to_be_vectorized ===\n");
641 auto_vec<gimple *, 64> worklist;
643 /* 1. Init worklist. */
644 for (i = 0; i < nbbs; i++)
646 bb = bbs[i];
647 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
649 phi = gsi_stmt (si);
650 if (dump_enabled_p ())
652 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
653 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
656 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
657 vect_mark_relevant (&worklist, phi, relevant, live_p);
659 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
661 stmt = gsi_stmt (si);
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
668 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
669 vect_mark_relevant (&worklist, stmt, relevant, live_p);
673 /* 2. Process_worklist */
674 while (worklist.length () > 0)
676 use_operand_p use_p;
677 ssa_op_iter iter;
679 stmt = worklist.pop ();
680 if (dump_enabled_p ())
682 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
683 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
686 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
687 (DEF_STMT) as relevant/irrelevant according to the relevance property
688 of STMT. */
689 stmt_vinfo = vinfo_for_stmt (stmt);
690 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
692 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
693 propagated as is to the DEF_STMTs of its USEs.
695 One exception is when STMT has been identified as defining a reduction
696 variable; in this case we set the relevance to vect_used_by_reduction.
697 This is because we distinguish between two kinds of relevant stmts -
698 those that are used by a reduction computation, and those that are
699 (also) used by a regular computation. This allows us later on to
700 identify stmts that are used solely by a reduction, and therefore the
701 order of the results that they produce does not have to be kept. */
703 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
705 case vect_reduction_def:
706 gcc_assert (relevant != vect_unused_in_scope);
707 if (relevant != vect_unused_in_scope
708 && relevant != vect_used_in_scope
709 && relevant != vect_used_by_reduction
710 && relevant != vect_used_only_live)
712 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
714 "unsupported use of reduction.\n");
715 return false;
717 break;
719 case vect_nested_cycle:
720 if (relevant != vect_unused_in_scope
721 && relevant != vect_used_in_outer_by_reduction
722 && relevant != vect_used_in_outer)
724 if (dump_enabled_p ())
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
726 "unsupported use of nested cycle.\n");
728 return false;
730 break;
732 case vect_double_reduction_def:
733 if (relevant != vect_unused_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
737 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 "unsupported use of double reduction.\n");
741 return false;
743 break;
745 default:
746 break;
749 if (is_pattern_stmt_p (stmt_vinfo))
751 /* Pattern statements are not inserted into the code, so
752 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
753 have to scan the RHS or function arguments instead. */
754 if (is_gimple_assign (stmt))
756 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
757 tree op = gimple_assign_rhs1 (stmt);
759 i = 1;
760 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
762 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
763 relevant, &worklist, false)
764 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
765 relevant, &worklist, false))
766 return false;
767 i = 2;
769 for (; i < gimple_num_ops (stmt); i++)
771 op = gimple_op (stmt, i);
772 if (TREE_CODE (op) == SSA_NAME
773 && !process_use (stmt, op, loop_vinfo, relevant,
774 &worklist, false))
775 return false;
778 else if (is_gimple_call (stmt))
780 for (i = 0; i < gimple_call_num_args (stmt); i++)
782 tree arg = gimple_call_arg (stmt, i);
783 if (!process_use (stmt, arg, loop_vinfo, relevant,
784 &worklist, false))
785 return false;
789 else
790 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
792 tree op = USE_FROM_PTR (use_p);
793 if (!process_use (stmt, op, loop_vinfo, relevant,
794 &worklist, false))
795 return false;
798 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
800 gather_scatter_info gs_info;
801 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
802 gcc_unreachable ();
803 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
804 &worklist, true))
805 return false;
807 } /* while worklist */
809 return true;
813 /* Function vect_model_simple_cost.
815 Models cost for simple operations, i.e. those that only emit ncopies of a
816 single op. Right now, this does not account for multiple insns that could
817 be generated for the single vector op. We will handle that shortly. */
819 void
820 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
821 enum vect_def_type *dt,
822 int ndts,
823 stmt_vector_for_cost *prologue_cost_vec,
824 stmt_vector_for_cost *body_cost_vec)
826 int i;
827 int inside_cost = 0, prologue_cost = 0;
829 /* The SLP costs were already calculated during SLP tree build. */
830 if (PURE_SLP_STMT (stmt_info))
831 return;
833 /* Cost the "broadcast" of a scalar operand in to a vector operand.
834 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
835 cost model. */
836 for (i = 0; i < ndts; i++)
837 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
838 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
839 stmt_info, 0, vect_prologue);
841 /* Pass the inside-of-loop statements to the target-specific cost model. */
842 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
843 stmt_info, 0, vect_body);
845 if (dump_enabled_p ())
846 dump_printf_loc (MSG_NOTE, vect_location,
847 "vect_model_simple_cost: inside_cost = %d, "
848 "prologue_cost = %d .\n", inside_cost, prologue_cost);
852 /* Model cost for type demotion and promotion operations. PWR is normally
853 zero for single-step promotions and demotions. It will be one if
854 two-step promotion/demotion is required, and so on. Each additional
855 step doubles the number of instructions required. */
857 static void
858 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
859 enum vect_def_type *dt, int pwr)
861 int i, tmp;
862 int inside_cost = 0, prologue_cost = 0;
863 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
864 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
865 void *target_cost_data;
867 /* The SLP costs were already calculated during SLP tree build. */
868 if (PURE_SLP_STMT (stmt_info))
869 return;
871 if (loop_vinfo)
872 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
873 else
874 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
876 for (i = 0; i < pwr + 1; i++)
878 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
879 (i + 1) : i;
880 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
881 vec_promote_demote, stmt_info, 0,
882 vect_body);
885 /* FORNOW: Assuming maximum 2 args per stmts. */
886 for (i = 0; i < 2; i++)
887 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
888 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
889 stmt_info, 0, vect_prologue);
891 if (dump_enabled_p ())
892 dump_printf_loc (MSG_NOTE, vect_location,
893 "vect_model_promotion_demotion_cost: inside_cost = %d, "
894 "prologue_cost = %d .\n", inside_cost, prologue_cost);
897 /* Function vect_model_store_cost
899 Models cost for stores. In the case of grouped accesses, one access
900 has the overhead of the grouped access attributed to it. */
902 void
903 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
904 vect_memory_access_type memory_access_type,
905 vec_load_store_type vls_type, slp_tree slp_node,
906 stmt_vector_for_cost *prologue_cost_vec,
907 stmt_vector_for_cost *body_cost_vec)
909 unsigned int inside_cost = 0, prologue_cost = 0;
910 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
911 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
912 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
914 if (vls_type == VLS_STORE_INVARIANT)
915 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
916 stmt_info, 0, vect_prologue);
918 /* Grouped stores update all elements in the group at once,
919 so we want the DR for the first statement. */
920 if (!slp_node && grouped_access_p)
922 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
923 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
926 /* True if we should include any once-per-group costs as well as
927 the cost of the statement itself. For SLP we only get called
928 once per group anyhow. */
929 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
931 /* We assume that the cost of a single store-lanes instruction is
932 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
933 access is instead being provided by a permute-and-store operation,
934 include the cost of the permutes. */
935 if (first_stmt_p
936 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
938 /* Uses a high and low interleave or shuffle operations for each
939 needed permute. */
940 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
941 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
942 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
943 stmt_info, 0, vect_body);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "vect_model_store_cost: strided group_size = %d .\n",
948 group_size);
951 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
952 /* Costs of the stores. */
953 if (memory_access_type == VMAT_ELEMENTWISE
954 || memory_access_type == VMAT_GATHER_SCATTER)
956 /* N scalar stores plus extracting the elements. */
957 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
958 inside_cost += record_stmt_cost (body_cost_vec,
959 ncopies * assumed_nunits,
960 scalar_store, stmt_info, 0, vect_body);
962 else
963 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
965 if (memory_access_type == VMAT_ELEMENTWISE
966 || memory_access_type == VMAT_STRIDED_SLP)
968 /* N scalar stores plus extracting the elements. */
969 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
970 inside_cost += record_stmt_cost (body_cost_vec,
971 ncopies * assumed_nunits,
972 vec_to_scalar, stmt_info, 0, vect_body);
975 if (dump_enabled_p ())
976 dump_printf_loc (MSG_NOTE, vect_location,
977 "vect_model_store_cost: inside_cost = %d, "
978 "prologue_cost = %d .\n", inside_cost, prologue_cost);
982 /* Calculate cost of DR's memory access. */
983 void
984 vect_get_store_cost (struct data_reference *dr, int ncopies,
985 unsigned int *inside_cost,
986 stmt_vector_for_cost *body_cost_vec)
988 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
989 gimple *stmt = DR_STMT (dr);
990 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
992 switch (alignment_support_scheme)
994 case dr_aligned:
996 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
997 vector_store, stmt_info, 0,
998 vect_body);
1000 if (dump_enabled_p ())
1001 dump_printf_loc (MSG_NOTE, vect_location,
1002 "vect_model_store_cost: aligned.\n");
1003 break;
1006 case dr_unaligned_supported:
1008 /* Here, we assign an additional cost for the unaligned store. */
1009 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1010 unaligned_store, stmt_info,
1011 DR_MISALIGNMENT (dr), vect_body);
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE, vect_location,
1014 "vect_model_store_cost: unaligned supported by "
1015 "hardware.\n");
1016 break;
1019 case dr_unaligned_unsupported:
1021 *inside_cost = VECT_MAX_COST;
1023 if (dump_enabled_p ())
1024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1025 "vect_model_store_cost: unsupported access.\n");
1026 break;
1029 default:
1030 gcc_unreachable ();
1035 /* Function vect_model_load_cost
1037 Models cost for loads. In the case of grouped accesses, one access has
1038 the overhead of the grouped access attributed to it. Since unaligned
1039 accesses are supported for loads, we also account for the costs of the
1040 access scheme chosen. */
1042 void
1043 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1044 vect_memory_access_type memory_access_type,
1045 slp_tree slp_node,
1046 stmt_vector_for_cost *prologue_cost_vec,
1047 stmt_vector_for_cost *body_cost_vec)
1049 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1050 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1051 unsigned int inside_cost = 0, prologue_cost = 0;
1052 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1054 /* Grouped loads read all elements in the group at once,
1055 so we want the DR for the first statement. */
1056 if (!slp_node && grouped_access_p)
1058 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1059 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1062 /* True if we should include any once-per-group costs as well as
1063 the cost of the statement itself. For SLP we only get called
1064 once per group anyhow. */
1065 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1067 /* We assume that the cost of a single load-lanes instruction is
1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1069 access is instead being provided by a load-and-permute operation,
1070 include the cost of the permutes. */
1071 if (first_stmt_p
1072 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1074 /* Uses an even and odd extract operations or shuffle operations
1075 for each needed permute. */
1076 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1077 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1078 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1079 stmt_info, 0, vect_body);
1081 if (dump_enabled_p ())
1082 dump_printf_loc (MSG_NOTE, vect_location,
1083 "vect_model_load_cost: strided group_size = %d .\n",
1084 group_size);
1087 /* The loads themselves. */
1088 if (memory_access_type == VMAT_ELEMENTWISE
1089 || memory_access_type == VMAT_GATHER_SCATTER)
1091 /* N scalar loads plus gathering them into a vector. */
1092 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1093 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1094 inside_cost += record_stmt_cost (body_cost_vec,
1095 ncopies * assumed_nunits,
1096 scalar_load, stmt_info, 0, vect_body);
1098 else
1099 vect_get_load_cost (dr, ncopies, first_stmt_p,
1100 &inside_cost, &prologue_cost,
1101 prologue_cost_vec, body_cost_vec, true);
1102 if (memory_access_type == VMAT_ELEMENTWISE
1103 || memory_access_type == VMAT_STRIDED_SLP)
1104 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1105 stmt_info, 0, vect_body);
1107 if (dump_enabled_p ())
1108 dump_printf_loc (MSG_NOTE, vect_location,
1109 "vect_model_load_cost: inside_cost = %d, "
1110 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1114 /* Calculate cost of DR's memory access. */
1115 void
1116 vect_get_load_cost (struct data_reference *dr, int ncopies,
1117 bool add_realign_cost, unsigned int *inside_cost,
1118 unsigned int *prologue_cost,
1119 stmt_vector_for_cost *prologue_cost_vec,
1120 stmt_vector_for_cost *body_cost_vec,
1121 bool record_prologue_costs)
1123 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1124 gimple *stmt = DR_STMT (dr);
1125 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1127 switch (alignment_support_scheme)
1129 case dr_aligned:
1131 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1132 stmt_info, 0, vect_body);
1134 if (dump_enabled_p ())
1135 dump_printf_loc (MSG_NOTE, vect_location,
1136 "vect_model_load_cost: aligned.\n");
1138 break;
1140 case dr_unaligned_supported:
1142 /* Here, we assign an additional cost for the unaligned load. */
1143 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1144 unaligned_load, stmt_info,
1145 DR_MISALIGNMENT (dr), vect_body);
1147 if (dump_enabled_p ())
1148 dump_printf_loc (MSG_NOTE, vect_location,
1149 "vect_model_load_cost: unaligned supported by "
1150 "hardware.\n");
1152 break;
1154 case dr_explicit_realign:
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1157 vector_load, stmt_info, 0, vect_body);
1158 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1159 vec_perm, stmt_info, 0, vect_body);
1161 /* FIXME: If the misalignment remains fixed across the iterations of
1162 the containing loop, the following cost should be added to the
1163 prologue costs. */
1164 if (targetm.vectorize.builtin_mask_for_load)
1165 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1166 stmt_info, 0, vect_body);
1168 if (dump_enabled_p ())
1169 dump_printf_loc (MSG_NOTE, vect_location,
1170 "vect_model_load_cost: explicit realign\n");
1172 break;
1174 case dr_explicit_realign_optimized:
1176 if (dump_enabled_p ())
1177 dump_printf_loc (MSG_NOTE, vect_location,
1178 "vect_model_load_cost: unaligned software "
1179 "pipelined.\n");
1181 /* Unaligned software pipeline has a load of an address, an initial
1182 load, and possibly a mask operation to "prime" the loop. However,
1183 if this is an access in a group of loads, which provide grouped
1184 access, then the above cost should only be considered for one
1185 access in the group. Inside the loop, there is a load op
1186 and a realignment op. */
1188 if (add_realign_cost && record_prologue_costs)
1190 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1191 vector_stmt, stmt_info,
1192 0, vect_prologue);
1193 if (targetm.vectorize.builtin_mask_for_load)
1194 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1195 vector_stmt, stmt_info,
1196 0, vect_prologue);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1200 stmt_info, 0, vect_body);
1201 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1202 stmt_info, 0, vect_body);
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE, vect_location,
1206 "vect_model_load_cost: explicit realign optimized"
1207 "\n");
1209 break;
1212 case dr_unaligned_unsupported:
1214 *inside_cost = VECT_MAX_COST;
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1218 "vect_model_load_cost: unsupported access.\n");
1219 break;
1222 default:
1223 gcc_unreachable ();
1227 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1228 the loop preheader for the vectorized stmt STMT. */
1230 static void
1231 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1233 if (gsi)
1234 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1235 else
1237 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1238 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1240 if (loop_vinfo)
1242 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1243 basic_block new_bb;
1244 edge pe;
1246 if (nested_in_vect_loop_p (loop, stmt))
1247 loop = loop->inner;
1249 pe = loop_preheader_edge (loop);
1250 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1251 gcc_assert (!new_bb);
1253 else
1255 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1256 basic_block bb;
1257 gimple_stmt_iterator gsi_bb_start;
1259 gcc_assert (bb_vinfo);
1260 bb = BB_VINFO_BB (bb_vinfo);
1261 gsi_bb_start = gsi_after_labels (bb);
1262 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1266 if (dump_enabled_p ())
1268 dump_printf_loc (MSG_NOTE, vect_location,
1269 "created new init_stmt: ");
1270 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1274 /* Function vect_init_vector.
1276 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1277 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1278 vector type a vector with all elements equal to VAL is created first.
1279 Place the initialization at BSI if it is not NULL. Otherwise, place the
1280 initialization at the loop preheader.
1281 Return the DEF of INIT_STMT.
1282 It will be used in the vectorization of STMT. */
1284 tree
1285 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1287 gimple *init_stmt;
1288 tree new_temp;
1290 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1291 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1293 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1294 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1296 /* Scalar boolean value should be transformed into
1297 all zeros or all ones value before building a vector. */
1298 if (VECTOR_BOOLEAN_TYPE_P (type))
1300 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1301 tree false_val = build_zero_cst (TREE_TYPE (type));
1303 if (CONSTANT_CLASS_P (val))
1304 val = integer_zerop (val) ? false_val : true_val;
1305 else
1307 new_temp = make_ssa_name (TREE_TYPE (type));
1308 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1309 val, true_val, false_val);
1310 vect_init_vector_1 (stmt, init_stmt, gsi);
1311 val = new_temp;
1314 else if (CONSTANT_CLASS_P (val))
1315 val = fold_convert (TREE_TYPE (type), val);
1316 else
1318 new_temp = make_ssa_name (TREE_TYPE (type));
1319 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1320 init_stmt = gimple_build_assign (new_temp,
1321 fold_build1 (VIEW_CONVERT_EXPR,
1322 TREE_TYPE (type),
1323 val));
1324 else
1325 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1326 vect_init_vector_1 (stmt, init_stmt, gsi);
1327 val = new_temp;
1330 val = build_vector_from_val (type, val);
1333 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1334 init_stmt = gimple_build_assign (new_temp, val);
1335 vect_init_vector_1 (stmt, init_stmt, gsi);
1336 return new_temp;
1339 /* Function vect_get_vec_def_for_operand_1.
1341 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1342 DT that will be used in the vectorized stmt. */
1344 tree
1345 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1347 tree vec_oprnd;
1348 gimple *vec_stmt;
1349 stmt_vec_info def_stmt_info = NULL;
1351 switch (dt)
1353 /* operand is a constant or a loop invariant. */
1354 case vect_constant_def:
1355 case vect_external_def:
1356 /* Code should use vect_get_vec_def_for_operand. */
1357 gcc_unreachable ();
1359 /* operand is defined inside the loop. */
1360 case vect_internal_def:
1362 /* Get the def from the vectorized stmt. */
1363 def_stmt_info = vinfo_for_stmt (def_stmt);
1365 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1366 /* Get vectorized pattern statement. */
1367 if (!vec_stmt
1368 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1369 && !STMT_VINFO_RELEVANT (def_stmt_info))
1370 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1371 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1372 gcc_assert (vec_stmt);
1373 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1374 vec_oprnd = PHI_RESULT (vec_stmt);
1375 else if (is_gimple_call (vec_stmt))
1376 vec_oprnd = gimple_call_lhs (vec_stmt);
1377 else
1378 vec_oprnd = gimple_assign_lhs (vec_stmt);
1379 return vec_oprnd;
1382 /* operand is defined by a loop header phi. */
1383 case vect_reduction_def:
1384 case vect_double_reduction_def:
1385 case vect_nested_cycle:
1386 case vect_induction_def:
1388 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1390 /* Get the def from the vectorized stmt. */
1391 def_stmt_info = vinfo_for_stmt (def_stmt);
1392 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1393 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1394 vec_oprnd = PHI_RESULT (vec_stmt);
1395 else
1396 vec_oprnd = gimple_get_lhs (vec_stmt);
1397 return vec_oprnd;
1400 default:
1401 gcc_unreachable ();
1406 /* Function vect_get_vec_def_for_operand.
1408 OP is an operand in STMT. This function returns a (vector) def that will be
1409 used in the vectorized stmt for STMT.
1411 In the case that OP is an SSA_NAME which is defined in the loop, then
1412 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1414 In case OP is an invariant or constant, a new stmt that creates a vector def
1415 needs to be introduced. VECTYPE may be used to specify a required type for
1416 vector invariant. */
1418 tree
1419 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1421 gimple *def_stmt;
1422 enum vect_def_type dt;
1423 bool is_simple_use;
1424 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1425 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1427 if (dump_enabled_p ())
1429 dump_printf_loc (MSG_NOTE, vect_location,
1430 "vect_get_vec_def_for_operand: ");
1431 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1432 dump_printf (MSG_NOTE, "\n");
1435 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1436 gcc_assert (is_simple_use);
1437 if (def_stmt && dump_enabled_p ())
1439 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1440 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1443 if (dt == vect_constant_def || dt == vect_external_def)
1445 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1446 tree vector_type;
1448 if (vectype)
1449 vector_type = vectype;
1450 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1451 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1452 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1453 else
1454 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1456 gcc_assert (vector_type);
1457 return vect_init_vector (stmt, op, vector_type, NULL);
1459 else
1460 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1464 /* Function vect_get_vec_def_for_stmt_copy
1466 Return a vector-def for an operand. This function is used when the
1467 vectorized stmt to be created (by the caller to this function) is a "copy"
1468 created in case the vectorized result cannot fit in one vector, and several
1469 copies of the vector-stmt are required. In this case the vector-def is
1470 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1471 of the stmt that defines VEC_OPRND.
1472 DT is the type of the vector def VEC_OPRND.
1474 Context:
1475 In case the vectorization factor (VF) is bigger than the number
1476 of elements that can fit in a vectype (nunits), we have to generate
1477 more than one vector stmt to vectorize the scalar stmt. This situation
1478 arises when there are multiple data-types operated upon in the loop; the
1479 smallest data-type determines the VF, and as a result, when vectorizing
1480 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1481 vector stmt (each computing a vector of 'nunits' results, and together
1482 computing 'VF' results in each iteration). This function is called when
1483 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1484 which VF=16 and nunits=4, so the number of copies required is 4):
1486 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1488 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1489 VS1.1: vx.1 = memref1 VS1.2
1490 VS1.2: vx.2 = memref2 VS1.3
1491 VS1.3: vx.3 = memref3
1493 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1494 VSnew.1: vz1 = vx.1 + ... VSnew.2
1495 VSnew.2: vz2 = vx.2 + ... VSnew.3
1496 VSnew.3: vz3 = vx.3 + ...
1498 The vectorization of S1 is explained in vectorizable_load.
1499 The vectorization of S2:
1500 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1501 the function 'vect_get_vec_def_for_operand' is called to
1502 get the relevant vector-def for each operand of S2. For operand x it
1503 returns the vector-def 'vx.0'.
1505 To create the remaining copies of the vector-stmt (VSnew.j), this
1506 function is called to get the relevant vector-def for each operand. It is
1507 obtained from the respective VS1.j stmt, which is recorded in the
1508 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1510 For example, to obtain the vector-def 'vx.1' in order to create the
1511 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1512 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1513 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1514 and return its def ('vx.1').
1515 Overall, to create the above sequence this function will be called 3 times:
1516 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1517 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1518 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1520 tree
1521 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1523 gimple *vec_stmt_for_operand;
1524 stmt_vec_info def_stmt_info;
1526 /* Do nothing; can reuse same def. */
1527 if (dt == vect_external_def || dt == vect_constant_def )
1528 return vec_oprnd;
1530 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1531 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1532 gcc_assert (def_stmt_info);
1533 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1534 gcc_assert (vec_stmt_for_operand);
1535 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1536 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1537 else
1538 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1539 return vec_oprnd;
1543 /* Get vectorized definitions for the operands to create a copy of an original
1544 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1546 void
1547 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1548 vec<tree> *vec_oprnds0,
1549 vec<tree> *vec_oprnds1)
1551 tree vec_oprnd = vec_oprnds0->pop ();
1553 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1554 vec_oprnds0->quick_push (vec_oprnd);
1556 if (vec_oprnds1 && vec_oprnds1->length ())
1558 vec_oprnd = vec_oprnds1->pop ();
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1560 vec_oprnds1->quick_push (vec_oprnd);
1565 /* Get vectorized definitions for OP0 and OP1. */
1567 void
1568 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1569 vec<tree> *vec_oprnds0,
1570 vec<tree> *vec_oprnds1,
1571 slp_tree slp_node)
1573 if (slp_node)
1575 int nops = (op1 == NULL_TREE) ? 1 : 2;
1576 auto_vec<tree> ops (nops);
1577 auto_vec<vec<tree> > vec_defs (nops);
1579 ops.quick_push (op0);
1580 if (op1)
1581 ops.quick_push (op1);
1583 vect_get_slp_defs (ops, slp_node, &vec_defs);
1585 *vec_oprnds0 = vec_defs[0];
1586 if (op1)
1587 *vec_oprnds1 = vec_defs[1];
1589 else
1591 tree vec_oprnd;
1593 vec_oprnds0->create (1);
1594 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1595 vec_oprnds0->quick_push (vec_oprnd);
1597 if (op1)
1599 vec_oprnds1->create (1);
1600 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1601 vec_oprnds1->quick_push (vec_oprnd);
1606 /* Helper function called by vect_finish_replace_stmt and
1607 vect_finish_stmt_generation. Set the location of the new
1608 statement and create a stmt_vec_info for it. */
1610 static void
1611 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1614 vec_info *vinfo = stmt_info->vinfo;
1616 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1618 if (dump_enabled_p ())
1620 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1621 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1624 gimple_set_location (vec_stmt, gimple_location (stmt));
1626 /* While EH edges will generally prevent vectorization, stmt might
1627 e.g. be in a must-not-throw region. Ensure newly created stmts
1628 that could throw are part of the same region. */
1629 int lp_nr = lookup_stmt_eh_lp (stmt);
1630 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1631 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1634 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1635 which sets the same scalar result as STMT did. */
1637 void
1638 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1640 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1642 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1643 gsi_replace (&gsi, vec_stmt, false);
1645 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1648 /* Function vect_finish_stmt_generation.
1650 Insert a new stmt. */
1652 void
1653 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1654 gimple_stmt_iterator *gsi)
1656 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1658 if (!gsi_end_p (*gsi)
1659 && gimple_has_mem_ops (vec_stmt))
1661 gimple *at_stmt = gsi_stmt (*gsi);
1662 tree vuse = gimple_vuse (at_stmt);
1663 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1665 tree vdef = gimple_vdef (at_stmt);
1666 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1667 /* If we have an SSA vuse and insert a store, update virtual
1668 SSA form to avoid triggering the renamer. Do so only
1669 if we can easily see all uses - which is what almost always
1670 happens with the way vectorized stmts are inserted. */
1671 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1672 && ((is_gimple_assign (vec_stmt)
1673 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1674 || (is_gimple_call (vec_stmt)
1675 && !(gimple_call_flags (vec_stmt)
1676 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1678 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1679 gimple_set_vdef (vec_stmt, new_vdef);
1680 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1684 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1685 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1688 /* We want to vectorize a call to combined function CFN with function
1689 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1690 as the types of all inputs. Check whether this is possible using
1691 an internal function, returning its code if so or IFN_LAST if not. */
1693 static internal_fn
1694 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1695 tree vectype_out, tree vectype_in)
1697 internal_fn ifn;
1698 if (internal_fn_p (cfn))
1699 ifn = as_internal_fn (cfn);
1700 else
1701 ifn = associated_internal_fn (fndecl);
1702 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1704 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1705 if (info.vectorizable)
1707 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1708 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1709 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1710 OPTIMIZE_FOR_SPEED))
1711 return ifn;
1714 return IFN_LAST;
1718 static tree permute_vec_elements (tree, tree, tree, gimple *,
1719 gimple_stmt_iterator *);
1721 /* Check whether a load or store statement in the loop described by
1722 LOOP_VINFO is possible in a fully-masked loop. This is testing
1723 whether the vectorizer pass has the appropriate support, as well as
1724 whether the target does.
1726 VLS_TYPE says whether the statement is a load or store and VECTYPE
1727 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1728 says how the load or store is going to be implemented and GROUP_SIZE
1729 is the number of load or store statements in the containing group.
1731 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1732 supported, otherwise record the required mask types. */
1734 static void
1735 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1736 vec_load_store_type vls_type, int group_size,
1737 vect_memory_access_type memory_access_type)
1739 /* Invariant loads need no special support. */
1740 if (memory_access_type == VMAT_INVARIANT)
1741 return;
1743 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1744 machine_mode vecmode = TYPE_MODE (vectype);
1745 bool is_load = (vls_type == VLS_LOAD);
1746 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1748 if (is_load
1749 ? !vect_load_lanes_supported (vectype, group_size, true)
1750 : !vect_store_lanes_supported (vectype, group_size, true))
1752 if (dump_enabled_p ())
1753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1754 "can't use a fully-masked loop because the"
1755 " target doesn't have an appropriate masked"
1756 " load/store-lanes instruction.\n");
1757 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1758 return;
1760 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1761 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1762 return;
1765 if (memory_access_type != VMAT_CONTIGUOUS
1766 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1768 /* Element X of the data must come from iteration i * VF + X of the
1769 scalar loop. We need more work to support other mappings. */
1770 if (dump_enabled_p ())
1771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1772 "can't use a fully-masked loop because an access"
1773 " isn't contiguous.\n");
1774 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1775 return;
1778 machine_mode mask_mode;
1779 if (!(targetm.vectorize.get_mask_mode
1780 (GET_MODE_NUNITS (vecmode),
1781 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1782 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1784 if (dump_enabled_p ())
1785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1786 "can't use a fully-masked loop because the target"
1787 " doesn't have the appropriate masked load or"
1788 " store.\n");
1789 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1790 return;
1792 /* We might load more scalars than we need for permuting SLP loads.
1793 We checked in get_group_load_store_type that the extra elements
1794 don't leak into a new vector. */
1795 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1796 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1797 unsigned int nvectors;
1798 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1799 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1800 else
1801 gcc_unreachable ();
1804 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1805 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1806 that needs to be applied to all loads and stores in a vectorized loop.
1807 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1809 MASK_TYPE is the type of both masks. If new statements are needed,
1810 insert them before GSI. */
1812 static tree
1813 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1814 gimple_stmt_iterator *gsi)
1816 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1817 if (!loop_mask)
1818 return vec_mask;
1820 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1821 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1822 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1823 vec_mask, loop_mask);
1824 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1825 return and_res;
1828 /* STMT is a non-strided load or store, meaning that it accesses
1829 elements with a known constant step. Return -1 if that step
1830 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1832 static int
1833 compare_step_with_zero (gimple *stmt)
1835 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1836 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1837 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1838 size_zero_node);
1841 /* If the target supports a permute mask that reverses the elements in
1842 a vector of type VECTYPE, return that mask, otherwise return null. */
1844 static tree
1845 perm_mask_for_reverse (tree vectype)
1847 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1849 /* The encoding has a single stepped pattern. */
1850 vec_perm_builder sel (nunits, 1, 3);
1851 for (int i = 0; i < 3; ++i)
1852 sel.quick_push (nunits - 1 - i);
1854 vec_perm_indices indices (sel, 1, nunits);
1855 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1856 return NULL_TREE;
1857 return vect_gen_perm_mask_checked (vectype, indices);
1860 /* STMT is either a masked or unconditional store. Return the value
1861 being stored. */
1863 static tree
1864 vect_get_store_rhs (gimple *stmt)
1866 if (gassign *assign = dyn_cast <gassign *> (stmt))
1868 gcc_assert (gimple_assign_single_p (assign));
1869 return gimple_assign_rhs1 (assign);
1871 if (gcall *call = dyn_cast <gcall *> (stmt))
1873 internal_fn ifn = gimple_call_internal_fn (call);
1874 gcc_assert (ifn == IFN_MASK_STORE);
1875 return gimple_call_arg (stmt, 3);
1877 gcc_unreachable ();
1880 /* A subroutine of get_load_store_type, with a subset of the same
1881 arguments. Handle the case where STMT is part of a grouped load
1882 or store.
1884 For stores, the statements in the group are all consecutive
1885 and there is no gap at the end. For loads, the statements in the
1886 group might not be consecutive; there can be gaps between statements
1887 as well as at the end. */
1889 static bool
1890 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1891 bool masked_p, vec_load_store_type vls_type,
1892 vect_memory_access_type *memory_access_type)
1894 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1895 vec_info *vinfo = stmt_info->vinfo;
1896 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1897 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1898 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1899 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1900 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1901 bool single_element_p = (stmt == first_stmt
1902 && !GROUP_NEXT_ELEMENT (stmt_info));
1903 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1904 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1906 /* True if the vectorized statements would access beyond the last
1907 statement in the group. */
1908 bool overrun_p = false;
1910 /* True if we can cope with such overrun by peeling for gaps, so that
1911 there is at least one final scalar iteration after the vector loop. */
1912 bool can_overrun_p = (!masked_p
1913 && vls_type == VLS_LOAD
1914 && loop_vinfo
1915 && !loop->inner);
1917 /* There can only be a gap at the end of the group if the stride is
1918 known at compile time. */
1919 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1921 /* Stores can't yet have gaps. */
1922 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1924 if (slp)
1926 if (STMT_VINFO_STRIDED_P (stmt_info))
1928 /* Try to use consecutive accesses of GROUP_SIZE elements,
1929 separated by the stride, until we have a complete vector.
1930 Fall back to scalar accesses if that isn't possible. */
1931 if (multiple_p (nunits, group_size))
1932 *memory_access_type = VMAT_STRIDED_SLP;
1933 else
1934 *memory_access_type = VMAT_ELEMENTWISE;
1936 else
1938 overrun_p = loop_vinfo && gap != 0;
1939 if (overrun_p && vls_type != VLS_LOAD)
1941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1942 "Grouped store with gaps requires"
1943 " non-consecutive accesses\n");
1944 return false;
1946 /* An overrun is fine if the trailing elements are smaller
1947 than the alignment boundary B. Every vector access will
1948 be a multiple of B and so we are guaranteed to access a
1949 non-gap element in the same B-sized block. */
1950 if (overrun_p
1951 && gap < (vect_known_alignment_in_bytes (first_dr)
1952 / vect_get_scalar_dr_size (first_dr)))
1953 overrun_p = false;
1954 if (overrun_p && !can_overrun_p)
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1958 "Peeling for outer loop is not supported\n");
1959 return false;
1961 *memory_access_type = VMAT_CONTIGUOUS;
1964 else
1966 /* We can always handle this case using elementwise accesses,
1967 but see if something more efficient is available. */
1968 *memory_access_type = VMAT_ELEMENTWISE;
1970 /* If there is a gap at the end of the group then these optimizations
1971 would access excess elements in the last iteration. */
1972 bool would_overrun_p = (gap != 0);
1973 /* An overrun is fine if the trailing elements are smaller than the
1974 alignment boundary B. Every vector access will be a multiple of B
1975 and so we are guaranteed to access a non-gap element in the
1976 same B-sized block. */
1977 if (would_overrun_p
1978 && !masked_p
1979 && gap < (vect_known_alignment_in_bytes (first_dr)
1980 / vect_get_scalar_dr_size (first_dr)))
1981 would_overrun_p = false;
1983 if (!STMT_VINFO_STRIDED_P (stmt_info)
1984 && (can_overrun_p || !would_overrun_p)
1985 && compare_step_with_zero (stmt) > 0)
1987 /* First cope with the degenerate case of a single-element
1988 vector. */
1989 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
1990 *memory_access_type = VMAT_CONTIGUOUS;
1992 /* Otherwise try using LOAD/STORE_LANES. */
1993 if (*memory_access_type == VMAT_ELEMENTWISE
1994 && (vls_type == VLS_LOAD
1995 ? vect_load_lanes_supported (vectype, group_size, masked_p)
1996 : vect_store_lanes_supported (vectype, group_size,
1997 masked_p)))
1999 *memory_access_type = VMAT_LOAD_STORE_LANES;
2000 overrun_p = would_overrun_p;
2003 /* If that fails, try using permuting loads. */
2004 if (*memory_access_type == VMAT_ELEMENTWISE
2005 && (vls_type == VLS_LOAD
2006 ? vect_grouped_load_supported (vectype, single_element_p,
2007 group_size)
2008 : vect_grouped_store_supported (vectype, group_size)))
2010 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2011 overrun_p = would_overrun_p;
2016 if (vls_type != VLS_LOAD && first_stmt == stmt)
2018 /* STMT is the leader of the group. Check the operands of all the
2019 stmts of the group. */
2020 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2021 while (next_stmt)
2023 tree op = vect_get_store_rhs (next_stmt);
2024 gimple *def_stmt;
2025 enum vect_def_type dt;
2026 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2030 "use not simple.\n");
2031 return false;
2033 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2037 if (overrun_p)
2039 gcc_assert (can_overrun_p);
2040 if (dump_enabled_p ())
2041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2042 "Data access with gaps requires scalar "
2043 "epilogue loop\n");
2044 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2047 return true;
2050 /* A subroutine of get_load_store_type, with a subset of the same
2051 arguments. Handle the case where STMT is a load or store that
2052 accesses consecutive elements with a negative step. */
2054 static vect_memory_access_type
2055 get_negative_load_store_type (gimple *stmt, tree vectype,
2056 vec_load_store_type vls_type,
2057 unsigned int ncopies)
2059 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2060 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2061 dr_alignment_support alignment_support_scheme;
2063 if (ncopies > 1)
2065 if (dump_enabled_p ())
2066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2067 "multiple types with negative step.\n");
2068 return VMAT_ELEMENTWISE;
2071 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2072 if (alignment_support_scheme != dr_aligned
2073 && alignment_support_scheme != dr_unaligned_supported)
2075 if (dump_enabled_p ())
2076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2077 "negative step but alignment required.\n");
2078 return VMAT_ELEMENTWISE;
2081 if (vls_type == VLS_STORE_INVARIANT)
2083 if (dump_enabled_p ())
2084 dump_printf_loc (MSG_NOTE, vect_location,
2085 "negative step with invariant source;"
2086 " no permute needed.\n");
2087 return VMAT_CONTIGUOUS_DOWN;
2090 if (!perm_mask_for_reverse (vectype))
2092 if (dump_enabled_p ())
2093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2094 "negative step and reversing not supported.\n");
2095 return VMAT_ELEMENTWISE;
2098 return VMAT_CONTIGUOUS_REVERSE;
2101 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2102 if there is a memory access type that the vectorized form can use,
2103 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2104 or scatters, fill in GS_INFO accordingly.
2106 SLP says whether we're performing SLP rather than loop vectorization.
2107 MASKED_P is true if the statement is conditional on a vectorized mask.
2108 VECTYPE is the vector type that the vectorized statements will use.
2109 NCOPIES is the number of vector statements that will be needed. */
2111 static bool
2112 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2113 vec_load_store_type vls_type, unsigned int ncopies,
2114 vect_memory_access_type *memory_access_type,
2115 gather_scatter_info *gs_info)
2117 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2118 vec_info *vinfo = stmt_info->vinfo;
2119 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2120 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2121 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2123 *memory_access_type = VMAT_GATHER_SCATTER;
2124 gimple *def_stmt;
2125 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2126 gcc_unreachable ();
2127 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2128 &gs_info->offset_dt,
2129 &gs_info->offset_vectype))
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2133 "%s index use not simple.\n",
2134 vls_type == VLS_LOAD ? "gather" : "scatter");
2135 return false;
2138 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2140 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2141 memory_access_type))
2142 return false;
2144 else if (STMT_VINFO_STRIDED_P (stmt_info))
2146 gcc_assert (!slp);
2147 *memory_access_type = VMAT_ELEMENTWISE;
2149 else
2151 int cmp = compare_step_with_zero (stmt);
2152 if (cmp < 0)
2153 *memory_access_type = get_negative_load_store_type
2154 (stmt, vectype, vls_type, ncopies);
2155 else if (cmp == 0)
2157 gcc_assert (vls_type == VLS_LOAD);
2158 *memory_access_type = VMAT_INVARIANT;
2160 else
2161 *memory_access_type = VMAT_CONTIGUOUS;
2164 if ((*memory_access_type == VMAT_ELEMENTWISE
2165 || *memory_access_type == VMAT_STRIDED_SLP)
2166 && !nunits.is_constant ())
2168 if (dump_enabled_p ())
2169 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2170 "Not using elementwise accesses due to variable "
2171 "vectorization factor.\n");
2172 return false;
2175 /* FIXME: At the moment the cost model seems to underestimate the
2176 cost of using elementwise accesses. This check preserves the
2177 traditional behavior until that can be fixed. */
2178 if (*memory_access_type == VMAT_ELEMENTWISE
2179 && !STMT_VINFO_STRIDED_P (stmt_info)
2180 && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
2181 && !GROUP_NEXT_ELEMENT (stmt_info)
2182 && !pow2p_hwi (GROUP_SIZE (stmt_info))))
2184 if (dump_enabled_p ())
2185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2186 "not falling back to elementwise accesses\n");
2187 return false;
2189 return true;
2192 /* Return true if boolean argument MASK is suitable for vectorizing
2193 conditional load or store STMT. When returning true, store the
2194 type of the vectorized mask in *MASK_VECTYPE_OUT. */
2196 static bool
2197 vect_check_load_store_mask (gimple *stmt, tree mask, tree *mask_vectype_out)
2199 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2201 if (dump_enabled_p ())
2202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2203 "mask argument is not a boolean.\n");
2204 return false;
2207 if (TREE_CODE (mask) != SSA_NAME)
2209 if (dump_enabled_p ())
2210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2211 "mask argument is not an SSA name.\n");
2212 return false;
2215 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2216 gimple *def_stmt;
2217 enum vect_def_type dt;
2218 tree mask_vectype;
2219 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &dt,
2220 &mask_vectype))
2222 if (dump_enabled_p ())
2223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2224 "mask use not simple.\n");
2225 return false;
2228 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2229 if (!mask_vectype)
2230 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2232 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2234 if (dump_enabled_p ())
2235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2236 "could not find an appropriate vector mask type.\n");
2237 return false;
2240 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2241 TYPE_VECTOR_SUBPARTS (vectype)))
2243 if (dump_enabled_p ())
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2246 "vector mask type ");
2247 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2248 dump_printf (MSG_MISSED_OPTIMIZATION,
2249 " does not match vector data type ");
2250 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2251 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2253 return false;
2256 *mask_vectype_out = mask_vectype;
2257 return true;
2260 /* Return true if stored value RHS is suitable for vectorizing store
2261 statement STMT. When returning true, store the type of the
2262 vectorized store value in *RHS_VECTYPE_OUT and the type of the
2263 store in *VLS_TYPE_OUT. */
2265 static bool
2266 vect_check_store_rhs (gimple *stmt, tree rhs, tree *rhs_vectype_out,
2267 vec_load_store_type *vls_type_out)
2269 /* In the case this is a store from a constant make sure
2270 native_encode_expr can handle it. */
2271 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2273 if (dump_enabled_p ())
2274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2275 "cannot encode constant as a byte sequence.\n");
2276 return false;
2279 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2280 gimple *def_stmt;
2281 enum vect_def_type dt;
2282 tree rhs_vectype;
2283 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &dt,
2284 &rhs_vectype))
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2288 "use not simple.\n");
2289 return false;
2292 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2293 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2295 if (dump_enabled_p ())
2296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2297 "incompatible vector types.\n");
2298 return false;
2301 *rhs_vectype_out = rhs_vectype;
2302 if (dt == vect_constant_def || dt == vect_external_def)
2303 *vls_type_out = VLS_STORE_INVARIANT;
2304 else
2305 *vls_type_out = VLS_STORE;
2306 return true;
2309 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2310 Note that we support masks with floating-point type, in which case the
2311 floats are interpreted as a bitmask. */
2313 static tree
2314 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2316 if (TREE_CODE (masktype) == INTEGER_TYPE)
2317 return build_int_cst (masktype, -1);
2318 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2320 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2321 mask = build_vector_from_val (masktype, mask);
2322 return vect_init_vector (stmt, mask, masktype, NULL);
2324 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2326 REAL_VALUE_TYPE r;
2327 long tmp[6];
2328 for (int j = 0; j < 6; ++j)
2329 tmp[j] = -1;
2330 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2331 tree mask = build_real (TREE_TYPE (masktype), r);
2332 mask = build_vector_from_val (masktype, mask);
2333 return vect_init_vector (stmt, mask, masktype, NULL);
2335 gcc_unreachable ();
2338 /* Build an all-zero merge value of type VECTYPE while vectorizing
2339 STMT as a gather load. */
2341 static tree
2342 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2344 tree merge;
2345 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2346 merge = build_int_cst (TREE_TYPE (vectype), 0);
2347 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2349 REAL_VALUE_TYPE r;
2350 long tmp[6];
2351 for (int j = 0; j < 6; ++j)
2352 tmp[j] = 0;
2353 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2354 merge = build_real (TREE_TYPE (vectype), r);
2356 else
2357 gcc_unreachable ();
2358 merge = build_vector_from_val (vectype, merge);
2359 return vect_init_vector (stmt, merge, vectype, NULL);
2362 /* Build a gather load call while vectorizing STMT. Insert new instructions
2363 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2364 operation. If the load is conditional, MASK is the unvectorized
2365 condition, otherwise MASK is null. */
2367 static void
2368 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2369 gimple **vec_stmt, gather_scatter_info *gs_info,
2370 tree mask)
2372 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2373 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2374 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2375 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2376 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2377 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2378 edge pe = loop_preheader_edge (loop);
2379 enum { NARROW, NONE, WIDEN } modifier;
2380 poly_uint64 gather_off_nunits
2381 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2383 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2384 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2385 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2386 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2387 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2388 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2389 tree scaletype = TREE_VALUE (arglist);
2390 gcc_checking_assert (types_compatible_p (srctype, rettype)
2391 && (!mask || types_compatible_p (srctype, masktype)));
2393 tree perm_mask = NULL_TREE;
2394 tree mask_perm_mask = NULL_TREE;
2395 if (known_eq (nunits, gather_off_nunits))
2396 modifier = NONE;
2397 else if (known_eq (nunits * 2, gather_off_nunits))
2399 modifier = WIDEN;
2401 /* Currently widening gathers and scatters are only supported for
2402 fixed-length vectors. */
2403 int count = gather_off_nunits.to_constant ();
2404 vec_perm_builder sel (count, count, 1);
2405 for (int i = 0; i < count; ++i)
2406 sel.quick_push (i | (count / 2));
2408 vec_perm_indices indices (sel, 1, count);
2409 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2410 indices);
2412 else if (known_eq (nunits, gather_off_nunits * 2))
2414 modifier = NARROW;
2416 /* Currently narrowing gathers and scatters are only supported for
2417 fixed-length vectors. */
2418 int count = nunits.to_constant ();
2419 vec_perm_builder sel (count, count, 1);
2420 sel.quick_grow (count);
2421 for (int i = 0; i < count; ++i)
2422 sel[i] = i < count / 2 ? i : i + count / 2;
2423 vec_perm_indices indices (sel, 2, count);
2424 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2426 ncopies *= 2;
2428 if (mask)
2430 for (int i = 0; i < count; ++i)
2431 sel[i] = i | (count / 2);
2432 indices.new_vector (sel, 2, count);
2433 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2436 else
2437 gcc_unreachable ();
2439 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2440 vectype);
2442 tree ptr = fold_convert (ptrtype, gs_info->base);
2443 if (!is_gimple_min_invariant (ptr))
2445 gimple_seq seq;
2446 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2447 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2448 gcc_assert (!new_bb);
2451 tree scale = build_int_cst (scaletype, gs_info->scale);
2453 tree vec_oprnd0 = NULL_TREE;
2454 tree vec_mask = NULL_TREE;
2455 tree src_op = NULL_TREE;
2456 tree mask_op = NULL_TREE;
2457 tree prev_res = NULL_TREE;
2458 stmt_vec_info prev_stmt_info = NULL;
2460 if (!mask)
2462 src_op = vect_build_zero_merge_argument (stmt, rettype);
2463 mask_op = vect_build_all_ones_mask (stmt, masktype);
2466 for (int j = 0; j < ncopies; ++j)
2468 tree op, var;
2469 gimple *new_stmt;
2470 if (modifier == WIDEN && (j & 1))
2471 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2472 perm_mask, stmt, gsi);
2473 else if (j == 0)
2474 op = vec_oprnd0
2475 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2476 else
2477 op = vec_oprnd0
2478 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2480 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2482 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2483 TYPE_VECTOR_SUBPARTS (idxtype)));
2484 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2485 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2486 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2487 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2488 op = var;
2491 if (mask)
2493 if (mask_perm_mask && (j & 1))
2494 mask_op = permute_vec_elements (mask_op, mask_op,
2495 mask_perm_mask, stmt, gsi);
2496 else
2498 if (j == 0)
2499 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2500 else
2502 gimple *def_stmt;
2503 enum vect_def_type dt;
2504 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2505 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2508 mask_op = vec_mask;
2509 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2511 gcc_assert
2512 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2513 TYPE_VECTOR_SUBPARTS (masktype)));
2514 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2515 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2516 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2517 mask_op);
2518 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2519 mask_op = var;
2522 src_op = mask_op;
2525 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2526 mask_op, scale);
2528 if (!useless_type_conversion_p (vectype, rettype))
2530 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2531 TYPE_VECTOR_SUBPARTS (rettype)));
2532 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2533 gimple_call_set_lhs (new_stmt, op);
2534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2535 var = make_ssa_name (vec_dest);
2536 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2537 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2539 else
2541 var = make_ssa_name (vec_dest, new_stmt);
2542 gimple_call_set_lhs (new_stmt, var);
2545 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2547 if (modifier == NARROW)
2549 if ((j & 1) == 0)
2551 prev_res = var;
2552 continue;
2554 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2555 new_stmt = SSA_NAME_DEF_STMT (var);
2558 if (prev_stmt_info == NULL)
2559 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2560 else
2561 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2562 prev_stmt_info = vinfo_for_stmt (new_stmt);
2566 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2568 static bool
2569 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2570 gimple **vec_stmt, slp_tree slp_node,
2571 tree vectype_in, enum vect_def_type *dt)
2573 tree op, vectype;
2574 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2575 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2576 unsigned ncopies;
2577 unsigned HOST_WIDE_INT nunits, num_bytes;
2579 op = gimple_call_arg (stmt, 0);
2580 vectype = STMT_VINFO_VECTYPE (stmt_info);
2582 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2583 return false;
2585 /* Multiple types in SLP are handled by creating the appropriate number of
2586 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2587 case of SLP. */
2588 if (slp_node)
2589 ncopies = 1;
2590 else
2591 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2593 gcc_assert (ncopies >= 1);
2595 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2596 if (! char_vectype)
2597 return false;
2599 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2600 return false;
2602 unsigned word_bytes = num_bytes / nunits;
2604 /* The encoding uses one stepped pattern for each byte in the word. */
2605 vec_perm_builder elts (num_bytes, word_bytes, 3);
2606 for (unsigned i = 0; i < 3; ++i)
2607 for (unsigned j = 0; j < word_bytes; ++j)
2608 elts.quick_push ((i + 1) * word_bytes - j - 1);
2610 vec_perm_indices indices (elts, 1, num_bytes);
2611 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2612 return false;
2614 if (! vec_stmt)
2616 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2617 if (dump_enabled_p ())
2618 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2619 "\n");
2620 if (! PURE_SLP_STMT (stmt_info))
2622 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2623 1, vector_stmt, stmt_info, 0, vect_prologue);
2624 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2625 ncopies, vec_perm, stmt_info, 0, vect_body);
2627 return true;
2630 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2632 /* Transform. */
2633 vec<tree> vec_oprnds = vNULL;
2634 gimple *new_stmt = NULL;
2635 stmt_vec_info prev_stmt_info = NULL;
2636 for (unsigned j = 0; j < ncopies; j++)
2638 /* Handle uses. */
2639 if (j == 0)
2640 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2641 else
2642 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2644 /* Arguments are ready. create the new vector stmt. */
2645 unsigned i;
2646 tree vop;
2647 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2649 tree tem = make_ssa_name (char_vectype);
2650 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2651 char_vectype, vop));
2652 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2653 tree tem2 = make_ssa_name (char_vectype);
2654 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2655 tem, tem, bswap_vconst);
2656 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2657 tem = make_ssa_name (vectype);
2658 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2659 vectype, tem2));
2660 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2661 if (slp_node)
2662 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2665 if (slp_node)
2666 continue;
2668 if (j == 0)
2669 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2670 else
2671 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2673 prev_stmt_info = vinfo_for_stmt (new_stmt);
2676 vec_oprnds.release ();
2677 return true;
2680 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2681 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2682 in a single step. On success, store the binary pack code in
2683 *CONVERT_CODE. */
2685 static bool
2686 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2687 tree_code *convert_code)
2689 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2690 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2691 return false;
2693 tree_code code;
2694 int multi_step_cvt = 0;
2695 auto_vec <tree, 8> interm_types;
2696 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2697 &code, &multi_step_cvt,
2698 &interm_types)
2699 || multi_step_cvt)
2700 return false;
2702 *convert_code = code;
2703 return true;
2706 /* Function vectorizable_call.
2708 Check if GS performs a function call that can be vectorized.
2709 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2710 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2711 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2713 static bool
2714 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2715 slp_tree slp_node)
2717 gcall *stmt;
2718 tree vec_dest;
2719 tree scalar_dest;
2720 tree op, type;
2721 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2722 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2723 tree vectype_out, vectype_in;
2724 poly_uint64 nunits_in;
2725 poly_uint64 nunits_out;
2726 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2727 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2728 vec_info *vinfo = stmt_info->vinfo;
2729 tree fndecl, new_temp, rhs_type;
2730 gimple *def_stmt;
2731 enum vect_def_type dt[3]
2732 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2733 int ndts = 3;
2734 gimple *new_stmt = NULL;
2735 int ncopies, j;
2736 vec<tree> vargs = vNULL;
2737 enum { NARROW, NONE, WIDEN } modifier;
2738 size_t i, nargs;
2739 tree lhs;
2741 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2742 return false;
2744 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2745 && ! vec_stmt)
2746 return false;
2748 /* Is GS a vectorizable call? */
2749 stmt = dyn_cast <gcall *> (gs);
2750 if (!stmt)
2751 return false;
2753 if (gimple_call_internal_p (stmt)
2754 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2755 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2756 /* Handled by vectorizable_load and vectorizable_store. */
2757 return false;
2759 if (gimple_call_lhs (stmt) == NULL_TREE
2760 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2761 return false;
2763 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2765 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2767 /* Process function arguments. */
2768 rhs_type = NULL_TREE;
2769 vectype_in = NULL_TREE;
2770 nargs = gimple_call_num_args (stmt);
2772 /* Bail out if the function has more than three arguments, we do not have
2773 interesting builtin functions to vectorize with more than two arguments
2774 except for fma. No arguments is also not good. */
2775 if (nargs == 0 || nargs > 3)
2776 return false;
2778 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2779 if (gimple_call_internal_p (stmt)
2780 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2782 nargs = 0;
2783 rhs_type = unsigned_type_node;
2786 for (i = 0; i < nargs; i++)
2788 tree opvectype;
2790 op = gimple_call_arg (stmt, i);
2792 /* We can only handle calls with arguments of the same type. */
2793 if (rhs_type
2794 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2796 if (dump_enabled_p ())
2797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2798 "argument types differ.\n");
2799 return false;
2801 if (!rhs_type)
2802 rhs_type = TREE_TYPE (op);
2804 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2806 if (dump_enabled_p ())
2807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2808 "use not simple.\n");
2809 return false;
2812 if (!vectype_in)
2813 vectype_in = opvectype;
2814 else if (opvectype
2815 && opvectype != vectype_in)
2817 if (dump_enabled_p ())
2818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2819 "argument vector types differ.\n");
2820 return false;
2823 /* If all arguments are external or constant defs use a vector type with
2824 the same size as the output vector type. */
2825 if (!vectype_in)
2826 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2827 if (vec_stmt)
2828 gcc_assert (vectype_in);
2829 if (!vectype_in)
2831 if (dump_enabled_p ())
2833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2834 "no vectype for scalar type ");
2835 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2836 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2839 return false;
2842 /* FORNOW */
2843 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2844 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2845 if (known_eq (nunits_in * 2, nunits_out))
2846 modifier = NARROW;
2847 else if (known_eq (nunits_out, nunits_in))
2848 modifier = NONE;
2849 else if (known_eq (nunits_out * 2, nunits_in))
2850 modifier = WIDEN;
2851 else
2852 return false;
2854 /* We only handle functions that do not read or clobber memory. */
2855 if (gimple_vuse (stmt))
2857 if (dump_enabled_p ())
2858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2859 "function reads from or writes to memory.\n");
2860 return false;
2863 /* For now, we only vectorize functions if a target specific builtin
2864 is available. TODO -- in some cases, it might be profitable to
2865 insert the calls for pieces of the vector, in order to be able
2866 to vectorize other operations in the loop. */
2867 fndecl = NULL_TREE;
2868 internal_fn ifn = IFN_LAST;
2869 combined_fn cfn = gimple_call_combined_fn (stmt);
2870 tree callee = gimple_call_fndecl (stmt);
2872 /* First try using an internal function. */
2873 tree_code convert_code = ERROR_MARK;
2874 if (cfn != CFN_LAST
2875 && (modifier == NONE
2876 || (modifier == NARROW
2877 && simple_integer_narrowing (vectype_out, vectype_in,
2878 &convert_code))))
2879 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2880 vectype_in);
2882 /* If that fails, try asking for a target-specific built-in function. */
2883 if (ifn == IFN_LAST)
2885 if (cfn != CFN_LAST)
2886 fndecl = targetm.vectorize.builtin_vectorized_function
2887 (cfn, vectype_out, vectype_in);
2888 else
2889 fndecl = targetm.vectorize.builtin_md_vectorized_function
2890 (callee, vectype_out, vectype_in);
2893 if (ifn == IFN_LAST && !fndecl)
2895 if (cfn == CFN_GOMP_SIMD_LANE
2896 && !slp_node
2897 && loop_vinfo
2898 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2899 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2900 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2901 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2903 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2904 { 0, 1, 2, ... vf - 1 } vector. */
2905 gcc_assert (nargs == 0);
2907 else if (modifier == NONE
2908 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2909 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2910 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2911 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2912 vectype_in, dt);
2913 else
2915 if (dump_enabled_p ())
2916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2917 "function is not vectorizable.\n");
2918 return false;
2922 if (slp_node)
2923 ncopies = 1;
2924 else if (modifier == NARROW && ifn == IFN_LAST)
2925 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2926 else
2927 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2929 /* Sanity check: make sure that at least one copy of the vectorized stmt
2930 needs to be generated. */
2931 gcc_assert (ncopies >= 1);
2933 if (!vec_stmt) /* transformation not required. */
2935 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2936 if (dump_enabled_p ())
2937 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2938 "\n");
2939 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2940 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2941 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2942 vec_promote_demote, stmt_info, 0, vect_body);
2944 return true;
2947 /* Transform. */
2949 if (dump_enabled_p ())
2950 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2952 /* Handle def. */
2953 scalar_dest = gimple_call_lhs (stmt);
2954 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2956 prev_stmt_info = NULL;
2957 if (modifier == NONE || ifn != IFN_LAST)
2959 tree prev_res = NULL_TREE;
2960 for (j = 0; j < ncopies; ++j)
2962 /* Build argument list for the vectorized call. */
2963 if (j == 0)
2964 vargs.create (nargs);
2965 else
2966 vargs.truncate (0);
2968 if (slp_node)
2970 auto_vec<vec<tree> > vec_defs (nargs);
2971 vec<tree> vec_oprnds0;
2973 for (i = 0; i < nargs; i++)
2974 vargs.quick_push (gimple_call_arg (stmt, i));
2975 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2976 vec_oprnds0 = vec_defs[0];
2978 /* Arguments are ready. Create the new vector stmt. */
2979 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2981 size_t k;
2982 for (k = 0; k < nargs; k++)
2984 vec<tree> vec_oprndsk = vec_defs[k];
2985 vargs[k] = vec_oprndsk[i];
2987 if (modifier == NARROW)
2989 tree half_res = make_ssa_name (vectype_in);
2990 gcall *call
2991 = gimple_build_call_internal_vec (ifn, vargs);
2992 gimple_call_set_lhs (call, half_res);
2993 gimple_call_set_nothrow (call, true);
2994 new_stmt = call;
2995 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2996 if ((i & 1) == 0)
2998 prev_res = half_res;
2999 continue;
3001 new_temp = make_ssa_name (vec_dest);
3002 new_stmt = gimple_build_assign (new_temp, convert_code,
3003 prev_res, half_res);
3005 else
3007 gcall *call;
3008 if (ifn != IFN_LAST)
3009 call = gimple_build_call_internal_vec (ifn, vargs);
3010 else
3011 call = gimple_build_call_vec (fndecl, vargs);
3012 new_temp = make_ssa_name (vec_dest, call);
3013 gimple_call_set_lhs (call, new_temp);
3014 gimple_call_set_nothrow (call, true);
3015 new_stmt = call;
3017 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3018 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3021 for (i = 0; i < nargs; i++)
3023 vec<tree> vec_oprndsi = vec_defs[i];
3024 vec_oprndsi.release ();
3026 continue;
3029 for (i = 0; i < nargs; i++)
3031 op = gimple_call_arg (stmt, i);
3032 if (j == 0)
3033 vec_oprnd0
3034 = vect_get_vec_def_for_operand (op, stmt);
3035 else
3037 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3038 vec_oprnd0
3039 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3042 vargs.quick_push (vec_oprnd0);
3045 if (gimple_call_internal_p (stmt)
3046 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3048 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3049 tree new_var
3050 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3051 gimple *init_stmt = gimple_build_assign (new_var, cst);
3052 vect_init_vector_1 (stmt, init_stmt, NULL);
3053 new_temp = make_ssa_name (vec_dest);
3054 new_stmt = gimple_build_assign (new_temp, new_var);
3056 else if (modifier == NARROW)
3058 tree half_res = make_ssa_name (vectype_in);
3059 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3060 gimple_call_set_lhs (call, half_res);
3061 gimple_call_set_nothrow (call, true);
3062 new_stmt = call;
3063 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3064 if ((j & 1) == 0)
3066 prev_res = half_res;
3067 continue;
3069 new_temp = make_ssa_name (vec_dest);
3070 new_stmt = gimple_build_assign (new_temp, convert_code,
3071 prev_res, half_res);
3073 else
3075 gcall *call;
3076 if (ifn != IFN_LAST)
3077 call = gimple_build_call_internal_vec (ifn, vargs);
3078 else
3079 call = gimple_build_call_vec (fndecl, vargs);
3080 new_temp = make_ssa_name (vec_dest, new_stmt);
3081 gimple_call_set_lhs (call, new_temp);
3082 gimple_call_set_nothrow (call, true);
3083 new_stmt = call;
3085 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3087 if (j == (modifier == NARROW ? 1 : 0))
3088 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3089 else
3090 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3092 prev_stmt_info = vinfo_for_stmt (new_stmt);
3095 else if (modifier == NARROW)
3097 for (j = 0; j < ncopies; ++j)
3099 /* Build argument list for the vectorized call. */
3100 if (j == 0)
3101 vargs.create (nargs * 2);
3102 else
3103 vargs.truncate (0);
3105 if (slp_node)
3107 auto_vec<vec<tree> > vec_defs (nargs);
3108 vec<tree> vec_oprnds0;
3110 for (i = 0; i < nargs; i++)
3111 vargs.quick_push (gimple_call_arg (stmt, i));
3112 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3113 vec_oprnds0 = vec_defs[0];
3115 /* Arguments are ready. Create the new vector stmt. */
3116 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3118 size_t k;
3119 vargs.truncate (0);
3120 for (k = 0; k < nargs; k++)
3122 vec<tree> vec_oprndsk = vec_defs[k];
3123 vargs.quick_push (vec_oprndsk[i]);
3124 vargs.quick_push (vec_oprndsk[i + 1]);
3126 gcall *call;
3127 if (ifn != IFN_LAST)
3128 call = gimple_build_call_internal_vec (ifn, vargs);
3129 else
3130 call = gimple_build_call_vec (fndecl, vargs);
3131 new_temp = make_ssa_name (vec_dest, call);
3132 gimple_call_set_lhs (call, new_temp);
3133 gimple_call_set_nothrow (call, true);
3134 new_stmt = call;
3135 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3136 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3139 for (i = 0; i < nargs; i++)
3141 vec<tree> vec_oprndsi = vec_defs[i];
3142 vec_oprndsi.release ();
3144 continue;
3147 for (i = 0; i < nargs; i++)
3149 op = gimple_call_arg (stmt, i);
3150 if (j == 0)
3152 vec_oprnd0
3153 = vect_get_vec_def_for_operand (op, stmt);
3154 vec_oprnd1
3155 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3157 else
3159 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3160 vec_oprnd0
3161 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3162 vec_oprnd1
3163 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3166 vargs.quick_push (vec_oprnd0);
3167 vargs.quick_push (vec_oprnd1);
3170 new_stmt = gimple_build_call_vec (fndecl, vargs);
3171 new_temp = make_ssa_name (vec_dest, new_stmt);
3172 gimple_call_set_lhs (new_stmt, new_temp);
3173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3175 if (j == 0)
3176 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3177 else
3178 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3180 prev_stmt_info = vinfo_for_stmt (new_stmt);
3183 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3185 else
3186 /* No current target implements this case. */
3187 return false;
3189 vargs.release ();
3191 /* The call in STMT might prevent it from being removed in dce.
3192 We however cannot remove it here, due to the way the ssa name
3193 it defines is mapped to the new definition. So just replace
3194 rhs of the statement with something harmless. */
3196 if (slp_node)
3197 return true;
3199 type = TREE_TYPE (scalar_dest);
3200 if (is_pattern_stmt_p (stmt_info))
3201 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3202 else
3203 lhs = gimple_call_lhs (stmt);
3205 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3206 set_vinfo_for_stmt (new_stmt, stmt_info);
3207 set_vinfo_for_stmt (stmt, NULL);
3208 STMT_VINFO_STMT (stmt_info) = new_stmt;
3209 gsi_replace (gsi, new_stmt, false);
3211 return true;
3215 struct simd_call_arg_info
3217 tree vectype;
3218 tree op;
3219 HOST_WIDE_INT linear_step;
3220 enum vect_def_type dt;
3221 unsigned int align;
3222 bool simd_lane_linear;
3225 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3226 is linear within simd lane (but not within whole loop), note it in
3227 *ARGINFO. */
3229 static void
3230 vect_simd_lane_linear (tree op, struct loop *loop,
3231 struct simd_call_arg_info *arginfo)
3233 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3235 if (!is_gimple_assign (def_stmt)
3236 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3237 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3238 return;
3240 tree base = gimple_assign_rhs1 (def_stmt);
3241 HOST_WIDE_INT linear_step = 0;
3242 tree v = gimple_assign_rhs2 (def_stmt);
3243 while (TREE_CODE (v) == SSA_NAME)
3245 tree t;
3246 def_stmt = SSA_NAME_DEF_STMT (v);
3247 if (is_gimple_assign (def_stmt))
3248 switch (gimple_assign_rhs_code (def_stmt))
3250 case PLUS_EXPR:
3251 t = gimple_assign_rhs2 (def_stmt);
3252 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3253 return;
3254 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3255 v = gimple_assign_rhs1 (def_stmt);
3256 continue;
3257 case MULT_EXPR:
3258 t = gimple_assign_rhs2 (def_stmt);
3259 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3260 return;
3261 linear_step = tree_to_shwi (t);
3262 v = gimple_assign_rhs1 (def_stmt);
3263 continue;
3264 CASE_CONVERT:
3265 t = gimple_assign_rhs1 (def_stmt);
3266 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3267 || (TYPE_PRECISION (TREE_TYPE (v))
3268 < TYPE_PRECISION (TREE_TYPE (t))))
3269 return;
3270 if (!linear_step)
3271 linear_step = 1;
3272 v = t;
3273 continue;
3274 default:
3275 return;
3277 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3278 && loop->simduid
3279 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3280 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3281 == loop->simduid))
3283 if (!linear_step)
3284 linear_step = 1;
3285 arginfo->linear_step = linear_step;
3286 arginfo->op = base;
3287 arginfo->simd_lane_linear = true;
3288 return;
3293 /* Return the number of elements in vector type VECTYPE, which is associated
3294 with a SIMD clone. At present these vectors always have a constant
3295 length. */
3297 static unsigned HOST_WIDE_INT
3298 simd_clone_subparts (tree vectype)
3300 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3303 /* Function vectorizable_simd_clone_call.
3305 Check if STMT performs a function call that can be vectorized
3306 by calling a simd clone of the function.
3307 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3308 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3309 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3311 static bool
3312 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3313 gimple **vec_stmt, slp_tree slp_node)
3315 tree vec_dest;
3316 tree scalar_dest;
3317 tree op, type;
3318 tree vec_oprnd0 = NULL_TREE;
3319 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3320 tree vectype;
3321 unsigned int nunits;
3322 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3323 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3324 vec_info *vinfo = stmt_info->vinfo;
3325 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3326 tree fndecl, new_temp;
3327 gimple *def_stmt;
3328 gimple *new_stmt = NULL;
3329 int ncopies, j;
3330 auto_vec<simd_call_arg_info> arginfo;
3331 vec<tree> vargs = vNULL;
3332 size_t i, nargs;
3333 tree lhs, rtype, ratype;
3334 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3336 /* Is STMT a vectorizable call? */
3337 if (!is_gimple_call (stmt))
3338 return false;
3340 fndecl = gimple_call_fndecl (stmt);
3341 if (fndecl == NULL_TREE)
3342 return false;
3344 struct cgraph_node *node = cgraph_node::get (fndecl);
3345 if (node == NULL || node->simd_clones == NULL)
3346 return false;
3348 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3349 return false;
3351 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3352 && ! vec_stmt)
3353 return false;
3355 if (gimple_call_lhs (stmt)
3356 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3357 return false;
3359 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3361 vectype = STMT_VINFO_VECTYPE (stmt_info);
3363 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3364 return false;
3366 /* FORNOW */
3367 if (slp_node)
3368 return false;
3370 /* Process function arguments. */
3371 nargs = gimple_call_num_args (stmt);
3373 /* Bail out if the function has zero arguments. */
3374 if (nargs == 0)
3375 return false;
3377 arginfo.reserve (nargs, true);
3379 for (i = 0; i < nargs; i++)
3381 simd_call_arg_info thisarginfo;
3382 affine_iv iv;
3384 thisarginfo.linear_step = 0;
3385 thisarginfo.align = 0;
3386 thisarginfo.op = NULL_TREE;
3387 thisarginfo.simd_lane_linear = false;
3389 op = gimple_call_arg (stmt, i);
3390 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3391 &thisarginfo.vectype)
3392 || thisarginfo.dt == vect_uninitialized_def)
3394 if (dump_enabled_p ())
3395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3396 "use not simple.\n");
3397 return false;
3400 if (thisarginfo.dt == vect_constant_def
3401 || thisarginfo.dt == vect_external_def)
3402 gcc_assert (thisarginfo.vectype == NULL_TREE);
3403 else
3404 gcc_assert (thisarginfo.vectype != NULL_TREE);
3406 /* For linear arguments, the analyze phase should have saved
3407 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3408 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3409 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3411 gcc_assert (vec_stmt);
3412 thisarginfo.linear_step
3413 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3414 thisarginfo.op
3415 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3416 thisarginfo.simd_lane_linear
3417 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3418 == boolean_true_node);
3419 /* If loop has been peeled for alignment, we need to adjust it. */
3420 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3421 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3422 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3424 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3425 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3426 tree opt = TREE_TYPE (thisarginfo.op);
3427 bias = fold_convert (TREE_TYPE (step), bias);
3428 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3429 thisarginfo.op
3430 = fold_build2 (POINTER_TYPE_P (opt)
3431 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3432 thisarginfo.op, bias);
3435 else if (!vec_stmt
3436 && thisarginfo.dt != vect_constant_def
3437 && thisarginfo.dt != vect_external_def
3438 && loop_vinfo
3439 && TREE_CODE (op) == SSA_NAME
3440 && simple_iv (loop, loop_containing_stmt (stmt), op,
3441 &iv, false)
3442 && tree_fits_shwi_p (iv.step))
3444 thisarginfo.linear_step = tree_to_shwi (iv.step);
3445 thisarginfo.op = iv.base;
3447 else if ((thisarginfo.dt == vect_constant_def
3448 || thisarginfo.dt == vect_external_def)
3449 && POINTER_TYPE_P (TREE_TYPE (op)))
3450 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3451 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3452 linear too. */
3453 if (POINTER_TYPE_P (TREE_TYPE (op))
3454 && !thisarginfo.linear_step
3455 && !vec_stmt
3456 && thisarginfo.dt != vect_constant_def
3457 && thisarginfo.dt != vect_external_def
3458 && loop_vinfo
3459 && !slp_node
3460 && TREE_CODE (op) == SSA_NAME)
3461 vect_simd_lane_linear (op, loop, &thisarginfo);
3463 arginfo.quick_push (thisarginfo);
3466 unsigned HOST_WIDE_INT vf;
3467 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3469 if (dump_enabled_p ())
3470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3471 "not considering SIMD clones; not yet supported"
3472 " for variable-width vectors.\n");
3473 return NULL;
3476 unsigned int badness = 0;
3477 struct cgraph_node *bestn = NULL;
3478 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3479 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3480 else
3481 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3482 n = n->simdclone->next_clone)
3484 unsigned int this_badness = 0;
3485 if (n->simdclone->simdlen > vf
3486 || n->simdclone->nargs != nargs)
3487 continue;
3488 if (n->simdclone->simdlen < vf)
3489 this_badness += (exact_log2 (vf)
3490 - exact_log2 (n->simdclone->simdlen)) * 1024;
3491 if (n->simdclone->inbranch)
3492 this_badness += 2048;
3493 int target_badness = targetm.simd_clone.usable (n);
3494 if (target_badness < 0)
3495 continue;
3496 this_badness += target_badness * 512;
3497 /* FORNOW: Have to add code to add the mask argument. */
3498 if (n->simdclone->inbranch)
3499 continue;
3500 for (i = 0; i < nargs; i++)
3502 switch (n->simdclone->args[i].arg_type)
3504 case SIMD_CLONE_ARG_TYPE_VECTOR:
3505 if (!useless_type_conversion_p
3506 (n->simdclone->args[i].orig_type,
3507 TREE_TYPE (gimple_call_arg (stmt, i))))
3508 i = -1;
3509 else if (arginfo[i].dt == vect_constant_def
3510 || arginfo[i].dt == vect_external_def
3511 || arginfo[i].linear_step)
3512 this_badness += 64;
3513 break;
3514 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3515 if (arginfo[i].dt != vect_constant_def
3516 && arginfo[i].dt != vect_external_def)
3517 i = -1;
3518 break;
3519 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3520 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3521 if (arginfo[i].dt == vect_constant_def
3522 || arginfo[i].dt == vect_external_def
3523 || (arginfo[i].linear_step
3524 != n->simdclone->args[i].linear_step))
3525 i = -1;
3526 break;
3527 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3528 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3529 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3530 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3531 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3532 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3533 /* FORNOW */
3534 i = -1;
3535 break;
3536 case SIMD_CLONE_ARG_TYPE_MASK:
3537 gcc_unreachable ();
3539 if (i == (size_t) -1)
3540 break;
3541 if (n->simdclone->args[i].alignment > arginfo[i].align)
3543 i = -1;
3544 break;
3546 if (arginfo[i].align)
3547 this_badness += (exact_log2 (arginfo[i].align)
3548 - exact_log2 (n->simdclone->args[i].alignment));
3550 if (i == (size_t) -1)
3551 continue;
3552 if (bestn == NULL || this_badness < badness)
3554 bestn = n;
3555 badness = this_badness;
3559 if (bestn == NULL)
3560 return false;
3562 for (i = 0; i < nargs; i++)
3563 if ((arginfo[i].dt == vect_constant_def
3564 || arginfo[i].dt == vect_external_def)
3565 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3567 arginfo[i].vectype
3568 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3569 i)));
3570 if (arginfo[i].vectype == NULL
3571 || (simd_clone_subparts (arginfo[i].vectype)
3572 > bestn->simdclone->simdlen))
3573 return false;
3576 fndecl = bestn->decl;
3577 nunits = bestn->simdclone->simdlen;
3578 ncopies = vf / nunits;
3580 /* If the function isn't const, only allow it in simd loops where user
3581 has asserted that at least nunits consecutive iterations can be
3582 performed using SIMD instructions. */
3583 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3584 && gimple_vuse (stmt))
3585 return false;
3587 /* Sanity check: make sure that at least one copy of the vectorized stmt
3588 needs to be generated. */
3589 gcc_assert (ncopies >= 1);
3591 if (!vec_stmt) /* transformation not required. */
3593 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3594 for (i = 0; i < nargs; i++)
3595 if ((bestn->simdclone->args[i].arg_type
3596 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3597 || (bestn->simdclone->args[i].arg_type
3598 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3600 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3601 + 1);
3602 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3603 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3604 ? size_type_node : TREE_TYPE (arginfo[i].op);
3605 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3606 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3607 tree sll = arginfo[i].simd_lane_linear
3608 ? boolean_true_node : boolean_false_node;
3609 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3611 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3612 if (dump_enabled_p ())
3613 dump_printf_loc (MSG_NOTE, vect_location,
3614 "=== vectorizable_simd_clone_call ===\n");
3615 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3616 return true;
3619 /* Transform. */
3621 if (dump_enabled_p ())
3622 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3624 /* Handle def. */
3625 scalar_dest = gimple_call_lhs (stmt);
3626 vec_dest = NULL_TREE;
3627 rtype = NULL_TREE;
3628 ratype = NULL_TREE;
3629 if (scalar_dest)
3631 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3632 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3633 if (TREE_CODE (rtype) == ARRAY_TYPE)
3635 ratype = rtype;
3636 rtype = TREE_TYPE (ratype);
3640 prev_stmt_info = NULL;
3641 for (j = 0; j < ncopies; ++j)
3643 /* Build argument list for the vectorized call. */
3644 if (j == 0)
3645 vargs.create (nargs);
3646 else
3647 vargs.truncate (0);
3649 for (i = 0; i < nargs; i++)
3651 unsigned int k, l, m, o;
3652 tree atype;
3653 op = gimple_call_arg (stmt, i);
3654 switch (bestn->simdclone->args[i].arg_type)
3656 case SIMD_CLONE_ARG_TYPE_VECTOR:
3657 atype = bestn->simdclone->args[i].vector_type;
3658 o = nunits / simd_clone_subparts (atype);
3659 for (m = j * o; m < (j + 1) * o; m++)
3661 if (simd_clone_subparts (atype)
3662 < simd_clone_subparts (arginfo[i].vectype))
3664 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3665 k = (simd_clone_subparts (arginfo[i].vectype)
3666 / simd_clone_subparts (atype));
3667 gcc_assert ((k & (k - 1)) == 0);
3668 if (m == 0)
3669 vec_oprnd0
3670 = vect_get_vec_def_for_operand (op, stmt);
3671 else
3673 vec_oprnd0 = arginfo[i].op;
3674 if ((m & (k - 1)) == 0)
3675 vec_oprnd0
3676 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3677 vec_oprnd0);
3679 arginfo[i].op = vec_oprnd0;
3680 vec_oprnd0
3681 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3682 bitsize_int (prec),
3683 bitsize_int ((m & (k - 1)) * prec));
3684 new_stmt
3685 = gimple_build_assign (make_ssa_name (atype),
3686 vec_oprnd0);
3687 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3688 vargs.safe_push (gimple_assign_lhs (new_stmt));
3690 else
3692 k = (simd_clone_subparts (atype)
3693 / simd_clone_subparts (arginfo[i].vectype));
3694 gcc_assert ((k & (k - 1)) == 0);
3695 vec<constructor_elt, va_gc> *ctor_elts;
3696 if (k != 1)
3697 vec_alloc (ctor_elts, k);
3698 else
3699 ctor_elts = NULL;
3700 for (l = 0; l < k; l++)
3702 if (m == 0 && l == 0)
3703 vec_oprnd0
3704 = vect_get_vec_def_for_operand (op, stmt);
3705 else
3706 vec_oprnd0
3707 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3708 arginfo[i].op);
3709 arginfo[i].op = vec_oprnd0;
3710 if (k == 1)
3711 break;
3712 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3713 vec_oprnd0);
3715 if (k == 1)
3716 vargs.safe_push (vec_oprnd0);
3717 else
3719 vec_oprnd0 = build_constructor (atype, ctor_elts);
3720 new_stmt
3721 = gimple_build_assign (make_ssa_name (atype),
3722 vec_oprnd0);
3723 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3724 vargs.safe_push (gimple_assign_lhs (new_stmt));
3728 break;
3729 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3730 vargs.safe_push (op);
3731 break;
3732 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3733 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3734 if (j == 0)
3736 gimple_seq stmts;
3737 arginfo[i].op
3738 = force_gimple_operand (arginfo[i].op, &stmts, true,
3739 NULL_TREE);
3740 if (stmts != NULL)
3742 basic_block new_bb;
3743 edge pe = loop_preheader_edge (loop);
3744 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3745 gcc_assert (!new_bb);
3747 if (arginfo[i].simd_lane_linear)
3749 vargs.safe_push (arginfo[i].op);
3750 break;
3752 tree phi_res = copy_ssa_name (op);
3753 gphi *new_phi = create_phi_node (phi_res, loop->header);
3754 set_vinfo_for_stmt (new_phi,
3755 new_stmt_vec_info (new_phi, loop_vinfo));
3756 add_phi_arg (new_phi, arginfo[i].op,
3757 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3758 enum tree_code code
3759 = POINTER_TYPE_P (TREE_TYPE (op))
3760 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3761 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3762 ? sizetype : TREE_TYPE (op);
3763 widest_int cst
3764 = wi::mul (bestn->simdclone->args[i].linear_step,
3765 ncopies * nunits);
3766 tree tcst = wide_int_to_tree (type, cst);
3767 tree phi_arg = copy_ssa_name (op);
3768 new_stmt
3769 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3770 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3771 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3772 set_vinfo_for_stmt (new_stmt,
3773 new_stmt_vec_info (new_stmt, loop_vinfo));
3774 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3775 UNKNOWN_LOCATION);
3776 arginfo[i].op = phi_res;
3777 vargs.safe_push (phi_res);
3779 else
3781 enum tree_code code
3782 = POINTER_TYPE_P (TREE_TYPE (op))
3783 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3784 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3785 ? sizetype : TREE_TYPE (op);
3786 widest_int cst
3787 = wi::mul (bestn->simdclone->args[i].linear_step,
3788 j * nunits);
3789 tree tcst = wide_int_to_tree (type, cst);
3790 new_temp = make_ssa_name (TREE_TYPE (op));
3791 new_stmt = gimple_build_assign (new_temp, code,
3792 arginfo[i].op, tcst);
3793 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3794 vargs.safe_push (new_temp);
3796 break;
3797 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3798 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3799 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3800 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3801 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3802 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3803 default:
3804 gcc_unreachable ();
3808 new_stmt = gimple_build_call_vec (fndecl, vargs);
3809 if (vec_dest)
3811 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
3812 if (ratype)
3813 new_temp = create_tmp_var (ratype);
3814 else if (simd_clone_subparts (vectype)
3815 == simd_clone_subparts (rtype))
3816 new_temp = make_ssa_name (vec_dest, new_stmt);
3817 else
3818 new_temp = make_ssa_name (rtype, new_stmt);
3819 gimple_call_set_lhs (new_stmt, new_temp);
3821 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3823 if (vec_dest)
3825 if (simd_clone_subparts (vectype) < nunits)
3827 unsigned int k, l;
3828 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3829 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
3830 k = nunits / simd_clone_subparts (vectype);
3831 gcc_assert ((k & (k - 1)) == 0);
3832 for (l = 0; l < k; l++)
3834 tree t;
3835 if (ratype)
3837 t = build_fold_addr_expr (new_temp);
3838 t = build2 (MEM_REF, vectype, t,
3839 build_int_cst (TREE_TYPE (t), l * bytes));
3841 else
3842 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3843 bitsize_int (prec), bitsize_int (l * prec));
3844 new_stmt
3845 = gimple_build_assign (make_ssa_name (vectype), t);
3846 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3847 if (j == 0 && l == 0)
3848 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3849 else
3850 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3852 prev_stmt_info = vinfo_for_stmt (new_stmt);
3855 if (ratype)
3857 tree clobber = build_constructor (ratype, NULL);
3858 TREE_THIS_VOLATILE (clobber) = 1;
3859 new_stmt = gimple_build_assign (new_temp, clobber);
3860 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3862 continue;
3864 else if (simd_clone_subparts (vectype) > nunits)
3866 unsigned int k = (simd_clone_subparts (vectype)
3867 / simd_clone_subparts (rtype));
3868 gcc_assert ((k & (k - 1)) == 0);
3869 if ((j & (k - 1)) == 0)
3870 vec_alloc (ret_ctor_elts, k);
3871 if (ratype)
3873 unsigned int m, o = nunits / simd_clone_subparts (rtype);
3874 for (m = 0; m < o; m++)
3876 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3877 size_int (m), NULL_TREE, NULL_TREE);
3878 new_stmt
3879 = gimple_build_assign (make_ssa_name (rtype), tem);
3880 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3881 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3882 gimple_assign_lhs (new_stmt));
3884 tree clobber = build_constructor (ratype, NULL);
3885 TREE_THIS_VOLATILE (clobber) = 1;
3886 new_stmt = gimple_build_assign (new_temp, clobber);
3887 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3889 else
3890 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3891 if ((j & (k - 1)) != k - 1)
3892 continue;
3893 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3894 new_stmt
3895 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3896 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3898 if ((unsigned) j == k - 1)
3899 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3900 else
3901 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3903 prev_stmt_info = vinfo_for_stmt (new_stmt);
3904 continue;
3906 else if (ratype)
3908 tree t = build_fold_addr_expr (new_temp);
3909 t = build2 (MEM_REF, vectype, t,
3910 build_int_cst (TREE_TYPE (t), 0));
3911 new_stmt
3912 = gimple_build_assign (make_ssa_name (vec_dest), t);
3913 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3914 tree clobber = build_constructor (ratype, NULL);
3915 TREE_THIS_VOLATILE (clobber) = 1;
3916 vect_finish_stmt_generation (stmt,
3917 gimple_build_assign (new_temp,
3918 clobber), gsi);
3922 if (j == 0)
3923 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3924 else
3925 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3927 prev_stmt_info = vinfo_for_stmt (new_stmt);
3930 vargs.release ();
3932 /* The call in STMT might prevent it from being removed in dce.
3933 We however cannot remove it here, due to the way the ssa name
3934 it defines is mapped to the new definition. So just replace
3935 rhs of the statement with something harmless. */
3937 if (slp_node)
3938 return true;
3940 if (scalar_dest)
3942 type = TREE_TYPE (scalar_dest);
3943 if (is_pattern_stmt_p (stmt_info))
3944 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3945 else
3946 lhs = gimple_call_lhs (stmt);
3947 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3949 else
3950 new_stmt = gimple_build_nop ();
3951 set_vinfo_for_stmt (new_stmt, stmt_info);
3952 set_vinfo_for_stmt (stmt, NULL);
3953 STMT_VINFO_STMT (stmt_info) = new_stmt;
3954 gsi_replace (gsi, new_stmt, true);
3955 unlink_stmt_vdef (stmt);
3957 return true;
3961 /* Function vect_gen_widened_results_half
3963 Create a vector stmt whose code, type, number of arguments, and result
3964 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3965 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3966 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3967 needs to be created (DECL is a function-decl of a target-builtin).
3968 STMT is the original scalar stmt that we are vectorizing. */
3970 static gimple *
3971 vect_gen_widened_results_half (enum tree_code code,
3972 tree decl,
3973 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3974 tree vec_dest, gimple_stmt_iterator *gsi,
3975 gimple *stmt)
3977 gimple *new_stmt;
3978 tree new_temp;
3980 /* Generate half of the widened result: */
3981 if (code == CALL_EXPR)
3983 /* Target specific support */
3984 if (op_type == binary_op)
3985 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3986 else
3987 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3988 new_temp = make_ssa_name (vec_dest, new_stmt);
3989 gimple_call_set_lhs (new_stmt, new_temp);
3991 else
3993 /* Generic support */
3994 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3995 if (op_type != binary_op)
3996 vec_oprnd1 = NULL;
3997 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3998 new_temp = make_ssa_name (vec_dest, new_stmt);
3999 gimple_assign_set_lhs (new_stmt, new_temp);
4001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4003 return new_stmt;
4007 /* Get vectorized definitions for loop-based vectorization. For the first
4008 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4009 scalar operand), and for the rest we get a copy with
4010 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4011 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4012 The vectors are collected into VEC_OPRNDS. */
4014 static void
4015 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4016 vec<tree> *vec_oprnds, int multi_step_cvt)
4018 tree vec_oprnd;
4020 /* Get first vector operand. */
4021 /* All the vector operands except the very first one (that is scalar oprnd)
4022 are stmt copies. */
4023 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4024 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4025 else
4026 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4028 vec_oprnds->quick_push (vec_oprnd);
4030 /* Get second vector operand. */
4031 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4032 vec_oprnds->quick_push (vec_oprnd);
4034 *oprnd = vec_oprnd;
4036 /* For conversion in multiple steps, continue to get operands
4037 recursively. */
4038 if (multi_step_cvt)
4039 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4043 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4044 For multi-step conversions store the resulting vectors and call the function
4045 recursively. */
4047 static void
4048 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4049 int multi_step_cvt, gimple *stmt,
4050 vec<tree> vec_dsts,
4051 gimple_stmt_iterator *gsi,
4052 slp_tree slp_node, enum tree_code code,
4053 stmt_vec_info *prev_stmt_info)
4055 unsigned int i;
4056 tree vop0, vop1, new_tmp, vec_dest;
4057 gimple *new_stmt;
4058 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4060 vec_dest = vec_dsts.pop ();
4062 for (i = 0; i < vec_oprnds->length (); i += 2)
4064 /* Create demotion operation. */
4065 vop0 = (*vec_oprnds)[i];
4066 vop1 = (*vec_oprnds)[i + 1];
4067 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4068 new_tmp = make_ssa_name (vec_dest, new_stmt);
4069 gimple_assign_set_lhs (new_stmt, new_tmp);
4070 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4072 if (multi_step_cvt)
4073 /* Store the resulting vector for next recursive call. */
4074 (*vec_oprnds)[i/2] = new_tmp;
4075 else
4077 /* This is the last step of the conversion sequence. Store the
4078 vectors in SLP_NODE or in vector info of the scalar statement
4079 (or in STMT_VINFO_RELATED_STMT chain). */
4080 if (slp_node)
4081 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4082 else
4084 if (!*prev_stmt_info)
4085 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4086 else
4087 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4089 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4094 /* For multi-step demotion operations we first generate demotion operations
4095 from the source type to the intermediate types, and then combine the
4096 results (stored in VEC_OPRNDS) in demotion operation to the destination
4097 type. */
4098 if (multi_step_cvt)
4100 /* At each level of recursion we have half of the operands we had at the
4101 previous level. */
4102 vec_oprnds->truncate ((i+1)/2);
4103 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4104 stmt, vec_dsts, gsi, slp_node,
4105 VEC_PACK_TRUNC_EXPR,
4106 prev_stmt_info);
4109 vec_dsts.quick_push (vec_dest);
4113 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4114 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4115 the resulting vectors and call the function recursively. */
4117 static void
4118 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4119 vec<tree> *vec_oprnds1,
4120 gimple *stmt, tree vec_dest,
4121 gimple_stmt_iterator *gsi,
4122 enum tree_code code1,
4123 enum tree_code code2, tree decl1,
4124 tree decl2, int op_type)
4126 int i;
4127 tree vop0, vop1, new_tmp1, new_tmp2;
4128 gimple *new_stmt1, *new_stmt2;
4129 vec<tree> vec_tmp = vNULL;
4131 vec_tmp.create (vec_oprnds0->length () * 2);
4132 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4134 if (op_type == binary_op)
4135 vop1 = (*vec_oprnds1)[i];
4136 else
4137 vop1 = NULL_TREE;
4139 /* Generate the two halves of promotion operation. */
4140 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4141 op_type, vec_dest, gsi, stmt);
4142 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4143 op_type, vec_dest, gsi, stmt);
4144 if (is_gimple_call (new_stmt1))
4146 new_tmp1 = gimple_call_lhs (new_stmt1);
4147 new_tmp2 = gimple_call_lhs (new_stmt2);
4149 else
4151 new_tmp1 = gimple_assign_lhs (new_stmt1);
4152 new_tmp2 = gimple_assign_lhs (new_stmt2);
4155 /* Store the results for the next step. */
4156 vec_tmp.quick_push (new_tmp1);
4157 vec_tmp.quick_push (new_tmp2);
4160 vec_oprnds0->release ();
4161 *vec_oprnds0 = vec_tmp;
4165 /* Check if STMT performs a conversion operation, that can be vectorized.
4166 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4167 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4168 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4170 static bool
4171 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4172 gimple **vec_stmt, slp_tree slp_node)
4174 tree vec_dest;
4175 tree scalar_dest;
4176 tree op0, op1 = NULL_TREE;
4177 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4178 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4179 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4180 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4181 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4182 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4183 tree new_temp;
4184 gimple *def_stmt;
4185 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4186 int ndts = 2;
4187 gimple *new_stmt = NULL;
4188 stmt_vec_info prev_stmt_info;
4189 poly_uint64 nunits_in;
4190 poly_uint64 nunits_out;
4191 tree vectype_out, vectype_in;
4192 int ncopies, i, j;
4193 tree lhs_type, rhs_type;
4194 enum { NARROW, NONE, WIDEN } modifier;
4195 vec<tree> vec_oprnds0 = vNULL;
4196 vec<tree> vec_oprnds1 = vNULL;
4197 tree vop0;
4198 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4199 vec_info *vinfo = stmt_info->vinfo;
4200 int multi_step_cvt = 0;
4201 vec<tree> interm_types = vNULL;
4202 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4203 int op_type;
4204 unsigned short fltsz;
4206 /* Is STMT a vectorizable conversion? */
4208 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4209 return false;
4211 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4212 && ! vec_stmt)
4213 return false;
4215 if (!is_gimple_assign (stmt))
4216 return false;
4218 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4219 return false;
4221 code = gimple_assign_rhs_code (stmt);
4222 if (!CONVERT_EXPR_CODE_P (code)
4223 && code != FIX_TRUNC_EXPR
4224 && code != FLOAT_EXPR
4225 && code != WIDEN_MULT_EXPR
4226 && code != WIDEN_LSHIFT_EXPR)
4227 return false;
4229 op_type = TREE_CODE_LENGTH (code);
4231 /* Check types of lhs and rhs. */
4232 scalar_dest = gimple_assign_lhs (stmt);
4233 lhs_type = TREE_TYPE (scalar_dest);
4234 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4236 op0 = gimple_assign_rhs1 (stmt);
4237 rhs_type = TREE_TYPE (op0);
4239 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4240 && !((INTEGRAL_TYPE_P (lhs_type)
4241 && INTEGRAL_TYPE_P (rhs_type))
4242 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4243 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4244 return false;
4246 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4247 && ((INTEGRAL_TYPE_P (lhs_type)
4248 && !type_has_mode_precision_p (lhs_type))
4249 || (INTEGRAL_TYPE_P (rhs_type)
4250 && !type_has_mode_precision_p (rhs_type))))
4252 if (dump_enabled_p ())
4253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4254 "type conversion to/from bit-precision unsupported."
4255 "\n");
4256 return false;
4259 /* Check the operands of the operation. */
4260 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4262 if (dump_enabled_p ())
4263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4264 "use not simple.\n");
4265 return false;
4267 if (op_type == binary_op)
4269 bool ok;
4271 op1 = gimple_assign_rhs2 (stmt);
4272 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4273 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4274 OP1. */
4275 if (CONSTANT_CLASS_P (op0))
4276 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4277 else
4278 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4280 if (!ok)
4282 if (dump_enabled_p ())
4283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4284 "use not simple.\n");
4285 return false;
4289 /* If op0 is an external or constant defs use a vector type of
4290 the same size as the output vector type. */
4291 if (!vectype_in)
4292 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4293 if (vec_stmt)
4294 gcc_assert (vectype_in);
4295 if (!vectype_in)
4297 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4300 "no vectype for scalar type ");
4301 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4302 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4305 return false;
4308 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4309 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4311 if (dump_enabled_p ())
4313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4314 "can't convert between boolean and non "
4315 "boolean vectors");
4316 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4317 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4320 return false;
4323 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4324 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4325 if (known_eq (nunits_out, nunits_in))
4326 modifier = NONE;
4327 else if (multiple_p (nunits_out, nunits_in))
4328 modifier = NARROW;
4329 else
4331 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4332 modifier = WIDEN;
4335 /* Multiple types in SLP are handled by creating the appropriate number of
4336 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4337 case of SLP. */
4338 if (slp_node)
4339 ncopies = 1;
4340 else if (modifier == NARROW)
4341 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4342 else
4343 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4345 /* Sanity check: make sure that at least one copy of the vectorized stmt
4346 needs to be generated. */
4347 gcc_assert (ncopies >= 1);
4349 bool found_mode = false;
4350 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4351 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4352 opt_scalar_mode rhs_mode_iter;
4354 /* Supportable by target? */
4355 switch (modifier)
4357 case NONE:
4358 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4359 return false;
4360 if (supportable_convert_operation (code, vectype_out, vectype_in,
4361 &decl1, &code1))
4362 break;
4363 /* FALLTHRU */
4364 unsupported:
4365 if (dump_enabled_p ())
4366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4367 "conversion not supported by target.\n");
4368 return false;
4370 case WIDEN:
4371 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4372 &code1, &code2, &multi_step_cvt,
4373 &interm_types))
4375 /* Binary widening operation can only be supported directly by the
4376 architecture. */
4377 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4378 break;
4381 if (code != FLOAT_EXPR
4382 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4383 goto unsupported;
4385 fltsz = GET_MODE_SIZE (lhs_mode);
4386 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4388 rhs_mode = rhs_mode_iter.require ();
4389 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4390 break;
4392 cvt_type
4393 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4394 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4395 if (cvt_type == NULL_TREE)
4396 goto unsupported;
4398 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4400 if (!supportable_convert_operation (code, vectype_out,
4401 cvt_type, &decl1, &codecvt1))
4402 goto unsupported;
4404 else if (!supportable_widening_operation (code, stmt, vectype_out,
4405 cvt_type, &codecvt1,
4406 &codecvt2, &multi_step_cvt,
4407 &interm_types))
4408 continue;
4409 else
4410 gcc_assert (multi_step_cvt == 0);
4412 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4413 vectype_in, &code1, &code2,
4414 &multi_step_cvt, &interm_types))
4416 found_mode = true;
4417 break;
4421 if (!found_mode)
4422 goto unsupported;
4424 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4425 codecvt2 = ERROR_MARK;
4426 else
4428 multi_step_cvt++;
4429 interm_types.safe_push (cvt_type);
4430 cvt_type = NULL_TREE;
4432 break;
4434 case NARROW:
4435 gcc_assert (op_type == unary_op);
4436 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4437 &code1, &multi_step_cvt,
4438 &interm_types))
4439 break;
4441 if (code != FIX_TRUNC_EXPR
4442 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4443 goto unsupported;
4445 cvt_type
4446 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4447 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4448 if (cvt_type == NULL_TREE)
4449 goto unsupported;
4450 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4451 &decl1, &codecvt1))
4452 goto unsupported;
4453 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4454 &code1, &multi_step_cvt,
4455 &interm_types))
4456 break;
4457 goto unsupported;
4459 default:
4460 gcc_unreachable ();
4463 if (!vec_stmt) /* transformation not required. */
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_NOTE, vect_location,
4467 "=== vectorizable_conversion ===\n");
4468 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4470 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4471 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4473 else if (modifier == NARROW)
4475 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4476 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4478 else
4480 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4481 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4483 interm_types.release ();
4484 return true;
4487 /* Transform. */
4488 if (dump_enabled_p ())
4489 dump_printf_loc (MSG_NOTE, vect_location,
4490 "transform conversion. ncopies = %d.\n", ncopies);
4492 if (op_type == binary_op)
4494 if (CONSTANT_CLASS_P (op0))
4495 op0 = fold_convert (TREE_TYPE (op1), op0);
4496 else if (CONSTANT_CLASS_P (op1))
4497 op1 = fold_convert (TREE_TYPE (op0), op1);
4500 /* In case of multi-step conversion, we first generate conversion operations
4501 to the intermediate types, and then from that types to the final one.
4502 We create vector destinations for the intermediate type (TYPES) received
4503 from supportable_*_operation, and store them in the correct order
4504 for future use in vect_create_vectorized_*_stmts (). */
4505 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4506 vec_dest = vect_create_destination_var (scalar_dest,
4507 (cvt_type && modifier == WIDEN)
4508 ? cvt_type : vectype_out);
4509 vec_dsts.quick_push (vec_dest);
4511 if (multi_step_cvt)
4513 for (i = interm_types.length () - 1;
4514 interm_types.iterate (i, &intermediate_type); i--)
4516 vec_dest = vect_create_destination_var (scalar_dest,
4517 intermediate_type);
4518 vec_dsts.quick_push (vec_dest);
4522 if (cvt_type)
4523 vec_dest = vect_create_destination_var (scalar_dest,
4524 modifier == WIDEN
4525 ? vectype_out : cvt_type);
4527 if (!slp_node)
4529 if (modifier == WIDEN)
4531 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4532 if (op_type == binary_op)
4533 vec_oprnds1.create (1);
4535 else if (modifier == NARROW)
4536 vec_oprnds0.create (
4537 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4539 else if (code == WIDEN_LSHIFT_EXPR)
4540 vec_oprnds1.create (slp_node->vec_stmts_size);
4542 last_oprnd = op0;
4543 prev_stmt_info = NULL;
4544 switch (modifier)
4546 case NONE:
4547 for (j = 0; j < ncopies; j++)
4549 if (j == 0)
4550 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4551 else
4552 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4554 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4556 /* Arguments are ready, create the new vector stmt. */
4557 if (code1 == CALL_EXPR)
4559 new_stmt = gimple_build_call (decl1, 1, vop0);
4560 new_temp = make_ssa_name (vec_dest, new_stmt);
4561 gimple_call_set_lhs (new_stmt, new_temp);
4563 else
4565 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4566 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4567 new_temp = make_ssa_name (vec_dest, new_stmt);
4568 gimple_assign_set_lhs (new_stmt, new_temp);
4571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4572 if (slp_node)
4573 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4574 else
4576 if (!prev_stmt_info)
4577 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4578 else
4579 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4580 prev_stmt_info = vinfo_for_stmt (new_stmt);
4584 break;
4586 case WIDEN:
4587 /* In case the vectorization factor (VF) is bigger than the number
4588 of elements that we can fit in a vectype (nunits), we have to
4589 generate more than one vector stmt - i.e - we need to "unroll"
4590 the vector stmt by a factor VF/nunits. */
4591 for (j = 0; j < ncopies; j++)
4593 /* Handle uses. */
4594 if (j == 0)
4596 if (slp_node)
4598 if (code == WIDEN_LSHIFT_EXPR)
4600 unsigned int k;
4602 vec_oprnd1 = op1;
4603 /* Store vec_oprnd1 for every vector stmt to be created
4604 for SLP_NODE. We check during the analysis that all
4605 the shift arguments are the same. */
4606 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4607 vec_oprnds1.quick_push (vec_oprnd1);
4609 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4610 slp_node);
4612 else
4613 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4614 &vec_oprnds1, slp_node);
4616 else
4618 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4619 vec_oprnds0.quick_push (vec_oprnd0);
4620 if (op_type == binary_op)
4622 if (code == WIDEN_LSHIFT_EXPR)
4623 vec_oprnd1 = op1;
4624 else
4625 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4626 vec_oprnds1.quick_push (vec_oprnd1);
4630 else
4632 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4633 vec_oprnds0.truncate (0);
4634 vec_oprnds0.quick_push (vec_oprnd0);
4635 if (op_type == binary_op)
4637 if (code == WIDEN_LSHIFT_EXPR)
4638 vec_oprnd1 = op1;
4639 else
4640 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4641 vec_oprnd1);
4642 vec_oprnds1.truncate (0);
4643 vec_oprnds1.quick_push (vec_oprnd1);
4647 /* Arguments are ready. Create the new vector stmts. */
4648 for (i = multi_step_cvt; i >= 0; i--)
4650 tree this_dest = vec_dsts[i];
4651 enum tree_code c1 = code1, c2 = code2;
4652 if (i == 0 && codecvt2 != ERROR_MARK)
4654 c1 = codecvt1;
4655 c2 = codecvt2;
4657 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4658 &vec_oprnds1,
4659 stmt, this_dest, gsi,
4660 c1, c2, decl1, decl2,
4661 op_type);
4664 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4666 if (cvt_type)
4668 if (codecvt1 == CALL_EXPR)
4670 new_stmt = gimple_build_call (decl1, 1, vop0);
4671 new_temp = make_ssa_name (vec_dest, new_stmt);
4672 gimple_call_set_lhs (new_stmt, new_temp);
4674 else
4676 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4677 new_temp = make_ssa_name (vec_dest);
4678 new_stmt = gimple_build_assign (new_temp, codecvt1,
4679 vop0);
4682 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4684 else
4685 new_stmt = SSA_NAME_DEF_STMT (vop0);
4687 if (slp_node)
4688 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4689 else
4691 if (!prev_stmt_info)
4692 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4693 else
4694 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4695 prev_stmt_info = vinfo_for_stmt (new_stmt);
4700 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4701 break;
4703 case NARROW:
4704 /* In case the vectorization factor (VF) is bigger than the number
4705 of elements that we can fit in a vectype (nunits), we have to
4706 generate more than one vector stmt - i.e - we need to "unroll"
4707 the vector stmt by a factor VF/nunits. */
4708 for (j = 0; j < ncopies; j++)
4710 /* Handle uses. */
4711 if (slp_node)
4712 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4713 slp_node);
4714 else
4716 vec_oprnds0.truncate (0);
4717 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4718 vect_pow2 (multi_step_cvt) - 1);
4721 /* Arguments are ready. Create the new vector stmts. */
4722 if (cvt_type)
4723 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4725 if (codecvt1 == CALL_EXPR)
4727 new_stmt = gimple_build_call (decl1, 1, vop0);
4728 new_temp = make_ssa_name (vec_dest, new_stmt);
4729 gimple_call_set_lhs (new_stmt, new_temp);
4731 else
4733 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4734 new_temp = make_ssa_name (vec_dest);
4735 new_stmt = gimple_build_assign (new_temp, codecvt1,
4736 vop0);
4739 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4740 vec_oprnds0[i] = new_temp;
4743 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4744 stmt, vec_dsts, gsi,
4745 slp_node, code1,
4746 &prev_stmt_info);
4749 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4750 break;
4753 vec_oprnds0.release ();
4754 vec_oprnds1.release ();
4755 interm_types.release ();
4757 return true;
4761 /* Function vectorizable_assignment.
4763 Check if STMT performs an assignment (copy) that can be vectorized.
4764 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4765 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4766 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4768 static bool
4769 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4770 gimple **vec_stmt, slp_tree slp_node)
4772 tree vec_dest;
4773 tree scalar_dest;
4774 tree op;
4775 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4776 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4777 tree new_temp;
4778 gimple *def_stmt;
4779 enum vect_def_type dt[1] = {vect_unknown_def_type};
4780 int ndts = 1;
4781 int ncopies;
4782 int i, j;
4783 vec<tree> vec_oprnds = vNULL;
4784 tree vop;
4785 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4786 vec_info *vinfo = stmt_info->vinfo;
4787 gimple *new_stmt = NULL;
4788 stmt_vec_info prev_stmt_info = NULL;
4789 enum tree_code code;
4790 tree vectype_in;
4792 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4793 return false;
4795 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4796 && ! vec_stmt)
4797 return false;
4799 /* Is vectorizable assignment? */
4800 if (!is_gimple_assign (stmt))
4801 return false;
4803 scalar_dest = gimple_assign_lhs (stmt);
4804 if (TREE_CODE (scalar_dest) != SSA_NAME)
4805 return false;
4807 code = gimple_assign_rhs_code (stmt);
4808 if (gimple_assign_single_p (stmt)
4809 || code == PAREN_EXPR
4810 || CONVERT_EXPR_CODE_P (code))
4811 op = gimple_assign_rhs1 (stmt);
4812 else
4813 return false;
4815 if (code == VIEW_CONVERT_EXPR)
4816 op = TREE_OPERAND (op, 0);
4818 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4819 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4821 /* Multiple types in SLP are handled by creating the appropriate number of
4822 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4823 case of SLP. */
4824 if (slp_node)
4825 ncopies = 1;
4826 else
4827 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4829 gcc_assert (ncopies >= 1);
4831 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4833 if (dump_enabled_p ())
4834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4835 "use not simple.\n");
4836 return false;
4839 /* We can handle NOP_EXPR conversions that do not change the number
4840 of elements or the vector size. */
4841 if ((CONVERT_EXPR_CODE_P (code)
4842 || code == VIEW_CONVERT_EXPR)
4843 && (!vectype_in
4844 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
4845 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
4846 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4847 return false;
4849 /* We do not handle bit-precision changes. */
4850 if ((CONVERT_EXPR_CODE_P (code)
4851 || code == VIEW_CONVERT_EXPR)
4852 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4853 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4854 || !type_has_mode_precision_p (TREE_TYPE (op)))
4855 /* But a conversion that does not change the bit-pattern is ok. */
4856 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4857 > TYPE_PRECISION (TREE_TYPE (op)))
4858 && TYPE_UNSIGNED (TREE_TYPE (op)))
4859 /* Conversion between boolean types of different sizes is
4860 a simple assignment in case their vectypes are same
4861 boolean vectors. */
4862 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4863 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4865 if (dump_enabled_p ())
4866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4867 "type conversion to/from bit-precision "
4868 "unsupported.\n");
4869 return false;
4872 if (!vec_stmt) /* transformation not required. */
4874 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4875 if (dump_enabled_p ())
4876 dump_printf_loc (MSG_NOTE, vect_location,
4877 "=== vectorizable_assignment ===\n");
4878 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4879 return true;
4882 /* Transform. */
4883 if (dump_enabled_p ())
4884 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4886 /* Handle def. */
4887 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4889 /* Handle use. */
4890 for (j = 0; j < ncopies; j++)
4892 /* Handle uses. */
4893 if (j == 0)
4894 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4895 else
4896 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4898 /* Arguments are ready. create the new vector stmt. */
4899 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4901 if (CONVERT_EXPR_CODE_P (code)
4902 || code == VIEW_CONVERT_EXPR)
4903 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4904 new_stmt = gimple_build_assign (vec_dest, vop);
4905 new_temp = make_ssa_name (vec_dest, new_stmt);
4906 gimple_assign_set_lhs (new_stmt, new_temp);
4907 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4908 if (slp_node)
4909 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4912 if (slp_node)
4913 continue;
4915 if (j == 0)
4916 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4917 else
4918 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4920 prev_stmt_info = vinfo_for_stmt (new_stmt);
4923 vec_oprnds.release ();
4924 return true;
4928 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4929 either as shift by a scalar or by a vector. */
4931 bool
4932 vect_supportable_shift (enum tree_code code, tree scalar_type)
4935 machine_mode vec_mode;
4936 optab optab;
4937 int icode;
4938 tree vectype;
4940 vectype = get_vectype_for_scalar_type (scalar_type);
4941 if (!vectype)
4942 return false;
4944 optab = optab_for_tree_code (code, vectype, optab_scalar);
4945 if (!optab
4946 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4948 optab = optab_for_tree_code (code, vectype, optab_vector);
4949 if (!optab
4950 || (optab_handler (optab, TYPE_MODE (vectype))
4951 == CODE_FOR_nothing))
4952 return false;
4955 vec_mode = TYPE_MODE (vectype);
4956 icode = (int) optab_handler (optab, vec_mode);
4957 if (icode == CODE_FOR_nothing)
4958 return false;
4960 return true;
4964 /* Function vectorizable_shift.
4966 Check if STMT performs a shift operation that can be vectorized.
4967 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4968 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4969 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4971 static bool
4972 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4973 gimple **vec_stmt, slp_tree slp_node)
4975 tree vec_dest;
4976 tree scalar_dest;
4977 tree op0, op1 = NULL;
4978 tree vec_oprnd1 = NULL_TREE;
4979 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4980 tree vectype;
4981 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4982 enum tree_code code;
4983 machine_mode vec_mode;
4984 tree new_temp;
4985 optab optab;
4986 int icode;
4987 machine_mode optab_op2_mode;
4988 gimple *def_stmt;
4989 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4990 int ndts = 2;
4991 gimple *new_stmt = NULL;
4992 stmt_vec_info prev_stmt_info;
4993 poly_uint64 nunits_in;
4994 poly_uint64 nunits_out;
4995 tree vectype_out;
4996 tree op1_vectype;
4997 int ncopies;
4998 int j, i;
4999 vec<tree> vec_oprnds0 = vNULL;
5000 vec<tree> vec_oprnds1 = vNULL;
5001 tree vop0, vop1;
5002 unsigned int k;
5003 bool scalar_shift_arg = true;
5004 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5005 vec_info *vinfo = stmt_info->vinfo;
5007 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5008 return false;
5010 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5011 && ! vec_stmt)
5012 return false;
5014 /* Is STMT a vectorizable binary/unary operation? */
5015 if (!is_gimple_assign (stmt))
5016 return false;
5018 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5019 return false;
5021 code = gimple_assign_rhs_code (stmt);
5023 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5024 || code == RROTATE_EXPR))
5025 return false;
5027 scalar_dest = gimple_assign_lhs (stmt);
5028 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5029 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5031 if (dump_enabled_p ())
5032 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5033 "bit-precision shifts not supported.\n");
5034 return false;
5037 op0 = gimple_assign_rhs1 (stmt);
5038 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5040 if (dump_enabled_p ())
5041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5042 "use not simple.\n");
5043 return false;
5045 /* If op0 is an external or constant def use a vector type with
5046 the same size as the output vector type. */
5047 if (!vectype)
5048 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5049 if (vec_stmt)
5050 gcc_assert (vectype);
5051 if (!vectype)
5053 if (dump_enabled_p ())
5054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5055 "no vectype for scalar type\n");
5056 return false;
5059 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5060 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5061 if (maybe_ne (nunits_out, nunits_in))
5062 return false;
5064 op1 = gimple_assign_rhs2 (stmt);
5065 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "use not simple.\n");
5070 return false;
5073 /* Multiple types in SLP are handled by creating the appropriate number of
5074 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5075 case of SLP. */
5076 if (slp_node)
5077 ncopies = 1;
5078 else
5079 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5081 gcc_assert (ncopies >= 1);
5083 /* Determine whether the shift amount is a vector, or scalar. If the
5084 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5086 if ((dt[1] == vect_internal_def
5087 || dt[1] == vect_induction_def)
5088 && !slp_node)
5089 scalar_shift_arg = false;
5090 else if (dt[1] == vect_constant_def
5091 || dt[1] == vect_external_def
5092 || dt[1] == vect_internal_def)
5094 /* In SLP, need to check whether the shift count is the same,
5095 in loops if it is a constant or invariant, it is always
5096 a scalar shift. */
5097 if (slp_node)
5099 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5100 gimple *slpstmt;
5102 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5103 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5104 scalar_shift_arg = false;
5107 /* If the shift amount is computed by a pattern stmt we cannot
5108 use the scalar amount directly thus give up and use a vector
5109 shift. */
5110 if (dt[1] == vect_internal_def)
5112 gimple *def = SSA_NAME_DEF_STMT (op1);
5113 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5114 scalar_shift_arg = false;
5117 else
5119 if (dump_enabled_p ())
5120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5121 "operand mode requires invariant argument.\n");
5122 return false;
5125 /* Vector shifted by vector. */
5126 if (!scalar_shift_arg)
5128 optab = optab_for_tree_code (code, vectype, optab_vector);
5129 if (dump_enabled_p ())
5130 dump_printf_loc (MSG_NOTE, vect_location,
5131 "vector/vector shift/rotate found.\n");
5133 if (!op1_vectype)
5134 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5135 if (op1_vectype == NULL_TREE
5136 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5138 if (dump_enabled_p ())
5139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5140 "unusable type for last operand in"
5141 " vector/vector shift/rotate.\n");
5142 return false;
5145 /* See if the machine has a vector shifted by scalar insn and if not
5146 then see if it has a vector shifted by vector insn. */
5147 else
5149 optab = optab_for_tree_code (code, vectype, optab_scalar);
5150 if (optab
5151 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5153 if (dump_enabled_p ())
5154 dump_printf_loc (MSG_NOTE, vect_location,
5155 "vector/scalar shift/rotate found.\n");
5157 else
5159 optab = optab_for_tree_code (code, vectype, optab_vector);
5160 if (optab
5161 && (optab_handler (optab, TYPE_MODE (vectype))
5162 != CODE_FOR_nothing))
5164 scalar_shift_arg = false;
5166 if (dump_enabled_p ())
5167 dump_printf_loc (MSG_NOTE, vect_location,
5168 "vector/vector shift/rotate found.\n");
5170 /* Unlike the other binary operators, shifts/rotates have
5171 the rhs being int, instead of the same type as the lhs,
5172 so make sure the scalar is the right type if we are
5173 dealing with vectors of long long/long/short/char. */
5174 if (dt[1] == vect_constant_def)
5175 op1 = fold_convert (TREE_TYPE (vectype), op1);
5176 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5177 TREE_TYPE (op1)))
5179 if (slp_node
5180 && TYPE_MODE (TREE_TYPE (vectype))
5181 != TYPE_MODE (TREE_TYPE (op1)))
5183 if (dump_enabled_p ())
5184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5185 "unusable type for last operand in"
5186 " vector/vector shift/rotate.\n");
5187 return false;
5189 if (vec_stmt && !slp_node)
5191 op1 = fold_convert (TREE_TYPE (vectype), op1);
5192 op1 = vect_init_vector (stmt, op1,
5193 TREE_TYPE (vectype), NULL);
5200 /* Supportable by target? */
5201 if (!optab)
5203 if (dump_enabled_p ())
5204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5205 "no optab.\n");
5206 return false;
5208 vec_mode = TYPE_MODE (vectype);
5209 icode = (int) optab_handler (optab, vec_mode);
5210 if (icode == CODE_FOR_nothing)
5212 if (dump_enabled_p ())
5213 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5214 "op not supported by target.\n");
5215 /* Check only during analysis. */
5216 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5217 || (!vec_stmt
5218 && !vect_worthwhile_without_simd_p (vinfo, code)))
5219 return false;
5220 if (dump_enabled_p ())
5221 dump_printf_loc (MSG_NOTE, vect_location,
5222 "proceeding using word mode.\n");
5225 /* Worthwhile without SIMD support? Check only during analysis. */
5226 if (!vec_stmt
5227 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5228 && !vect_worthwhile_without_simd_p (vinfo, code))
5230 if (dump_enabled_p ())
5231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5232 "not worthwhile without SIMD support.\n");
5233 return false;
5236 if (!vec_stmt) /* transformation not required. */
5238 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5239 if (dump_enabled_p ())
5240 dump_printf_loc (MSG_NOTE, vect_location,
5241 "=== vectorizable_shift ===\n");
5242 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5243 return true;
5246 /* Transform. */
5248 if (dump_enabled_p ())
5249 dump_printf_loc (MSG_NOTE, vect_location,
5250 "transform binary/unary operation.\n");
5252 /* Handle def. */
5253 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5255 prev_stmt_info = NULL;
5256 for (j = 0; j < ncopies; j++)
5258 /* Handle uses. */
5259 if (j == 0)
5261 if (scalar_shift_arg)
5263 /* Vector shl and shr insn patterns can be defined with scalar
5264 operand 2 (shift operand). In this case, use constant or loop
5265 invariant op1 directly, without extending it to vector mode
5266 first. */
5267 optab_op2_mode = insn_data[icode].operand[2].mode;
5268 if (!VECTOR_MODE_P (optab_op2_mode))
5270 if (dump_enabled_p ())
5271 dump_printf_loc (MSG_NOTE, vect_location,
5272 "operand 1 using scalar mode.\n");
5273 vec_oprnd1 = op1;
5274 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5275 vec_oprnds1.quick_push (vec_oprnd1);
5276 if (slp_node)
5278 /* Store vec_oprnd1 for every vector stmt to be created
5279 for SLP_NODE. We check during the analysis that all
5280 the shift arguments are the same.
5281 TODO: Allow different constants for different vector
5282 stmts generated for an SLP instance. */
5283 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5284 vec_oprnds1.quick_push (vec_oprnd1);
5289 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5290 (a special case for certain kind of vector shifts); otherwise,
5291 operand 1 should be of a vector type (the usual case). */
5292 if (vec_oprnd1)
5293 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5294 slp_node);
5295 else
5296 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5297 slp_node);
5299 else
5300 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5302 /* Arguments are ready. Create the new vector stmt. */
5303 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5305 vop1 = vec_oprnds1[i];
5306 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5307 new_temp = make_ssa_name (vec_dest, new_stmt);
5308 gimple_assign_set_lhs (new_stmt, new_temp);
5309 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5310 if (slp_node)
5311 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5314 if (slp_node)
5315 continue;
5317 if (j == 0)
5318 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5319 else
5320 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5321 prev_stmt_info = vinfo_for_stmt (new_stmt);
5324 vec_oprnds0.release ();
5325 vec_oprnds1.release ();
5327 return true;
5331 /* Function vectorizable_operation.
5333 Check if STMT performs a binary, unary or ternary operation that can
5334 be vectorized.
5335 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5336 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5337 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5339 static bool
5340 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5341 gimple **vec_stmt, slp_tree slp_node)
5343 tree vec_dest;
5344 tree scalar_dest;
5345 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5346 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5347 tree vectype;
5348 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5349 enum tree_code code, orig_code;
5350 machine_mode vec_mode;
5351 tree new_temp;
5352 int op_type;
5353 optab optab;
5354 bool target_support_p;
5355 gimple *def_stmt;
5356 enum vect_def_type dt[3]
5357 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5358 int ndts = 3;
5359 gimple *new_stmt = NULL;
5360 stmt_vec_info prev_stmt_info;
5361 poly_uint64 nunits_in;
5362 poly_uint64 nunits_out;
5363 tree vectype_out;
5364 int ncopies;
5365 int j, i;
5366 vec<tree> vec_oprnds0 = vNULL;
5367 vec<tree> vec_oprnds1 = vNULL;
5368 vec<tree> vec_oprnds2 = vNULL;
5369 tree vop0, vop1, vop2;
5370 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5371 vec_info *vinfo = stmt_info->vinfo;
5373 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5374 return false;
5376 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5377 && ! vec_stmt)
5378 return false;
5380 /* Is STMT a vectorizable binary/unary operation? */
5381 if (!is_gimple_assign (stmt))
5382 return false;
5384 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5385 return false;
5387 orig_code = code = gimple_assign_rhs_code (stmt);
5389 /* For pointer addition and subtraction, we should use the normal
5390 plus and minus for the vector operation. */
5391 if (code == POINTER_PLUS_EXPR)
5392 code = PLUS_EXPR;
5393 if (code == POINTER_DIFF_EXPR)
5394 code = MINUS_EXPR;
5396 /* Support only unary or binary operations. */
5397 op_type = TREE_CODE_LENGTH (code);
5398 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5400 if (dump_enabled_p ())
5401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5402 "num. args = %d (not unary/binary/ternary op).\n",
5403 op_type);
5404 return false;
5407 scalar_dest = gimple_assign_lhs (stmt);
5408 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5410 /* Most operations cannot handle bit-precision types without extra
5411 truncations. */
5412 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5413 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5414 /* Exception are bitwise binary operations. */
5415 && code != BIT_IOR_EXPR
5416 && code != BIT_XOR_EXPR
5417 && code != BIT_AND_EXPR)
5419 if (dump_enabled_p ())
5420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5421 "bit-precision arithmetic not supported.\n");
5422 return false;
5425 op0 = gimple_assign_rhs1 (stmt);
5426 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5428 if (dump_enabled_p ())
5429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5430 "use not simple.\n");
5431 return false;
5433 /* If op0 is an external or constant def use a vector type with
5434 the same size as the output vector type. */
5435 if (!vectype)
5437 /* For boolean type we cannot determine vectype by
5438 invariant value (don't know whether it is a vector
5439 of booleans or vector of integers). We use output
5440 vectype because operations on boolean don't change
5441 type. */
5442 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5444 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5446 if (dump_enabled_p ())
5447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5448 "not supported operation on bool value.\n");
5449 return false;
5451 vectype = vectype_out;
5453 else
5454 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5456 if (vec_stmt)
5457 gcc_assert (vectype);
5458 if (!vectype)
5460 if (dump_enabled_p ())
5462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5463 "no vectype for scalar type ");
5464 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5465 TREE_TYPE (op0));
5466 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5469 return false;
5472 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5473 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5474 if (maybe_ne (nunits_out, nunits_in))
5475 return false;
5477 if (op_type == binary_op || op_type == ternary_op)
5479 op1 = gimple_assign_rhs2 (stmt);
5480 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5482 if (dump_enabled_p ())
5483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5484 "use not simple.\n");
5485 return false;
5488 if (op_type == ternary_op)
5490 op2 = gimple_assign_rhs3 (stmt);
5491 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5493 if (dump_enabled_p ())
5494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5495 "use not simple.\n");
5496 return false;
5500 /* Multiple types in SLP are handled by creating the appropriate number of
5501 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5502 case of SLP. */
5503 if (slp_node)
5504 ncopies = 1;
5505 else
5506 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5508 gcc_assert (ncopies >= 1);
5510 /* Shifts are handled in vectorizable_shift (). */
5511 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5512 || code == RROTATE_EXPR)
5513 return false;
5515 /* Supportable by target? */
5517 vec_mode = TYPE_MODE (vectype);
5518 if (code == MULT_HIGHPART_EXPR)
5519 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5520 else
5522 optab = optab_for_tree_code (code, vectype, optab_default);
5523 if (!optab)
5525 if (dump_enabled_p ())
5526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5527 "no optab.\n");
5528 return false;
5530 target_support_p = (optab_handler (optab, vec_mode)
5531 != CODE_FOR_nothing);
5534 if (!target_support_p)
5536 if (dump_enabled_p ())
5537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5538 "op not supported by target.\n");
5539 /* Check only during analysis. */
5540 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5541 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5542 return false;
5543 if (dump_enabled_p ())
5544 dump_printf_loc (MSG_NOTE, vect_location,
5545 "proceeding using word mode.\n");
5548 /* Worthwhile without SIMD support? Check only during analysis. */
5549 if (!VECTOR_MODE_P (vec_mode)
5550 && !vec_stmt
5551 && !vect_worthwhile_without_simd_p (vinfo, code))
5553 if (dump_enabled_p ())
5554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5555 "not worthwhile without SIMD support.\n");
5556 return false;
5559 if (!vec_stmt) /* transformation not required. */
5561 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5562 if (dump_enabled_p ())
5563 dump_printf_loc (MSG_NOTE, vect_location,
5564 "=== vectorizable_operation ===\n");
5565 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5566 return true;
5569 /* Transform. */
5571 if (dump_enabled_p ())
5572 dump_printf_loc (MSG_NOTE, vect_location,
5573 "transform binary/unary operation.\n");
5575 /* Handle def. */
5576 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5578 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5579 vectors with unsigned elements, but the result is signed. So, we
5580 need to compute the MINUS_EXPR into vectype temporary and
5581 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5582 tree vec_cvt_dest = NULL_TREE;
5583 if (orig_code == POINTER_DIFF_EXPR)
5584 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5586 /* In case the vectorization factor (VF) is bigger than the number
5587 of elements that we can fit in a vectype (nunits), we have to generate
5588 more than one vector stmt - i.e - we need to "unroll" the
5589 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5590 from one copy of the vector stmt to the next, in the field
5591 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5592 stages to find the correct vector defs to be used when vectorizing
5593 stmts that use the defs of the current stmt. The example below
5594 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5595 we need to create 4 vectorized stmts):
5597 before vectorization:
5598 RELATED_STMT VEC_STMT
5599 S1: x = memref - -
5600 S2: z = x + 1 - -
5602 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5603 there):
5604 RELATED_STMT VEC_STMT
5605 VS1_0: vx0 = memref0 VS1_1 -
5606 VS1_1: vx1 = memref1 VS1_2 -
5607 VS1_2: vx2 = memref2 VS1_3 -
5608 VS1_3: vx3 = memref3 - -
5609 S1: x = load - VS1_0
5610 S2: z = x + 1 - -
5612 step2: vectorize stmt S2 (done here):
5613 To vectorize stmt S2 we first need to find the relevant vector
5614 def for the first operand 'x'. This is, as usual, obtained from
5615 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5616 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5617 relevant vector def 'vx0'. Having found 'vx0' we can generate
5618 the vector stmt VS2_0, and as usual, record it in the
5619 STMT_VINFO_VEC_STMT of stmt S2.
5620 When creating the second copy (VS2_1), we obtain the relevant vector
5621 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5622 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5623 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5624 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5625 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5626 chain of stmts and pointers:
5627 RELATED_STMT VEC_STMT
5628 VS1_0: vx0 = memref0 VS1_1 -
5629 VS1_1: vx1 = memref1 VS1_2 -
5630 VS1_2: vx2 = memref2 VS1_3 -
5631 VS1_3: vx3 = memref3 - -
5632 S1: x = load - VS1_0
5633 VS2_0: vz0 = vx0 + v1 VS2_1 -
5634 VS2_1: vz1 = vx1 + v1 VS2_2 -
5635 VS2_2: vz2 = vx2 + v1 VS2_3 -
5636 VS2_3: vz3 = vx3 + v1 - -
5637 S2: z = x + 1 - VS2_0 */
5639 prev_stmt_info = NULL;
5640 for (j = 0; j < ncopies; j++)
5642 /* Handle uses. */
5643 if (j == 0)
5645 if (op_type == binary_op || op_type == ternary_op)
5646 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5647 slp_node);
5648 else
5649 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5650 slp_node);
5651 if (op_type == ternary_op)
5652 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5653 slp_node);
5655 else
5657 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5658 if (op_type == ternary_op)
5660 tree vec_oprnd = vec_oprnds2.pop ();
5661 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5662 vec_oprnd));
5666 /* Arguments are ready. Create the new vector stmt. */
5667 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5669 vop1 = ((op_type == binary_op || op_type == ternary_op)
5670 ? vec_oprnds1[i] : NULL_TREE);
5671 vop2 = ((op_type == ternary_op)
5672 ? vec_oprnds2[i] : NULL_TREE);
5673 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5674 new_temp = make_ssa_name (vec_dest, new_stmt);
5675 gimple_assign_set_lhs (new_stmt, new_temp);
5676 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5677 if (vec_cvt_dest)
5679 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5680 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5681 new_temp);
5682 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5683 gimple_assign_set_lhs (new_stmt, new_temp);
5684 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5686 if (slp_node)
5687 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5690 if (slp_node)
5691 continue;
5693 if (j == 0)
5694 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5695 else
5696 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5697 prev_stmt_info = vinfo_for_stmt (new_stmt);
5700 vec_oprnds0.release ();
5701 vec_oprnds1.release ();
5702 vec_oprnds2.release ();
5704 return true;
5707 /* A helper function to ensure data reference DR's base alignment. */
5709 static void
5710 ensure_base_align (struct data_reference *dr)
5712 if (!dr->aux)
5713 return;
5715 if (DR_VECT_AUX (dr)->base_misaligned)
5717 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5719 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5721 if (decl_in_symtab_p (base_decl))
5722 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5723 else
5725 SET_DECL_ALIGN (base_decl, align_base_to);
5726 DECL_USER_ALIGN (base_decl) = 1;
5728 DR_VECT_AUX (dr)->base_misaligned = false;
5733 /* Function get_group_alias_ptr_type.
5735 Return the alias type for the group starting at FIRST_STMT. */
5737 static tree
5738 get_group_alias_ptr_type (gimple *first_stmt)
5740 struct data_reference *first_dr, *next_dr;
5741 gimple *next_stmt;
5743 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5744 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5745 while (next_stmt)
5747 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5748 if (get_alias_set (DR_REF (first_dr))
5749 != get_alias_set (DR_REF (next_dr)))
5751 if (dump_enabled_p ())
5752 dump_printf_loc (MSG_NOTE, vect_location,
5753 "conflicting alias set types.\n");
5754 return ptr_type_node;
5756 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5758 return reference_alias_ptr_type (DR_REF (first_dr));
5762 /* Function vectorizable_store.
5764 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5765 can be vectorized.
5766 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5767 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5768 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5770 static bool
5771 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5772 slp_tree slp_node)
5774 tree data_ref;
5775 tree op;
5776 tree vec_oprnd = NULL_TREE;
5777 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5778 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5779 tree elem_type;
5780 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5781 struct loop *loop = NULL;
5782 machine_mode vec_mode;
5783 tree dummy;
5784 enum dr_alignment_support alignment_support_scheme;
5785 gimple *def_stmt;
5786 enum vect_def_type dt;
5787 stmt_vec_info prev_stmt_info = NULL;
5788 tree dataref_ptr = NULL_TREE;
5789 tree dataref_offset = NULL_TREE;
5790 gimple *ptr_incr = NULL;
5791 int ncopies;
5792 int j;
5793 gimple *next_stmt, *first_stmt;
5794 bool grouped_store;
5795 unsigned int group_size, i;
5796 vec<tree> oprnds = vNULL;
5797 vec<tree> result_chain = vNULL;
5798 bool inv_p;
5799 tree offset = NULL_TREE;
5800 vec<tree> vec_oprnds = vNULL;
5801 bool slp = (slp_node != NULL);
5802 unsigned int vec_num;
5803 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5804 vec_info *vinfo = stmt_info->vinfo;
5805 tree aggr_type;
5806 gather_scatter_info gs_info;
5807 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5808 gimple *new_stmt;
5809 poly_uint64 vf;
5810 vec_load_store_type vls_type;
5811 tree ref_type;
5813 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5814 return false;
5816 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5817 && ! vec_stmt)
5818 return false;
5820 /* Is vectorizable store? */
5822 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
5823 if (is_gimple_assign (stmt))
5825 tree scalar_dest = gimple_assign_lhs (stmt);
5826 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5827 && is_pattern_stmt_p (stmt_info))
5828 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5829 if (TREE_CODE (scalar_dest) != ARRAY_REF
5830 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5831 && TREE_CODE (scalar_dest) != INDIRECT_REF
5832 && TREE_CODE (scalar_dest) != COMPONENT_REF
5833 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5834 && TREE_CODE (scalar_dest) != REALPART_EXPR
5835 && TREE_CODE (scalar_dest) != MEM_REF)
5836 return false;
5838 else
5840 gcall *call = dyn_cast <gcall *> (stmt);
5841 if (!call || !gimple_call_internal_p (call, IFN_MASK_STORE))
5842 return false;
5844 if (slp_node != NULL)
5846 if (dump_enabled_p ())
5847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5848 "SLP of masked stores not supported.\n");
5849 return false;
5852 ref_type = TREE_TYPE (gimple_call_arg (call, 1));
5853 mask = gimple_call_arg (call, 2);
5854 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
5855 return false;
5858 op = vect_get_store_rhs (stmt);
5860 /* Cannot have hybrid store SLP -- that would mean storing to the
5861 same location twice. */
5862 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5864 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5865 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5867 if (loop_vinfo)
5869 loop = LOOP_VINFO_LOOP (loop_vinfo);
5870 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5872 else
5873 vf = 1;
5875 /* Multiple types in SLP are handled by creating the appropriate number of
5876 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5877 case of SLP. */
5878 if (slp)
5879 ncopies = 1;
5880 else
5881 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5883 gcc_assert (ncopies >= 1);
5885 /* FORNOW. This restriction should be relaxed. */
5886 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5888 if (dump_enabled_p ())
5889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5890 "multiple types in nested loop.\n");
5891 return false;
5894 if (!vect_check_store_rhs (stmt, op, &rhs_vectype, &vls_type))
5895 return false;
5897 elem_type = TREE_TYPE (vectype);
5898 vec_mode = TYPE_MODE (vectype);
5900 if (!STMT_VINFO_DATA_REF (stmt_info))
5901 return false;
5903 vect_memory_access_type memory_access_type;
5904 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
5905 &memory_access_type, &gs_info))
5906 return false;
5908 if (mask)
5910 if (memory_access_type == VMAT_CONTIGUOUS)
5912 if (!VECTOR_MODE_P (vec_mode)
5913 || !can_vec_mask_load_store_p (vec_mode,
5914 TYPE_MODE (mask_vectype), false))
5915 return false;
5917 else if (memory_access_type != VMAT_LOAD_STORE_LANES)
5919 if (dump_enabled_p ())
5920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5921 "unsupported access type for masked store.\n");
5922 return false;
5925 else
5927 /* FORNOW. In some cases can vectorize even if data-type not supported
5928 (e.g. - array initialization with 0). */
5929 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5930 return false;
5933 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5934 if (grouped_store)
5936 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5937 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5938 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5940 else
5942 first_stmt = stmt;
5943 first_dr = dr;
5944 group_size = vec_num = 1;
5947 if (!vec_stmt) /* transformation not required. */
5949 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5951 if (loop_vinfo
5952 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
5953 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
5954 memory_access_type);
5956 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5957 /* The SLP costs are calculated during SLP analysis. */
5958 if (!PURE_SLP_STMT (stmt_info))
5959 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
5960 vls_type, NULL, NULL, NULL);
5961 return true;
5963 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5965 /* Transform. */
5967 ensure_base_align (dr);
5969 if (memory_access_type == VMAT_GATHER_SCATTER)
5971 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
5972 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5973 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5974 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5975 edge pe = loop_preheader_edge (loop);
5976 gimple_seq seq;
5977 basic_block new_bb;
5978 enum { NARROW, NONE, WIDEN } modifier;
5979 poly_uint64 scatter_off_nunits
5980 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5982 if (known_eq (nunits, scatter_off_nunits))
5983 modifier = NONE;
5984 else if (known_eq (nunits * 2, scatter_off_nunits))
5986 modifier = WIDEN;
5988 /* Currently gathers and scatters are only supported for
5989 fixed-length vectors. */
5990 unsigned int count = scatter_off_nunits.to_constant ();
5991 vec_perm_builder sel (count, count, 1);
5992 for (i = 0; i < (unsigned int) count; ++i)
5993 sel.quick_push (i | (count / 2));
5995 vec_perm_indices indices (sel, 1, count);
5996 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5997 indices);
5998 gcc_assert (perm_mask != NULL_TREE);
6000 else if (known_eq (nunits, scatter_off_nunits * 2))
6002 modifier = NARROW;
6004 /* Currently gathers and scatters are only supported for
6005 fixed-length vectors. */
6006 unsigned int count = nunits.to_constant ();
6007 vec_perm_builder sel (count, count, 1);
6008 for (i = 0; i < (unsigned int) count; ++i)
6009 sel.quick_push (i | (count / 2));
6011 vec_perm_indices indices (sel, 2, count);
6012 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6013 gcc_assert (perm_mask != NULL_TREE);
6014 ncopies *= 2;
6016 else
6017 gcc_unreachable ();
6019 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6020 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6021 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6022 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6023 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6024 scaletype = TREE_VALUE (arglist);
6026 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6027 && TREE_CODE (rettype) == VOID_TYPE);
6029 ptr = fold_convert (ptrtype, gs_info.base);
6030 if (!is_gimple_min_invariant (ptr))
6032 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6033 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6034 gcc_assert (!new_bb);
6037 /* Currently we support only unconditional scatter stores,
6038 so mask should be all ones. */
6039 mask = build_int_cst (masktype, -1);
6040 mask = vect_init_vector (stmt, mask, masktype, NULL);
6042 scale = build_int_cst (scaletype, gs_info.scale);
6044 prev_stmt_info = NULL;
6045 for (j = 0; j < ncopies; ++j)
6047 if (j == 0)
6049 src = vec_oprnd1
6050 = vect_get_vec_def_for_operand (op, stmt);
6051 op = vec_oprnd0
6052 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6054 else if (modifier != NONE && (j & 1))
6056 if (modifier == WIDEN)
6058 src = vec_oprnd1
6059 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
6060 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6061 stmt, gsi);
6063 else if (modifier == NARROW)
6065 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6066 stmt, gsi);
6067 op = vec_oprnd0
6068 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6069 vec_oprnd0);
6071 else
6072 gcc_unreachable ();
6074 else
6076 src = vec_oprnd1
6077 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
6078 op = vec_oprnd0
6079 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6080 vec_oprnd0);
6083 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6085 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6086 TYPE_VECTOR_SUBPARTS (srctype)));
6087 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6088 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6089 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6090 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6091 src = var;
6094 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6096 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6097 TYPE_VECTOR_SUBPARTS (idxtype)));
6098 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6099 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6100 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6102 op = var;
6105 new_stmt
6106 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6108 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6110 if (prev_stmt_info == NULL)
6111 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6112 else
6113 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6114 prev_stmt_info = vinfo_for_stmt (new_stmt);
6116 return true;
6119 if (grouped_store)
6121 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
6123 /* FORNOW */
6124 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6126 /* We vectorize all the stmts of the interleaving group when we
6127 reach the last stmt in the group. */
6128 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6129 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
6130 && !slp)
6132 *vec_stmt = NULL;
6133 return true;
6136 if (slp)
6138 grouped_store = false;
6139 /* VEC_NUM is the number of vect stmts to be created for this
6140 group. */
6141 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6142 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6143 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6144 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6145 op = vect_get_store_rhs (first_stmt);
6147 else
6148 /* VEC_NUM is the number of vect stmts to be created for this
6149 group. */
6150 vec_num = group_size;
6152 ref_type = get_group_alias_ptr_type (first_stmt);
6154 else
6155 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6157 if (dump_enabled_p ())
6158 dump_printf_loc (MSG_NOTE, vect_location,
6159 "transform store. ncopies = %d\n", ncopies);
6161 if (memory_access_type == VMAT_ELEMENTWISE
6162 || memory_access_type == VMAT_STRIDED_SLP)
6164 gimple_stmt_iterator incr_gsi;
6165 bool insert_after;
6166 gimple *incr;
6167 tree offvar;
6168 tree ivstep;
6169 tree running_off;
6170 gimple_seq stmts = NULL;
6171 tree stride_base, stride_step, alias_off;
6172 tree vec_oprnd;
6173 unsigned int g;
6174 /* Checked by get_load_store_type. */
6175 unsigned int const_nunits = nunits.to_constant ();
6177 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6178 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6180 stride_base
6181 = fold_build_pointer_plus
6182 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6183 size_binop (PLUS_EXPR,
6184 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6185 convert_to_ptrofftype (DR_INIT (first_dr))));
6186 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6188 /* For a store with loop-invariant (but other than power-of-2)
6189 stride (i.e. not a grouped access) like so:
6191 for (i = 0; i < n; i += stride)
6192 array[i] = ...;
6194 we generate a new induction variable and new stores from
6195 the components of the (vectorized) rhs:
6197 for (j = 0; ; j += VF*stride)
6198 vectemp = ...;
6199 tmp1 = vectemp[0];
6200 array[j] = tmp1;
6201 tmp2 = vectemp[1];
6202 array[j + stride] = tmp2;
6206 unsigned nstores = const_nunits;
6207 unsigned lnel = 1;
6208 tree ltype = elem_type;
6209 tree lvectype = vectype;
6210 if (slp)
6212 if (group_size < const_nunits
6213 && const_nunits % group_size == 0)
6215 nstores = const_nunits / group_size;
6216 lnel = group_size;
6217 ltype = build_vector_type (elem_type, group_size);
6218 lvectype = vectype;
6220 /* First check if vec_extract optab doesn't support extraction
6221 of vector elts directly. */
6222 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6223 machine_mode vmode;
6224 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6225 || !VECTOR_MODE_P (vmode)
6226 || (convert_optab_handler (vec_extract_optab,
6227 TYPE_MODE (vectype), vmode)
6228 == CODE_FOR_nothing))
6230 /* Try to avoid emitting an extract of vector elements
6231 by performing the extracts using an integer type of the
6232 same size, extracting from a vector of those and then
6233 re-interpreting it as the original vector type if
6234 supported. */
6235 unsigned lsize
6236 = group_size * GET_MODE_BITSIZE (elmode);
6237 elmode = int_mode_for_size (lsize, 0).require ();
6238 unsigned int lnunits = const_nunits / group_size;
6239 /* If we can't construct such a vector fall back to
6240 element extracts from the original vector type and
6241 element size stores. */
6242 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6243 && VECTOR_MODE_P (vmode)
6244 && (convert_optab_handler (vec_extract_optab,
6245 vmode, elmode)
6246 != CODE_FOR_nothing))
6248 nstores = lnunits;
6249 lnel = group_size;
6250 ltype = build_nonstandard_integer_type (lsize, 1);
6251 lvectype = build_vector_type (ltype, nstores);
6253 /* Else fall back to vector extraction anyway.
6254 Fewer stores are more important than avoiding spilling
6255 of the vector we extract from. Compared to the
6256 construction case in vectorizable_load no store-forwarding
6257 issue exists here for reasonable archs. */
6260 else if (group_size >= const_nunits
6261 && group_size % const_nunits == 0)
6263 nstores = 1;
6264 lnel = const_nunits;
6265 ltype = vectype;
6266 lvectype = vectype;
6268 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6269 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6272 ivstep = stride_step;
6273 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6274 build_int_cst (TREE_TYPE (ivstep), vf));
6276 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6278 create_iv (stride_base, ivstep, NULL,
6279 loop, &incr_gsi, insert_after,
6280 &offvar, NULL);
6281 incr = gsi_stmt (incr_gsi);
6282 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6284 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6285 if (stmts)
6286 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6288 prev_stmt_info = NULL;
6289 alias_off = build_int_cst (ref_type, 0);
6290 next_stmt = first_stmt;
6291 for (g = 0; g < group_size; g++)
6293 running_off = offvar;
6294 if (g)
6296 tree size = TYPE_SIZE_UNIT (ltype);
6297 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6298 size);
6299 tree newoff = copy_ssa_name (running_off, NULL);
6300 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6301 running_off, pos);
6302 vect_finish_stmt_generation (stmt, incr, gsi);
6303 running_off = newoff;
6305 unsigned int group_el = 0;
6306 unsigned HOST_WIDE_INT
6307 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6308 for (j = 0; j < ncopies; j++)
6310 /* We've set op and dt above, from vect_get_store_rhs,
6311 and first_stmt == stmt. */
6312 if (j == 0)
6314 if (slp)
6316 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6317 slp_node);
6318 vec_oprnd = vec_oprnds[0];
6320 else
6322 op = vect_get_store_rhs (next_stmt);
6323 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6326 else
6328 if (slp)
6329 vec_oprnd = vec_oprnds[j];
6330 else
6332 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6333 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6336 /* Pun the vector to extract from if necessary. */
6337 if (lvectype != vectype)
6339 tree tem = make_ssa_name (lvectype);
6340 gimple *pun
6341 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6342 lvectype, vec_oprnd));
6343 vect_finish_stmt_generation (stmt, pun, gsi);
6344 vec_oprnd = tem;
6346 for (i = 0; i < nstores; i++)
6348 tree newref, newoff;
6349 gimple *incr, *assign;
6350 tree size = TYPE_SIZE (ltype);
6351 /* Extract the i'th component. */
6352 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6353 bitsize_int (i), size);
6354 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6355 size, pos);
6357 elem = force_gimple_operand_gsi (gsi, elem, true,
6358 NULL_TREE, true,
6359 GSI_SAME_STMT);
6361 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6362 group_el * elsz);
6363 newref = build2 (MEM_REF, ltype,
6364 running_off, this_off);
6366 /* And store it to *running_off. */
6367 assign = gimple_build_assign (newref, elem);
6368 vect_finish_stmt_generation (stmt, assign, gsi);
6370 group_el += lnel;
6371 if (! slp
6372 || group_el == group_size)
6374 newoff = copy_ssa_name (running_off, NULL);
6375 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6376 running_off, stride_step);
6377 vect_finish_stmt_generation (stmt, incr, gsi);
6379 running_off = newoff;
6380 group_el = 0;
6382 if (g == group_size - 1
6383 && !slp)
6385 if (j == 0 && i == 0)
6386 STMT_VINFO_VEC_STMT (stmt_info)
6387 = *vec_stmt = assign;
6388 else
6389 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6390 prev_stmt_info = vinfo_for_stmt (assign);
6394 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6395 if (slp)
6396 break;
6399 vec_oprnds.release ();
6400 return true;
6403 auto_vec<tree> dr_chain (group_size);
6404 oprnds.create (group_size);
6406 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6407 gcc_assert (alignment_support_scheme);
6408 bool masked_loop_p = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6409 /* Targets with store-lane instructions must not require explicit
6410 realignment. vect_supportable_dr_alignment always returns either
6411 dr_aligned or dr_unaligned_supported for masked operations. */
6412 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6413 && !mask
6414 && !masked_loop_p)
6415 || alignment_support_scheme == dr_aligned
6416 || alignment_support_scheme == dr_unaligned_supported);
6418 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6419 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6420 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6422 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6423 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6424 else
6425 aggr_type = vectype;
6427 if (mask)
6428 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6430 /* In case the vectorization factor (VF) is bigger than the number
6431 of elements that we can fit in a vectype (nunits), we have to generate
6432 more than one vector stmt - i.e - we need to "unroll" the
6433 vector stmt by a factor VF/nunits. For more details see documentation in
6434 vect_get_vec_def_for_copy_stmt. */
6436 /* In case of interleaving (non-unit grouped access):
6438 S1: &base + 2 = x2
6439 S2: &base = x0
6440 S3: &base + 1 = x1
6441 S4: &base + 3 = x3
6443 We create vectorized stores starting from base address (the access of the
6444 first stmt in the chain (S2 in the above example), when the last store stmt
6445 of the chain (S4) is reached:
6447 VS1: &base = vx2
6448 VS2: &base + vec_size*1 = vx0
6449 VS3: &base + vec_size*2 = vx1
6450 VS4: &base + vec_size*3 = vx3
6452 Then permutation statements are generated:
6454 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6455 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6458 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6459 (the order of the data-refs in the output of vect_permute_store_chain
6460 corresponds to the order of scalar stmts in the interleaving chain - see
6461 the documentation of vect_permute_store_chain()).
6463 In case of both multiple types and interleaving, above vector stores and
6464 permutation stmts are created for every copy. The result vector stmts are
6465 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6466 STMT_VINFO_RELATED_STMT for the next copies.
6469 prev_stmt_info = NULL;
6470 tree vec_mask = NULL_TREE;
6471 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
6472 for (j = 0; j < ncopies; j++)
6475 if (j == 0)
6477 if (slp)
6479 /* Get vectorized arguments for SLP_NODE. */
6480 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6481 NULL, slp_node);
6483 vec_oprnd = vec_oprnds[0];
6485 else
6487 /* For interleaved stores we collect vectorized defs for all the
6488 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6489 used as an input to vect_permute_store_chain(), and OPRNDS as
6490 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6492 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6493 OPRNDS are of size 1. */
6494 next_stmt = first_stmt;
6495 for (i = 0; i < group_size; i++)
6497 /* Since gaps are not supported for interleaved stores,
6498 GROUP_SIZE is the exact number of stmts in the chain.
6499 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6500 there is no interleaving, GROUP_SIZE is 1, and only one
6501 iteration of the loop will be executed. */
6502 op = vect_get_store_rhs (next_stmt);
6503 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6504 dr_chain.quick_push (vec_oprnd);
6505 oprnds.quick_push (vec_oprnd);
6506 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6508 if (mask)
6509 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6510 mask_vectype);
6513 /* We should have catched mismatched types earlier. */
6514 gcc_assert (useless_type_conversion_p (vectype,
6515 TREE_TYPE (vec_oprnd)));
6516 bool simd_lane_access_p
6517 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6518 if (simd_lane_access_p
6519 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6520 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6521 && integer_zerop (DR_OFFSET (first_dr))
6522 && integer_zerop (DR_INIT (first_dr))
6523 && alias_sets_conflict_p (get_alias_set (aggr_type),
6524 get_alias_set (TREE_TYPE (ref_type))))
6526 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6527 dataref_offset = build_int_cst (ref_type, 0);
6528 inv_p = false;
6530 else
6531 dataref_ptr
6532 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6533 simd_lane_access_p ? loop : NULL,
6534 offset, &dummy, gsi, &ptr_incr,
6535 simd_lane_access_p, &inv_p);
6536 gcc_assert (bb_vinfo || !inv_p);
6538 else
6540 /* For interleaved stores we created vectorized defs for all the
6541 defs stored in OPRNDS in the previous iteration (previous copy).
6542 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6543 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6544 next copy.
6545 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6546 OPRNDS are of size 1. */
6547 for (i = 0; i < group_size; i++)
6549 op = oprnds[i];
6550 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6552 dr_chain[i] = vec_oprnd;
6553 oprnds[i] = vec_oprnd;
6555 if (mask)
6557 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
6558 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
6560 if (dataref_offset)
6561 dataref_offset
6562 = int_const_binop (PLUS_EXPR, dataref_offset,
6563 TYPE_SIZE_UNIT (aggr_type));
6564 else
6565 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6566 TYPE_SIZE_UNIT (aggr_type));
6569 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6571 tree vec_array;
6573 /* Combine all the vectors into an array. */
6574 vec_array = create_vector_array (vectype, vec_num);
6575 for (i = 0; i < vec_num; i++)
6577 vec_oprnd = dr_chain[i];
6578 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6581 tree final_mask = NULL;
6582 if (masked_loop_p)
6583 final_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
6584 if (vec_mask)
6585 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6586 vec_mask, gsi);
6588 gcall *call;
6589 if (final_mask)
6591 /* Emit:
6592 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6593 VEC_ARRAY). */
6594 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6595 tree alias_ptr = build_int_cst (ref_type, align);
6596 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6597 dataref_ptr, alias_ptr,
6598 final_mask, vec_array);
6600 else
6602 /* Emit:
6603 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6604 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6605 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6606 vec_array);
6607 gimple_call_set_lhs (call, data_ref);
6609 gimple_call_set_nothrow (call, true);
6610 new_stmt = call;
6611 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6613 else
6615 new_stmt = NULL;
6616 if (grouped_store)
6618 if (j == 0)
6619 result_chain.create (group_size);
6620 /* Permute. */
6621 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6622 &result_chain);
6625 next_stmt = first_stmt;
6626 for (i = 0; i < vec_num; i++)
6628 unsigned align, misalign;
6630 tree final_mask = NULL_TREE;
6631 if (masked_loop_p)
6632 final_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6633 vectype, vec_num * j + i);
6634 if (vec_mask)
6635 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6636 vec_mask, gsi);
6638 if (i > 0)
6639 /* Bump the vector pointer. */
6640 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6641 stmt, NULL_TREE);
6643 if (slp)
6644 vec_oprnd = vec_oprnds[i];
6645 else if (grouped_store)
6646 /* For grouped stores vectorized defs are interleaved in
6647 vect_permute_store_chain(). */
6648 vec_oprnd = result_chain[i];
6650 align = DR_TARGET_ALIGNMENT (first_dr);
6651 if (aligned_access_p (first_dr))
6652 misalign = 0;
6653 else if (DR_MISALIGNMENT (first_dr) == -1)
6655 align = dr_alignment (vect_dr_behavior (first_dr));
6656 misalign = 0;
6658 else
6659 misalign = DR_MISALIGNMENT (first_dr);
6660 if (dataref_offset == NULL_TREE
6661 && TREE_CODE (dataref_ptr) == SSA_NAME)
6662 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6663 misalign);
6665 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6667 tree perm_mask = perm_mask_for_reverse (vectype);
6668 tree perm_dest
6669 = vect_create_destination_var (vect_get_store_rhs (stmt),
6670 vectype);
6671 tree new_temp = make_ssa_name (perm_dest);
6673 /* Generate the permute statement. */
6674 gimple *perm_stmt
6675 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6676 vec_oprnd, perm_mask);
6677 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6679 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6680 vec_oprnd = new_temp;
6683 /* Arguments are ready. Create the new vector stmt. */
6684 if (final_mask)
6686 align = least_bit_hwi (misalign | align);
6687 tree ptr = build_int_cst (ref_type, align);
6688 gcall *call
6689 = gimple_build_call_internal (IFN_MASK_STORE, 4,
6690 dataref_ptr, ptr,
6691 final_mask, vec_oprnd);
6692 gimple_call_set_nothrow (call, true);
6693 new_stmt = call;
6695 else
6697 data_ref = fold_build2 (MEM_REF, vectype,
6698 dataref_ptr,
6699 dataref_offset
6700 ? dataref_offset
6701 : build_int_cst (ref_type, 0));
6702 if (aligned_access_p (first_dr))
6704 else if (DR_MISALIGNMENT (first_dr) == -1)
6705 TREE_TYPE (data_ref)
6706 = build_aligned_type (TREE_TYPE (data_ref),
6707 align * BITS_PER_UNIT);
6708 else
6709 TREE_TYPE (data_ref)
6710 = build_aligned_type (TREE_TYPE (data_ref),
6711 TYPE_ALIGN (elem_type));
6712 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6714 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6716 if (slp)
6717 continue;
6719 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6720 if (!next_stmt)
6721 break;
6724 if (!slp)
6726 if (j == 0)
6727 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6728 else
6729 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6730 prev_stmt_info = vinfo_for_stmt (new_stmt);
6734 oprnds.release ();
6735 result_chain.release ();
6736 vec_oprnds.release ();
6738 return true;
6741 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6742 VECTOR_CST mask. No checks are made that the target platform supports the
6743 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6744 vect_gen_perm_mask_checked. */
6746 tree
6747 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
6749 tree mask_type;
6751 poly_uint64 nunits = sel.length ();
6752 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
6754 mask_type = build_vector_type (ssizetype, nunits);
6755 return vec_perm_indices_to_tree (mask_type, sel);
6758 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6759 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6761 tree
6762 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
6764 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
6765 return vect_gen_perm_mask_any (vectype, sel);
6768 /* Given a vector variable X and Y, that was generated for the scalar
6769 STMT, generate instructions to permute the vector elements of X and Y
6770 using permutation mask MASK_VEC, insert them at *GSI and return the
6771 permuted vector variable. */
6773 static tree
6774 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6775 gimple_stmt_iterator *gsi)
6777 tree vectype = TREE_TYPE (x);
6778 tree perm_dest, data_ref;
6779 gimple *perm_stmt;
6781 tree scalar_dest = gimple_get_lhs (stmt);
6782 if (TREE_CODE (scalar_dest) == SSA_NAME)
6783 perm_dest = vect_create_destination_var (scalar_dest, vectype);
6784 else
6785 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
6786 data_ref = make_ssa_name (perm_dest);
6788 /* Generate the permute statement. */
6789 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6790 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6792 return data_ref;
6795 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6796 inserting them on the loops preheader edge. Returns true if we
6797 were successful in doing so (and thus STMT can be moved then),
6798 otherwise returns false. */
6800 static bool
6801 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6803 ssa_op_iter i;
6804 tree op;
6805 bool any = false;
6807 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6809 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6810 if (!gimple_nop_p (def_stmt)
6811 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6813 /* Make sure we don't need to recurse. While we could do
6814 so in simple cases when there are more complex use webs
6815 we don't have an easy way to preserve stmt order to fulfil
6816 dependencies within them. */
6817 tree op2;
6818 ssa_op_iter i2;
6819 if (gimple_code (def_stmt) == GIMPLE_PHI)
6820 return false;
6821 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6823 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6824 if (!gimple_nop_p (def_stmt2)
6825 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6826 return false;
6828 any = true;
6832 if (!any)
6833 return true;
6835 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6837 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6838 if (!gimple_nop_p (def_stmt)
6839 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6841 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6842 gsi_remove (&gsi, false);
6843 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6847 return true;
6850 /* vectorizable_load.
6852 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6853 can be vectorized.
6854 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6855 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6856 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6858 static bool
6859 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6860 slp_tree slp_node, slp_instance slp_node_instance)
6862 tree scalar_dest;
6863 tree vec_dest = NULL;
6864 tree data_ref = NULL;
6865 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6866 stmt_vec_info prev_stmt_info;
6867 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6868 struct loop *loop = NULL;
6869 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6870 bool nested_in_vect_loop = false;
6871 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6872 tree elem_type;
6873 tree new_temp;
6874 machine_mode mode;
6875 gimple *new_stmt = NULL;
6876 tree dummy;
6877 enum dr_alignment_support alignment_support_scheme;
6878 tree dataref_ptr = NULL_TREE;
6879 tree dataref_offset = NULL_TREE;
6880 gimple *ptr_incr = NULL;
6881 int ncopies;
6882 int i, j;
6883 unsigned int group_size;
6884 poly_uint64 group_gap_adj;
6885 tree msq = NULL_TREE, lsq;
6886 tree offset = NULL_TREE;
6887 tree byte_offset = NULL_TREE;
6888 tree realignment_token = NULL_TREE;
6889 gphi *phi = NULL;
6890 vec<tree> dr_chain = vNULL;
6891 bool grouped_load = false;
6892 gimple *first_stmt;
6893 gimple *first_stmt_for_drptr = NULL;
6894 bool inv_p;
6895 bool compute_in_loop = false;
6896 struct loop *at_loop;
6897 int vec_num;
6898 bool slp = (slp_node != NULL);
6899 bool slp_perm = false;
6900 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6901 poly_uint64 vf;
6902 tree aggr_type;
6903 gather_scatter_info gs_info;
6904 vec_info *vinfo = stmt_info->vinfo;
6905 tree ref_type;
6907 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6908 return false;
6910 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6911 && ! vec_stmt)
6912 return false;
6914 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6915 if (is_gimple_assign (stmt))
6917 scalar_dest = gimple_assign_lhs (stmt);
6918 if (TREE_CODE (scalar_dest) != SSA_NAME)
6919 return false;
6921 tree_code code = gimple_assign_rhs_code (stmt);
6922 if (code != ARRAY_REF
6923 && code != BIT_FIELD_REF
6924 && code != INDIRECT_REF
6925 && code != COMPONENT_REF
6926 && code != IMAGPART_EXPR
6927 && code != REALPART_EXPR
6928 && code != MEM_REF
6929 && TREE_CODE_CLASS (code) != tcc_declaration)
6930 return false;
6932 else
6934 gcall *call = dyn_cast <gcall *> (stmt);
6935 if (!call || !gimple_call_internal_p (call, IFN_MASK_LOAD))
6936 return false;
6938 scalar_dest = gimple_call_lhs (call);
6939 if (!scalar_dest)
6940 return false;
6942 if (slp_node != NULL)
6944 if (dump_enabled_p ())
6945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6946 "SLP of masked loads not supported.\n");
6947 return false;
6950 mask = gimple_call_arg (call, 2);
6951 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
6952 return false;
6955 if (!STMT_VINFO_DATA_REF (stmt_info))
6956 return false;
6958 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6959 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6961 if (loop_vinfo)
6963 loop = LOOP_VINFO_LOOP (loop_vinfo);
6964 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6965 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6967 else
6968 vf = 1;
6970 /* Multiple types in SLP are handled by creating the appropriate number of
6971 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6972 case of SLP. */
6973 if (slp)
6974 ncopies = 1;
6975 else
6976 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6978 gcc_assert (ncopies >= 1);
6980 /* FORNOW. This restriction should be relaxed. */
6981 if (nested_in_vect_loop && ncopies > 1)
6983 if (dump_enabled_p ())
6984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6985 "multiple types in nested loop.\n");
6986 return false;
6989 /* Invalidate assumptions made by dependence analysis when vectorization
6990 on the unrolled body effectively re-orders stmts. */
6991 if (ncopies > 1
6992 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6993 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6994 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6996 if (dump_enabled_p ())
6997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6998 "cannot perform implicit CSE when unrolling "
6999 "with negative dependence distance\n");
7000 return false;
7003 elem_type = TREE_TYPE (vectype);
7004 mode = TYPE_MODE (vectype);
7006 /* FORNOW. In some cases can vectorize even if data-type not supported
7007 (e.g. - data copies). */
7008 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7010 if (dump_enabled_p ())
7011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7012 "Aligned load, but unsupported type.\n");
7013 return false;
7016 /* Check if the load is a part of an interleaving chain. */
7017 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7019 grouped_load = true;
7020 /* FORNOW */
7021 gcc_assert (!nested_in_vect_loop);
7022 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7024 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7025 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7027 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7028 slp_perm = true;
7030 /* Invalidate assumptions made by dependence analysis when vectorization
7031 on the unrolled body effectively re-orders stmts. */
7032 if (!PURE_SLP_STMT (stmt_info)
7033 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7034 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7035 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7037 if (dump_enabled_p ())
7038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7039 "cannot perform implicit CSE when performing "
7040 "group loads with negative dependence distance\n");
7041 return false;
7044 /* Similarly when the stmt is a load that is both part of a SLP
7045 instance and a loop vectorized stmt via the same-dr mechanism
7046 we have to give up. */
7047 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7048 && (STMT_SLP_TYPE (stmt_info)
7049 != STMT_SLP_TYPE (vinfo_for_stmt
7050 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7052 if (dump_enabled_p ())
7053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7054 "conflicting SLP types for CSEd load\n");
7055 return false;
7058 else
7059 group_size = 1;
7061 vect_memory_access_type memory_access_type;
7062 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7063 &memory_access_type, &gs_info))
7064 return false;
7066 if (mask)
7068 if (memory_access_type == VMAT_CONTIGUOUS)
7070 machine_mode vec_mode = TYPE_MODE (vectype);
7071 if (!VECTOR_MODE_P (vec_mode)
7072 || !can_vec_mask_load_store_p (vec_mode,
7073 TYPE_MODE (mask_vectype), true))
7074 return false;
7076 else if (memory_access_type == VMAT_GATHER_SCATTER)
7078 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7079 tree masktype
7080 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7081 if (TREE_CODE (masktype) == INTEGER_TYPE)
7083 if (dump_enabled_p ())
7084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7085 "masked gather with integer mask not"
7086 " supported.");
7087 return false;
7090 else if (memory_access_type != VMAT_LOAD_STORE_LANES)
7092 if (dump_enabled_p ())
7093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7094 "unsupported access type for masked load.\n");
7095 return false;
7099 if (!vec_stmt) /* transformation not required. */
7101 if (!slp)
7102 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7104 if (loop_vinfo
7105 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7106 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7107 memory_access_type);
7109 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7110 /* The SLP costs are calculated during SLP analysis. */
7111 if (!PURE_SLP_STMT (stmt_info))
7112 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7113 NULL, NULL, NULL);
7114 return true;
7117 if (!slp)
7118 gcc_assert (memory_access_type
7119 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7121 if (dump_enabled_p ())
7122 dump_printf_loc (MSG_NOTE, vect_location,
7123 "transform load. ncopies = %d\n", ncopies);
7125 /* Transform. */
7127 ensure_base_align (dr);
7129 if (memory_access_type == VMAT_GATHER_SCATTER)
7131 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask);
7132 return true;
7135 if (memory_access_type == VMAT_ELEMENTWISE
7136 || memory_access_type == VMAT_STRIDED_SLP)
7138 gimple_stmt_iterator incr_gsi;
7139 bool insert_after;
7140 gimple *incr;
7141 tree offvar;
7142 tree ivstep;
7143 tree running_off;
7144 vec<constructor_elt, va_gc> *v = NULL;
7145 gimple_seq stmts = NULL;
7146 tree stride_base, stride_step, alias_off;
7147 /* Checked by get_load_store_type. */
7148 unsigned int const_nunits = nunits.to_constant ();
7150 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7151 gcc_assert (!nested_in_vect_loop);
7153 if (slp && grouped_load)
7155 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7156 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7157 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7158 ref_type = get_group_alias_ptr_type (first_stmt);
7160 else
7162 first_stmt = stmt;
7163 first_dr = dr;
7164 group_size = 1;
7165 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7168 stride_base
7169 = fold_build_pointer_plus
7170 (DR_BASE_ADDRESS (first_dr),
7171 size_binop (PLUS_EXPR,
7172 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7173 convert_to_ptrofftype (DR_INIT (first_dr))));
7174 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7176 /* For a load with loop-invariant (but other than power-of-2)
7177 stride (i.e. not a grouped access) like so:
7179 for (i = 0; i < n; i += stride)
7180 ... = array[i];
7182 we generate a new induction variable and new accesses to
7183 form a new vector (or vectors, depending on ncopies):
7185 for (j = 0; ; j += VF*stride)
7186 tmp1 = array[j];
7187 tmp2 = array[j + stride];
7189 vectemp = {tmp1, tmp2, ...}
7192 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7193 build_int_cst (TREE_TYPE (stride_step), vf));
7195 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7197 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7198 loop, &incr_gsi, insert_after,
7199 &offvar, NULL);
7200 incr = gsi_stmt (incr_gsi);
7201 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7203 stride_step = force_gimple_operand (unshare_expr (stride_step),
7204 &stmts, true, NULL_TREE);
7205 if (stmts)
7206 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7208 prev_stmt_info = NULL;
7209 running_off = offvar;
7210 alias_off = build_int_cst (ref_type, 0);
7211 int nloads = const_nunits;
7212 int lnel = 1;
7213 tree ltype = TREE_TYPE (vectype);
7214 tree lvectype = vectype;
7215 auto_vec<tree> dr_chain;
7216 if (memory_access_type == VMAT_STRIDED_SLP)
7218 if (group_size < const_nunits)
7220 /* First check if vec_init optab supports construction from
7221 vector elts directly. */
7222 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7223 machine_mode vmode;
7224 if (mode_for_vector (elmode, group_size).exists (&vmode)
7225 && VECTOR_MODE_P (vmode)
7226 && (convert_optab_handler (vec_init_optab,
7227 TYPE_MODE (vectype), vmode)
7228 != CODE_FOR_nothing))
7230 nloads = const_nunits / group_size;
7231 lnel = group_size;
7232 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7234 else
7236 /* Otherwise avoid emitting a constructor of vector elements
7237 by performing the loads using an integer type of the same
7238 size, constructing a vector of those and then
7239 re-interpreting it as the original vector type.
7240 This avoids a huge runtime penalty due to the general
7241 inability to perform store forwarding from smaller stores
7242 to a larger load. */
7243 unsigned lsize
7244 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7245 elmode = int_mode_for_size (lsize, 0).require ();
7246 unsigned int lnunits = const_nunits / group_size;
7247 /* If we can't construct such a vector fall back to
7248 element loads of the original vector type. */
7249 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7250 && VECTOR_MODE_P (vmode)
7251 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7252 != CODE_FOR_nothing))
7254 nloads = lnunits;
7255 lnel = group_size;
7256 ltype = build_nonstandard_integer_type (lsize, 1);
7257 lvectype = build_vector_type (ltype, nloads);
7261 else
7263 nloads = 1;
7264 lnel = const_nunits;
7265 ltype = vectype;
7267 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7269 if (slp)
7271 /* For SLP permutation support we need to load the whole group,
7272 not only the number of vector stmts the permutation result
7273 fits in. */
7274 if (slp_perm)
7276 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7277 variable VF. */
7278 unsigned int const_vf = vf.to_constant ();
7279 ncopies = CEIL (group_size * const_vf, const_nunits);
7280 dr_chain.create (ncopies);
7282 else
7283 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7285 unsigned int group_el = 0;
7286 unsigned HOST_WIDE_INT
7287 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7288 for (j = 0; j < ncopies; j++)
7290 if (nloads > 1)
7291 vec_alloc (v, nloads);
7292 for (i = 0; i < nloads; i++)
7294 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7295 group_el * elsz);
7296 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7297 build2 (MEM_REF, ltype,
7298 running_off, this_off));
7299 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7300 if (nloads > 1)
7301 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7302 gimple_assign_lhs (new_stmt));
7304 group_el += lnel;
7305 if (! slp
7306 || group_el == group_size)
7308 tree newoff = copy_ssa_name (running_off);
7309 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7310 running_off, stride_step);
7311 vect_finish_stmt_generation (stmt, incr, gsi);
7313 running_off = newoff;
7314 group_el = 0;
7317 if (nloads > 1)
7319 tree vec_inv = build_constructor (lvectype, v);
7320 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7321 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7322 if (lvectype != vectype)
7324 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7325 VIEW_CONVERT_EXPR,
7326 build1 (VIEW_CONVERT_EXPR,
7327 vectype, new_temp));
7328 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7332 if (slp)
7334 if (slp_perm)
7335 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7336 else
7337 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7339 else
7341 if (j == 0)
7342 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7343 else
7344 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7345 prev_stmt_info = vinfo_for_stmt (new_stmt);
7348 if (slp_perm)
7350 unsigned n_perms;
7351 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7352 slp_node_instance, false, &n_perms);
7354 return true;
7357 if (grouped_load)
7359 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7360 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7361 /* For SLP vectorization we directly vectorize a subchain
7362 without permutation. */
7363 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7364 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7365 /* For BB vectorization always use the first stmt to base
7366 the data ref pointer on. */
7367 if (bb_vinfo)
7368 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7370 /* Check if the chain of loads is already vectorized. */
7371 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7372 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7373 ??? But we can only do so if there is exactly one
7374 as we have no way to get at the rest. Leave the CSE
7375 opportunity alone.
7376 ??? With the group load eventually participating
7377 in multiple different permutations (having multiple
7378 slp nodes which refer to the same group) the CSE
7379 is even wrong code. See PR56270. */
7380 && !slp)
7382 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7383 return true;
7385 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7386 group_gap_adj = 0;
7388 /* VEC_NUM is the number of vect stmts to be created for this group. */
7389 if (slp)
7391 grouped_load = false;
7392 /* For SLP permutation support we need to load the whole group,
7393 not only the number of vector stmts the permutation result
7394 fits in. */
7395 if (slp_perm)
7397 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7398 variable VF. */
7399 unsigned int const_vf = vf.to_constant ();
7400 unsigned int const_nunits = nunits.to_constant ();
7401 vec_num = CEIL (group_size * const_vf, const_nunits);
7402 group_gap_adj = vf * group_size - nunits * vec_num;
7404 else
7406 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7407 group_gap_adj
7408 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7411 else
7412 vec_num = group_size;
7414 ref_type = get_group_alias_ptr_type (first_stmt);
7416 else
7418 first_stmt = stmt;
7419 first_dr = dr;
7420 group_size = vec_num = 1;
7421 group_gap_adj = 0;
7422 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7425 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7426 gcc_assert (alignment_support_scheme);
7427 bool masked_loop_p = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7428 /* Targets with store-lane instructions must not require explicit
7429 realignment. vect_supportable_dr_alignment always returns either
7430 dr_aligned or dr_unaligned_supported for masked operations. */
7431 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7432 && !mask
7433 && !masked_loop_p)
7434 || alignment_support_scheme == dr_aligned
7435 || alignment_support_scheme == dr_unaligned_supported);
7437 /* In case the vectorization factor (VF) is bigger than the number
7438 of elements that we can fit in a vectype (nunits), we have to generate
7439 more than one vector stmt - i.e - we need to "unroll" the
7440 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7441 from one copy of the vector stmt to the next, in the field
7442 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7443 stages to find the correct vector defs to be used when vectorizing
7444 stmts that use the defs of the current stmt. The example below
7445 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7446 need to create 4 vectorized stmts):
7448 before vectorization:
7449 RELATED_STMT VEC_STMT
7450 S1: x = memref - -
7451 S2: z = x + 1 - -
7453 step 1: vectorize stmt S1:
7454 We first create the vector stmt VS1_0, and, as usual, record a
7455 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7456 Next, we create the vector stmt VS1_1, and record a pointer to
7457 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7458 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7459 stmts and pointers:
7460 RELATED_STMT VEC_STMT
7461 VS1_0: vx0 = memref0 VS1_1 -
7462 VS1_1: vx1 = memref1 VS1_2 -
7463 VS1_2: vx2 = memref2 VS1_3 -
7464 VS1_3: vx3 = memref3 - -
7465 S1: x = load - VS1_0
7466 S2: z = x + 1 - -
7468 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7469 information we recorded in RELATED_STMT field is used to vectorize
7470 stmt S2. */
7472 /* In case of interleaving (non-unit grouped access):
7474 S1: x2 = &base + 2
7475 S2: x0 = &base
7476 S3: x1 = &base + 1
7477 S4: x3 = &base + 3
7479 Vectorized loads are created in the order of memory accesses
7480 starting from the access of the first stmt of the chain:
7482 VS1: vx0 = &base
7483 VS2: vx1 = &base + vec_size*1
7484 VS3: vx3 = &base + vec_size*2
7485 VS4: vx4 = &base + vec_size*3
7487 Then permutation statements are generated:
7489 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7490 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7493 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7494 (the order of the data-refs in the output of vect_permute_load_chain
7495 corresponds to the order of scalar stmts in the interleaving chain - see
7496 the documentation of vect_permute_load_chain()).
7497 The generation of permutation stmts and recording them in
7498 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7500 In case of both multiple types and interleaving, the vector loads and
7501 permutation stmts above are created for every copy. The result vector
7502 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7503 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7505 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7506 on a target that supports unaligned accesses (dr_unaligned_supported)
7507 we generate the following code:
7508 p = initial_addr;
7509 indx = 0;
7510 loop {
7511 p = p + indx * vectype_size;
7512 vec_dest = *(p);
7513 indx = indx + 1;
7516 Otherwise, the data reference is potentially unaligned on a target that
7517 does not support unaligned accesses (dr_explicit_realign_optimized) -
7518 then generate the following code, in which the data in each iteration is
7519 obtained by two vector loads, one from the previous iteration, and one
7520 from the current iteration:
7521 p1 = initial_addr;
7522 msq_init = *(floor(p1))
7523 p2 = initial_addr + VS - 1;
7524 realignment_token = call target_builtin;
7525 indx = 0;
7526 loop {
7527 p2 = p2 + indx * vectype_size
7528 lsq = *(floor(p2))
7529 vec_dest = realign_load (msq, lsq, realignment_token)
7530 indx = indx + 1;
7531 msq = lsq;
7532 } */
7534 /* If the misalignment remains the same throughout the execution of the
7535 loop, we can create the init_addr and permutation mask at the loop
7536 preheader. Otherwise, it needs to be created inside the loop.
7537 This can only occur when vectorizing memory accesses in the inner-loop
7538 nested within an outer-loop that is being vectorized. */
7540 if (nested_in_vect_loop
7541 && !multiple_p (DR_STEP_ALIGNMENT (dr),
7542 GET_MODE_SIZE (TYPE_MODE (vectype))))
7544 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7545 compute_in_loop = true;
7548 if ((alignment_support_scheme == dr_explicit_realign_optimized
7549 || alignment_support_scheme == dr_explicit_realign)
7550 && !compute_in_loop)
7552 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7553 alignment_support_scheme, NULL_TREE,
7554 &at_loop);
7555 if (alignment_support_scheme == dr_explicit_realign_optimized)
7557 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7558 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7559 size_one_node);
7562 else
7563 at_loop = loop;
7565 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7566 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7568 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7569 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7570 else
7571 aggr_type = vectype;
7573 tree vec_mask = NULL_TREE;
7574 prev_stmt_info = NULL;
7575 poly_uint64 group_elt = 0;
7576 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
7577 for (j = 0; j < ncopies; j++)
7579 /* 1. Create the vector or array pointer update chain. */
7580 if (j == 0)
7582 bool simd_lane_access_p
7583 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7584 if (simd_lane_access_p
7585 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7586 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7587 && integer_zerop (DR_OFFSET (first_dr))
7588 && integer_zerop (DR_INIT (first_dr))
7589 && alias_sets_conflict_p (get_alias_set (aggr_type),
7590 get_alias_set (TREE_TYPE (ref_type)))
7591 && (alignment_support_scheme == dr_aligned
7592 || alignment_support_scheme == dr_unaligned_supported))
7594 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7595 dataref_offset = build_int_cst (ref_type, 0);
7596 inv_p = false;
7598 else if (first_stmt_for_drptr
7599 && first_stmt != first_stmt_for_drptr)
7601 dataref_ptr
7602 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7603 at_loop, offset, &dummy, gsi,
7604 &ptr_incr, simd_lane_access_p,
7605 &inv_p, byte_offset);
7606 /* Adjust the pointer by the difference to first_stmt. */
7607 data_reference_p ptrdr
7608 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7609 tree diff = fold_convert (sizetype,
7610 size_binop (MINUS_EXPR,
7611 DR_INIT (first_dr),
7612 DR_INIT (ptrdr)));
7613 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7614 stmt, diff);
7616 else
7617 dataref_ptr
7618 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7619 offset, &dummy, gsi, &ptr_incr,
7620 simd_lane_access_p, &inv_p,
7621 byte_offset);
7622 if (mask)
7623 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
7624 mask_vectype);
7626 else
7628 if (dataref_offset)
7629 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7630 TYPE_SIZE_UNIT (aggr_type));
7631 else
7632 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7633 TYPE_SIZE_UNIT (aggr_type));
7634 if (mask)
7636 gimple *def_stmt;
7637 vect_def_type dt;
7638 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
7639 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
7643 if (grouped_load || slp_perm)
7644 dr_chain.create (vec_num);
7646 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7648 tree vec_array;
7650 vec_array = create_vector_array (vectype, vec_num);
7652 tree final_mask = NULL_TREE;
7653 if (masked_loop_p)
7654 final_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
7655 if (vec_mask)
7656 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7657 vec_mask, gsi);
7659 gcall *call;
7660 if (final_mask)
7662 /* Emit:
7663 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
7664 VEC_MASK). */
7665 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7666 tree alias_ptr = build_int_cst (ref_type, align);
7667 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
7668 dataref_ptr, alias_ptr,
7669 final_mask);
7671 else
7673 /* Emit:
7674 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7675 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7676 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7678 gimple_call_set_lhs (call, vec_array);
7679 gimple_call_set_nothrow (call, true);
7680 new_stmt = call;
7681 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7683 /* Extract each vector into an SSA_NAME. */
7684 for (i = 0; i < vec_num; i++)
7686 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7687 vec_array, i);
7688 dr_chain.quick_push (new_temp);
7691 /* Record the mapping between SSA_NAMEs and statements. */
7692 vect_record_grouped_load_vectors (stmt, dr_chain);
7694 else
7696 for (i = 0; i < vec_num; i++)
7698 tree final_mask = NULL_TREE;
7699 if (masked_loop_p
7700 && memory_access_type != VMAT_INVARIANT)
7701 final_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
7702 vectype, vec_num * j + i);
7703 if (vec_mask)
7704 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7705 vec_mask, gsi);
7707 if (i > 0)
7708 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7709 stmt, NULL_TREE);
7711 /* 2. Create the vector-load in the loop. */
7712 switch (alignment_support_scheme)
7714 case dr_aligned:
7715 case dr_unaligned_supported:
7717 unsigned int align, misalign;
7719 align = DR_TARGET_ALIGNMENT (dr);
7720 if (alignment_support_scheme == dr_aligned)
7722 gcc_assert (aligned_access_p (first_dr));
7723 misalign = 0;
7725 else if (DR_MISALIGNMENT (first_dr) == -1)
7727 align = dr_alignment (vect_dr_behavior (first_dr));
7728 misalign = 0;
7730 else
7731 misalign = DR_MISALIGNMENT (first_dr);
7732 if (dataref_offset == NULL_TREE
7733 && TREE_CODE (dataref_ptr) == SSA_NAME)
7734 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7735 align, misalign);
7737 if (final_mask)
7739 align = least_bit_hwi (misalign | align);
7740 tree ptr = build_int_cst (ref_type, align);
7741 gcall *call
7742 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
7743 dataref_ptr, ptr,
7744 final_mask);
7745 gimple_call_set_nothrow (call, true);
7746 new_stmt = call;
7747 data_ref = NULL_TREE;
7749 else
7751 data_ref
7752 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7753 dataref_offset
7754 ? dataref_offset
7755 : build_int_cst (ref_type, 0));
7756 if (alignment_support_scheme == dr_aligned)
7758 else if (DR_MISALIGNMENT (first_dr) == -1)
7759 TREE_TYPE (data_ref)
7760 = build_aligned_type (TREE_TYPE (data_ref),
7761 align * BITS_PER_UNIT);
7762 else
7763 TREE_TYPE (data_ref)
7764 = build_aligned_type (TREE_TYPE (data_ref),
7765 TYPE_ALIGN (elem_type));
7767 break;
7769 case dr_explicit_realign:
7771 tree ptr, bump;
7773 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7775 if (compute_in_loop)
7776 msq = vect_setup_realignment (first_stmt, gsi,
7777 &realignment_token,
7778 dr_explicit_realign,
7779 dataref_ptr, NULL);
7781 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7782 ptr = copy_ssa_name (dataref_ptr);
7783 else
7784 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7785 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7786 new_stmt = gimple_build_assign
7787 (ptr, BIT_AND_EXPR, dataref_ptr,
7788 build_int_cst
7789 (TREE_TYPE (dataref_ptr),
7790 -(HOST_WIDE_INT) align));
7791 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7792 data_ref
7793 = build2 (MEM_REF, vectype, ptr,
7794 build_int_cst (ref_type, 0));
7795 vec_dest = vect_create_destination_var (scalar_dest,
7796 vectype);
7797 new_stmt = gimple_build_assign (vec_dest, data_ref);
7798 new_temp = make_ssa_name (vec_dest, new_stmt);
7799 gimple_assign_set_lhs (new_stmt, new_temp);
7800 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7801 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7803 msq = new_temp;
7805 bump = size_binop (MULT_EXPR, vs,
7806 TYPE_SIZE_UNIT (elem_type));
7807 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7808 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7809 new_stmt = gimple_build_assign
7810 (NULL_TREE, BIT_AND_EXPR, ptr,
7811 build_int_cst
7812 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7813 ptr = copy_ssa_name (ptr, new_stmt);
7814 gimple_assign_set_lhs (new_stmt, ptr);
7815 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7816 data_ref
7817 = build2 (MEM_REF, vectype, ptr,
7818 build_int_cst (ref_type, 0));
7819 break;
7821 case dr_explicit_realign_optimized:
7823 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7824 new_temp = copy_ssa_name (dataref_ptr);
7825 else
7826 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7827 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7828 new_stmt = gimple_build_assign
7829 (new_temp, BIT_AND_EXPR, dataref_ptr,
7830 build_int_cst (TREE_TYPE (dataref_ptr),
7831 -(HOST_WIDE_INT) align));
7832 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7833 data_ref
7834 = build2 (MEM_REF, vectype, new_temp,
7835 build_int_cst (ref_type, 0));
7836 break;
7838 default:
7839 gcc_unreachable ();
7841 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7842 /* DATA_REF is null if we've already built the statement. */
7843 if (data_ref)
7844 new_stmt = gimple_build_assign (vec_dest, data_ref);
7845 new_temp = make_ssa_name (vec_dest, new_stmt);
7846 gimple_set_lhs (new_stmt, new_temp);
7847 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7849 /* 3. Handle explicit realignment if necessary/supported.
7850 Create in loop:
7851 vec_dest = realign_load (msq, lsq, realignment_token) */
7852 if (alignment_support_scheme == dr_explicit_realign_optimized
7853 || alignment_support_scheme == dr_explicit_realign)
7855 lsq = gimple_assign_lhs (new_stmt);
7856 if (!realignment_token)
7857 realignment_token = dataref_ptr;
7858 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7859 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7860 msq, lsq, realignment_token);
7861 new_temp = make_ssa_name (vec_dest, new_stmt);
7862 gimple_assign_set_lhs (new_stmt, new_temp);
7863 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7865 if (alignment_support_scheme == dr_explicit_realign_optimized)
7867 gcc_assert (phi);
7868 if (i == vec_num - 1 && j == ncopies - 1)
7869 add_phi_arg (phi, lsq,
7870 loop_latch_edge (containing_loop),
7871 UNKNOWN_LOCATION);
7872 msq = lsq;
7876 /* 4. Handle invariant-load. */
7877 if (inv_p && !bb_vinfo)
7879 gcc_assert (!grouped_load);
7880 /* If we have versioned for aliasing or the loop doesn't
7881 have any data dependencies that would preclude this,
7882 then we are sure this is a loop invariant load and
7883 thus we can insert it on the preheader edge. */
7884 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7885 && !nested_in_vect_loop
7886 && hoist_defs_of_uses (stmt, loop))
7888 if (dump_enabled_p ())
7890 dump_printf_loc (MSG_NOTE, vect_location,
7891 "hoisting out of the vectorized "
7892 "loop: ");
7893 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7895 tree tem = copy_ssa_name (scalar_dest);
7896 gsi_insert_on_edge_immediate
7897 (loop_preheader_edge (loop),
7898 gimple_build_assign (tem,
7899 unshare_expr
7900 (gimple_assign_rhs1 (stmt))));
7901 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7902 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7903 set_vinfo_for_stmt (new_stmt,
7904 new_stmt_vec_info (new_stmt, vinfo));
7906 else
7908 gimple_stmt_iterator gsi2 = *gsi;
7909 gsi_next (&gsi2);
7910 new_temp = vect_init_vector (stmt, scalar_dest,
7911 vectype, &gsi2);
7912 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7916 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7918 tree perm_mask = perm_mask_for_reverse (vectype);
7919 new_temp = permute_vec_elements (new_temp, new_temp,
7920 perm_mask, stmt, gsi);
7921 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7924 /* Collect vector loads and later create their permutation in
7925 vect_transform_grouped_load (). */
7926 if (grouped_load || slp_perm)
7927 dr_chain.quick_push (new_temp);
7929 /* Store vector loads in the corresponding SLP_NODE. */
7930 if (slp && !slp_perm)
7931 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7933 /* With SLP permutation we load the gaps as well, without
7934 we need to skip the gaps after we manage to fully load
7935 all elements. group_gap_adj is GROUP_SIZE here. */
7936 group_elt += nunits;
7937 if (maybe_ne (group_gap_adj, 0U)
7938 && !slp_perm
7939 && known_eq (group_elt, group_size - group_gap_adj))
7941 poly_wide_int bump_val
7942 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7943 * group_gap_adj);
7944 tree bump = wide_int_to_tree (sizetype, bump_val);
7945 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7946 stmt, bump);
7947 group_elt = 0;
7950 /* Bump the vector pointer to account for a gap or for excess
7951 elements loaded for a permuted SLP load. */
7952 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
7954 poly_wide_int bump_val
7955 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7956 * group_gap_adj);
7957 tree bump = wide_int_to_tree (sizetype, bump_val);
7958 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7959 stmt, bump);
7963 if (slp && !slp_perm)
7964 continue;
7966 if (slp_perm)
7968 unsigned n_perms;
7969 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7970 slp_node_instance, false,
7971 &n_perms))
7973 dr_chain.release ();
7974 return false;
7977 else
7979 if (grouped_load)
7981 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7982 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7983 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7985 else
7987 if (j == 0)
7988 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7989 else
7990 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7991 prev_stmt_info = vinfo_for_stmt (new_stmt);
7994 dr_chain.release ();
7997 return true;
8000 /* Function vect_is_simple_cond.
8002 Input:
8003 LOOP - the loop that is being vectorized.
8004 COND - Condition that is checked for simple use.
8006 Output:
8007 *COMP_VECTYPE - the vector type for the comparison.
8008 *DTS - The def types for the arguments of the comparison
8010 Returns whether a COND can be vectorized. Checks whether
8011 condition operands are supportable using vec_is_simple_use. */
8013 static bool
8014 vect_is_simple_cond (tree cond, vec_info *vinfo,
8015 tree *comp_vectype, enum vect_def_type *dts,
8016 tree vectype)
8018 tree lhs, rhs;
8019 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8021 /* Mask case. */
8022 if (TREE_CODE (cond) == SSA_NAME
8023 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8025 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8026 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
8027 &dts[0], comp_vectype)
8028 || !*comp_vectype
8029 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8030 return false;
8031 return true;
8034 if (!COMPARISON_CLASS_P (cond))
8035 return false;
8037 lhs = TREE_OPERAND (cond, 0);
8038 rhs = TREE_OPERAND (cond, 1);
8040 if (TREE_CODE (lhs) == SSA_NAME)
8042 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
8043 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
8044 return false;
8046 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8047 || TREE_CODE (lhs) == FIXED_CST)
8048 dts[0] = vect_constant_def;
8049 else
8050 return false;
8052 if (TREE_CODE (rhs) == SSA_NAME)
8054 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
8055 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
8056 return false;
8058 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8059 || TREE_CODE (rhs) == FIXED_CST)
8060 dts[1] = vect_constant_def;
8061 else
8062 return false;
8064 if (vectype1 && vectype2
8065 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8066 TYPE_VECTOR_SUBPARTS (vectype2)))
8067 return false;
8069 *comp_vectype = vectype1 ? vectype1 : vectype2;
8070 /* Invariant comparison. */
8071 if (! *comp_vectype)
8073 tree scalar_type = TREE_TYPE (lhs);
8074 /* If we can widen the comparison to match vectype do so. */
8075 if (INTEGRAL_TYPE_P (scalar_type)
8076 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8077 TYPE_SIZE (TREE_TYPE (vectype))))
8078 scalar_type = build_nonstandard_integer_type
8079 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8080 TYPE_UNSIGNED (scalar_type));
8081 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8084 return true;
8087 /* vectorizable_condition.
8089 Check if STMT is conditional modify expression that can be vectorized.
8090 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8091 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8092 at GSI.
8094 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8095 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8096 else clause if it is 2).
8098 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8100 bool
8101 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8102 gimple **vec_stmt, tree reduc_def, int reduc_index,
8103 slp_tree slp_node)
8105 tree scalar_dest = NULL_TREE;
8106 tree vec_dest = NULL_TREE;
8107 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8108 tree then_clause, else_clause;
8109 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8110 tree comp_vectype = NULL_TREE;
8111 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8112 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8113 tree vec_compare;
8114 tree new_temp;
8115 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8116 enum vect_def_type dts[4]
8117 = {vect_unknown_def_type, vect_unknown_def_type,
8118 vect_unknown_def_type, vect_unknown_def_type};
8119 int ndts = 4;
8120 int ncopies;
8121 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8122 stmt_vec_info prev_stmt_info = NULL;
8123 int i, j;
8124 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8125 vec<tree> vec_oprnds0 = vNULL;
8126 vec<tree> vec_oprnds1 = vNULL;
8127 vec<tree> vec_oprnds2 = vNULL;
8128 vec<tree> vec_oprnds3 = vNULL;
8129 tree vec_cmp_type;
8130 bool masked = false;
8132 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8133 return false;
8135 vect_reduction_type reduction_type
8136 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8137 if (reduction_type == TREE_CODE_REDUCTION)
8139 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8140 return false;
8142 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8143 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8144 && reduc_def))
8145 return false;
8147 /* FORNOW: not yet supported. */
8148 if (STMT_VINFO_LIVE_P (stmt_info))
8150 if (dump_enabled_p ())
8151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8152 "value used after loop.\n");
8153 return false;
8157 /* Is vectorizable conditional operation? */
8158 if (!is_gimple_assign (stmt))
8159 return false;
8161 code = gimple_assign_rhs_code (stmt);
8163 if (code != COND_EXPR)
8164 return false;
8166 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8167 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8169 if (slp_node)
8170 ncopies = 1;
8171 else
8172 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8174 gcc_assert (ncopies >= 1);
8175 if (reduc_index && ncopies > 1)
8176 return false; /* FORNOW */
8178 cond_expr = gimple_assign_rhs1 (stmt);
8179 then_clause = gimple_assign_rhs2 (stmt);
8180 else_clause = gimple_assign_rhs3 (stmt);
8182 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8183 &comp_vectype, &dts[0], vectype)
8184 || !comp_vectype)
8185 return false;
8187 gimple *def_stmt;
8188 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8189 &vectype1))
8190 return false;
8191 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8192 &vectype2))
8193 return false;
8195 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8196 return false;
8198 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8199 return false;
8201 masked = !COMPARISON_CLASS_P (cond_expr);
8202 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8204 if (vec_cmp_type == NULL_TREE)
8205 return false;
8207 cond_code = TREE_CODE (cond_expr);
8208 if (!masked)
8210 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8211 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8214 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8216 /* Boolean values may have another representation in vectors
8217 and therefore we prefer bit operations over comparison for
8218 them (which also works for scalar masks). We store opcodes
8219 to use in bitop1 and bitop2. Statement is vectorized as
8220 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8221 depending on bitop1 and bitop2 arity. */
8222 switch (cond_code)
8224 case GT_EXPR:
8225 bitop1 = BIT_NOT_EXPR;
8226 bitop2 = BIT_AND_EXPR;
8227 break;
8228 case GE_EXPR:
8229 bitop1 = BIT_NOT_EXPR;
8230 bitop2 = BIT_IOR_EXPR;
8231 break;
8232 case LT_EXPR:
8233 bitop1 = BIT_NOT_EXPR;
8234 bitop2 = BIT_AND_EXPR;
8235 std::swap (cond_expr0, cond_expr1);
8236 break;
8237 case LE_EXPR:
8238 bitop1 = BIT_NOT_EXPR;
8239 bitop2 = BIT_IOR_EXPR;
8240 std::swap (cond_expr0, cond_expr1);
8241 break;
8242 case NE_EXPR:
8243 bitop1 = BIT_XOR_EXPR;
8244 break;
8245 case EQ_EXPR:
8246 bitop1 = BIT_XOR_EXPR;
8247 bitop2 = BIT_NOT_EXPR;
8248 break;
8249 default:
8250 return false;
8252 cond_code = SSA_NAME;
8255 if (!vec_stmt)
8257 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8258 if (bitop1 != NOP_EXPR)
8260 machine_mode mode = TYPE_MODE (comp_vectype);
8261 optab optab;
8263 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8264 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8265 return false;
8267 if (bitop2 != NOP_EXPR)
8269 optab = optab_for_tree_code (bitop2, comp_vectype,
8270 optab_default);
8271 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8272 return false;
8275 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8276 cond_code))
8278 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8279 return true;
8281 return false;
8284 /* Transform. */
8286 if (!slp_node)
8288 vec_oprnds0.create (1);
8289 vec_oprnds1.create (1);
8290 vec_oprnds2.create (1);
8291 vec_oprnds3.create (1);
8294 /* Handle def. */
8295 scalar_dest = gimple_assign_lhs (stmt);
8296 if (reduction_type != EXTRACT_LAST_REDUCTION)
8297 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8299 /* Handle cond expr. */
8300 for (j = 0; j < ncopies; j++)
8302 gimple *new_stmt = NULL;
8303 if (j == 0)
8305 if (slp_node)
8307 auto_vec<tree, 4> ops;
8308 auto_vec<vec<tree>, 4> vec_defs;
8310 if (masked)
8311 ops.safe_push (cond_expr);
8312 else
8314 ops.safe_push (cond_expr0);
8315 ops.safe_push (cond_expr1);
8317 ops.safe_push (then_clause);
8318 ops.safe_push (else_clause);
8319 vect_get_slp_defs (ops, slp_node, &vec_defs);
8320 vec_oprnds3 = vec_defs.pop ();
8321 vec_oprnds2 = vec_defs.pop ();
8322 if (!masked)
8323 vec_oprnds1 = vec_defs.pop ();
8324 vec_oprnds0 = vec_defs.pop ();
8326 else
8328 gimple *gtemp;
8329 if (masked)
8331 vec_cond_lhs
8332 = vect_get_vec_def_for_operand (cond_expr, stmt,
8333 comp_vectype);
8334 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8335 &gtemp, &dts[0]);
8337 else
8339 vec_cond_lhs
8340 = vect_get_vec_def_for_operand (cond_expr0,
8341 stmt, comp_vectype);
8342 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8344 vec_cond_rhs
8345 = vect_get_vec_def_for_operand (cond_expr1,
8346 stmt, comp_vectype);
8347 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8349 if (reduc_index == 1)
8350 vec_then_clause = reduc_def;
8351 else
8353 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8354 stmt);
8355 vect_is_simple_use (then_clause, loop_vinfo,
8356 &gtemp, &dts[2]);
8358 if (reduc_index == 2)
8359 vec_else_clause = reduc_def;
8360 else
8362 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8363 stmt);
8364 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8368 else
8370 vec_cond_lhs
8371 = vect_get_vec_def_for_stmt_copy (dts[0],
8372 vec_oprnds0.pop ());
8373 if (!masked)
8374 vec_cond_rhs
8375 = vect_get_vec_def_for_stmt_copy (dts[1],
8376 vec_oprnds1.pop ());
8378 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8379 vec_oprnds2.pop ());
8380 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8381 vec_oprnds3.pop ());
8384 if (!slp_node)
8386 vec_oprnds0.quick_push (vec_cond_lhs);
8387 if (!masked)
8388 vec_oprnds1.quick_push (vec_cond_rhs);
8389 vec_oprnds2.quick_push (vec_then_clause);
8390 vec_oprnds3.quick_push (vec_else_clause);
8393 /* Arguments are ready. Create the new vector stmt. */
8394 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8396 vec_then_clause = vec_oprnds2[i];
8397 vec_else_clause = vec_oprnds3[i];
8399 if (masked)
8400 vec_compare = vec_cond_lhs;
8401 else
8403 vec_cond_rhs = vec_oprnds1[i];
8404 if (bitop1 == NOP_EXPR)
8405 vec_compare = build2 (cond_code, vec_cmp_type,
8406 vec_cond_lhs, vec_cond_rhs);
8407 else
8409 new_temp = make_ssa_name (vec_cmp_type);
8410 if (bitop1 == BIT_NOT_EXPR)
8411 new_stmt = gimple_build_assign (new_temp, bitop1,
8412 vec_cond_rhs);
8413 else
8414 new_stmt
8415 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8416 vec_cond_rhs);
8417 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8418 if (bitop2 == NOP_EXPR)
8419 vec_compare = new_temp;
8420 else if (bitop2 == BIT_NOT_EXPR)
8422 /* Instead of doing ~x ? y : z do x ? z : y. */
8423 vec_compare = new_temp;
8424 std::swap (vec_then_clause, vec_else_clause);
8426 else
8428 vec_compare = make_ssa_name (vec_cmp_type);
8429 new_stmt
8430 = gimple_build_assign (vec_compare, bitop2,
8431 vec_cond_lhs, new_temp);
8432 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8436 if (reduction_type == EXTRACT_LAST_REDUCTION)
8438 if (!is_gimple_val (vec_compare))
8440 tree vec_compare_name = make_ssa_name (vec_cmp_type);
8441 new_stmt = gimple_build_assign (vec_compare_name,
8442 vec_compare);
8443 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8444 vec_compare = vec_compare_name;
8446 gcc_assert (reduc_index == 2);
8447 new_stmt = gimple_build_call_internal
8448 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8449 vec_then_clause);
8450 gimple_call_set_lhs (new_stmt, scalar_dest);
8451 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8452 if (stmt == gsi_stmt (*gsi))
8453 vect_finish_replace_stmt (stmt, new_stmt);
8454 else
8456 /* In this case we're moving the definition to later in the
8457 block. That doesn't matter because the only uses of the
8458 lhs are in phi statements. */
8459 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8460 gsi_remove (&old_gsi, true);
8461 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8464 else
8466 new_temp = make_ssa_name (vec_dest);
8467 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8468 vec_compare, vec_then_clause,
8469 vec_else_clause);
8470 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8472 if (slp_node)
8473 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8476 if (slp_node)
8477 continue;
8479 if (j == 0)
8480 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8481 else
8482 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8484 prev_stmt_info = vinfo_for_stmt (new_stmt);
8487 vec_oprnds0.release ();
8488 vec_oprnds1.release ();
8489 vec_oprnds2.release ();
8490 vec_oprnds3.release ();
8492 return true;
8495 /* vectorizable_comparison.
8497 Check if STMT is comparison expression that can be vectorized.
8498 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8499 comparison, put it in VEC_STMT, and insert it at GSI.
8501 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8503 static bool
8504 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8505 gimple **vec_stmt, tree reduc_def,
8506 slp_tree slp_node)
8508 tree lhs, rhs1, rhs2;
8509 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8510 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8511 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8512 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8513 tree new_temp;
8514 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8515 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8516 int ndts = 2;
8517 poly_uint64 nunits;
8518 int ncopies;
8519 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8520 stmt_vec_info prev_stmt_info = NULL;
8521 int i, j;
8522 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8523 vec<tree> vec_oprnds0 = vNULL;
8524 vec<tree> vec_oprnds1 = vNULL;
8525 gimple *def_stmt;
8526 tree mask_type;
8527 tree mask;
8529 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8530 return false;
8532 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8533 return false;
8535 mask_type = vectype;
8536 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8538 if (slp_node)
8539 ncopies = 1;
8540 else
8541 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8543 gcc_assert (ncopies >= 1);
8544 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8545 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8546 && reduc_def))
8547 return false;
8549 if (STMT_VINFO_LIVE_P (stmt_info))
8551 if (dump_enabled_p ())
8552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8553 "value used after loop.\n");
8554 return false;
8557 if (!is_gimple_assign (stmt))
8558 return false;
8560 code = gimple_assign_rhs_code (stmt);
8562 if (TREE_CODE_CLASS (code) != tcc_comparison)
8563 return false;
8565 rhs1 = gimple_assign_rhs1 (stmt);
8566 rhs2 = gimple_assign_rhs2 (stmt);
8568 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8569 &dts[0], &vectype1))
8570 return false;
8572 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8573 &dts[1], &vectype2))
8574 return false;
8576 if (vectype1 && vectype2
8577 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8578 TYPE_VECTOR_SUBPARTS (vectype2)))
8579 return false;
8581 vectype = vectype1 ? vectype1 : vectype2;
8583 /* Invariant comparison. */
8584 if (!vectype)
8586 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8587 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
8588 return false;
8590 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
8591 return false;
8593 /* Can't compare mask and non-mask types. */
8594 if (vectype1 && vectype2
8595 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8596 return false;
8598 /* Boolean values may have another representation in vectors
8599 and therefore we prefer bit operations over comparison for
8600 them (which also works for scalar masks). We store opcodes
8601 to use in bitop1 and bitop2. Statement is vectorized as
8602 BITOP2 (rhs1 BITOP1 rhs2) or
8603 rhs1 BITOP2 (BITOP1 rhs2)
8604 depending on bitop1 and bitop2 arity. */
8605 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8607 if (code == GT_EXPR)
8609 bitop1 = BIT_NOT_EXPR;
8610 bitop2 = BIT_AND_EXPR;
8612 else if (code == GE_EXPR)
8614 bitop1 = BIT_NOT_EXPR;
8615 bitop2 = BIT_IOR_EXPR;
8617 else if (code == LT_EXPR)
8619 bitop1 = BIT_NOT_EXPR;
8620 bitop2 = BIT_AND_EXPR;
8621 std::swap (rhs1, rhs2);
8622 std::swap (dts[0], dts[1]);
8624 else if (code == LE_EXPR)
8626 bitop1 = BIT_NOT_EXPR;
8627 bitop2 = BIT_IOR_EXPR;
8628 std::swap (rhs1, rhs2);
8629 std::swap (dts[0], dts[1]);
8631 else
8633 bitop1 = BIT_XOR_EXPR;
8634 if (code == EQ_EXPR)
8635 bitop2 = BIT_NOT_EXPR;
8639 if (!vec_stmt)
8641 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8642 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8643 dts, ndts, NULL, NULL);
8644 if (bitop1 == NOP_EXPR)
8645 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8646 else
8648 machine_mode mode = TYPE_MODE (vectype);
8649 optab optab;
8651 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8652 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8653 return false;
8655 if (bitop2 != NOP_EXPR)
8657 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8658 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8659 return false;
8661 return true;
8665 /* Transform. */
8666 if (!slp_node)
8668 vec_oprnds0.create (1);
8669 vec_oprnds1.create (1);
8672 /* Handle def. */
8673 lhs = gimple_assign_lhs (stmt);
8674 mask = vect_create_destination_var (lhs, mask_type);
8676 /* Handle cmp expr. */
8677 for (j = 0; j < ncopies; j++)
8679 gassign *new_stmt = NULL;
8680 if (j == 0)
8682 if (slp_node)
8684 auto_vec<tree, 2> ops;
8685 auto_vec<vec<tree>, 2> vec_defs;
8687 ops.safe_push (rhs1);
8688 ops.safe_push (rhs2);
8689 vect_get_slp_defs (ops, slp_node, &vec_defs);
8690 vec_oprnds1 = vec_defs.pop ();
8691 vec_oprnds0 = vec_defs.pop ();
8693 else
8695 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8696 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8699 else
8701 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8702 vec_oprnds0.pop ());
8703 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8704 vec_oprnds1.pop ());
8707 if (!slp_node)
8709 vec_oprnds0.quick_push (vec_rhs1);
8710 vec_oprnds1.quick_push (vec_rhs2);
8713 /* Arguments are ready. Create the new vector stmt. */
8714 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8716 vec_rhs2 = vec_oprnds1[i];
8718 new_temp = make_ssa_name (mask);
8719 if (bitop1 == NOP_EXPR)
8721 new_stmt = gimple_build_assign (new_temp, code,
8722 vec_rhs1, vec_rhs2);
8723 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8725 else
8727 if (bitop1 == BIT_NOT_EXPR)
8728 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8729 else
8730 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8731 vec_rhs2);
8732 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8733 if (bitop2 != NOP_EXPR)
8735 tree res = make_ssa_name (mask);
8736 if (bitop2 == BIT_NOT_EXPR)
8737 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8738 else
8739 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8740 new_temp);
8741 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8744 if (slp_node)
8745 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8748 if (slp_node)
8749 continue;
8751 if (j == 0)
8752 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8753 else
8754 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8756 prev_stmt_info = vinfo_for_stmt (new_stmt);
8759 vec_oprnds0.release ();
8760 vec_oprnds1.release ();
8762 return true;
8765 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8766 can handle all live statements in the node. Otherwise return true
8767 if STMT is not live or if vectorizable_live_operation can handle it.
8768 GSI and VEC_STMT are as for vectorizable_live_operation. */
8770 static bool
8771 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8772 slp_tree slp_node, gimple **vec_stmt)
8774 if (slp_node)
8776 gimple *slp_stmt;
8777 unsigned int i;
8778 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8780 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8781 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8782 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8783 vec_stmt))
8784 return false;
8787 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8788 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8789 return false;
8791 return true;
8794 /* Make sure the statement is vectorizable. */
8796 bool
8797 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8798 slp_instance node_instance)
8800 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8801 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8802 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8803 bool ok;
8804 gimple *pattern_stmt;
8805 gimple_seq pattern_def_seq;
8807 if (dump_enabled_p ())
8809 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8810 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8813 if (gimple_has_volatile_ops (stmt))
8815 if (dump_enabled_p ())
8816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8817 "not vectorized: stmt has volatile operands\n");
8819 return false;
8822 /* Skip stmts that do not need to be vectorized. In loops this is expected
8823 to include:
8824 - the COND_EXPR which is the loop exit condition
8825 - any LABEL_EXPRs in the loop
8826 - computations that are used only for array indexing or loop control.
8827 In basic blocks we only analyze statements that are a part of some SLP
8828 instance, therefore, all the statements are relevant.
8830 Pattern statement needs to be analyzed instead of the original statement
8831 if the original statement is not relevant. Otherwise, we analyze both
8832 statements. In basic blocks we are called from some SLP instance
8833 traversal, don't analyze pattern stmts instead, the pattern stmts
8834 already will be part of SLP instance. */
8836 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8837 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8838 && !STMT_VINFO_LIVE_P (stmt_info))
8840 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8841 && pattern_stmt
8842 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8843 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8845 /* Analyze PATTERN_STMT instead of the original stmt. */
8846 stmt = pattern_stmt;
8847 stmt_info = vinfo_for_stmt (pattern_stmt);
8848 if (dump_enabled_p ())
8850 dump_printf_loc (MSG_NOTE, vect_location,
8851 "==> examining pattern statement: ");
8852 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8855 else
8857 if (dump_enabled_p ())
8858 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8860 return true;
8863 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8864 && node == NULL
8865 && pattern_stmt
8866 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8867 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8869 /* Analyze PATTERN_STMT too. */
8870 if (dump_enabled_p ())
8872 dump_printf_loc (MSG_NOTE, vect_location,
8873 "==> examining pattern statement: ");
8874 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8877 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8878 node_instance))
8879 return false;
8882 if (is_pattern_stmt_p (stmt_info)
8883 && node == NULL
8884 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8886 gimple_stmt_iterator si;
8888 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8890 gimple *pattern_def_stmt = gsi_stmt (si);
8891 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8892 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8894 /* Analyze def stmt of STMT if it's a pattern stmt. */
8895 if (dump_enabled_p ())
8897 dump_printf_loc (MSG_NOTE, vect_location,
8898 "==> examining pattern def statement: ");
8899 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8902 if (!vect_analyze_stmt (pattern_def_stmt,
8903 need_to_vectorize, node, node_instance))
8904 return false;
8909 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8911 case vect_internal_def:
8912 break;
8914 case vect_reduction_def:
8915 case vect_nested_cycle:
8916 gcc_assert (!bb_vinfo
8917 && (relevance == vect_used_in_outer
8918 || relevance == vect_used_in_outer_by_reduction
8919 || relevance == vect_used_by_reduction
8920 || relevance == vect_unused_in_scope
8921 || relevance == vect_used_only_live));
8922 break;
8924 case vect_induction_def:
8925 gcc_assert (!bb_vinfo);
8926 break;
8928 case vect_constant_def:
8929 case vect_external_def:
8930 case vect_unknown_def_type:
8931 default:
8932 gcc_unreachable ();
8935 if (STMT_VINFO_RELEVANT_P (stmt_info))
8937 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8938 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8939 || (is_gimple_call (stmt)
8940 && gimple_call_lhs (stmt) == NULL_TREE));
8941 *need_to_vectorize = true;
8944 if (PURE_SLP_STMT (stmt_info) && !node)
8946 dump_printf_loc (MSG_NOTE, vect_location,
8947 "handled only by SLP analysis\n");
8948 return true;
8951 ok = true;
8952 if (!bb_vinfo
8953 && (STMT_VINFO_RELEVANT_P (stmt_info)
8954 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8955 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8956 || vectorizable_conversion (stmt, NULL, NULL, node)
8957 || vectorizable_shift (stmt, NULL, NULL, node)
8958 || vectorizable_operation (stmt, NULL, NULL, node)
8959 || vectorizable_assignment (stmt, NULL, NULL, node)
8960 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8961 || vectorizable_call (stmt, NULL, NULL, node)
8962 || vectorizable_store (stmt, NULL, NULL, node)
8963 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8964 || vectorizable_induction (stmt, NULL, NULL, node)
8965 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8966 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8967 else
8969 if (bb_vinfo)
8970 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8971 || vectorizable_conversion (stmt, NULL, NULL, node)
8972 || vectorizable_shift (stmt, NULL, NULL, node)
8973 || vectorizable_operation (stmt, NULL, NULL, node)
8974 || vectorizable_assignment (stmt, NULL, NULL, node)
8975 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8976 || vectorizable_call (stmt, NULL, NULL, node)
8977 || vectorizable_store (stmt, NULL, NULL, node)
8978 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8979 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8982 if (!ok)
8984 if (dump_enabled_p ())
8986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8987 "not vectorized: relevant stmt not ");
8988 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8989 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8992 return false;
8995 if (bb_vinfo)
8996 return true;
8998 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8999 need extra handling, except for vectorizable reductions. */
9000 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9001 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
9003 if (dump_enabled_p ())
9005 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9006 "not vectorized: live stmt not supported: ");
9007 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9010 return false;
9013 return true;
9017 /* Function vect_transform_stmt.
9019 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9021 bool
9022 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9023 bool *grouped_store, slp_tree slp_node,
9024 slp_instance slp_node_instance)
9026 bool is_store = false;
9027 gimple *vec_stmt = NULL;
9028 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9029 bool done;
9031 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9032 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9034 switch (STMT_VINFO_TYPE (stmt_info))
9036 case type_demotion_vec_info_type:
9037 case type_promotion_vec_info_type:
9038 case type_conversion_vec_info_type:
9039 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
9040 gcc_assert (done);
9041 break;
9043 case induc_vec_info_type:
9044 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
9045 gcc_assert (done);
9046 break;
9048 case shift_vec_info_type:
9049 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
9050 gcc_assert (done);
9051 break;
9053 case op_vec_info_type:
9054 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
9055 gcc_assert (done);
9056 break;
9058 case assignment_vec_info_type:
9059 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9060 gcc_assert (done);
9061 break;
9063 case load_vec_info_type:
9064 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9065 slp_node_instance);
9066 gcc_assert (done);
9067 break;
9069 case store_vec_info_type:
9070 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9071 gcc_assert (done);
9072 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9074 /* In case of interleaving, the whole chain is vectorized when the
9075 last store in the chain is reached. Store stmts before the last
9076 one are skipped, and there vec_stmt_info shouldn't be freed
9077 meanwhile. */
9078 *grouped_store = true;
9079 if (STMT_VINFO_VEC_STMT (stmt_info))
9080 is_store = true;
9082 else
9083 is_store = true;
9084 break;
9086 case condition_vec_info_type:
9087 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
9088 gcc_assert (done);
9089 break;
9091 case comparison_vec_info_type:
9092 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9093 gcc_assert (done);
9094 break;
9096 case call_vec_info_type:
9097 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
9098 stmt = gsi_stmt (*gsi);
9099 break;
9101 case call_simd_clone_vec_info_type:
9102 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9103 stmt = gsi_stmt (*gsi);
9104 break;
9106 case reduc_vec_info_type:
9107 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9108 slp_node_instance);
9109 gcc_assert (done);
9110 break;
9112 default:
9113 if (!STMT_VINFO_LIVE_P (stmt_info))
9115 if (dump_enabled_p ())
9116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9117 "stmt not supported.\n");
9118 gcc_unreachable ();
9122 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9123 This would break hybrid SLP vectorization. */
9124 if (slp_node)
9125 gcc_assert (!vec_stmt
9126 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9128 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9129 is being vectorized, but outside the immediately enclosing loop. */
9130 if (vec_stmt
9131 && STMT_VINFO_LOOP_VINFO (stmt_info)
9132 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
9133 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
9134 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9135 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9136 || STMT_VINFO_RELEVANT (stmt_info) ==
9137 vect_used_in_outer_by_reduction))
9139 struct loop *innerloop = LOOP_VINFO_LOOP (
9140 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9141 imm_use_iterator imm_iter;
9142 use_operand_p use_p;
9143 tree scalar_dest;
9144 gimple *exit_phi;
9146 if (dump_enabled_p ())
9147 dump_printf_loc (MSG_NOTE, vect_location,
9148 "Record the vdef for outer-loop vectorization.\n");
9150 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9151 (to be used when vectorizing outer-loop stmts that use the DEF of
9152 STMT). */
9153 if (gimple_code (stmt) == GIMPLE_PHI)
9154 scalar_dest = PHI_RESULT (stmt);
9155 else
9156 scalar_dest = gimple_assign_lhs (stmt);
9158 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9160 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9162 exit_phi = USE_STMT (use_p);
9163 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9168 /* Handle stmts whose DEF is used outside the loop-nest that is
9169 being vectorized. */
9170 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9172 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
9173 gcc_assert (done);
9176 if (vec_stmt)
9177 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9179 return is_store;
9183 /* Remove a group of stores (for SLP or interleaving), free their
9184 stmt_vec_info. */
9186 void
9187 vect_remove_stores (gimple *first_stmt)
9189 gimple *next = first_stmt;
9190 gimple *tmp;
9191 gimple_stmt_iterator next_si;
9193 while (next)
9195 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9197 tmp = GROUP_NEXT_ELEMENT (stmt_info);
9198 if (is_pattern_stmt_p (stmt_info))
9199 next = STMT_VINFO_RELATED_STMT (stmt_info);
9200 /* Free the attached stmt_vec_info and remove the stmt. */
9201 next_si = gsi_for_stmt (next);
9202 unlink_stmt_vdef (next);
9203 gsi_remove (&next_si, true);
9204 release_defs (next);
9205 free_stmt_vec_info (next);
9206 next = tmp;
9211 /* Function new_stmt_vec_info.
9213 Create and initialize a new stmt_vec_info struct for STMT. */
9215 stmt_vec_info
9216 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9218 stmt_vec_info res;
9219 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9221 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9222 STMT_VINFO_STMT (res) = stmt;
9223 res->vinfo = vinfo;
9224 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9225 STMT_VINFO_LIVE_P (res) = false;
9226 STMT_VINFO_VECTYPE (res) = NULL;
9227 STMT_VINFO_VEC_STMT (res) = NULL;
9228 STMT_VINFO_VECTORIZABLE (res) = true;
9229 STMT_VINFO_IN_PATTERN_P (res) = false;
9230 STMT_VINFO_RELATED_STMT (res) = NULL;
9231 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9232 STMT_VINFO_DATA_REF (res) = NULL;
9233 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9234 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9236 if (gimple_code (stmt) == GIMPLE_PHI
9237 && is_loop_header_bb_p (gimple_bb (stmt)))
9238 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9239 else
9240 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9242 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9243 STMT_SLP_TYPE (res) = loop_vect;
9244 STMT_VINFO_NUM_SLP_USES (res) = 0;
9246 GROUP_FIRST_ELEMENT (res) = NULL;
9247 GROUP_NEXT_ELEMENT (res) = NULL;
9248 GROUP_SIZE (res) = 0;
9249 GROUP_STORE_COUNT (res) = 0;
9250 GROUP_GAP (res) = 0;
9251 GROUP_SAME_DR_STMT (res) = NULL;
9253 return res;
9257 /* Create a hash table for stmt_vec_info. */
9259 void
9260 init_stmt_vec_info_vec (void)
9262 gcc_assert (!stmt_vec_info_vec.exists ());
9263 stmt_vec_info_vec.create (50);
9267 /* Free hash table for stmt_vec_info. */
9269 void
9270 free_stmt_vec_info_vec (void)
9272 unsigned int i;
9273 stmt_vec_info info;
9274 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9275 if (info != NULL)
9276 free_stmt_vec_info (STMT_VINFO_STMT (info));
9277 gcc_assert (stmt_vec_info_vec.exists ());
9278 stmt_vec_info_vec.release ();
9282 /* Free stmt vectorization related info. */
9284 void
9285 free_stmt_vec_info (gimple *stmt)
9287 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9289 if (!stmt_info)
9290 return;
9292 /* Check if this statement has a related "pattern stmt"
9293 (introduced by the vectorizer during the pattern recognition
9294 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9295 too. */
9296 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9298 stmt_vec_info patt_info
9299 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9300 if (patt_info)
9302 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9303 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9304 gimple_set_bb (patt_stmt, NULL);
9305 tree lhs = gimple_get_lhs (patt_stmt);
9306 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9307 release_ssa_name (lhs);
9308 if (seq)
9310 gimple_stmt_iterator si;
9311 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9313 gimple *seq_stmt = gsi_stmt (si);
9314 gimple_set_bb (seq_stmt, NULL);
9315 lhs = gimple_get_lhs (seq_stmt);
9316 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9317 release_ssa_name (lhs);
9318 free_stmt_vec_info (seq_stmt);
9321 free_stmt_vec_info (patt_stmt);
9325 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9326 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9327 set_vinfo_for_stmt (stmt, NULL);
9328 free (stmt_info);
9332 /* Function get_vectype_for_scalar_type_and_size.
9334 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9335 by the target. */
9337 tree
9338 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9340 tree orig_scalar_type = scalar_type;
9341 scalar_mode inner_mode;
9342 machine_mode simd_mode;
9343 poly_uint64 nunits;
9344 tree vectype;
9346 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9347 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9348 return NULL_TREE;
9350 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9352 /* For vector types of elements whose mode precision doesn't
9353 match their types precision we use a element type of mode
9354 precision. The vectorization routines will have to make sure
9355 they support the proper result truncation/extension.
9356 We also make sure to build vector types with INTEGER_TYPE
9357 component type only. */
9358 if (INTEGRAL_TYPE_P (scalar_type)
9359 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9360 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9361 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9362 TYPE_UNSIGNED (scalar_type));
9364 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9365 When the component mode passes the above test simply use a type
9366 corresponding to that mode. The theory is that any use that
9367 would cause problems with this will disable vectorization anyway. */
9368 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9369 && !INTEGRAL_TYPE_P (scalar_type))
9370 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9372 /* We can't build a vector type of elements with alignment bigger than
9373 their size. */
9374 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9375 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9376 TYPE_UNSIGNED (scalar_type));
9378 /* If we felt back to using the mode fail if there was
9379 no scalar type for it. */
9380 if (scalar_type == NULL_TREE)
9381 return NULL_TREE;
9383 /* If no size was supplied use the mode the target prefers. Otherwise
9384 lookup a vector mode of the specified size. */
9385 if (known_eq (size, 0U))
9386 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9387 else if (!multiple_p (size, nbytes, &nunits)
9388 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9389 return NULL_TREE;
9390 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9391 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9392 return NULL_TREE;
9394 vectype = build_vector_type (scalar_type, nunits);
9396 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9397 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9398 return NULL_TREE;
9400 /* Re-attach the address-space qualifier if we canonicalized the scalar
9401 type. */
9402 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9403 return build_qualified_type
9404 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9406 return vectype;
9409 poly_uint64 current_vector_size;
9411 /* Function get_vectype_for_scalar_type.
9413 Returns the vector type corresponding to SCALAR_TYPE as supported
9414 by the target. */
9416 tree
9417 get_vectype_for_scalar_type (tree scalar_type)
9419 tree vectype;
9420 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9421 current_vector_size);
9422 if (vectype
9423 && known_eq (current_vector_size, 0U))
9424 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9425 return vectype;
9428 /* Function get_mask_type_for_scalar_type.
9430 Returns the mask type corresponding to a result of comparison
9431 of vectors of specified SCALAR_TYPE as supported by target. */
9433 tree
9434 get_mask_type_for_scalar_type (tree scalar_type)
9436 tree vectype = get_vectype_for_scalar_type (scalar_type);
9438 if (!vectype)
9439 return NULL;
9441 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9442 current_vector_size);
9445 /* Function get_same_sized_vectype
9447 Returns a vector type corresponding to SCALAR_TYPE of size
9448 VECTOR_TYPE if supported by the target. */
9450 tree
9451 get_same_sized_vectype (tree scalar_type, tree vector_type)
9453 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9454 return build_same_sized_truth_vector_type (vector_type);
9456 return get_vectype_for_scalar_type_and_size
9457 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9460 /* Function vect_is_simple_use.
9462 Input:
9463 VINFO - the vect info of the loop or basic block that is being vectorized.
9464 OPERAND - operand in the loop or bb.
9465 Output:
9466 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9467 DT - the type of definition
9469 Returns whether a stmt with OPERAND can be vectorized.
9470 For loops, supportable operands are constants, loop invariants, and operands
9471 that are defined by the current iteration of the loop. Unsupportable
9472 operands are those that are defined by a previous iteration of the loop (as
9473 is the case in reduction/induction computations).
9474 For basic blocks, supportable operands are constants and bb invariants.
9475 For now, operands defined outside the basic block are not supported. */
9477 bool
9478 vect_is_simple_use (tree operand, vec_info *vinfo,
9479 gimple **def_stmt, enum vect_def_type *dt)
9481 *def_stmt = NULL;
9482 *dt = vect_unknown_def_type;
9484 if (dump_enabled_p ())
9486 dump_printf_loc (MSG_NOTE, vect_location,
9487 "vect_is_simple_use: operand ");
9488 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9489 dump_printf (MSG_NOTE, "\n");
9492 if (CONSTANT_CLASS_P (operand))
9494 *dt = vect_constant_def;
9495 return true;
9498 if (is_gimple_min_invariant (operand))
9500 *dt = vect_external_def;
9501 return true;
9504 if (TREE_CODE (operand) != SSA_NAME)
9506 if (dump_enabled_p ())
9507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9508 "not ssa-name.\n");
9509 return false;
9512 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9514 *dt = vect_external_def;
9515 return true;
9518 *def_stmt = SSA_NAME_DEF_STMT (operand);
9519 if (dump_enabled_p ())
9521 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9522 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9525 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9526 *dt = vect_external_def;
9527 else
9529 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9530 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9533 if (dump_enabled_p ())
9535 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9536 switch (*dt)
9538 case vect_uninitialized_def:
9539 dump_printf (MSG_NOTE, "uninitialized\n");
9540 break;
9541 case vect_constant_def:
9542 dump_printf (MSG_NOTE, "constant\n");
9543 break;
9544 case vect_external_def:
9545 dump_printf (MSG_NOTE, "external\n");
9546 break;
9547 case vect_internal_def:
9548 dump_printf (MSG_NOTE, "internal\n");
9549 break;
9550 case vect_induction_def:
9551 dump_printf (MSG_NOTE, "induction\n");
9552 break;
9553 case vect_reduction_def:
9554 dump_printf (MSG_NOTE, "reduction\n");
9555 break;
9556 case vect_double_reduction_def:
9557 dump_printf (MSG_NOTE, "double reduction\n");
9558 break;
9559 case vect_nested_cycle:
9560 dump_printf (MSG_NOTE, "nested cycle\n");
9561 break;
9562 case vect_unknown_def_type:
9563 dump_printf (MSG_NOTE, "unknown\n");
9564 break;
9568 if (*dt == vect_unknown_def_type)
9570 if (dump_enabled_p ())
9571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9572 "Unsupported pattern.\n");
9573 return false;
9576 switch (gimple_code (*def_stmt))
9578 case GIMPLE_PHI:
9579 case GIMPLE_ASSIGN:
9580 case GIMPLE_CALL:
9581 break;
9582 default:
9583 if (dump_enabled_p ())
9584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9585 "unsupported defining stmt:\n");
9586 return false;
9589 return true;
9592 /* Function vect_is_simple_use.
9594 Same as vect_is_simple_use but also determines the vector operand
9595 type of OPERAND and stores it to *VECTYPE. If the definition of
9596 OPERAND is vect_uninitialized_def, vect_constant_def or
9597 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9598 is responsible to compute the best suited vector type for the
9599 scalar operand. */
9601 bool
9602 vect_is_simple_use (tree operand, vec_info *vinfo,
9603 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9605 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9606 return false;
9608 /* Now get a vector type if the def is internal, otherwise supply
9609 NULL_TREE and leave it up to the caller to figure out a proper
9610 type for the use stmt. */
9611 if (*dt == vect_internal_def
9612 || *dt == vect_induction_def
9613 || *dt == vect_reduction_def
9614 || *dt == vect_double_reduction_def
9615 || *dt == vect_nested_cycle)
9617 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9619 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9620 && !STMT_VINFO_RELEVANT (stmt_info)
9621 && !STMT_VINFO_LIVE_P (stmt_info))
9622 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9624 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9625 gcc_assert (*vectype != NULL_TREE);
9627 else if (*dt == vect_uninitialized_def
9628 || *dt == vect_constant_def
9629 || *dt == vect_external_def)
9630 *vectype = NULL_TREE;
9631 else
9632 gcc_unreachable ();
9634 return true;
9638 /* Function supportable_widening_operation
9640 Check whether an operation represented by the code CODE is a
9641 widening operation that is supported by the target platform in
9642 vector form (i.e., when operating on arguments of type VECTYPE_IN
9643 producing a result of type VECTYPE_OUT).
9645 Widening operations we currently support are NOP (CONVERT), FLOAT
9646 and WIDEN_MULT. This function checks if these operations are supported
9647 by the target platform either directly (via vector tree-codes), or via
9648 target builtins.
9650 Output:
9651 - CODE1 and CODE2 are codes of vector operations to be used when
9652 vectorizing the operation, if available.
9653 - MULTI_STEP_CVT determines the number of required intermediate steps in
9654 case of multi-step conversion (like char->short->int - in that case
9655 MULTI_STEP_CVT will be 1).
9656 - INTERM_TYPES contains the intermediate type required to perform the
9657 widening operation (short in the above example). */
9659 bool
9660 supportable_widening_operation (enum tree_code code, gimple *stmt,
9661 tree vectype_out, tree vectype_in,
9662 enum tree_code *code1, enum tree_code *code2,
9663 int *multi_step_cvt,
9664 vec<tree> *interm_types)
9666 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9667 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9668 struct loop *vect_loop = NULL;
9669 machine_mode vec_mode;
9670 enum insn_code icode1, icode2;
9671 optab optab1, optab2;
9672 tree vectype = vectype_in;
9673 tree wide_vectype = vectype_out;
9674 enum tree_code c1, c2;
9675 int i;
9676 tree prev_type, intermediate_type;
9677 machine_mode intermediate_mode, prev_mode;
9678 optab optab3, optab4;
9680 *multi_step_cvt = 0;
9681 if (loop_info)
9682 vect_loop = LOOP_VINFO_LOOP (loop_info);
9684 switch (code)
9686 case WIDEN_MULT_EXPR:
9687 /* The result of a vectorized widening operation usually requires
9688 two vectors (because the widened results do not fit into one vector).
9689 The generated vector results would normally be expected to be
9690 generated in the same order as in the original scalar computation,
9691 i.e. if 8 results are generated in each vector iteration, they are
9692 to be organized as follows:
9693 vect1: [res1,res2,res3,res4],
9694 vect2: [res5,res6,res7,res8].
9696 However, in the special case that the result of the widening
9697 operation is used in a reduction computation only, the order doesn't
9698 matter (because when vectorizing a reduction we change the order of
9699 the computation). Some targets can take advantage of this and
9700 generate more efficient code. For example, targets like Altivec,
9701 that support widen_mult using a sequence of {mult_even,mult_odd}
9702 generate the following vectors:
9703 vect1: [res1,res3,res5,res7],
9704 vect2: [res2,res4,res6,res8].
9706 When vectorizing outer-loops, we execute the inner-loop sequentially
9707 (each vectorized inner-loop iteration contributes to VF outer-loop
9708 iterations in parallel). We therefore don't allow to change the
9709 order of the computation in the inner-loop during outer-loop
9710 vectorization. */
9711 /* TODO: Another case in which order doesn't *really* matter is when we
9712 widen and then contract again, e.g. (short)((int)x * y >> 8).
9713 Normally, pack_trunc performs an even/odd permute, whereas the
9714 repack from an even/odd expansion would be an interleave, which
9715 would be significantly simpler for e.g. AVX2. */
9716 /* In any case, in order to avoid duplicating the code below, recurse
9717 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9718 are properly set up for the caller. If we fail, we'll continue with
9719 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9720 if (vect_loop
9721 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9722 && !nested_in_vect_loop_p (vect_loop, stmt)
9723 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9724 stmt, vectype_out, vectype_in,
9725 code1, code2, multi_step_cvt,
9726 interm_types))
9728 /* Elements in a vector with vect_used_by_reduction property cannot
9729 be reordered if the use chain with this property does not have the
9730 same operation. One such an example is s += a * b, where elements
9731 in a and b cannot be reordered. Here we check if the vector defined
9732 by STMT is only directly used in the reduction statement. */
9733 tree lhs = gimple_assign_lhs (stmt);
9734 use_operand_p dummy;
9735 gimple *use_stmt;
9736 stmt_vec_info use_stmt_info = NULL;
9737 if (single_imm_use (lhs, &dummy, &use_stmt)
9738 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9739 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9740 return true;
9742 c1 = VEC_WIDEN_MULT_LO_EXPR;
9743 c2 = VEC_WIDEN_MULT_HI_EXPR;
9744 break;
9746 case DOT_PROD_EXPR:
9747 c1 = DOT_PROD_EXPR;
9748 c2 = DOT_PROD_EXPR;
9749 break;
9751 case SAD_EXPR:
9752 c1 = SAD_EXPR;
9753 c2 = SAD_EXPR;
9754 break;
9756 case VEC_WIDEN_MULT_EVEN_EXPR:
9757 /* Support the recursion induced just above. */
9758 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9759 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9760 break;
9762 case WIDEN_LSHIFT_EXPR:
9763 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9764 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9765 break;
9767 CASE_CONVERT:
9768 c1 = VEC_UNPACK_LO_EXPR;
9769 c2 = VEC_UNPACK_HI_EXPR;
9770 break;
9772 case FLOAT_EXPR:
9773 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9774 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9775 break;
9777 case FIX_TRUNC_EXPR:
9778 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9779 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9780 computing the operation. */
9781 return false;
9783 default:
9784 gcc_unreachable ();
9787 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9788 std::swap (c1, c2);
9790 if (code == FIX_TRUNC_EXPR)
9792 /* The signedness is determined from output operand. */
9793 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9794 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9796 else
9798 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9799 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9802 if (!optab1 || !optab2)
9803 return false;
9805 vec_mode = TYPE_MODE (vectype);
9806 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9807 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9808 return false;
9810 *code1 = c1;
9811 *code2 = c2;
9813 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9814 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9815 /* For scalar masks we may have different boolean
9816 vector types having the same QImode. Thus we
9817 add additional check for elements number. */
9818 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9819 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
9820 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
9822 /* Check if it's a multi-step conversion that can be done using intermediate
9823 types. */
9825 prev_type = vectype;
9826 prev_mode = vec_mode;
9828 if (!CONVERT_EXPR_CODE_P (code))
9829 return false;
9831 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9832 intermediate steps in promotion sequence. We try
9833 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9834 not. */
9835 interm_types->create (MAX_INTERM_CVT_STEPS);
9836 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9838 intermediate_mode = insn_data[icode1].operand[0].mode;
9839 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9841 intermediate_type = vect_halve_mask_nunits (prev_type);
9842 if (intermediate_mode != TYPE_MODE (intermediate_type))
9843 return false;
9845 else
9846 intermediate_type
9847 = lang_hooks.types.type_for_mode (intermediate_mode,
9848 TYPE_UNSIGNED (prev_type));
9850 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9851 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9853 if (!optab3 || !optab4
9854 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9855 || insn_data[icode1].operand[0].mode != intermediate_mode
9856 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9857 || insn_data[icode2].operand[0].mode != intermediate_mode
9858 || ((icode1 = optab_handler (optab3, intermediate_mode))
9859 == CODE_FOR_nothing)
9860 || ((icode2 = optab_handler (optab4, intermediate_mode))
9861 == CODE_FOR_nothing))
9862 break;
9864 interm_types->quick_push (intermediate_type);
9865 (*multi_step_cvt)++;
9867 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9868 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9869 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9870 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
9871 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
9873 prev_type = intermediate_type;
9874 prev_mode = intermediate_mode;
9877 interm_types->release ();
9878 return false;
9882 /* Function supportable_narrowing_operation
9884 Check whether an operation represented by the code CODE is a
9885 narrowing operation that is supported by the target platform in
9886 vector form (i.e., when operating on arguments of type VECTYPE_IN
9887 and producing a result of type VECTYPE_OUT).
9889 Narrowing operations we currently support are NOP (CONVERT) and
9890 FIX_TRUNC. This function checks if these operations are supported by
9891 the target platform directly via vector tree-codes.
9893 Output:
9894 - CODE1 is the code of a vector operation to be used when
9895 vectorizing the operation, if available.
9896 - MULTI_STEP_CVT determines the number of required intermediate steps in
9897 case of multi-step conversion (like int->short->char - in that case
9898 MULTI_STEP_CVT will be 1).
9899 - INTERM_TYPES contains the intermediate type required to perform the
9900 narrowing operation (short in the above example). */
9902 bool
9903 supportable_narrowing_operation (enum tree_code code,
9904 tree vectype_out, tree vectype_in,
9905 enum tree_code *code1, int *multi_step_cvt,
9906 vec<tree> *interm_types)
9908 machine_mode vec_mode;
9909 enum insn_code icode1;
9910 optab optab1, interm_optab;
9911 tree vectype = vectype_in;
9912 tree narrow_vectype = vectype_out;
9913 enum tree_code c1;
9914 tree intermediate_type, prev_type;
9915 machine_mode intermediate_mode, prev_mode;
9916 int i;
9917 bool uns;
9919 *multi_step_cvt = 0;
9920 switch (code)
9922 CASE_CONVERT:
9923 c1 = VEC_PACK_TRUNC_EXPR;
9924 break;
9926 case FIX_TRUNC_EXPR:
9927 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9928 break;
9930 case FLOAT_EXPR:
9931 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9932 tree code and optabs used for computing the operation. */
9933 return false;
9935 default:
9936 gcc_unreachable ();
9939 if (code == FIX_TRUNC_EXPR)
9940 /* The signedness is determined from output operand. */
9941 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9942 else
9943 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9945 if (!optab1)
9946 return false;
9948 vec_mode = TYPE_MODE (vectype);
9949 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9950 return false;
9952 *code1 = c1;
9954 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9955 /* For scalar masks we may have different boolean
9956 vector types having the same QImode. Thus we
9957 add additional check for elements number. */
9958 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9959 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
9960 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9962 /* Check if it's a multi-step conversion that can be done using intermediate
9963 types. */
9964 prev_mode = vec_mode;
9965 prev_type = vectype;
9966 if (code == FIX_TRUNC_EXPR)
9967 uns = TYPE_UNSIGNED (vectype_out);
9968 else
9969 uns = TYPE_UNSIGNED (vectype);
9971 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9972 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9973 costly than signed. */
9974 if (code == FIX_TRUNC_EXPR && uns)
9976 enum insn_code icode2;
9978 intermediate_type
9979 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9980 interm_optab
9981 = optab_for_tree_code (c1, intermediate_type, optab_default);
9982 if (interm_optab != unknown_optab
9983 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9984 && insn_data[icode1].operand[0].mode
9985 == insn_data[icode2].operand[0].mode)
9987 uns = false;
9988 optab1 = interm_optab;
9989 icode1 = icode2;
9993 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9994 intermediate steps in promotion sequence. We try
9995 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9996 interm_types->create (MAX_INTERM_CVT_STEPS);
9997 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9999 intermediate_mode = insn_data[icode1].operand[0].mode;
10000 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10002 intermediate_type = vect_double_mask_nunits (prev_type);
10003 if (intermediate_mode != TYPE_MODE (intermediate_type))
10004 return false;
10006 else
10007 intermediate_type
10008 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10009 interm_optab
10010 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10011 optab_default);
10012 if (!interm_optab
10013 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10014 || insn_data[icode1].operand[0].mode != intermediate_mode
10015 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10016 == CODE_FOR_nothing))
10017 break;
10019 interm_types->quick_push (intermediate_type);
10020 (*multi_step_cvt)++;
10022 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10023 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10024 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10025 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10027 prev_mode = intermediate_mode;
10028 prev_type = intermediate_type;
10029 optab1 = interm_optab;
10032 interm_types->release ();
10033 return false;
10036 /* Generate and return a statement that sets vector mask MASK such that
10037 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10039 gcall *
10040 vect_gen_while (tree mask, tree start_index, tree end_index)
10042 tree cmp_type = TREE_TYPE (start_index);
10043 tree mask_type = TREE_TYPE (mask);
10044 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10045 cmp_type, mask_type,
10046 OPTIMIZE_FOR_SPEED));
10047 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10048 start_index, end_index,
10049 build_zero_cst (mask_type));
10050 gimple_call_set_lhs (call, mask);
10051 return call;
10054 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10055 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10057 tree
10058 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10059 tree end_index)
10061 tree tmp = make_ssa_name (mask_type);
10062 gcall *call = vect_gen_while (tmp, start_index, end_index);
10063 gimple_seq_add_stmt (seq, call);
10064 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);