* gcc-interface/trans.c (process_freeze_entity): Be prepared for a
[official-gcc.git] / gcc / tree-vect-stmts.c
blob0c343d4e7d9ba3758416e3761c31230e31ff9284
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
53 /* For lang_hooks.types.type_for_mode. */
54 #include "langhooks.h"
56 /* Says whether a statement is a load, a store of a vectorized statement
57 result, or a store of an invariant value. */
58 enum vec_load_store_type {
59 VLS_LOAD,
60 VLS_STORE,
61 VLS_STORE_INVARIANT
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (struct _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
80 struct loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
97 int misalign, enum vect_cost_model_location where)
99 if ((kind == vector_load || kind == unaligned_load)
100 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
101 kind = vector_gather_load;
102 if ((kind == vector_store || kind == unaligned_store)
103 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
104 kind = vector_scatter_store;
105 if (body_cost_vec)
107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108 stmt_info_for_cost si = { count, kind,
109 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
110 misalign };
111 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
115 else
116 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
117 count, kind, stmt_info, misalign, where);
120 /* Return a variable of type ELEM_TYPE[NELEMS]. */
122 static tree
123 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
125 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
126 "vect_array");
129 /* ARRAY is an array of vectors created by create_vector_array.
130 Return an SSA_NAME for the vector in index N. The reference
131 is part of the vectorization of STMT and the vector is associated
132 with scalar destination SCALAR_DEST. */
134 static tree
135 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
136 tree array, unsigned HOST_WIDE_INT n)
138 tree vect_type, vect, vect_name, array_ref;
139 gimple *new_stmt;
141 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
142 vect_type = TREE_TYPE (TREE_TYPE (array));
143 vect = vect_create_destination_var (scalar_dest, vect_type);
144 array_ref = build4 (ARRAY_REF, vect_type, array,
145 build_int_cst (size_type_node, n),
146 NULL_TREE, NULL_TREE);
148 new_stmt = gimple_build_assign (vect, array_ref);
149 vect_name = make_ssa_name (vect, new_stmt);
150 gimple_assign_set_lhs (new_stmt, vect_name);
151 vect_finish_stmt_generation (stmt, new_stmt, gsi);
153 return vect_name;
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Emit code to store SSA_NAME VECT in index N of the array.
158 The store is part of the vectorization of STMT. */
160 static void
161 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
162 tree array, unsigned HOST_WIDE_INT n)
164 tree array_ref;
165 gimple *new_stmt;
167 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
168 build_int_cst (size_type_node, n),
169 NULL_TREE, NULL_TREE);
171 new_stmt = gimple_build_assign (array_ref, vect);
172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
175 /* PTR is a pointer to an array of type TYPE. Return a representation
176 of *PTR. The memory reference replaces those in FIRST_DR
177 (and its group). */
179 static tree
180 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
182 tree mem_ref;
184 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
185 /* Arrays have the same alignment as their type. */
186 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
187 return mem_ref;
190 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192 /* Function vect_mark_relevant.
194 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
196 static void
197 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
198 enum vect_relevant relevant, bool live_p)
200 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
203 gimple *pattern_stmt;
205 if (dump_enabled_p ())
207 dump_printf_loc (MSG_NOTE, vect_location,
208 "mark relevant %d, live %d: ", relevant, live_p);
209 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
223 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_info = vinfo_for_stmt (pattern_stmt);
230 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
231 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
232 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
233 stmt = pattern_stmt;
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
260 tree op;
261 gimple *def_stmt;
262 ssa_op_iter iter;
264 if (!is_gimple_assign (stmt))
265 return false;
267 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 enum vect_def_type dt = vect_uninitialized_def;
271 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
275 "use not simple.\n");
276 return false;
279 if (dt != vect_external_def && dt != vect_constant_def)
280 return false;
282 return true;
285 /* Function vect_stmt_relevant_p.
287 Return true if STMT in loop that is represented by LOOP_VINFO is
288 "relevant for vectorization".
290 A stmt is considered "relevant for vectorization" if:
291 - it has uses outside the loop.
292 - it has vdefs (it alters memory).
293 - control stmts in the loop (except for the exit condition).
295 CHECKME: what other side effects would the vectorizer allow? */
297 static bool
298 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
299 enum vect_relevant *relevant, bool *live_p)
301 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
302 ssa_op_iter op_iter;
303 imm_use_iterator imm_iter;
304 use_operand_p use_p;
305 def_operand_p def_p;
307 *relevant = vect_unused_in_scope;
308 *live_p = false;
310 /* cond stmt other than loop exit cond. */
311 if (is_ctrl_stmt (stmt)
312 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
313 != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt)
319 && !gimple_clobber_p (stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (dump_enabled_p ())
336 dump_printf_loc (MSG_NOTE, vect_location,
337 "vec_stmt_relevant_p: used out of loop.\n");
339 if (is_gimple_debug (USE_STMT (use_p)))
340 continue;
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT. Check if USE is
368 used in STMT for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
373 tree operand;
374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
376 /* USE corresponds to some operand in STMT. If there is no data
377 reference in STMT, then any operand that corresponds to USE
378 is not indexing an array. */
379 if (!STMT_VINFO_DATA_REF (stmt_info))
380 return true;
382 /* STMT has a data_ref. FORNOW this means that its of one of
383 the following forms:
384 -1- ARRAY_REF = var
385 -2- var = ARRAY_REF
386 (This should have been verified in analyze_data_refs).
388 'var' in the second case corresponds to a def, not a use,
389 so USE cannot correspond to any operands that are not used
390 for array indexing.
392 Therefore, all we need to check is if STMT falls into the
393 first case, and whether var corresponds to USE. */
395 if (!gimple_assign_copy_p (stmt))
397 if (is_gimple_call (stmt)
398 && gimple_call_internal_p (stmt))
399 switch (gimple_call_internal_fn (stmt))
401 case IFN_MASK_STORE:
402 operand = gimple_call_arg (stmt, 3);
403 if (operand == use)
404 return true;
405 /* FALLTHRU */
406 case IFN_MASK_LOAD:
407 operand = gimple_call_arg (stmt, 2);
408 if (operand == use)
409 return true;
410 break;
411 default:
412 break;
414 return false;
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
419 operand = gimple_assign_rhs1 (stmt);
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
423 if (operand == use)
424 return true;
426 return false;
431 Function process_use.
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
435 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
436 that defined USE. This is done by calling mark_relevant and passing it
437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
448 which does not need to be directly vectorized, then the liveness/relevance
449 of the respective DEF_STMT is left unchanged.
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
455 Return true if everything is as expected. Return false otherwise. */
457 static bool
458 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
459 enum vect_relevant relevant, vec<gimple *> *worklist,
460 bool force)
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
466 gimple *def_stmt;
467 enum vect_def_type dt;
469 /* case 1: we are only interested in uses that need to be vectorized. Uses
470 that are used for address computation are not considered relevant. */
471 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
472 return true;
474 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
476 if (dump_enabled_p ())
477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
478 "not vectorized: unsupported use in stmt.\n");
479 return false;
482 if (!def_stmt || gimple_nop_p (def_stmt))
483 return true;
485 def_bb = gimple_bb (def_stmt);
486 if (!flow_bb_inside_loop_p (loop, def_bb))
488 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
490 return true;
493 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
494 DEF_STMT must have already been processed, because this should be the
495 only way that STMT, which is a reduction-phi, was put in the worklist,
496 as there should be no other uses for DEF_STMT in the loop. So we just
497 check that everything is as expected, and we are done. */
498 dstmt_vinfo = vinfo_for_stmt (def_stmt);
499 bb = gimple_bb (stmt);
500 if (gimple_code (stmt) == GIMPLE_PHI
501 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
502 && gimple_code (def_stmt) != GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
504 && bb->loop_father == def_bb->loop_father)
506 if (dump_enabled_p ())
507 dump_printf_loc (MSG_NOTE, vect_location,
508 "reduc-stmt defining reduc-phi in the same nest.\n");
509 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
510 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
511 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
512 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
513 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
514 return true;
517 /* case 3a: outer-loop stmt defining an inner-loop stmt:
518 outer-loop-header-bb:
519 d = def_stmt
520 inner-loop:
521 stmt # use (d)
522 outer-loop-tail-bb:
523 ... */
524 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
526 if (dump_enabled_p ())
527 dump_printf_loc (MSG_NOTE, vect_location,
528 "outer-loop def-stmt defining inner-loop stmt.\n");
530 switch (relevant)
532 case vect_unused_in_scope:
533 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
534 vect_used_in_scope : vect_unused_in_scope;
535 break;
537 case vect_used_in_outer_by_reduction:
538 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
539 relevant = vect_used_by_reduction;
540 break;
542 case vect_used_in_outer:
543 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
544 relevant = vect_used_in_scope;
545 break;
547 case vect_used_in_scope:
548 break;
550 default:
551 gcc_unreachable ();
555 /* case 3b: inner-loop stmt defining an outer-loop stmt:
556 outer-loop-header-bb:
558 inner-loop:
559 d = def_stmt
560 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
561 stmt # use (d) */
562 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
564 if (dump_enabled_p ())
565 dump_printf_loc (MSG_NOTE, vect_location,
566 "inner-loop def-stmt defining outer-loop stmt.\n");
568 switch (relevant)
570 case vect_unused_in_scope:
571 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
572 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
573 vect_used_in_outer_by_reduction : vect_unused_in_scope;
574 break;
576 case vect_used_by_reduction:
577 case vect_used_only_live:
578 relevant = vect_used_in_outer_by_reduction;
579 break;
581 case vect_used_in_scope:
582 relevant = vect_used_in_outer;
583 break;
585 default:
586 gcc_unreachable ();
589 /* We are also not interested in uses on loop PHI backedges that are
590 inductions. Otherwise we'll needlessly vectorize the IV increment
591 and cause hybrid SLP for SLP inductions. Unless the PHI is live
592 of course. */
593 else if (gimple_code (stmt) == GIMPLE_PHI
594 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
595 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
596 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
597 == use))
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE, vect_location,
601 "induction value on backedge.\n");
602 return true;
606 vect_mark_relevant (worklist, def_stmt, relevant, false);
607 return true;
611 /* Function vect_mark_stmts_to_be_vectorized.
613 Not all stmts in the loop need to be vectorized. For example:
615 for i...
616 for j...
617 1. T0 = i + j
618 2. T1 = a[T0]
620 3. j = j + 1
622 Stmt 1 and 3 do not need to be vectorized, because loop control and
623 addressing of vectorized data-refs are handled differently.
625 This pass detects such stmts. */
627 bool
628 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
631 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
632 unsigned int nbbs = loop->num_nodes;
633 gimple_stmt_iterator si;
634 gimple *stmt;
635 unsigned int i;
636 stmt_vec_info stmt_vinfo;
637 basic_block bb;
638 gimple *phi;
639 bool live_p;
640 enum vect_relevant relevant;
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "=== vect_mark_stmts_to_be_vectorized ===\n");
646 auto_vec<gimple *, 64> worklist;
648 /* 1. Init worklist. */
649 for (i = 0; i < nbbs; i++)
651 bb = bbs[i];
652 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
654 phi = gsi_stmt (si);
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
661 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
662 vect_mark_relevant (&worklist, phi, relevant, live_p);
664 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
666 stmt = gsi_stmt (si);
667 if (dump_enabled_p ())
669 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
670 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
673 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
674 vect_mark_relevant (&worklist, stmt, relevant, live_p);
678 /* 2. Process_worklist */
679 while (worklist.length () > 0)
681 use_operand_p use_p;
682 ssa_op_iter iter;
684 stmt = worklist.pop ();
685 if (dump_enabled_p ())
687 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
688 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
691 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
692 (DEF_STMT) as relevant/irrelevant according to the relevance property
693 of STMT. */
694 stmt_vinfo = vinfo_for_stmt (stmt);
695 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
697 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
698 propagated as is to the DEF_STMTs of its USEs.
700 One exception is when STMT has been identified as defining a reduction
701 variable; in this case we set the relevance to vect_used_by_reduction.
702 This is because we distinguish between two kinds of relevant stmts -
703 those that are used by a reduction computation, and those that are
704 (also) used by a regular computation. This allows us later on to
705 identify stmts that are used solely by a reduction, and therefore the
706 order of the results that they produce does not have to be kept. */
708 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
710 case vect_reduction_def:
711 gcc_assert (relevant != vect_unused_in_scope);
712 if (relevant != vect_unused_in_scope
713 && relevant != vect_used_in_scope
714 && relevant != vect_used_by_reduction
715 && relevant != vect_used_only_live)
717 if (dump_enabled_p ())
718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
719 "unsupported use of reduction.\n");
720 return false;
722 break;
724 case vect_nested_cycle:
725 if (relevant != vect_unused_in_scope
726 && relevant != vect_used_in_outer_by_reduction
727 && relevant != vect_used_in_outer)
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
731 "unsupported use of nested cycle.\n");
733 return false;
735 break;
737 case vect_double_reduction_def:
738 if (relevant != vect_unused_in_scope
739 && relevant != vect_used_by_reduction
740 && relevant != vect_used_only_live)
742 if (dump_enabled_p ())
743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
744 "unsupported use of double reduction.\n");
746 return false;
748 break;
750 default:
751 break;
754 if (is_pattern_stmt_p (stmt_vinfo))
756 /* Pattern statements are not inserted into the code, so
757 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
758 have to scan the RHS or function arguments instead. */
759 if (is_gimple_assign (stmt))
761 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
762 tree op = gimple_assign_rhs1 (stmt);
764 i = 1;
765 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
767 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
768 relevant, &worklist, false)
769 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
770 relevant, &worklist, false))
771 return false;
772 i = 2;
774 for (; i < gimple_num_ops (stmt); i++)
776 op = gimple_op (stmt, i);
777 if (TREE_CODE (op) == SSA_NAME
778 && !process_use (stmt, op, loop_vinfo, relevant,
779 &worklist, false))
780 return false;
783 else if (is_gimple_call (stmt))
785 for (i = 0; i < gimple_call_num_args (stmt); i++)
787 tree arg = gimple_call_arg (stmt, i);
788 if (!process_use (stmt, arg, loop_vinfo, relevant,
789 &worklist, false))
790 return false;
794 else
795 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
797 tree op = USE_FROM_PTR (use_p);
798 if (!process_use (stmt, op, loop_vinfo, relevant,
799 &worklist, false))
800 return false;
803 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
805 gather_scatter_info gs_info;
806 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
807 gcc_unreachable ();
808 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
809 &worklist, true))
810 return false;
812 } /* while worklist */
814 return true;
818 /* Function vect_model_simple_cost.
820 Models cost for simple operations, i.e. those that only emit ncopies of a
821 single op. Right now, this does not account for multiple insns that could
822 be generated for the single vector op. We will handle that shortly. */
824 void
825 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
826 enum vect_def_type *dt,
827 int ndts,
828 stmt_vector_for_cost *prologue_cost_vec,
829 stmt_vector_for_cost *body_cost_vec)
831 int i;
832 int inside_cost = 0, prologue_cost = 0;
834 /* The SLP costs were already calculated during SLP tree build. */
835 if (PURE_SLP_STMT (stmt_info))
836 return;
838 /* Cost the "broadcast" of a scalar operand in to a vector operand.
839 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
840 cost model. */
841 for (i = 0; i < ndts; i++)
842 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
843 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
844 stmt_info, 0, vect_prologue);
846 /* Pass the inside-of-loop statements to the target-specific cost model. */
847 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
848 stmt_info, 0, vect_body);
850 if (dump_enabled_p ())
851 dump_printf_loc (MSG_NOTE, vect_location,
852 "vect_model_simple_cost: inside_cost = %d, "
853 "prologue_cost = %d .\n", inside_cost, prologue_cost);
857 /* Model cost for type demotion and promotion operations. PWR is normally
858 zero for single-step promotions and demotions. It will be one if
859 two-step promotion/demotion is required, and so on. Each additional
860 step doubles the number of instructions required. */
862 static void
863 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
864 enum vect_def_type *dt, int pwr)
866 int i, tmp;
867 int inside_cost = 0, prologue_cost = 0;
868 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
869 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
870 void *target_cost_data;
872 /* The SLP costs were already calculated during SLP tree build. */
873 if (PURE_SLP_STMT (stmt_info))
874 return;
876 if (loop_vinfo)
877 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
878 else
879 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
881 for (i = 0; i < pwr + 1; i++)
883 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
884 (i + 1) : i;
885 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
886 vec_promote_demote, stmt_info, 0,
887 vect_body);
890 /* FORNOW: Assuming maximum 2 args per stmts. */
891 for (i = 0; i < 2; i++)
892 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
893 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
894 stmt_info, 0, vect_prologue);
896 if (dump_enabled_p ())
897 dump_printf_loc (MSG_NOTE, vect_location,
898 "vect_model_promotion_demotion_cost: inside_cost = %d, "
899 "prologue_cost = %d .\n", inside_cost, prologue_cost);
902 /* Function vect_model_store_cost
904 Models cost for stores. In the case of grouped accesses, one access
905 has the overhead of the grouped access attributed to it. */
907 void
908 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
909 vect_memory_access_type memory_access_type,
910 enum vect_def_type dt, slp_tree slp_node,
911 stmt_vector_for_cost *prologue_cost_vec,
912 stmt_vector_for_cost *body_cost_vec)
914 unsigned int inside_cost = 0, prologue_cost = 0;
915 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
916 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
917 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
919 if (dt == vect_constant_def || dt == vect_external_def)
920 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
921 stmt_info, 0, vect_prologue);
923 /* Grouped stores update all elements in the group at once,
924 so we want the DR for the first statement. */
925 if (!slp_node && grouped_access_p)
927 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
928 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
960 /* N scalar stores plus extracting the elements. */
961 inside_cost += record_stmt_cost (body_cost_vec,
962 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
963 scalar_store, stmt_info, 0, vect_body);
964 else
965 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
967 if (memory_access_type == VMAT_ELEMENTWISE
968 || memory_access_type == VMAT_STRIDED_SLP)
969 inside_cost += record_stmt_cost (body_cost_vec,
970 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
971 vec_to_scalar, stmt_info, 0, vect_body);
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_model_store_cost: inside_cost = %d, "
976 "prologue_cost = %d .\n", inside_cost, prologue_cost);
980 /* Calculate cost of DR's memory access. */
981 void
982 vect_get_store_cost (struct data_reference *dr, int ncopies,
983 unsigned int *inside_cost,
984 stmt_vector_for_cost *body_cost_vec)
986 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
987 gimple *stmt = DR_STMT (dr);
988 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
990 switch (alignment_support_scheme)
992 case dr_aligned:
994 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
995 vector_store, stmt_info, 0,
996 vect_body);
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_NOTE, vect_location,
1000 "vect_model_store_cost: aligned.\n");
1001 break;
1004 case dr_unaligned_supported:
1006 /* Here, we assign an additional cost for the unaligned store. */
1007 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1008 unaligned_store, stmt_info,
1009 DR_MISALIGNMENT (dr), vect_body);
1010 if (dump_enabled_p ())
1011 dump_printf_loc (MSG_NOTE, vect_location,
1012 "vect_model_store_cost: unaligned supported by "
1013 "hardware.\n");
1014 break;
1017 case dr_unaligned_unsupported:
1019 *inside_cost = VECT_MAX_COST;
1021 if (dump_enabled_p ())
1022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1023 "vect_model_store_cost: unsupported access.\n");
1024 break;
1027 default:
1028 gcc_unreachable ();
1033 /* Function vect_model_load_cost
1035 Models cost for loads. In the case of grouped accesses, one access has
1036 the overhead of the grouped access attributed to it. Since unaligned
1037 accesses are supported for loads, we also account for the costs of the
1038 access scheme chosen. */
1040 void
1041 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1042 vect_memory_access_type memory_access_type,
1043 slp_tree slp_node,
1044 stmt_vector_for_cost *prologue_cost_vec,
1045 stmt_vector_for_cost *body_cost_vec)
1047 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1048 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1049 unsigned int inside_cost = 0, prologue_cost = 0;
1050 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1052 /* Grouped loads read all elements in the group at once,
1053 so we want the DR for the first statement. */
1054 if (!slp_node && grouped_access_p)
1056 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1057 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1060 /* True if we should include any once-per-group costs as well as
1061 the cost of the statement itself. For SLP we only get called
1062 once per group anyhow. */
1063 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1065 /* We assume that the cost of a single load-lanes instruction is
1066 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1067 access is instead being provided by a load-and-permute operation,
1068 include the cost of the permutes. */
1069 if (first_stmt_p
1070 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1072 /* Uses an even and odd extract operations or shuffle operations
1073 for each needed permute. */
1074 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1075 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1076 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1077 stmt_info, 0, vect_body);
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_load_cost: strided group_size = %d .\n",
1082 group_size);
1085 /* The loads themselves. */
1086 if (memory_access_type == VMAT_ELEMENTWISE
1087 || memory_access_type == VMAT_GATHER_SCATTER)
1089 /* N scalar loads plus gathering them into a vector. */
1090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1091 inside_cost += record_stmt_cost (body_cost_vec,
1092 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1093 scalar_load, stmt_info, 0, vect_body);
1095 else
1096 vect_get_load_cost (dr, ncopies, first_stmt_p,
1097 &inside_cost, &prologue_cost,
1098 prologue_cost_vec, body_cost_vec, true);
1099 if (memory_access_type == VMAT_ELEMENTWISE
1100 || memory_access_type == VMAT_STRIDED_SLP)
1101 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1102 stmt_info, 0, vect_body);
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_NOTE, vect_location,
1106 "vect_model_load_cost: inside_cost = %d, "
1107 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1111 /* Calculate cost of DR's memory access. */
1112 void
1113 vect_get_load_cost (struct data_reference *dr, int ncopies,
1114 bool add_realign_cost, unsigned int *inside_cost,
1115 unsigned int *prologue_cost,
1116 stmt_vector_for_cost *prologue_cost_vec,
1117 stmt_vector_for_cost *body_cost_vec,
1118 bool record_prologue_costs)
1120 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1121 gimple *stmt = DR_STMT (dr);
1122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1124 switch (alignment_support_scheme)
1126 case dr_aligned:
1128 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1129 stmt_info, 0, vect_body);
1131 if (dump_enabled_p ())
1132 dump_printf_loc (MSG_NOTE, vect_location,
1133 "vect_model_load_cost: aligned.\n");
1135 break;
1137 case dr_unaligned_supported:
1139 /* Here, we assign an additional cost for the unaligned load. */
1140 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1141 unaligned_load, stmt_info,
1142 DR_MISALIGNMENT (dr), vect_body);
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: unaligned supported by "
1147 "hardware.\n");
1149 break;
1151 case dr_explicit_realign:
1153 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1154 vector_load, stmt_info, 0, vect_body);
1155 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1156 vec_perm, stmt_info, 0, vect_body);
1158 /* FIXME: If the misalignment remains fixed across the iterations of
1159 the containing loop, the following cost should be added to the
1160 prologue costs. */
1161 if (targetm.vectorize.builtin_mask_for_load)
1162 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1163 stmt_info, 0, vect_body);
1165 if (dump_enabled_p ())
1166 dump_printf_loc (MSG_NOTE, vect_location,
1167 "vect_model_load_cost: explicit realign\n");
1169 break;
1171 case dr_explicit_realign_optimized:
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: unaligned software "
1176 "pipelined.\n");
1178 /* Unaligned software pipeline has a load of an address, an initial
1179 load, and possibly a mask operation to "prime" the loop. However,
1180 if this is an access in a group of loads, which provide grouped
1181 access, then the above cost should only be considered for one
1182 access in the group. Inside the loop, there is a load op
1183 and a realignment op. */
1185 if (add_realign_cost && record_prologue_costs)
1187 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1188 vector_stmt, stmt_info,
1189 0, vect_prologue);
1190 if (targetm.vectorize.builtin_mask_for_load)
1191 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1192 vector_stmt, stmt_info,
1193 0, vect_prologue);
1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1197 stmt_info, 0, vect_body);
1198 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1199 stmt_info, 0, vect_body);
1201 if (dump_enabled_p ())
1202 dump_printf_loc (MSG_NOTE, vect_location,
1203 "vect_model_load_cost: explicit realign optimized"
1204 "\n");
1206 break;
1209 case dr_unaligned_unsupported:
1211 *inside_cost = VECT_MAX_COST;
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1215 "vect_model_load_cost: unsupported access.\n");
1216 break;
1219 default:
1220 gcc_unreachable ();
1224 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1225 the loop preheader for the vectorized stmt STMT. */
1227 static void
1228 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1230 if (gsi)
1231 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1232 else
1234 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1237 if (loop_vinfo)
1239 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1240 basic_block new_bb;
1241 edge pe;
1243 if (nested_in_vect_loop_p (loop, stmt))
1244 loop = loop->inner;
1246 pe = loop_preheader_edge (loop);
1247 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1248 gcc_assert (!new_bb);
1250 else
1252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1253 basic_block bb;
1254 gimple_stmt_iterator gsi_bb_start;
1256 gcc_assert (bb_vinfo);
1257 bb = BB_VINFO_BB (bb_vinfo);
1258 gsi_bb_start = gsi_after_labels (bb);
1259 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1263 if (dump_enabled_p ())
1265 dump_printf_loc (MSG_NOTE, vect_location,
1266 "created new init_stmt: ");
1267 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1271 /* Function vect_init_vector.
1273 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1274 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1275 vector type a vector with all elements equal to VAL is created first.
1276 Place the initialization at BSI if it is not NULL. Otherwise, place the
1277 initialization at the loop preheader.
1278 Return the DEF of INIT_STMT.
1279 It will be used in the vectorization of STMT. */
1281 tree
1282 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1284 gimple *init_stmt;
1285 tree new_temp;
1287 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1288 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1290 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1291 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1293 /* Scalar boolean value should be transformed into
1294 all zeros or all ones value before building a vector. */
1295 if (VECTOR_BOOLEAN_TYPE_P (type))
1297 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1298 tree false_val = build_zero_cst (TREE_TYPE (type));
1300 if (CONSTANT_CLASS_P (val))
1301 val = integer_zerop (val) ? false_val : true_val;
1302 else
1304 new_temp = make_ssa_name (TREE_TYPE (type));
1305 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1306 val, true_val, false_val);
1307 vect_init_vector_1 (stmt, init_stmt, gsi);
1308 val = new_temp;
1311 else if (CONSTANT_CLASS_P (val))
1312 val = fold_convert (TREE_TYPE (type), val);
1313 else
1315 new_temp = make_ssa_name (TREE_TYPE (type));
1316 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1317 init_stmt = gimple_build_assign (new_temp,
1318 fold_build1 (VIEW_CONVERT_EXPR,
1319 TREE_TYPE (type),
1320 val));
1321 else
1322 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1323 vect_init_vector_1 (stmt, init_stmt, gsi);
1324 val = new_temp;
1327 val = build_vector_from_val (type, val);
1330 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1331 init_stmt = gimple_build_assign (new_temp, val);
1332 vect_init_vector_1 (stmt, init_stmt, gsi);
1333 return new_temp;
1336 /* Function vect_get_vec_def_for_operand_1.
1338 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1339 DT that will be used in the vectorized stmt. */
1341 tree
1342 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1344 tree vec_oprnd;
1345 gimple *vec_stmt;
1346 stmt_vec_info def_stmt_info = NULL;
1348 switch (dt)
1350 /* operand is a constant or a loop invariant. */
1351 case vect_constant_def:
1352 case vect_external_def:
1353 /* Code should use vect_get_vec_def_for_operand. */
1354 gcc_unreachable ();
1356 /* operand is defined inside the loop. */
1357 case vect_internal_def:
1359 /* Get the def from the vectorized stmt. */
1360 def_stmt_info = vinfo_for_stmt (def_stmt);
1362 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1363 /* Get vectorized pattern statement. */
1364 if (!vec_stmt
1365 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1366 && !STMT_VINFO_RELEVANT (def_stmt_info))
1367 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1368 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1369 gcc_assert (vec_stmt);
1370 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1371 vec_oprnd = PHI_RESULT (vec_stmt);
1372 else if (is_gimple_call (vec_stmt))
1373 vec_oprnd = gimple_call_lhs (vec_stmt);
1374 else
1375 vec_oprnd = gimple_assign_lhs (vec_stmt);
1376 return vec_oprnd;
1379 /* operand is defined by a loop header phi. */
1380 case vect_reduction_def:
1381 case vect_double_reduction_def:
1382 case vect_nested_cycle:
1383 case vect_induction_def:
1385 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1387 /* Get the def from the vectorized stmt. */
1388 def_stmt_info = vinfo_for_stmt (def_stmt);
1389 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1390 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1391 vec_oprnd = PHI_RESULT (vec_stmt);
1392 else
1393 vec_oprnd = gimple_get_lhs (vec_stmt);
1394 return vec_oprnd;
1397 default:
1398 gcc_unreachable ();
1403 /* Function vect_get_vec_def_for_operand.
1405 OP is an operand in STMT. This function returns a (vector) def that will be
1406 used in the vectorized stmt for STMT.
1408 In the case that OP is an SSA_NAME which is defined in the loop, then
1409 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1411 In case OP is an invariant or constant, a new stmt that creates a vector def
1412 needs to be introduced. VECTYPE may be used to specify a required type for
1413 vector invariant. */
1415 tree
1416 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1418 gimple *def_stmt;
1419 enum vect_def_type dt;
1420 bool is_simple_use;
1421 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1422 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1424 if (dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "vect_get_vec_def_for_operand: ");
1428 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1429 dump_printf (MSG_NOTE, "\n");
1432 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1433 gcc_assert (is_simple_use);
1434 if (def_stmt && dump_enabled_p ())
1436 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1437 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1440 if (dt == vect_constant_def || dt == vect_external_def)
1442 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1443 tree vector_type;
1445 if (vectype)
1446 vector_type = vectype;
1447 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1448 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1449 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1450 else
1451 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1453 gcc_assert (vector_type);
1454 return vect_init_vector (stmt, op, vector_type, NULL);
1456 else
1457 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1461 /* Function vect_get_vec_def_for_stmt_copy
1463 Return a vector-def for an operand. This function is used when the
1464 vectorized stmt to be created (by the caller to this function) is a "copy"
1465 created in case the vectorized result cannot fit in one vector, and several
1466 copies of the vector-stmt are required. In this case the vector-def is
1467 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1468 of the stmt that defines VEC_OPRND.
1469 DT is the type of the vector def VEC_OPRND.
1471 Context:
1472 In case the vectorization factor (VF) is bigger than the number
1473 of elements that can fit in a vectype (nunits), we have to generate
1474 more than one vector stmt to vectorize the scalar stmt. This situation
1475 arises when there are multiple data-types operated upon in the loop; the
1476 smallest data-type determines the VF, and as a result, when vectorizing
1477 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1478 vector stmt (each computing a vector of 'nunits' results, and together
1479 computing 'VF' results in each iteration). This function is called when
1480 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1481 which VF=16 and nunits=4, so the number of copies required is 4):
1483 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1485 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1486 VS1.1: vx.1 = memref1 VS1.2
1487 VS1.2: vx.2 = memref2 VS1.3
1488 VS1.3: vx.3 = memref3
1490 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1491 VSnew.1: vz1 = vx.1 + ... VSnew.2
1492 VSnew.2: vz2 = vx.2 + ... VSnew.3
1493 VSnew.3: vz3 = vx.3 + ...
1495 The vectorization of S1 is explained in vectorizable_load.
1496 The vectorization of S2:
1497 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1498 the function 'vect_get_vec_def_for_operand' is called to
1499 get the relevant vector-def for each operand of S2. For operand x it
1500 returns the vector-def 'vx.0'.
1502 To create the remaining copies of the vector-stmt (VSnew.j), this
1503 function is called to get the relevant vector-def for each operand. It is
1504 obtained from the respective VS1.j stmt, which is recorded in the
1505 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1507 For example, to obtain the vector-def 'vx.1' in order to create the
1508 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1509 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1510 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1511 and return its def ('vx.1').
1512 Overall, to create the above sequence this function will be called 3 times:
1513 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1514 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1515 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1517 tree
1518 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1520 gimple *vec_stmt_for_operand;
1521 stmt_vec_info def_stmt_info;
1523 /* Do nothing; can reuse same def. */
1524 if (dt == vect_external_def || dt == vect_constant_def )
1525 return vec_oprnd;
1527 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1528 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1529 gcc_assert (def_stmt_info);
1530 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1531 gcc_assert (vec_stmt_for_operand);
1532 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1533 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1534 else
1535 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1536 return vec_oprnd;
1540 /* Get vectorized definitions for the operands to create a copy of an original
1541 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1543 void
1544 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1545 vec<tree> *vec_oprnds0,
1546 vec<tree> *vec_oprnds1)
1548 tree vec_oprnd = vec_oprnds0->pop ();
1550 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1551 vec_oprnds0->quick_push (vec_oprnd);
1553 if (vec_oprnds1 && vec_oprnds1->length ())
1555 vec_oprnd = vec_oprnds1->pop ();
1556 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1557 vec_oprnds1->quick_push (vec_oprnd);
1562 /* Get vectorized definitions for OP0 and OP1. */
1564 void
1565 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1566 vec<tree> *vec_oprnds0,
1567 vec<tree> *vec_oprnds1,
1568 slp_tree slp_node)
1570 if (slp_node)
1572 int nops = (op1 == NULL_TREE) ? 1 : 2;
1573 auto_vec<tree> ops (nops);
1574 auto_vec<vec<tree> > vec_defs (nops);
1576 ops.quick_push (op0);
1577 if (op1)
1578 ops.quick_push (op1);
1580 vect_get_slp_defs (ops, slp_node, &vec_defs);
1582 *vec_oprnds0 = vec_defs[0];
1583 if (op1)
1584 *vec_oprnds1 = vec_defs[1];
1586 else
1588 tree vec_oprnd;
1590 vec_oprnds0->create (1);
1591 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1592 vec_oprnds0->quick_push (vec_oprnd);
1594 if (op1)
1596 vec_oprnds1->create (1);
1597 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1598 vec_oprnds1->quick_push (vec_oprnd);
1604 /* Function vect_finish_stmt_generation.
1606 Insert a new stmt. */
1608 void
1609 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1610 gimple_stmt_iterator *gsi)
1612 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1613 vec_info *vinfo = stmt_info->vinfo;
1615 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1617 if (!gsi_end_p (*gsi)
1618 && gimple_has_mem_ops (vec_stmt))
1620 gimple *at_stmt = gsi_stmt (*gsi);
1621 tree vuse = gimple_vuse (at_stmt);
1622 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1624 tree vdef = gimple_vdef (at_stmt);
1625 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1626 /* If we have an SSA vuse and insert a store, update virtual
1627 SSA form to avoid triggering the renamer. Do so only
1628 if we can easily see all uses - which is what almost always
1629 happens with the way vectorized stmts are inserted. */
1630 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1631 && ((is_gimple_assign (vec_stmt)
1632 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1633 || (is_gimple_call (vec_stmt)
1634 && !(gimple_call_flags (vec_stmt)
1635 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1637 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1638 gimple_set_vdef (vec_stmt, new_vdef);
1639 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1643 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1645 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1647 if (dump_enabled_p ())
1649 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1650 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1653 gimple_set_location (vec_stmt, gimple_location (stmt));
1655 /* While EH edges will generally prevent vectorization, stmt might
1656 e.g. be in a must-not-throw region. Ensure newly created stmts
1657 that could throw are part of the same region. */
1658 int lp_nr = lookup_stmt_eh_lp (stmt);
1659 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1660 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1663 /* We want to vectorize a call to combined function CFN with function
1664 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1665 as the types of all inputs. Check whether this is possible using
1666 an internal function, returning its code if so or IFN_LAST if not. */
1668 static internal_fn
1669 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1670 tree vectype_out, tree vectype_in)
1672 internal_fn ifn;
1673 if (internal_fn_p (cfn))
1674 ifn = as_internal_fn (cfn);
1675 else
1676 ifn = associated_internal_fn (fndecl);
1677 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1679 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1680 if (info.vectorizable)
1682 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1683 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1684 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1685 OPTIMIZE_FOR_SPEED))
1686 return ifn;
1689 return IFN_LAST;
1693 static tree permute_vec_elements (tree, tree, tree, gimple *,
1694 gimple_stmt_iterator *);
1696 /* STMT is a non-strided load or store, meaning that it accesses
1697 elements with a known constant step. Return -1 if that step
1698 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1700 static int
1701 compare_step_with_zero (gimple *stmt)
1703 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1704 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1705 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1706 size_zero_node);
1709 /* If the target supports a permute mask that reverses the elements in
1710 a vector of type VECTYPE, return that mask, otherwise return null. */
1712 static tree
1713 perm_mask_for_reverse (tree vectype)
1715 int i, nunits;
1717 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1719 auto_vec_perm_indices sel (nunits);
1720 for (i = 0; i < nunits; ++i)
1721 sel.quick_push (nunits - 1 - i);
1723 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
1724 return NULL_TREE;
1725 return vect_gen_perm_mask_checked (vectype, sel);
1728 /* A subroutine of get_load_store_type, with a subset of the same
1729 arguments. Handle the case where STMT is part of a grouped load
1730 or store.
1732 For stores, the statements in the group are all consecutive
1733 and there is no gap at the end. For loads, the statements in the
1734 group might not be consecutive; there can be gaps between statements
1735 as well as at the end. */
1737 static bool
1738 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1739 vec_load_store_type vls_type,
1740 vect_memory_access_type *memory_access_type)
1742 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1743 vec_info *vinfo = stmt_info->vinfo;
1744 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1745 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1746 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1747 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1748 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1749 bool single_element_p = (stmt == first_stmt
1750 && !GROUP_NEXT_ELEMENT (stmt_info));
1751 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1752 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1754 /* True if the vectorized statements would access beyond the last
1755 statement in the group. */
1756 bool overrun_p = false;
1758 /* True if we can cope with such overrun by peeling for gaps, so that
1759 there is at least one final scalar iteration after the vector loop. */
1760 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1762 /* There can only be a gap at the end of the group if the stride is
1763 known at compile time. */
1764 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1766 /* Stores can't yet have gaps. */
1767 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1769 if (slp)
1771 if (STMT_VINFO_STRIDED_P (stmt_info))
1773 /* Try to use consecutive accesses of GROUP_SIZE elements,
1774 separated by the stride, until we have a complete vector.
1775 Fall back to scalar accesses if that isn't possible. */
1776 if (nunits % group_size == 0)
1777 *memory_access_type = VMAT_STRIDED_SLP;
1778 else
1779 *memory_access_type = VMAT_ELEMENTWISE;
1781 else
1783 overrun_p = loop_vinfo && gap != 0;
1784 if (overrun_p && vls_type != VLS_LOAD)
1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1787 "Grouped store with gaps requires"
1788 " non-consecutive accesses\n");
1789 return false;
1791 /* An overrun is fine if the trailing elements are smaller
1792 than the alignment boundary B. Every vector access will
1793 be a multiple of B and so we are guaranteed to access a
1794 non-gap element in the same B-sized block. */
1795 if (overrun_p
1796 && gap < (vect_known_alignment_in_bytes (first_dr)
1797 / vect_get_scalar_dr_size (first_dr)))
1798 overrun_p = false;
1799 if (overrun_p && !can_overrun_p)
1801 if (dump_enabled_p ())
1802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1803 "Peeling for outer loop is not supported\n");
1804 return false;
1806 *memory_access_type = VMAT_CONTIGUOUS;
1809 else
1811 /* We can always handle this case using elementwise accesses,
1812 but see if something more efficient is available. */
1813 *memory_access_type = VMAT_ELEMENTWISE;
1815 /* If there is a gap at the end of the group then these optimizations
1816 would access excess elements in the last iteration. */
1817 bool would_overrun_p = (gap != 0);
1818 /* An overrun is fine if the trailing elements are smaller than the
1819 alignment boundary B. Every vector access will be a multiple of B
1820 and so we are guaranteed to access a non-gap element in the
1821 same B-sized block. */
1822 if (would_overrun_p
1823 && gap < (vect_known_alignment_in_bytes (first_dr)
1824 / vect_get_scalar_dr_size (first_dr)))
1825 would_overrun_p = false;
1827 if (!STMT_VINFO_STRIDED_P (stmt_info)
1828 && (can_overrun_p || !would_overrun_p)
1829 && compare_step_with_zero (stmt) > 0)
1831 /* First try using LOAD/STORE_LANES. */
1832 if (vls_type == VLS_LOAD
1833 ? vect_load_lanes_supported (vectype, group_size)
1834 : vect_store_lanes_supported (vectype, group_size))
1836 *memory_access_type = VMAT_LOAD_STORE_LANES;
1837 overrun_p = would_overrun_p;
1840 /* If that fails, try using permuting loads. */
1841 if (*memory_access_type == VMAT_ELEMENTWISE
1842 && (vls_type == VLS_LOAD
1843 ? vect_grouped_load_supported (vectype, single_element_p,
1844 group_size)
1845 : vect_grouped_store_supported (vectype, group_size)))
1847 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1848 overrun_p = would_overrun_p;
1853 if (vls_type != VLS_LOAD && first_stmt == stmt)
1855 /* STMT is the leader of the group. Check the operands of all the
1856 stmts of the group. */
1857 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1858 while (next_stmt)
1860 gcc_assert (gimple_assign_single_p (next_stmt));
1861 tree op = gimple_assign_rhs1 (next_stmt);
1862 gimple *def_stmt;
1863 enum vect_def_type dt;
1864 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1866 if (dump_enabled_p ())
1867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1868 "use not simple.\n");
1869 return false;
1871 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1875 if (overrun_p)
1877 gcc_assert (can_overrun_p);
1878 if (dump_enabled_p ())
1879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1880 "Data access with gaps requires scalar "
1881 "epilogue loop\n");
1882 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1885 return true;
1888 /* A subroutine of get_load_store_type, with a subset of the same
1889 arguments. Handle the case where STMT is a load or store that
1890 accesses consecutive elements with a negative step. */
1892 static vect_memory_access_type
1893 get_negative_load_store_type (gimple *stmt, tree vectype,
1894 vec_load_store_type vls_type,
1895 unsigned int ncopies)
1897 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1898 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1899 dr_alignment_support alignment_support_scheme;
1901 if (ncopies > 1)
1903 if (dump_enabled_p ())
1904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1905 "multiple types with negative step.\n");
1906 return VMAT_ELEMENTWISE;
1909 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1910 if (alignment_support_scheme != dr_aligned
1911 && alignment_support_scheme != dr_unaligned_supported)
1913 if (dump_enabled_p ())
1914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1915 "negative step but alignment required.\n");
1916 return VMAT_ELEMENTWISE;
1919 if (vls_type == VLS_STORE_INVARIANT)
1921 if (dump_enabled_p ())
1922 dump_printf_loc (MSG_NOTE, vect_location,
1923 "negative step with invariant source;"
1924 " no permute needed.\n");
1925 return VMAT_CONTIGUOUS_DOWN;
1928 if (!perm_mask_for_reverse (vectype))
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1932 "negative step and reversing not supported.\n");
1933 return VMAT_ELEMENTWISE;
1936 return VMAT_CONTIGUOUS_REVERSE;
1939 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1940 if there is a memory access type that the vectorized form can use,
1941 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1942 or scatters, fill in GS_INFO accordingly.
1944 SLP says whether we're performing SLP rather than loop vectorization.
1945 VECTYPE is the vector type that the vectorized statements will use.
1946 NCOPIES is the number of vector statements that will be needed. */
1948 static bool
1949 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1950 vec_load_store_type vls_type, unsigned int ncopies,
1951 vect_memory_access_type *memory_access_type,
1952 gather_scatter_info *gs_info)
1954 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1955 vec_info *vinfo = stmt_info->vinfo;
1956 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1957 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1959 *memory_access_type = VMAT_GATHER_SCATTER;
1960 gimple *def_stmt;
1961 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1962 gcc_unreachable ();
1963 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1964 &gs_info->offset_dt,
1965 &gs_info->offset_vectype))
1967 if (dump_enabled_p ())
1968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1969 "%s index use not simple.\n",
1970 vls_type == VLS_LOAD ? "gather" : "scatter");
1971 return false;
1974 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1976 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1977 memory_access_type))
1978 return false;
1980 else if (STMT_VINFO_STRIDED_P (stmt_info))
1982 gcc_assert (!slp);
1983 *memory_access_type = VMAT_ELEMENTWISE;
1985 else
1987 int cmp = compare_step_with_zero (stmt);
1988 if (cmp < 0)
1989 *memory_access_type = get_negative_load_store_type
1990 (stmt, vectype, vls_type, ncopies);
1991 else if (cmp == 0)
1993 gcc_assert (vls_type == VLS_LOAD);
1994 *memory_access_type = VMAT_INVARIANT;
1996 else
1997 *memory_access_type = VMAT_CONTIGUOUS;
2000 /* FIXME: At the moment the cost model seems to underestimate the
2001 cost of using elementwise accesses. This check preserves the
2002 traditional behavior until that can be fixed. */
2003 if (*memory_access_type == VMAT_ELEMENTWISE
2004 && !STMT_VINFO_STRIDED_P (stmt_info))
2006 if (dump_enabled_p ())
2007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2008 "not falling back to elementwise accesses\n");
2009 return false;
2011 return true;
2014 /* Function vectorizable_mask_load_store.
2016 Check if STMT performs a conditional load or store that can be vectorized.
2017 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2018 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2019 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2021 static bool
2022 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2023 gimple **vec_stmt, slp_tree slp_node)
2025 tree vec_dest = NULL;
2026 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2027 stmt_vec_info prev_stmt_info;
2028 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2029 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2030 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2031 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2032 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2033 tree rhs_vectype = NULL_TREE;
2034 tree mask_vectype;
2035 tree elem_type;
2036 gimple *new_stmt;
2037 tree dummy;
2038 tree dataref_ptr = NULL_TREE;
2039 gimple *ptr_incr;
2040 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2041 int ncopies;
2042 int i, j;
2043 bool inv_p;
2044 gather_scatter_info gs_info;
2045 vec_load_store_type vls_type;
2046 tree mask;
2047 gimple *def_stmt;
2048 enum vect_def_type dt;
2050 if (slp_node != NULL)
2051 return false;
2053 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2054 gcc_assert (ncopies >= 1);
2056 mask = gimple_call_arg (stmt, 2);
2058 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2059 return false;
2061 /* FORNOW. This restriction should be relaxed. */
2062 if (nested_in_vect_loop && ncopies > 1)
2064 if (dump_enabled_p ())
2065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2066 "multiple types in nested loop.");
2067 return false;
2070 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2071 return false;
2073 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2074 && ! vec_stmt)
2075 return false;
2077 if (!STMT_VINFO_DATA_REF (stmt_info))
2078 return false;
2080 elem_type = TREE_TYPE (vectype);
2082 if (TREE_CODE (mask) != SSA_NAME)
2083 return false;
2085 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2086 return false;
2088 if (!mask_vectype)
2089 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2091 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2092 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2093 return false;
2095 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2097 tree rhs = gimple_call_arg (stmt, 3);
2098 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2099 return false;
2100 if (dt == vect_constant_def || dt == vect_external_def)
2101 vls_type = VLS_STORE_INVARIANT;
2102 else
2103 vls_type = VLS_STORE;
2105 else
2106 vls_type = VLS_LOAD;
2108 vect_memory_access_type memory_access_type;
2109 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2110 &memory_access_type, &gs_info))
2111 return false;
2113 if (memory_access_type == VMAT_GATHER_SCATTER)
2115 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2116 tree masktype
2117 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2118 if (TREE_CODE (masktype) == INTEGER_TYPE)
2120 if (dump_enabled_p ())
2121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2122 "masked gather with integer mask not supported.");
2123 return false;
2126 else if (memory_access_type != VMAT_CONTIGUOUS)
2128 if (dump_enabled_p ())
2129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2130 "unsupported access type for masked %s.\n",
2131 vls_type == VLS_LOAD ? "load" : "store");
2132 return false;
2134 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2135 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2136 TYPE_MODE (mask_vectype),
2137 vls_type == VLS_LOAD)
2138 || (rhs_vectype
2139 && !useless_type_conversion_p (vectype, rhs_vectype)))
2140 return false;
2142 if (!vec_stmt) /* transformation not required. */
2144 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2145 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2146 if (vls_type == VLS_LOAD)
2147 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2148 NULL, NULL, NULL);
2149 else
2150 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2151 dt, NULL, NULL, NULL);
2152 return true;
2154 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2156 /* Transform. */
2158 if (memory_access_type == VMAT_GATHER_SCATTER)
2160 tree vec_oprnd0 = NULL_TREE, op;
2161 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2162 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2163 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2164 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2165 tree mask_perm_mask = NULL_TREE;
2166 edge pe = loop_preheader_edge (loop);
2167 gimple_seq seq;
2168 basic_block new_bb;
2169 enum { NARROW, NONE, WIDEN } modifier;
2170 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2172 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2173 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2174 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2175 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2176 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2177 scaletype = TREE_VALUE (arglist);
2178 gcc_checking_assert (types_compatible_p (srctype, rettype)
2179 && types_compatible_p (srctype, masktype));
2181 if (nunits == gather_off_nunits)
2182 modifier = NONE;
2183 else if (nunits == gather_off_nunits / 2)
2185 modifier = WIDEN;
2187 auto_vec_perm_indices sel (gather_off_nunits);
2188 for (i = 0; i < gather_off_nunits; ++i)
2189 sel.quick_push (i | nunits);
2191 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2193 else if (nunits == gather_off_nunits * 2)
2195 modifier = NARROW;
2197 auto_vec_perm_indices sel (nunits);
2198 sel.quick_grow (nunits);
2199 for (i = 0; i < nunits; ++i)
2200 sel[i] = i < gather_off_nunits
2201 ? i : i + nunits - gather_off_nunits;
2203 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2204 ncopies *= 2;
2205 for (i = 0; i < nunits; ++i)
2206 sel[i] = i | gather_off_nunits;
2207 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2209 else
2210 gcc_unreachable ();
2212 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2214 ptr = fold_convert (ptrtype, gs_info.base);
2215 if (!is_gimple_min_invariant (ptr))
2217 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2218 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2219 gcc_assert (!new_bb);
2222 scale = build_int_cst (scaletype, gs_info.scale);
2224 prev_stmt_info = NULL;
2225 for (j = 0; j < ncopies; ++j)
2227 if (modifier == WIDEN && (j & 1))
2228 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2229 perm_mask, stmt, gsi);
2230 else if (j == 0)
2231 op = vec_oprnd0
2232 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2233 else
2234 op = vec_oprnd0
2235 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2237 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2239 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2240 == TYPE_VECTOR_SUBPARTS (idxtype));
2241 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2242 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2243 new_stmt
2244 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2246 op = var;
2249 if (mask_perm_mask && (j & 1))
2250 mask_op = permute_vec_elements (mask_op, mask_op,
2251 mask_perm_mask, stmt, gsi);
2252 else
2254 if (j == 0)
2255 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2256 else
2258 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2259 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2262 mask_op = vec_mask;
2263 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2265 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2266 == TYPE_VECTOR_SUBPARTS (masktype));
2267 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2268 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2269 new_stmt
2270 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2271 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2272 mask_op = var;
2276 new_stmt
2277 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2278 scale);
2280 if (!useless_type_conversion_p (vectype, rettype))
2282 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2283 == TYPE_VECTOR_SUBPARTS (rettype));
2284 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2285 gimple_call_set_lhs (new_stmt, op);
2286 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2287 var = make_ssa_name (vec_dest);
2288 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2289 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2291 else
2293 var = make_ssa_name (vec_dest, new_stmt);
2294 gimple_call_set_lhs (new_stmt, var);
2297 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2299 if (modifier == NARROW)
2301 if ((j & 1) == 0)
2303 prev_res = var;
2304 continue;
2306 var = permute_vec_elements (prev_res, var,
2307 perm_mask, stmt, gsi);
2308 new_stmt = SSA_NAME_DEF_STMT (var);
2311 if (prev_stmt_info == NULL)
2312 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2313 else
2314 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2315 prev_stmt_info = vinfo_for_stmt (new_stmt);
2318 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2319 from the IL. */
2320 if (STMT_VINFO_RELATED_STMT (stmt_info))
2322 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2323 stmt_info = vinfo_for_stmt (stmt);
2325 tree lhs = gimple_call_lhs (stmt);
2326 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2327 set_vinfo_for_stmt (new_stmt, stmt_info);
2328 set_vinfo_for_stmt (stmt, NULL);
2329 STMT_VINFO_STMT (stmt_info) = new_stmt;
2330 gsi_replace (gsi, new_stmt, true);
2331 return true;
2333 else if (vls_type != VLS_LOAD)
2335 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2336 prev_stmt_info = NULL;
2337 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2338 for (i = 0; i < ncopies; i++)
2340 unsigned align, misalign;
2342 if (i == 0)
2344 tree rhs = gimple_call_arg (stmt, 3);
2345 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2346 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2347 mask_vectype);
2348 /* We should have catched mismatched types earlier. */
2349 gcc_assert (useless_type_conversion_p (vectype,
2350 TREE_TYPE (vec_rhs)));
2351 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2352 NULL_TREE, &dummy, gsi,
2353 &ptr_incr, false, &inv_p);
2354 gcc_assert (!inv_p);
2356 else
2358 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2359 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2360 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2361 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2362 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2363 TYPE_SIZE_UNIT (vectype));
2366 align = DR_TARGET_ALIGNMENT (dr);
2367 if (aligned_access_p (dr))
2368 misalign = 0;
2369 else if (DR_MISALIGNMENT (dr) == -1)
2371 align = TYPE_ALIGN_UNIT (elem_type);
2372 misalign = 0;
2374 else
2375 misalign = DR_MISALIGNMENT (dr);
2376 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2377 misalign);
2378 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2379 misalign ? least_bit_hwi (misalign) : align);
2380 gcall *call
2381 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2382 ptr, vec_mask, vec_rhs);
2383 gimple_call_set_nothrow (call, true);
2384 new_stmt = call;
2385 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2386 if (i == 0)
2387 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2388 else
2389 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2390 prev_stmt_info = vinfo_for_stmt (new_stmt);
2393 else
2395 tree vec_mask = NULL_TREE;
2396 prev_stmt_info = NULL;
2397 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2398 for (i = 0; i < ncopies; i++)
2400 unsigned align, misalign;
2402 if (i == 0)
2404 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2405 mask_vectype);
2406 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2407 NULL_TREE, &dummy, gsi,
2408 &ptr_incr, false, &inv_p);
2409 gcc_assert (!inv_p);
2411 else
2413 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2414 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2415 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2416 TYPE_SIZE_UNIT (vectype));
2419 align = DR_TARGET_ALIGNMENT (dr);
2420 if (aligned_access_p (dr))
2421 misalign = 0;
2422 else if (DR_MISALIGNMENT (dr) == -1)
2424 align = TYPE_ALIGN_UNIT (elem_type);
2425 misalign = 0;
2427 else
2428 misalign = DR_MISALIGNMENT (dr);
2429 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2430 misalign);
2431 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2432 misalign ? least_bit_hwi (misalign) : align);
2433 gcall *call
2434 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2435 ptr, vec_mask);
2436 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2437 gimple_call_set_nothrow (call, true);
2438 vect_finish_stmt_generation (stmt, call, gsi);
2439 if (i == 0)
2440 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
2441 else
2442 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2443 prev_stmt_info = vinfo_for_stmt (call);
2447 if (vls_type == VLS_LOAD)
2449 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2450 from the IL. */
2451 if (STMT_VINFO_RELATED_STMT (stmt_info))
2453 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2454 stmt_info = vinfo_for_stmt (stmt);
2456 tree lhs = gimple_call_lhs (stmt);
2457 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2458 set_vinfo_for_stmt (new_stmt, stmt_info);
2459 set_vinfo_for_stmt (stmt, NULL);
2460 STMT_VINFO_STMT (stmt_info) = new_stmt;
2461 gsi_replace (gsi, new_stmt, true);
2464 return true;
2467 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2469 static bool
2470 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2471 gimple **vec_stmt, slp_tree slp_node,
2472 tree vectype_in, enum vect_def_type *dt)
2474 tree op, vectype;
2475 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2476 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2477 unsigned ncopies, nunits;
2479 op = gimple_call_arg (stmt, 0);
2480 vectype = STMT_VINFO_VECTYPE (stmt_info);
2481 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2483 /* Multiple types in SLP are handled by creating the appropriate number of
2484 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2485 case of SLP. */
2486 if (slp_node)
2487 ncopies = 1;
2488 else
2489 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2491 gcc_assert (ncopies >= 1);
2493 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2494 if (! char_vectype)
2495 return false;
2497 unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2498 unsigned word_bytes = num_bytes / nunits;
2500 auto_vec_perm_indices elts (num_bytes);
2501 for (unsigned i = 0; i < nunits; ++i)
2502 for (unsigned j = 0; j < word_bytes; ++j)
2503 elts.quick_push ((i + 1) * word_bytes - j - 1);
2505 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts))
2506 return false;
2508 if (! vec_stmt)
2510 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2511 if (dump_enabled_p ())
2512 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2513 "\n");
2514 if (! PURE_SLP_STMT (stmt_info))
2516 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2517 1, vector_stmt, stmt_info, 0, vect_prologue);
2518 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2519 ncopies, vec_perm, stmt_info, 0, vect_body);
2521 return true;
2524 tree_vector_builder telts (char_vectype, num_bytes, 1);
2525 for (unsigned i = 0; i < num_bytes; ++i)
2526 telts.quick_push (build_int_cst (char_type_node, elts[i]));
2527 tree bswap_vconst = telts.build ();
2529 /* Transform. */
2530 vec<tree> vec_oprnds = vNULL;
2531 gimple *new_stmt = NULL;
2532 stmt_vec_info prev_stmt_info = NULL;
2533 for (unsigned j = 0; j < ncopies; j++)
2535 /* Handle uses. */
2536 if (j == 0)
2537 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2538 else
2539 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2541 /* Arguments are ready. create the new vector stmt. */
2542 unsigned i;
2543 tree vop;
2544 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2546 tree tem = make_ssa_name (char_vectype);
2547 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2548 char_vectype, vop));
2549 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2550 tree tem2 = make_ssa_name (char_vectype);
2551 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2552 tem, tem, bswap_vconst);
2553 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2554 tem = make_ssa_name (vectype);
2555 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2556 vectype, tem2));
2557 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2558 if (slp_node)
2559 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2562 if (slp_node)
2563 continue;
2565 if (j == 0)
2566 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2567 else
2568 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2570 prev_stmt_info = vinfo_for_stmt (new_stmt);
2573 vec_oprnds.release ();
2574 return true;
2577 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2578 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2579 in a single step. On success, store the binary pack code in
2580 *CONVERT_CODE. */
2582 static bool
2583 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2584 tree_code *convert_code)
2586 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2587 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2588 return false;
2590 tree_code code;
2591 int multi_step_cvt = 0;
2592 auto_vec <tree, 8> interm_types;
2593 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2594 &code, &multi_step_cvt,
2595 &interm_types)
2596 || multi_step_cvt)
2597 return false;
2599 *convert_code = code;
2600 return true;
2603 /* Function vectorizable_call.
2605 Check if GS performs a function call that can be vectorized.
2606 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2607 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2608 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2610 static bool
2611 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2612 slp_tree slp_node)
2614 gcall *stmt;
2615 tree vec_dest;
2616 tree scalar_dest;
2617 tree op, type;
2618 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2619 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2620 tree vectype_out, vectype_in;
2621 int nunits_in;
2622 int nunits_out;
2623 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2624 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2625 vec_info *vinfo = stmt_info->vinfo;
2626 tree fndecl, new_temp, rhs_type;
2627 gimple *def_stmt;
2628 enum vect_def_type dt[3]
2629 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2630 int ndts = 3;
2631 gimple *new_stmt = NULL;
2632 int ncopies, j;
2633 vec<tree> vargs = vNULL;
2634 enum { NARROW, NONE, WIDEN } modifier;
2635 size_t i, nargs;
2636 tree lhs;
2638 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2639 return false;
2641 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2642 && ! vec_stmt)
2643 return false;
2645 /* Is GS a vectorizable call? */
2646 stmt = dyn_cast <gcall *> (gs);
2647 if (!stmt)
2648 return false;
2650 if (gimple_call_internal_p (stmt)
2651 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2652 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2653 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2654 slp_node);
2656 if (gimple_call_lhs (stmt) == NULL_TREE
2657 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2658 return false;
2660 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2662 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2664 /* Process function arguments. */
2665 rhs_type = NULL_TREE;
2666 vectype_in = NULL_TREE;
2667 nargs = gimple_call_num_args (stmt);
2669 /* Bail out if the function has more than three arguments, we do not have
2670 interesting builtin functions to vectorize with more than two arguments
2671 except for fma. No arguments is also not good. */
2672 if (nargs == 0 || nargs > 3)
2673 return false;
2675 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2676 if (gimple_call_internal_p (stmt)
2677 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2679 nargs = 0;
2680 rhs_type = unsigned_type_node;
2683 for (i = 0; i < nargs; i++)
2685 tree opvectype;
2687 op = gimple_call_arg (stmt, i);
2689 /* We can only handle calls with arguments of the same type. */
2690 if (rhs_type
2691 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2693 if (dump_enabled_p ())
2694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2695 "argument types differ.\n");
2696 return false;
2698 if (!rhs_type)
2699 rhs_type = TREE_TYPE (op);
2701 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2703 if (dump_enabled_p ())
2704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2705 "use not simple.\n");
2706 return false;
2709 if (!vectype_in)
2710 vectype_in = opvectype;
2711 else if (opvectype
2712 && opvectype != vectype_in)
2714 if (dump_enabled_p ())
2715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2716 "argument vector types differ.\n");
2717 return false;
2720 /* If all arguments are external or constant defs use a vector type with
2721 the same size as the output vector type. */
2722 if (!vectype_in)
2723 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2724 if (vec_stmt)
2725 gcc_assert (vectype_in);
2726 if (!vectype_in)
2728 if (dump_enabled_p ())
2730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2731 "no vectype for scalar type ");
2732 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2733 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2736 return false;
2739 /* FORNOW */
2740 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2741 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2742 if (nunits_in == nunits_out / 2)
2743 modifier = NARROW;
2744 else if (nunits_out == nunits_in)
2745 modifier = NONE;
2746 else if (nunits_out == nunits_in / 2)
2747 modifier = WIDEN;
2748 else
2749 return false;
2751 /* We only handle functions that do not read or clobber memory. */
2752 if (gimple_vuse (stmt))
2754 if (dump_enabled_p ())
2755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2756 "function reads from or writes to memory.\n");
2757 return false;
2760 /* For now, we only vectorize functions if a target specific builtin
2761 is available. TODO -- in some cases, it might be profitable to
2762 insert the calls for pieces of the vector, in order to be able
2763 to vectorize other operations in the loop. */
2764 fndecl = NULL_TREE;
2765 internal_fn ifn = IFN_LAST;
2766 combined_fn cfn = gimple_call_combined_fn (stmt);
2767 tree callee = gimple_call_fndecl (stmt);
2769 /* First try using an internal function. */
2770 tree_code convert_code = ERROR_MARK;
2771 if (cfn != CFN_LAST
2772 && (modifier == NONE
2773 || (modifier == NARROW
2774 && simple_integer_narrowing (vectype_out, vectype_in,
2775 &convert_code))))
2776 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2777 vectype_in);
2779 /* If that fails, try asking for a target-specific built-in function. */
2780 if (ifn == IFN_LAST)
2782 if (cfn != CFN_LAST)
2783 fndecl = targetm.vectorize.builtin_vectorized_function
2784 (cfn, vectype_out, vectype_in);
2785 else
2786 fndecl = targetm.vectorize.builtin_md_vectorized_function
2787 (callee, vectype_out, vectype_in);
2790 if (ifn == IFN_LAST && !fndecl)
2792 if (cfn == CFN_GOMP_SIMD_LANE
2793 && !slp_node
2794 && loop_vinfo
2795 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2796 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2797 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2798 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2800 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2801 { 0, 1, 2, ... vf - 1 } vector. */
2802 gcc_assert (nargs == 0);
2804 else if (modifier == NONE
2805 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2806 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2807 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2808 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2809 vectype_in, dt);
2810 else
2812 if (dump_enabled_p ())
2813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2814 "function is not vectorizable.\n");
2815 return false;
2819 if (slp_node)
2820 ncopies = 1;
2821 else if (modifier == NARROW && ifn == IFN_LAST)
2822 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2823 else
2824 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2826 /* Sanity check: make sure that at least one copy of the vectorized stmt
2827 needs to be generated. */
2828 gcc_assert (ncopies >= 1);
2830 if (!vec_stmt) /* transformation not required. */
2832 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2833 if (dump_enabled_p ())
2834 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2835 "\n");
2836 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2837 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2838 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2839 vec_promote_demote, stmt_info, 0, vect_body);
2841 return true;
2844 /* Transform. */
2846 if (dump_enabled_p ())
2847 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2849 /* Handle def. */
2850 scalar_dest = gimple_call_lhs (stmt);
2851 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2853 prev_stmt_info = NULL;
2854 if (modifier == NONE || ifn != IFN_LAST)
2856 tree prev_res = NULL_TREE;
2857 for (j = 0; j < ncopies; ++j)
2859 /* Build argument list for the vectorized call. */
2860 if (j == 0)
2861 vargs.create (nargs);
2862 else
2863 vargs.truncate (0);
2865 if (slp_node)
2867 auto_vec<vec<tree> > vec_defs (nargs);
2868 vec<tree> vec_oprnds0;
2870 for (i = 0; i < nargs; i++)
2871 vargs.quick_push (gimple_call_arg (stmt, i));
2872 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2873 vec_oprnds0 = vec_defs[0];
2875 /* Arguments are ready. Create the new vector stmt. */
2876 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2878 size_t k;
2879 for (k = 0; k < nargs; k++)
2881 vec<tree> vec_oprndsk = vec_defs[k];
2882 vargs[k] = vec_oprndsk[i];
2884 if (modifier == NARROW)
2886 tree half_res = make_ssa_name (vectype_in);
2887 gcall *call
2888 = gimple_build_call_internal_vec (ifn, vargs);
2889 gimple_call_set_lhs (call, half_res);
2890 gimple_call_set_nothrow (call, true);
2891 new_stmt = call;
2892 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2893 if ((i & 1) == 0)
2895 prev_res = half_res;
2896 continue;
2898 new_temp = make_ssa_name (vec_dest);
2899 new_stmt = gimple_build_assign (new_temp, convert_code,
2900 prev_res, half_res);
2902 else
2904 gcall *call;
2905 if (ifn != IFN_LAST)
2906 call = gimple_build_call_internal_vec (ifn, vargs);
2907 else
2908 call = gimple_build_call_vec (fndecl, vargs);
2909 new_temp = make_ssa_name (vec_dest, call);
2910 gimple_call_set_lhs (call, new_temp);
2911 gimple_call_set_nothrow (call, true);
2912 new_stmt = call;
2914 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2915 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2918 for (i = 0; i < nargs; i++)
2920 vec<tree> vec_oprndsi = vec_defs[i];
2921 vec_oprndsi.release ();
2923 continue;
2926 for (i = 0; i < nargs; i++)
2928 op = gimple_call_arg (stmt, i);
2929 if (j == 0)
2930 vec_oprnd0
2931 = vect_get_vec_def_for_operand (op, stmt);
2932 else
2934 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2935 vec_oprnd0
2936 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2939 vargs.quick_push (vec_oprnd0);
2942 if (gimple_call_internal_p (stmt)
2943 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2945 tree_vector_builder v (vectype_out, 1, 3);
2946 for (int k = 0; k < 3; ++k)
2947 v.quick_push (build_int_cst (unsigned_type_node,
2948 j * nunits_out + k));
2949 tree cst = v.build ();
2950 tree new_var
2951 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2952 gimple *init_stmt = gimple_build_assign (new_var, cst);
2953 vect_init_vector_1 (stmt, init_stmt, NULL);
2954 new_temp = make_ssa_name (vec_dest);
2955 new_stmt = gimple_build_assign (new_temp, new_var);
2957 else if (modifier == NARROW)
2959 tree half_res = make_ssa_name (vectype_in);
2960 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2961 gimple_call_set_lhs (call, half_res);
2962 gimple_call_set_nothrow (call, true);
2963 new_stmt = call;
2964 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2965 if ((j & 1) == 0)
2967 prev_res = half_res;
2968 continue;
2970 new_temp = make_ssa_name (vec_dest);
2971 new_stmt = gimple_build_assign (new_temp, convert_code,
2972 prev_res, half_res);
2974 else
2976 gcall *call;
2977 if (ifn != IFN_LAST)
2978 call = gimple_build_call_internal_vec (ifn, vargs);
2979 else
2980 call = gimple_build_call_vec (fndecl, vargs);
2981 new_temp = make_ssa_name (vec_dest, new_stmt);
2982 gimple_call_set_lhs (call, new_temp);
2983 gimple_call_set_nothrow (call, true);
2984 new_stmt = call;
2986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2988 if (j == (modifier == NARROW ? 1 : 0))
2989 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2990 else
2991 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2993 prev_stmt_info = vinfo_for_stmt (new_stmt);
2996 else if (modifier == NARROW)
2998 for (j = 0; j < ncopies; ++j)
3000 /* Build argument list for the vectorized call. */
3001 if (j == 0)
3002 vargs.create (nargs * 2);
3003 else
3004 vargs.truncate (0);
3006 if (slp_node)
3008 auto_vec<vec<tree> > vec_defs (nargs);
3009 vec<tree> vec_oprnds0;
3011 for (i = 0; i < nargs; i++)
3012 vargs.quick_push (gimple_call_arg (stmt, i));
3013 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3014 vec_oprnds0 = vec_defs[0];
3016 /* Arguments are ready. Create the new vector stmt. */
3017 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3019 size_t k;
3020 vargs.truncate (0);
3021 for (k = 0; k < nargs; k++)
3023 vec<tree> vec_oprndsk = vec_defs[k];
3024 vargs.quick_push (vec_oprndsk[i]);
3025 vargs.quick_push (vec_oprndsk[i + 1]);
3027 gcall *call;
3028 if (ifn != IFN_LAST)
3029 call = gimple_build_call_internal_vec (ifn, vargs);
3030 else
3031 call = gimple_build_call_vec (fndecl, vargs);
3032 new_temp = make_ssa_name (vec_dest, call);
3033 gimple_call_set_lhs (call, new_temp);
3034 gimple_call_set_nothrow (call, true);
3035 new_stmt = call;
3036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3037 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3040 for (i = 0; i < nargs; i++)
3042 vec<tree> vec_oprndsi = vec_defs[i];
3043 vec_oprndsi.release ();
3045 continue;
3048 for (i = 0; i < nargs; i++)
3050 op = gimple_call_arg (stmt, i);
3051 if (j == 0)
3053 vec_oprnd0
3054 = vect_get_vec_def_for_operand (op, stmt);
3055 vec_oprnd1
3056 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3058 else
3060 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3061 vec_oprnd0
3062 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3063 vec_oprnd1
3064 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3067 vargs.quick_push (vec_oprnd0);
3068 vargs.quick_push (vec_oprnd1);
3071 new_stmt = gimple_build_call_vec (fndecl, vargs);
3072 new_temp = make_ssa_name (vec_dest, new_stmt);
3073 gimple_call_set_lhs (new_stmt, new_temp);
3074 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3076 if (j == 0)
3077 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3078 else
3079 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3081 prev_stmt_info = vinfo_for_stmt (new_stmt);
3084 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3086 else
3087 /* No current target implements this case. */
3088 return false;
3090 vargs.release ();
3092 /* The call in STMT might prevent it from being removed in dce.
3093 We however cannot remove it here, due to the way the ssa name
3094 it defines is mapped to the new definition. So just replace
3095 rhs of the statement with something harmless. */
3097 if (slp_node)
3098 return true;
3100 type = TREE_TYPE (scalar_dest);
3101 if (is_pattern_stmt_p (stmt_info))
3102 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3103 else
3104 lhs = gimple_call_lhs (stmt);
3106 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3107 set_vinfo_for_stmt (new_stmt, stmt_info);
3108 set_vinfo_for_stmt (stmt, NULL);
3109 STMT_VINFO_STMT (stmt_info) = new_stmt;
3110 gsi_replace (gsi, new_stmt, false);
3112 return true;
3116 struct simd_call_arg_info
3118 tree vectype;
3119 tree op;
3120 HOST_WIDE_INT linear_step;
3121 enum vect_def_type dt;
3122 unsigned int align;
3123 bool simd_lane_linear;
3126 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3127 is linear within simd lane (but not within whole loop), note it in
3128 *ARGINFO. */
3130 static void
3131 vect_simd_lane_linear (tree op, struct loop *loop,
3132 struct simd_call_arg_info *arginfo)
3134 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3136 if (!is_gimple_assign (def_stmt)
3137 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3138 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3139 return;
3141 tree base = gimple_assign_rhs1 (def_stmt);
3142 HOST_WIDE_INT linear_step = 0;
3143 tree v = gimple_assign_rhs2 (def_stmt);
3144 while (TREE_CODE (v) == SSA_NAME)
3146 tree t;
3147 def_stmt = SSA_NAME_DEF_STMT (v);
3148 if (is_gimple_assign (def_stmt))
3149 switch (gimple_assign_rhs_code (def_stmt))
3151 case PLUS_EXPR:
3152 t = gimple_assign_rhs2 (def_stmt);
3153 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3154 return;
3155 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3156 v = gimple_assign_rhs1 (def_stmt);
3157 continue;
3158 case MULT_EXPR:
3159 t = gimple_assign_rhs2 (def_stmt);
3160 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3161 return;
3162 linear_step = tree_to_shwi (t);
3163 v = gimple_assign_rhs1 (def_stmt);
3164 continue;
3165 CASE_CONVERT:
3166 t = gimple_assign_rhs1 (def_stmt);
3167 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3168 || (TYPE_PRECISION (TREE_TYPE (v))
3169 < TYPE_PRECISION (TREE_TYPE (t))))
3170 return;
3171 if (!linear_step)
3172 linear_step = 1;
3173 v = t;
3174 continue;
3175 default:
3176 return;
3178 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3179 && loop->simduid
3180 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3181 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3182 == loop->simduid))
3184 if (!linear_step)
3185 linear_step = 1;
3186 arginfo->linear_step = linear_step;
3187 arginfo->op = base;
3188 arginfo->simd_lane_linear = true;
3189 return;
3194 /* Function vectorizable_simd_clone_call.
3196 Check if STMT performs a function call that can be vectorized
3197 by calling a simd clone of the function.
3198 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3199 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3200 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3202 static bool
3203 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3204 gimple **vec_stmt, slp_tree slp_node)
3206 tree vec_dest;
3207 tree scalar_dest;
3208 tree op, type;
3209 tree vec_oprnd0 = NULL_TREE;
3210 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3211 tree vectype;
3212 unsigned int nunits;
3213 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3214 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3215 vec_info *vinfo = stmt_info->vinfo;
3216 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3217 tree fndecl, new_temp;
3218 gimple *def_stmt;
3219 gimple *new_stmt = NULL;
3220 int ncopies, j;
3221 auto_vec<simd_call_arg_info> arginfo;
3222 vec<tree> vargs = vNULL;
3223 size_t i, nargs;
3224 tree lhs, rtype, ratype;
3225 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3227 /* Is STMT a vectorizable call? */
3228 if (!is_gimple_call (stmt))
3229 return false;
3231 fndecl = gimple_call_fndecl (stmt);
3232 if (fndecl == NULL_TREE)
3233 return false;
3235 struct cgraph_node *node = cgraph_node::get (fndecl);
3236 if (node == NULL || node->simd_clones == NULL)
3237 return false;
3239 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3240 return false;
3242 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3243 && ! vec_stmt)
3244 return false;
3246 if (gimple_call_lhs (stmt)
3247 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3248 return false;
3250 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3252 vectype = STMT_VINFO_VECTYPE (stmt_info);
3254 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3255 return false;
3257 /* FORNOW */
3258 if (slp_node)
3259 return false;
3261 /* Process function arguments. */
3262 nargs = gimple_call_num_args (stmt);
3264 /* Bail out if the function has zero arguments. */
3265 if (nargs == 0)
3266 return false;
3268 arginfo.reserve (nargs, true);
3270 for (i = 0; i < nargs; i++)
3272 simd_call_arg_info thisarginfo;
3273 affine_iv iv;
3275 thisarginfo.linear_step = 0;
3276 thisarginfo.align = 0;
3277 thisarginfo.op = NULL_TREE;
3278 thisarginfo.simd_lane_linear = false;
3280 op = gimple_call_arg (stmt, i);
3281 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3282 &thisarginfo.vectype)
3283 || thisarginfo.dt == vect_uninitialized_def)
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287 "use not simple.\n");
3288 return false;
3291 if (thisarginfo.dt == vect_constant_def
3292 || thisarginfo.dt == vect_external_def)
3293 gcc_assert (thisarginfo.vectype == NULL_TREE);
3294 else
3295 gcc_assert (thisarginfo.vectype != NULL_TREE);
3297 /* For linear arguments, the analyze phase should have saved
3298 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3299 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3300 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3302 gcc_assert (vec_stmt);
3303 thisarginfo.linear_step
3304 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3305 thisarginfo.op
3306 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3307 thisarginfo.simd_lane_linear
3308 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3309 == boolean_true_node);
3310 /* If loop has been peeled for alignment, we need to adjust it. */
3311 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3312 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3313 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3315 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3316 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3317 tree opt = TREE_TYPE (thisarginfo.op);
3318 bias = fold_convert (TREE_TYPE (step), bias);
3319 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3320 thisarginfo.op
3321 = fold_build2 (POINTER_TYPE_P (opt)
3322 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3323 thisarginfo.op, bias);
3326 else if (!vec_stmt
3327 && thisarginfo.dt != vect_constant_def
3328 && thisarginfo.dt != vect_external_def
3329 && loop_vinfo
3330 && TREE_CODE (op) == SSA_NAME
3331 && simple_iv (loop, loop_containing_stmt (stmt), op,
3332 &iv, false)
3333 && tree_fits_shwi_p (iv.step))
3335 thisarginfo.linear_step = tree_to_shwi (iv.step);
3336 thisarginfo.op = iv.base;
3338 else if ((thisarginfo.dt == vect_constant_def
3339 || thisarginfo.dt == vect_external_def)
3340 && POINTER_TYPE_P (TREE_TYPE (op)))
3341 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3342 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3343 linear too. */
3344 if (POINTER_TYPE_P (TREE_TYPE (op))
3345 && !thisarginfo.linear_step
3346 && !vec_stmt
3347 && thisarginfo.dt != vect_constant_def
3348 && thisarginfo.dt != vect_external_def
3349 && loop_vinfo
3350 && !slp_node
3351 && TREE_CODE (op) == SSA_NAME)
3352 vect_simd_lane_linear (op, loop, &thisarginfo);
3354 arginfo.quick_push (thisarginfo);
3357 unsigned int badness = 0;
3358 struct cgraph_node *bestn = NULL;
3359 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3360 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3361 else
3362 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3363 n = n->simdclone->next_clone)
3365 unsigned int this_badness = 0;
3366 if (n->simdclone->simdlen
3367 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3368 || n->simdclone->nargs != nargs)
3369 continue;
3370 if (n->simdclone->simdlen
3371 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3372 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3373 - exact_log2 (n->simdclone->simdlen)) * 1024;
3374 if (n->simdclone->inbranch)
3375 this_badness += 2048;
3376 int target_badness = targetm.simd_clone.usable (n);
3377 if (target_badness < 0)
3378 continue;
3379 this_badness += target_badness * 512;
3380 /* FORNOW: Have to add code to add the mask argument. */
3381 if (n->simdclone->inbranch)
3382 continue;
3383 for (i = 0; i < nargs; i++)
3385 switch (n->simdclone->args[i].arg_type)
3387 case SIMD_CLONE_ARG_TYPE_VECTOR:
3388 if (!useless_type_conversion_p
3389 (n->simdclone->args[i].orig_type,
3390 TREE_TYPE (gimple_call_arg (stmt, i))))
3391 i = -1;
3392 else if (arginfo[i].dt == vect_constant_def
3393 || arginfo[i].dt == vect_external_def
3394 || arginfo[i].linear_step)
3395 this_badness += 64;
3396 break;
3397 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3398 if (arginfo[i].dt != vect_constant_def
3399 && arginfo[i].dt != vect_external_def)
3400 i = -1;
3401 break;
3402 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3403 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3404 if (arginfo[i].dt == vect_constant_def
3405 || arginfo[i].dt == vect_external_def
3406 || (arginfo[i].linear_step
3407 != n->simdclone->args[i].linear_step))
3408 i = -1;
3409 break;
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3411 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3412 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3413 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3414 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3415 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3416 /* FORNOW */
3417 i = -1;
3418 break;
3419 case SIMD_CLONE_ARG_TYPE_MASK:
3420 gcc_unreachable ();
3422 if (i == (size_t) -1)
3423 break;
3424 if (n->simdclone->args[i].alignment > arginfo[i].align)
3426 i = -1;
3427 break;
3429 if (arginfo[i].align)
3430 this_badness += (exact_log2 (arginfo[i].align)
3431 - exact_log2 (n->simdclone->args[i].alignment));
3433 if (i == (size_t) -1)
3434 continue;
3435 if (bestn == NULL || this_badness < badness)
3437 bestn = n;
3438 badness = this_badness;
3442 if (bestn == NULL)
3443 return false;
3445 for (i = 0; i < nargs; i++)
3446 if ((arginfo[i].dt == vect_constant_def
3447 || arginfo[i].dt == vect_external_def)
3448 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3450 arginfo[i].vectype
3451 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3452 i)));
3453 if (arginfo[i].vectype == NULL
3454 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3455 > bestn->simdclone->simdlen))
3456 return false;
3459 fndecl = bestn->decl;
3460 nunits = bestn->simdclone->simdlen;
3461 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3463 /* If the function isn't const, only allow it in simd loops where user
3464 has asserted that at least nunits consecutive iterations can be
3465 performed using SIMD instructions. */
3466 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3467 && gimple_vuse (stmt))
3468 return false;
3470 /* Sanity check: make sure that at least one copy of the vectorized stmt
3471 needs to be generated. */
3472 gcc_assert (ncopies >= 1);
3474 if (!vec_stmt) /* transformation not required. */
3476 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3477 for (i = 0; i < nargs; i++)
3478 if ((bestn->simdclone->args[i].arg_type
3479 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3480 || (bestn->simdclone->args[i].arg_type
3481 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3483 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3484 + 1);
3485 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3486 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3487 ? size_type_node : TREE_TYPE (arginfo[i].op);
3488 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3489 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3490 tree sll = arginfo[i].simd_lane_linear
3491 ? boolean_true_node : boolean_false_node;
3492 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3494 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3495 if (dump_enabled_p ())
3496 dump_printf_loc (MSG_NOTE, vect_location,
3497 "=== vectorizable_simd_clone_call ===\n");
3498 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3499 return true;
3502 /* Transform. */
3504 if (dump_enabled_p ())
3505 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3507 /* Handle def. */
3508 scalar_dest = gimple_call_lhs (stmt);
3509 vec_dest = NULL_TREE;
3510 rtype = NULL_TREE;
3511 ratype = NULL_TREE;
3512 if (scalar_dest)
3514 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3515 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3516 if (TREE_CODE (rtype) == ARRAY_TYPE)
3518 ratype = rtype;
3519 rtype = TREE_TYPE (ratype);
3523 prev_stmt_info = NULL;
3524 for (j = 0; j < ncopies; ++j)
3526 /* Build argument list for the vectorized call. */
3527 if (j == 0)
3528 vargs.create (nargs);
3529 else
3530 vargs.truncate (0);
3532 for (i = 0; i < nargs; i++)
3534 unsigned int k, l, m, o;
3535 tree atype;
3536 op = gimple_call_arg (stmt, i);
3537 switch (bestn->simdclone->args[i].arg_type)
3539 case SIMD_CLONE_ARG_TYPE_VECTOR:
3540 atype = bestn->simdclone->args[i].vector_type;
3541 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3542 for (m = j * o; m < (j + 1) * o; m++)
3544 if (TYPE_VECTOR_SUBPARTS (atype)
3545 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3547 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3548 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3549 / TYPE_VECTOR_SUBPARTS (atype));
3550 gcc_assert ((k & (k - 1)) == 0);
3551 if (m == 0)
3552 vec_oprnd0
3553 = vect_get_vec_def_for_operand (op, stmt);
3554 else
3556 vec_oprnd0 = arginfo[i].op;
3557 if ((m & (k - 1)) == 0)
3558 vec_oprnd0
3559 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3560 vec_oprnd0);
3562 arginfo[i].op = vec_oprnd0;
3563 vec_oprnd0
3564 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3565 bitsize_int (prec),
3566 bitsize_int ((m & (k - 1)) * prec));
3567 new_stmt
3568 = gimple_build_assign (make_ssa_name (atype),
3569 vec_oprnd0);
3570 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3571 vargs.safe_push (gimple_assign_lhs (new_stmt));
3573 else
3575 k = (TYPE_VECTOR_SUBPARTS (atype)
3576 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3577 gcc_assert ((k & (k - 1)) == 0);
3578 vec<constructor_elt, va_gc> *ctor_elts;
3579 if (k != 1)
3580 vec_alloc (ctor_elts, k);
3581 else
3582 ctor_elts = NULL;
3583 for (l = 0; l < k; l++)
3585 if (m == 0 && l == 0)
3586 vec_oprnd0
3587 = vect_get_vec_def_for_operand (op, stmt);
3588 else
3589 vec_oprnd0
3590 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3591 arginfo[i].op);
3592 arginfo[i].op = vec_oprnd0;
3593 if (k == 1)
3594 break;
3595 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3596 vec_oprnd0);
3598 if (k == 1)
3599 vargs.safe_push (vec_oprnd0);
3600 else
3602 vec_oprnd0 = build_constructor (atype, ctor_elts);
3603 new_stmt
3604 = gimple_build_assign (make_ssa_name (atype),
3605 vec_oprnd0);
3606 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3607 vargs.safe_push (gimple_assign_lhs (new_stmt));
3611 break;
3612 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3613 vargs.safe_push (op);
3614 break;
3615 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3616 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3617 if (j == 0)
3619 gimple_seq stmts;
3620 arginfo[i].op
3621 = force_gimple_operand (arginfo[i].op, &stmts, true,
3622 NULL_TREE);
3623 if (stmts != NULL)
3625 basic_block new_bb;
3626 edge pe = loop_preheader_edge (loop);
3627 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3628 gcc_assert (!new_bb);
3630 if (arginfo[i].simd_lane_linear)
3632 vargs.safe_push (arginfo[i].op);
3633 break;
3635 tree phi_res = copy_ssa_name (op);
3636 gphi *new_phi = create_phi_node (phi_res, loop->header);
3637 set_vinfo_for_stmt (new_phi,
3638 new_stmt_vec_info (new_phi, loop_vinfo));
3639 add_phi_arg (new_phi, arginfo[i].op,
3640 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3641 enum tree_code code
3642 = POINTER_TYPE_P (TREE_TYPE (op))
3643 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3644 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3645 ? sizetype : TREE_TYPE (op);
3646 widest_int cst
3647 = wi::mul (bestn->simdclone->args[i].linear_step,
3648 ncopies * nunits);
3649 tree tcst = wide_int_to_tree (type, cst);
3650 tree phi_arg = copy_ssa_name (op);
3651 new_stmt
3652 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3653 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3654 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3655 set_vinfo_for_stmt (new_stmt,
3656 new_stmt_vec_info (new_stmt, loop_vinfo));
3657 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3658 UNKNOWN_LOCATION);
3659 arginfo[i].op = phi_res;
3660 vargs.safe_push (phi_res);
3662 else
3664 enum tree_code code
3665 = POINTER_TYPE_P (TREE_TYPE (op))
3666 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3667 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3668 ? sizetype : TREE_TYPE (op);
3669 widest_int cst
3670 = wi::mul (bestn->simdclone->args[i].linear_step,
3671 j * nunits);
3672 tree tcst = wide_int_to_tree (type, cst);
3673 new_temp = make_ssa_name (TREE_TYPE (op));
3674 new_stmt = gimple_build_assign (new_temp, code,
3675 arginfo[i].op, tcst);
3676 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3677 vargs.safe_push (new_temp);
3679 break;
3680 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3681 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3682 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3683 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3684 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3685 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3686 default:
3687 gcc_unreachable ();
3691 new_stmt = gimple_build_call_vec (fndecl, vargs);
3692 if (vec_dest)
3694 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3695 if (ratype)
3696 new_temp = create_tmp_var (ratype);
3697 else if (TYPE_VECTOR_SUBPARTS (vectype)
3698 == TYPE_VECTOR_SUBPARTS (rtype))
3699 new_temp = make_ssa_name (vec_dest, new_stmt);
3700 else
3701 new_temp = make_ssa_name (rtype, new_stmt);
3702 gimple_call_set_lhs (new_stmt, new_temp);
3704 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3706 if (vec_dest)
3708 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3710 unsigned int k, l;
3711 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3712 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3713 gcc_assert ((k & (k - 1)) == 0);
3714 for (l = 0; l < k; l++)
3716 tree t;
3717 if (ratype)
3719 t = build_fold_addr_expr (new_temp);
3720 t = build2 (MEM_REF, vectype, t,
3721 build_int_cst (TREE_TYPE (t),
3722 l * prec / BITS_PER_UNIT));
3724 else
3725 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3726 bitsize_int (prec), bitsize_int (l * prec));
3727 new_stmt
3728 = gimple_build_assign (make_ssa_name (vectype), t);
3729 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3730 if (j == 0 && l == 0)
3731 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3732 else
3733 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3735 prev_stmt_info = vinfo_for_stmt (new_stmt);
3738 if (ratype)
3740 tree clobber = build_constructor (ratype, NULL);
3741 TREE_THIS_VOLATILE (clobber) = 1;
3742 new_stmt = gimple_build_assign (new_temp, clobber);
3743 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3745 continue;
3747 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3749 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3750 / TYPE_VECTOR_SUBPARTS (rtype));
3751 gcc_assert ((k & (k - 1)) == 0);
3752 if ((j & (k - 1)) == 0)
3753 vec_alloc (ret_ctor_elts, k);
3754 if (ratype)
3756 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3757 for (m = 0; m < o; m++)
3759 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3760 size_int (m), NULL_TREE, NULL_TREE);
3761 new_stmt
3762 = gimple_build_assign (make_ssa_name (rtype), tem);
3763 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3764 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3765 gimple_assign_lhs (new_stmt));
3767 tree clobber = build_constructor (ratype, NULL);
3768 TREE_THIS_VOLATILE (clobber) = 1;
3769 new_stmt = gimple_build_assign (new_temp, clobber);
3770 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3772 else
3773 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3774 if ((j & (k - 1)) != k - 1)
3775 continue;
3776 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3777 new_stmt
3778 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3779 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3781 if ((unsigned) j == k - 1)
3782 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3783 else
3784 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3786 prev_stmt_info = vinfo_for_stmt (new_stmt);
3787 continue;
3789 else if (ratype)
3791 tree t = build_fold_addr_expr (new_temp);
3792 t = build2 (MEM_REF, vectype, t,
3793 build_int_cst (TREE_TYPE (t), 0));
3794 new_stmt
3795 = gimple_build_assign (make_ssa_name (vec_dest), t);
3796 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3797 tree clobber = build_constructor (ratype, NULL);
3798 TREE_THIS_VOLATILE (clobber) = 1;
3799 vect_finish_stmt_generation (stmt,
3800 gimple_build_assign (new_temp,
3801 clobber), gsi);
3805 if (j == 0)
3806 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3807 else
3808 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3810 prev_stmt_info = vinfo_for_stmt (new_stmt);
3813 vargs.release ();
3815 /* The call in STMT might prevent it from being removed in dce.
3816 We however cannot remove it here, due to the way the ssa name
3817 it defines is mapped to the new definition. So just replace
3818 rhs of the statement with something harmless. */
3820 if (slp_node)
3821 return true;
3823 if (scalar_dest)
3825 type = TREE_TYPE (scalar_dest);
3826 if (is_pattern_stmt_p (stmt_info))
3827 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3828 else
3829 lhs = gimple_call_lhs (stmt);
3830 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3832 else
3833 new_stmt = gimple_build_nop ();
3834 set_vinfo_for_stmt (new_stmt, stmt_info);
3835 set_vinfo_for_stmt (stmt, NULL);
3836 STMT_VINFO_STMT (stmt_info) = new_stmt;
3837 gsi_replace (gsi, new_stmt, true);
3838 unlink_stmt_vdef (stmt);
3840 return true;
3844 /* Function vect_gen_widened_results_half
3846 Create a vector stmt whose code, type, number of arguments, and result
3847 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3848 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3849 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3850 needs to be created (DECL is a function-decl of a target-builtin).
3851 STMT is the original scalar stmt that we are vectorizing. */
3853 static gimple *
3854 vect_gen_widened_results_half (enum tree_code code,
3855 tree decl,
3856 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3857 tree vec_dest, gimple_stmt_iterator *gsi,
3858 gimple *stmt)
3860 gimple *new_stmt;
3861 tree new_temp;
3863 /* Generate half of the widened result: */
3864 if (code == CALL_EXPR)
3866 /* Target specific support */
3867 if (op_type == binary_op)
3868 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3869 else
3870 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3871 new_temp = make_ssa_name (vec_dest, new_stmt);
3872 gimple_call_set_lhs (new_stmt, new_temp);
3874 else
3876 /* Generic support */
3877 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3878 if (op_type != binary_op)
3879 vec_oprnd1 = NULL;
3880 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3881 new_temp = make_ssa_name (vec_dest, new_stmt);
3882 gimple_assign_set_lhs (new_stmt, new_temp);
3884 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3886 return new_stmt;
3890 /* Get vectorized definitions for loop-based vectorization. For the first
3891 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3892 scalar operand), and for the rest we get a copy with
3893 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3894 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3895 The vectors are collected into VEC_OPRNDS. */
3897 static void
3898 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3899 vec<tree> *vec_oprnds, int multi_step_cvt)
3901 tree vec_oprnd;
3903 /* Get first vector operand. */
3904 /* All the vector operands except the very first one (that is scalar oprnd)
3905 are stmt copies. */
3906 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3907 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3908 else
3909 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3911 vec_oprnds->quick_push (vec_oprnd);
3913 /* Get second vector operand. */
3914 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3915 vec_oprnds->quick_push (vec_oprnd);
3917 *oprnd = vec_oprnd;
3919 /* For conversion in multiple steps, continue to get operands
3920 recursively. */
3921 if (multi_step_cvt)
3922 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3926 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3927 For multi-step conversions store the resulting vectors and call the function
3928 recursively. */
3930 static void
3931 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3932 int multi_step_cvt, gimple *stmt,
3933 vec<tree> vec_dsts,
3934 gimple_stmt_iterator *gsi,
3935 slp_tree slp_node, enum tree_code code,
3936 stmt_vec_info *prev_stmt_info)
3938 unsigned int i;
3939 tree vop0, vop1, new_tmp, vec_dest;
3940 gimple *new_stmt;
3941 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3943 vec_dest = vec_dsts.pop ();
3945 for (i = 0; i < vec_oprnds->length (); i += 2)
3947 /* Create demotion operation. */
3948 vop0 = (*vec_oprnds)[i];
3949 vop1 = (*vec_oprnds)[i + 1];
3950 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3951 new_tmp = make_ssa_name (vec_dest, new_stmt);
3952 gimple_assign_set_lhs (new_stmt, new_tmp);
3953 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3955 if (multi_step_cvt)
3956 /* Store the resulting vector for next recursive call. */
3957 (*vec_oprnds)[i/2] = new_tmp;
3958 else
3960 /* This is the last step of the conversion sequence. Store the
3961 vectors in SLP_NODE or in vector info of the scalar statement
3962 (or in STMT_VINFO_RELATED_STMT chain). */
3963 if (slp_node)
3964 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3965 else
3967 if (!*prev_stmt_info)
3968 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3969 else
3970 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3972 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3977 /* For multi-step demotion operations we first generate demotion operations
3978 from the source type to the intermediate types, and then combine the
3979 results (stored in VEC_OPRNDS) in demotion operation to the destination
3980 type. */
3981 if (multi_step_cvt)
3983 /* At each level of recursion we have half of the operands we had at the
3984 previous level. */
3985 vec_oprnds->truncate ((i+1)/2);
3986 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3987 stmt, vec_dsts, gsi, slp_node,
3988 VEC_PACK_TRUNC_EXPR,
3989 prev_stmt_info);
3992 vec_dsts.quick_push (vec_dest);
3996 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3997 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3998 the resulting vectors and call the function recursively. */
4000 static void
4001 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4002 vec<tree> *vec_oprnds1,
4003 gimple *stmt, tree vec_dest,
4004 gimple_stmt_iterator *gsi,
4005 enum tree_code code1,
4006 enum tree_code code2, tree decl1,
4007 tree decl2, int op_type)
4009 int i;
4010 tree vop0, vop1, new_tmp1, new_tmp2;
4011 gimple *new_stmt1, *new_stmt2;
4012 vec<tree> vec_tmp = vNULL;
4014 vec_tmp.create (vec_oprnds0->length () * 2);
4015 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4017 if (op_type == binary_op)
4018 vop1 = (*vec_oprnds1)[i];
4019 else
4020 vop1 = NULL_TREE;
4022 /* Generate the two halves of promotion operation. */
4023 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4024 op_type, vec_dest, gsi, stmt);
4025 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4026 op_type, vec_dest, gsi, stmt);
4027 if (is_gimple_call (new_stmt1))
4029 new_tmp1 = gimple_call_lhs (new_stmt1);
4030 new_tmp2 = gimple_call_lhs (new_stmt2);
4032 else
4034 new_tmp1 = gimple_assign_lhs (new_stmt1);
4035 new_tmp2 = gimple_assign_lhs (new_stmt2);
4038 /* Store the results for the next step. */
4039 vec_tmp.quick_push (new_tmp1);
4040 vec_tmp.quick_push (new_tmp2);
4043 vec_oprnds0->release ();
4044 *vec_oprnds0 = vec_tmp;
4048 /* Check if STMT performs a conversion operation, that can be vectorized.
4049 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4050 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4051 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4053 static bool
4054 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4055 gimple **vec_stmt, slp_tree slp_node)
4057 tree vec_dest;
4058 tree scalar_dest;
4059 tree op0, op1 = NULL_TREE;
4060 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4061 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4062 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4063 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4064 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4065 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4066 tree new_temp;
4067 gimple *def_stmt;
4068 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4069 int ndts = 2;
4070 gimple *new_stmt = NULL;
4071 stmt_vec_info prev_stmt_info;
4072 int nunits_in;
4073 int nunits_out;
4074 tree vectype_out, vectype_in;
4075 int ncopies, i, j;
4076 tree lhs_type, rhs_type;
4077 enum { NARROW, NONE, WIDEN } modifier;
4078 vec<tree> vec_oprnds0 = vNULL;
4079 vec<tree> vec_oprnds1 = vNULL;
4080 tree vop0;
4081 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4082 vec_info *vinfo = stmt_info->vinfo;
4083 int multi_step_cvt = 0;
4084 vec<tree> interm_types = vNULL;
4085 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4086 int op_type;
4087 unsigned short fltsz;
4089 /* Is STMT a vectorizable conversion? */
4091 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4092 return false;
4094 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4095 && ! vec_stmt)
4096 return false;
4098 if (!is_gimple_assign (stmt))
4099 return false;
4101 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4102 return false;
4104 code = gimple_assign_rhs_code (stmt);
4105 if (!CONVERT_EXPR_CODE_P (code)
4106 && code != FIX_TRUNC_EXPR
4107 && code != FLOAT_EXPR
4108 && code != WIDEN_MULT_EXPR
4109 && code != WIDEN_LSHIFT_EXPR)
4110 return false;
4112 op_type = TREE_CODE_LENGTH (code);
4114 /* Check types of lhs and rhs. */
4115 scalar_dest = gimple_assign_lhs (stmt);
4116 lhs_type = TREE_TYPE (scalar_dest);
4117 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4119 op0 = gimple_assign_rhs1 (stmt);
4120 rhs_type = TREE_TYPE (op0);
4122 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4123 && !((INTEGRAL_TYPE_P (lhs_type)
4124 && INTEGRAL_TYPE_P (rhs_type))
4125 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4126 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4127 return false;
4129 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4130 && ((INTEGRAL_TYPE_P (lhs_type)
4131 && !type_has_mode_precision_p (lhs_type))
4132 || (INTEGRAL_TYPE_P (rhs_type)
4133 && !type_has_mode_precision_p (rhs_type))))
4135 if (dump_enabled_p ())
4136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4137 "type conversion to/from bit-precision unsupported."
4138 "\n");
4139 return false;
4142 /* Check the operands of the operation. */
4143 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4145 if (dump_enabled_p ())
4146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4147 "use not simple.\n");
4148 return false;
4150 if (op_type == binary_op)
4152 bool ok;
4154 op1 = gimple_assign_rhs2 (stmt);
4155 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4156 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4157 OP1. */
4158 if (CONSTANT_CLASS_P (op0))
4159 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4160 else
4161 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4163 if (!ok)
4165 if (dump_enabled_p ())
4166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4167 "use not simple.\n");
4168 return false;
4172 /* If op0 is an external or constant defs use a vector type of
4173 the same size as the output vector type. */
4174 if (!vectype_in)
4175 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4176 if (vec_stmt)
4177 gcc_assert (vectype_in);
4178 if (!vectype_in)
4180 if (dump_enabled_p ())
4182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4183 "no vectype for scalar type ");
4184 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4185 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4188 return false;
4191 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4192 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4194 if (dump_enabled_p ())
4196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4197 "can't convert between boolean and non "
4198 "boolean vectors");
4199 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4200 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4203 return false;
4206 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4207 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4208 if (nunits_in < nunits_out)
4209 modifier = NARROW;
4210 else if (nunits_out == nunits_in)
4211 modifier = NONE;
4212 else
4213 modifier = WIDEN;
4215 /* Multiple types in SLP are handled by creating the appropriate number of
4216 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4217 case of SLP. */
4218 if (slp_node)
4219 ncopies = 1;
4220 else if (modifier == NARROW)
4221 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4222 else
4223 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4225 /* Sanity check: make sure that at least one copy of the vectorized stmt
4226 needs to be generated. */
4227 gcc_assert (ncopies >= 1);
4229 bool found_mode = false;
4230 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4231 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4232 opt_scalar_mode rhs_mode_iter;
4234 /* Supportable by target? */
4235 switch (modifier)
4237 case NONE:
4238 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4239 return false;
4240 if (supportable_convert_operation (code, vectype_out, vectype_in,
4241 &decl1, &code1))
4242 break;
4243 /* FALLTHRU */
4244 unsupported:
4245 if (dump_enabled_p ())
4246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4247 "conversion not supported by target.\n");
4248 return false;
4250 case WIDEN:
4251 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4252 &code1, &code2, &multi_step_cvt,
4253 &interm_types))
4255 /* Binary widening operation can only be supported directly by the
4256 architecture. */
4257 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4258 break;
4261 if (code != FLOAT_EXPR
4262 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4263 goto unsupported;
4265 fltsz = GET_MODE_SIZE (lhs_mode);
4266 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4268 rhs_mode = rhs_mode_iter.require ();
4269 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4270 break;
4272 cvt_type
4273 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4274 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4275 if (cvt_type == NULL_TREE)
4276 goto unsupported;
4278 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4280 if (!supportable_convert_operation (code, vectype_out,
4281 cvt_type, &decl1, &codecvt1))
4282 goto unsupported;
4284 else if (!supportable_widening_operation (code, stmt, vectype_out,
4285 cvt_type, &codecvt1,
4286 &codecvt2, &multi_step_cvt,
4287 &interm_types))
4288 continue;
4289 else
4290 gcc_assert (multi_step_cvt == 0);
4292 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4293 vectype_in, &code1, &code2,
4294 &multi_step_cvt, &interm_types))
4296 found_mode = true;
4297 break;
4301 if (!found_mode)
4302 goto unsupported;
4304 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4305 codecvt2 = ERROR_MARK;
4306 else
4308 multi_step_cvt++;
4309 interm_types.safe_push (cvt_type);
4310 cvt_type = NULL_TREE;
4312 break;
4314 case NARROW:
4315 gcc_assert (op_type == unary_op);
4316 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4317 &code1, &multi_step_cvt,
4318 &interm_types))
4319 break;
4321 if (code != FIX_TRUNC_EXPR
4322 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4323 goto unsupported;
4325 cvt_type
4326 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4327 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4328 if (cvt_type == NULL_TREE)
4329 goto unsupported;
4330 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4331 &decl1, &codecvt1))
4332 goto unsupported;
4333 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4334 &code1, &multi_step_cvt,
4335 &interm_types))
4336 break;
4337 goto unsupported;
4339 default:
4340 gcc_unreachable ();
4343 if (!vec_stmt) /* transformation not required. */
4345 if (dump_enabled_p ())
4346 dump_printf_loc (MSG_NOTE, vect_location,
4347 "=== vectorizable_conversion ===\n");
4348 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4350 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4351 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4353 else if (modifier == NARROW)
4355 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4356 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4358 else
4360 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4361 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4363 interm_types.release ();
4364 return true;
4367 /* Transform. */
4368 if (dump_enabled_p ())
4369 dump_printf_loc (MSG_NOTE, vect_location,
4370 "transform conversion. ncopies = %d.\n", ncopies);
4372 if (op_type == binary_op)
4374 if (CONSTANT_CLASS_P (op0))
4375 op0 = fold_convert (TREE_TYPE (op1), op0);
4376 else if (CONSTANT_CLASS_P (op1))
4377 op1 = fold_convert (TREE_TYPE (op0), op1);
4380 /* In case of multi-step conversion, we first generate conversion operations
4381 to the intermediate types, and then from that types to the final one.
4382 We create vector destinations for the intermediate type (TYPES) received
4383 from supportable_*_operation, and store them in the correct order
4384 for future use in vect_create_vectorized_*_stmts (). */
4385 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4386 vec_dest = vect_create_destination_var (scalar_dest,
4387 (cvt_type && modifier == WIDEN)
4388 ? cvt_type : vectype_out);
4389 vec_dsts.quick_push (vec_dest);
4391 if (multi_step_cvt)
4393 for (i = interm_types.length () - 1;
4394 interm_types.iterate (i, &intermediate_type); i--)
4396 vec_dest = vect_create_destination_var (scalar_dest,
4397 intermediate_type);
4398 vec_dsts.quick_push (vec_dest);
4402 if (cvt_type)
4403 vec_dest = vect_create_destination_var (scalar_dest,
4404 modifier == WIDEN
4405 ? vectype_out : cvt_type);
4407 if (!slp_node)
4409 if (modifier == WIDEN)
4411 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4412 if (op_type == binary_op)
4413 vec_oprnds1.create (1);
4415 else if (modifier == NARROW)
4416 vec_oprnds0.create (
4417 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4419 else if (code == WIDEN_LSHIFT_EXPR)
4420 vec_oprnds1.create (slp_node->vec_stmts_size);
4422 last_oprnd = op0;
4423 prev_stmt_info = NULL;
4424 switch (modifier)
4426 case NONE:
4427 for (j = 0; j < ncopies; j++)
4429 if (j == 0)
4430 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4431 else
4432 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4434 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4436 /* Arguments are ready, create the new vector stmt. */
4437 if (code1 == CALL_EXPR)
4439 new_stmt = gimple_build_call (decl1, 1, vop0);
4440 new_temp = make_ssa_name (vec_dest, new_stmt);
4441 gimple_call_set_lhs (new_stmt, new_temp);
4443 else
4445 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4446 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4447 new_temp = make_ssa_name (vec_dest, new_stmt);
4448 gimple_assign_set_lhs (new_stmt, new_temp);
4451 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4452 if (slp_node)
4453 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4454 else
4456 if (!prev_stmt_info)
4457 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4458 else
4459 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4460 prev_stmt_info = vinfo_for_stmt (new_stmt);
4464 break;
4466 case WIDEN:
4467 /* In case the vectorization factor (VF) is bigger than the number
4468 of elements that we can fit in a vectype (nunits), we have to
4469 generate more than one vector stmt - i.e - we need to "unroll"
4470 the vector stmt by a factor VF/nunits. */
4471 for (j = 0; j < ncopies; j++)
4473 /* Handle uses. */
4474 if (j == 0)
4476 if (slp_node)
4478 if (code == WIDEN_LSHIFT_EXPR)
4480 unsigned int k;
4482 vec_oprnd1 = op1;
4483 /* Store vec_oprnd1 for every vector stmt to be created
4484 for SLP_NODE. We check during the analysis that all
4485 the shift arguments are the same. */
4486 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4487 vec_oprnds1.quick_push (vec_oprnd1);
4489 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4490 slp_node);
4492 else
4493 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4494 &vec_oprnds1, slp_node);
4496 else
4498 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4499 vec_oprnds0.quick_push (vec_oprnd0);
4500 if (op_type == binary_op)
4502 if (code == WIDEN_LSHIFT_EXPR)
4503 vec_oprnd1 = op1;
4504 else
4505 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4506 vec_oprnds1.quick_push (vec_oprnd1);
4510 else
4512 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4513 vec_oprnds0.truncate (0);
4514 vec_oprnds0.quick_push (vec_oprnd0);
4515 if (op_type == binary_op)
4517 if (code == WIDEN_LSHIFT_EXPR)
4518 vec_oprnd1 = op1;
4519 else
4520 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4521 vec_oprnd1);
4522 vec_oprnds1.truncate (0);
4523 vec_oprnds1.quick_push (vec_oprnd1);
4527 /* Arguments are ready. Create the new vector stmts. */
4528 for (i = multi_step_cvt; i >= 0; i--)
4530 tree this_dest = vec_dsts[i];
4531 enum tree_code c1 = code1, c2 = code2;
4532 if (i == 0 && codecvt2 != ERROR_MARK)
4534 c1 = codecvt1;
4535 c2 = codecvt2;
4537 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4538 &vec_oprnds1,
4539 stmt, this_dest, gsi,
4540 c1, c2, decl1, decl2,
4541 op_type);
4544 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4546 if (cvt_type)
4548 if (codecvt1 == CALL_EXPR)
4550 new_stmt = gimple_build_call (decl1, 1, vop0);
4551 new_temp = make_ssa_name (vec_dest, new_stmt);
4552 gimple_call_set_lhs (new_stmt, new_temp);
4554 else
4556 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4557 new_temp = make_ssa_name (vec_dest);
4558 new_stmt = gimple_build_assign (new_temp, codecvt1,
4559 vop0);
4562 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4564 else
4565 new_stmt = SSA_NAME_DEF_STMT (vop0);
4567 if (slp_node)
4568 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4569 else
4571 if (!prev_stmt_info)
4572 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4573 else
4574 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4575 prev_stmt_info = vinfo_for_stmt (new_stmt);
4580 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4581 break;
4583 case NARROW:
4584 /* In case the vectorization factor (VF) is bigger than the number
4585 of elements that we can fit in a vectype (nunits), we have to
4586 generate more than one vector stmt - i.e - we need to "unroll"
4587 the vector stmt by a factor VF/nunits. */
4588 for (j = 0; j < ncopies; j++)
4590 /* Handle uses. */
4591 if (slp_node)
4592 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4593 slp_node);
4594 else
4596 vec_oprnds0.truncate (0);
4597 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4598 vect_pow2 (multi_step_cvt) - 1);
4601 /* Arguments are ready. Create the new vector stmts. */
4602 if (cvt_type)
4603 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4605 if (codecvt1 == CALL_EXPR)
4607 new_stmt = gimple_build_call (decl1, 1, vop0);
4608 new_temp = make_ssa_name (vec_dest, new_stmt);
4609 gimple_call_set_lhs (new_stmt, new_temp);
4611 else
4613 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4614 new_temp = make_ssa_name (vec_dest);
4615 new_stmt = gimple_build_assign (new_temp, codecvt1,
4616 vop0);
4619 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4620 vec_oprnds0[i] = new_temp;
4623 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4624 stmt, vec_dsts, gsi,
4625 slp_node, code1,
4626 &prev_stmt_info);
4629 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4630 break;
4633 vec_oprnds0.release ();
4634 vec_oprnds1.release ();
4635 interm_types.release ();
4637 return true;
4641 /* Function vectorizable_assignment.
4643 Check if STMT performs an assignment (copy) that can be vectorized.
4644 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4645 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4646 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4648 static bool
4649 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4650 gimple **vec_stmt, slp_tree slp_node)
4652 tree vec_dest;
4653 tree scalar_dest;
4654 tree op;
4655 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4656 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4657 tree new_temp;
4658 gimple *def_stmt;
4659 enum vect_def_type dt[1] = {vect_unknown_def_type};
4660 int ndts = 1;
4661 int ncopies;
4662 int i, j;
4663 vec<tree> vec_oprnds = vNULL;
4664 tree vop;
4665 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4666 vec_info *vinfo = stmt_info->vinfo;
4667 gimple *new_stmt = NULL;
4668 stmt_vec_info prev_stmt_info = NULL;
4669 enum tree_code code;
4670 tree vectype_in;
4672 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4673 return false;
4675 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4676 && ! vec_stmt)
4677 return false;
4679 /* Is vectorizable assignment? */
4680 if (!is_gimple_assign (stmt))
4681 return false;
4683 scalar_dest = gimple_assign_lhs (stmt);
4684 if (TREE_CODE (scalar_dest) != SSA_NAME)
4685 return false;
4687 code = gimple_assign_rhs_code (stmt);
4688 if (gimple_assign_single_p (stmt)
4689 || code == PAREN_EXPR
4690 || CONVERT_EXPR_CODE_P (code))
4691 op = gimple_assign_rhs1 (stmt);
4692 else
4693 return false;
4695 if (code == VIEW_CONVERT_EXPR)
4696 op = TREE_OPERAND (op, 0);
4698 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4699 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4701 /* Multiple types in SLP are handled by creating the appropriate number of
4702 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4703 case of SLP. */
4704 if (slp_node)
4705 ncopies = 1;
4706 else
4707 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4709 gcc_assert (ncopies >= 1);
4711 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4713 if (dump_enabled_p ())
4714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4715 "use not simple.\n");
4716 return false;
4719 /* We can handle NOP_EXPR conversions that do not change the number
4720 of elements or the vector size. */
4721 if ((CONVERT_EXPR_CODE_P (code)
4722 || code == VIEW_CONVERT_EXPR)
4723 && (!vectype_in
4724 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4725 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4726 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4727 return false;
4729 /* We do not handle bit-precision changes. */
4730 if ((CONVERT_EXPR_CODE_P (code)
4731 || code == VIEW_CONVERT_EXPR)
4732 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4733 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4734 || !type_has_mode_precision_p (TREE_TYPE (op)))
4735 /* But a conversion that does not change the bit-pattern is ok. */
4736 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4737 > TYPE_PRECISION (TREE_TYPE (op)))
4738 && TYPE_UNSIGNED (TREE_TYPE (op)))
4739 /* Conversion between boolean types of different sizes is
4740 a simple assignment in case their vectypes are same
4741 boolean vectors. */
4742 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4743 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4745 if (dump_enabled_p ())
4746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4747 "type conversion to/from bit-precision "
4748 "unsupported.\n");
4749 return false;
4752 if (!vec_stmt) /* transformation not required. */
4754 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4755 if (dump_enabled_p ())
4756 dump_printf_loc (MSG_NOTE, vect_location,
4757 "=== vectorizable_assignment ===\n");
4758 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4759 return true;
4762 /* Transform. */
4763 if (dump_enabled_p ())
4764 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4766 /* Handle def. */
4767 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4769 /* Handle use. */
4770 for (j = 0; j < ncopies; j++)
4772 /* Handle uses. */
4773 if (j == 0)
4774 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4775 else
4776 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4778 /* Arguments are ready. create the new vector stmt. */
4779 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4781 if (CONVERT_EXPR_CODE_P (code)
4782 || code == VIEW_CONVERT_EXPR)
4783 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4784 new_stmt = gimple_build_assign (vec_dest, vop);
4785 new_temp = make_ssa_name (vec_dest, new_stmt);
4786 gimple_assign_set_lhs (new_stmt, new_temp);
4787 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4788 if (slp_node)
4789 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4792 if (slp_node)
4793 continue;
4795 if (j == 0)
4796 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4797 else
4798 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4800 prev_stmt_info = vinfo_for_stmt (new_stmt);
4803 vec_oprnds.release ();
4804 return true;
4808 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4809 either as shift by a scalar or by a vector. */
4811 bool
4812 vect_supportable_shift (enum tree_code code, tree scalar_type)
4815 machine_mode vec_mode;
4816 optab optab;
4817 int icode;
4818 tree vectype;
4820 vectype = get_vectype_for_scalar_type (scalar_type);
4821 if (!vectype)
4822 return false;
4824 optab = optab_for_tree_code (code, vectype, optab_scalar);
4825 if (!optab
4826 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4828 optab = optab_for_tree_code (code, vectype, optab_vector);
4829 if (!optab
4830 || (optab_handler (optab, TYPE_MODE (vectype))
4831 == CODE_FOR_nothing))
4832 return false;
4835 vec_mode = TYPE_MODE (vectype);
4836 icode = (int) optab_handler (optab, vec_mode);
4837 if (icode == CODE_FOR_nothing)
4838 return false;
4840 return true;
4844 /* Function vectorizable_shift.
4846 Check if STMT performs a shift operation that can be vectorized.
4847 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4848 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4849 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4851 static bool
4852 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4853 gimple **vec_stmt, slp_tree slp_node)
4855 tree vec_dest;
4856 tree scalar_dest;
4857 tree op0, op1 = NULL;
4858 tree vec_oprnd1 = NULL_TREE;
4859 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4860 tree vectype;
4861 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4862 enum tree_code code;
4863 machine_mode vec_mode;
4864 tree new_temp;
4865 optab optab;
4866 int icode;
4867 machine_mode optab_op2_mode;
4868 gimple *def_stmt;
4869 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4870 int ndts = 2;
4871 gimple *new_stmt = NULL;
4872 stmt_vec_info prev_stmt_info;
4873 int nunits_in;
4874 int nunits_out;
4875 tree vectype_out;
4876 tree op1_vectype;
4877 int ncopies;
4878 int j, i;
4879 vec<tree> vec_oprnds0 = vNULL;
4880 vec<tree> vec_oprnds1 = vNULL;
4881 tree vop0, vop1;
4882 unsigned int k;
4883 bool scalar_shift_arg = true;
4884 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4885 vec_info *vinfo = stmt_info->vinfo;
4887 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4888 return false;
4890 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4891 && ! vec_stmt)
4892 return false;
4894 /* Is STMT a vectorizable binary/unary operation? */
4895 if (!is_gimple_assign (stmt))
4896 return false;
4898 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4899 return false;
4901 code = gimple_assign_rhs_code (stmt);
4903 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4904 || code == RROTATE_EXPR))
4905 return false;
4907 scalar_dest = gimple_assign_lhs (stmt);
4908 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4909 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
4911 if (dump_enabled_p ())
4912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4913 "bit-precision shifts not supported.\n");
4914 return false;
4917 op0 = gimple_assign_rhs1 (stmt);
4918 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4920 if (dump_enabled_p ())
4921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4922 "use not simple.\n");
4923 return false;
4925 /* If op0 is an external or constant def use a vector type with
4926 the same size as the output vector type. */
4927 if (!vectype)
4928 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4929 if (vec_stmt)
4930 gcc_assert (vectype);
4931 if (!vectype)
4933 if (dump_enabled_p ())
4934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4935 "no vectype for scalar type\n");
4936 return false;
4939 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4940 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4941 if (nunits_out != nunits_in)
4942 return false;
4944 op1 = gimple_assign_rhs2 (stmt);
4945 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4947 if (dump_enabled_p ())
4948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4949 "use not simple.\n");
4950 return false;
4953 /* Multiple types in SLP are handled by creating the appropriate number of
4954 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4955 case of SLP. */
4956 if (slp_node)
4957 ncopies = 1;
4958 else
4959 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4961 gcc_assert (ncopies >= 1);
4963 /* Determine whether the shift amount is a vector, or scalar. If the
4964 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4966 if ((dt[1] == vect_internal_def
4967 || dt[1] == vect_induction_def)
4968 && !slp_node)
4969 scalar_shift_arg = false;
4970 else if (dt[1] == vect_constant_def
4971 || dt[1] == vect_external_def
4972 || dt[1] == vect_internal_def)
4974 /* In SLP, need to check whether the shift count is the same,
4975 in loops if it is a constant or invariant, it is always
4976 a scalar shift. */
4977 if (slp_node)
4979 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4980 gimple *slpstmt;
4982 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4983 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4984 scalar_shift_arg = false;
4987 /* If the shift amount is computed by a pattern stmt we cannot
4988 use the scalar amount directly thus give up and use a vector
4989 shift. */
4990 if (dt[1] == vect_internal_def)
4992 gimple *def = SSA_NAME_DEF_STMT (op1);
4993 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4994 scalar_shift_arg = false;
4997 else
4999 if (dump_enabled_p ())
5000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5001 "operand mode requires invariant argument.\n");
5002 return false;
5005 /* Vector shifted by vector. */
5006 if (!scalar_shift_arg)
5008 optab = optab_for_tree_code (code, vectype, optab_vector);
5009 if (dump_enabled_p ())
5010 dump_printf_loc (MSG_NOTE, vect_location,
5011 "vector/vector shift/rotate found.\n");
5013 if (!op1_vectype)
5014 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5015 if (op1_vectype == NULL_TREE
5016 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5018 if (dump_enabled_p ())
5019 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5020 "unusable type for last operand in"
5021 " vector/vector shift/rotate.\n");
5022 return false;
5025 /* See if the machine has a vector shifted by scalar insn and if not
5026 then see if it has a vector shifted by vector insn. */
5027 else
5029 optab = optab_for_tree_code (code, vectype, optab_scalar);
5030 if (optab
5031 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5033 if (dump_enabled_p ())
5034 dump_printf_loc (MSG_NOTE, vect_location,
5035 "vector/scalar shift/rotate found.\n");
5037 else
5039 optab = optab_for_tree_code (code, vectype, optab_vector);
5040 if (optab
5041 && (optab_handler (optab, TYPE_MODE (vectype))
5042 != CODE_FOR_nothing))
5044 scalar_shift_arg = false;
5046 if (dump_enabled_p ())
5047 dump_printf_loc (MSG_NOTE, vect_location,
5048 "vector/vector shift/rotate found.\n");
5050 /* Unlike the other binary operators, shifts/rotates have
5051 the rhs being int, instead of the same type as the lhs,
5052 so make sure the scalar is the right type if we are
5053 dealing with vectors of long long/long/short/char. */
5054 if (dt[1] == vect_constant_def)
5055 op1 = fold_convert (TREE_TYPE (vectype), op1);
5056 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5057 TREE_TYPE (op1)))
5059 if (slp_node
5060 && TYPE_MODE (TREE_TYPE (vectype))
5061 != TYPE_MODE (TREE_TYPE (op1)))
5063 if (dump_enabled_p ())
5064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5065 "unusable type for last operand in"
5066 " vector/vector shift/rotate.\n");
5067 return false;
5069 if (vec_stmt && !slp_node)
5071 op1 = fold_convert (TREE_TYPE (vectype), op1);
5072 op1 = vect_init_vector (stmt, op1,
5073 TREE_TYPE (vectype), NULL);
5080 /* Supportable by target? */
5081 if (!optab)
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5085 "no optab.\n");
5086 return false;
5088 vec_mode = TYPE_MODE (vectype);
5089 icode = (int) optab_handler (optab, vec_mode);
5090 if (icode == CODE_FOR_nothing)
5092 if (dump_enabled_p ())
5093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5094 "op not supported by target.\n");
5095 /* Check only during analysis. */
5096 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5097 || (!vec_stmt
5098 && !vect_worthwhile_without_simd_p (vinfo, code)))
5099 return false;
5100 if (dump_enabled_p ())
5101 dump_printf_loc (MSG_NOTE, vect_location,
5102 "proceeding using word mode.\n");
5105 /* Worthwhile without SIMD support? Check only during analysis. */
5106 if (!vec_stmt
5107 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5108 && !vect_worthwhile_without_simd_p (vinfo, code))
5110 if (dump_enabled_p ())
5111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5112 "not worthwhile without SIMD support.\n");
5113 return false;
5116 if (!vec_stmt) /* transformation not required. */
5118 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5119 if (dump_enabled_p ())
5120 dump_printf_loc (MSG_NOTE, vect_location,
5121 "=== vectorizable_shift ===\n");
5122 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5123 return true;
5126 /* Transform. */
5128 if (dump_enabled_p ())
5129 dump_printf_loc (MSG_NOTE, vect_location,
5130 "transform binary/unary operation.\n");
5132 /* Handle def. */
5133 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5135 prev_stmt_info = NULL;
5136 for (j = 0; j < ncopies; j++)
5138 /* Handle uses. */
5139 if (j == 0)
5141 if (scalar_shift_arg)
5143 /* Vector shl and shr insn patterns can be defined with scalar
5144 operand 2 (shift operand). In this case, use constant or loop
5145 invariant op1 directly, without extending it to vector mode
5146 first. */
5147 optab_op2_mode = insn_data[icode].operand[2].mode;
5148 if (!VECTOR_MODE_P (optab_op2_mode))
5150 if (dump_enabled_p ())
5151 dump_printf_loc (MSG_NOTE, vect_location,
5152 "operand 1 using scalar mode.\n");
5153 vec_oprnd1 = op1;
5154 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5155 vec_oprnds1.quick_push (vec_oprnd1);
5156 if (slp_node)
5158 /* Store vec_oprnd1 for every vector stmt to be created
5159 for SLP_NODE. We check during the analysis that all
5160 the shift arguments are the same.
5161 TODO: Allow different constants for different vector
5162 stmts generated for an SLP instance. */
5163 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5164 vec_oprnds1.quick_push (vec_oprnd1);
5169 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5170 (a special case for certain kind of vector shifts); otherwise,
5171 operand 1 should be of a vector type (the usual case). */
5172 if (vec_oprnd1)
5173 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5174 slp_node);
5175 else
5176 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5177 slp_node);
5179 else
5180 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5182 /* Arguments are ready. Create the new vector stmt. */
5183 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5185 vop1 = vec_oprnds1[i];
5186 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5187 new_temp = make_ssa_name (vec_dest, new_stmt);
5188 gimple_assign_set_lhs (new_stmt, new_temp);
5189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5190 if (slp_node)
5191 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5194 if (slp_node)
5195 continue;
5197 if (j == 0)
5198 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5199 else
5200 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5201 prev_stmt_info = vinfo_for_stmt (new_stmt);
5204 vec_oprnds0.release ();
5205 vec_oprnds1.release ();
5207 return true;
5211 /* Function vectorizable_operation.
5213 Check if STMT performs a binary, unary or ternary operation that can
5214 be vectorized.
5215 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5216 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5217 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5219 static bool
5220 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5221 gimple **vec_stmt, slp_tree slp_node)
5223 tree vec_dest;
5224 tree scalar_dest;
5225 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5226 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5227 tree vectype;
5228 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5229 enum tree_code code, orig_code;
5230 machine_mode vec_mode;
5231 tree new_temp;
5232 int op_type;
5233 optab optab;
5234 bool target_support_p;
5235 gimple *def_stmt;
5236 enum vect_def_type dt[3]
5237 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5238 int ndts = 3;
5239 gimple *new_stmt = NULL;
5240 stmt_vec_info prev_stmt_info;
5241 int nunits_in;
5242 int nunits_out;
5243 tree vectype_out;
5244 int ncopies;
5245 int j, i;
5246 vec<tree> vec_oprnds0 = vNULL;
5247 vec<tree> vec_oprnds1 = vNULL;
5248 vec<tree> vec_oprnds2 = vNULL;
5249 tree vop0, vop1, vop2;
5250 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5251 vec_info *vinfo = stmt_info->vinfo;
5253 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5254 return false;
5256 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5257 && ! vec_stmt)
5258 return false;
5260 /* Is STMT a vectorizable binary/unary operation? */
5261 if (!is_gimple_assign (stmt))
5262 return false;
5264 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5265 return false;
5267 orig_code = code = gimple_assign_rhs_code (stmt);
5269 /* For pointer addition and subtraction, we should use the normal
5270 plus and minus for the vector operation. */
5271 if (code == POINTER_PLUS_EXPR)
5272 code = PLUS_EXPR;
5273 if (code == POINTER_DIFF_EXPR)
5274 code = MINUS_EXPR;
5276 /* Support only unary or binary operations. */
5277 op_type = TREE_CODE_LENGTH (code);
5278 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5280 if (dump_enabled_p ())
5281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5282 "num. args = %d (not unary/binary/ternary op).\n",
5283 op_type);
5284 return false;
5287 scalar_dest = gimple_assign_lhs (stmt);
5288 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5290 /* Most operations cannot handle bit-precision types without extra
5291 truncations. */
5292 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5293 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5294 /* Exception are bitwise binary operations. */
5295 && code != BIT_IOR_EXPR
5296 && code != BIT_XOR_EXPR
5297 && code != BIT_AND_EXPR)
5299 if (dump_enabled_p ())
5300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5301 "bit-precision arithmetic not supported.\n");
5302 return false;
5305 op0 = gimple_assign_rhs1 (stmt);
5306 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5308 if (dump_enabled_p ())
5309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5310 "use not simple.\n");
5311 return false;
5313 /* If op0 is an external or constant def use a vector type with
5314 the same size as the output vector type. */
5315 if (!vectype)
5317 /* For boolean type we cannot determine vectype by
5318 invariant value (don't know whether it is a vector
5319 of booleans or vector of integers). We use output
5320 vectype because operations on boolean don't change
5321 type. */
5322 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5324 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5326 if (dump_enabled_p ())
5327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5328 "not supported operation on bool value.\n");
5329 return false;
5331 vectype = vectype_out;
5333 else
5334 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5336 if (vec_stmt)
5337 gcc_assert (vectype);
5338 if (!vectype)
5340 if (dump_enabled_p ())
5342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5343 "no vectype for scalar type ");
5344 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5345 TREE_TYPE (op0));
5346 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5349 return false;
5352 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5353 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5354 if (nunits_out != nunits_in)
5355 return false;
5357 if (op_type == binary_op || op_type == ternary_op)
5359 op1 = gimple_assign_rhs2 (stmt);
5360 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5362 if (dump_enabled_p ())
5363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5364 "use not simple.\n");
5365 return false;
5368 if (op_type == ternary_op)
5370 op2 = gimple_assign_rhs3 (stmt);
5371 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5373 if (dump_enabled_p ())
5374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5375 "use not simple.\n");
5376 return false;
5380 /* Multiple types in SLP are handled by creating the appropriate number of
5381 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5382 case of SLP. */
5383 if (slp_node)
5384 ncopies = 1;
5385 else
5386 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5388 gcc_assert (ncopies >= 1);
5390 /* Shifts are handled in vectorizable_shift (). */
5391 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5392 || code == RROTATE_EXPR)
5393 return false;
5395 /* Supportable by target? */
5397 vec_mode = TYPE_MODE (vectype);
5398 if (code == MULT_HIGHPART_EXPR)
5399 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5400 else
5402 optab = optab_for_tree_code (code, vectype, optab_default);
5403 if (!optab)
5405 if (dump_enabled_p ())
5406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5407 "no optab.\n");
5408 return false;
5410 target_support_p = (optab_handler (optab, vec_mode)
5411 != CODE_FOR_nothing);
5414 if (!target_support_p)
5416 if (dump_enabled_p ())
5417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5418 "op not supported by target.\n");
5419 /* Check only during analysis. */
5420 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5421 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5422 return false;
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_NOTE, vect_location,
5425 "proceeding using word mode.\n");
5428 /* Worthwhile without SIMD support? Check only during analysis. */
5429 if (!VECTOR_MODE_P (vec_mode)
5430 && !vec_stmt
5431 && !vect_worthwhile_without_simd_p (vinfo, code))
5433 if (dump_enabled_p ())
5434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5435 "not worthwhile without SIMD support.\n");
5436 return false;
5439 if (!vec_stmt) /* transformation not required. */
5441 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5442 if (dump_enabled_p ())
5443 dump_printf_loc (MSG_NOTE, vect_location,
5444 "=== vectorizable_operation ===\n");
5445 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5446 return true;
5449 /* Transform. */
5451 if (dump_enabled_p ())
5452 dump_printf_loc (MSG_NOTE, vect_location,
5453 "transform binary/unary operation.\n");
5455 /* Handle def. */
5456 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5458 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5459 vectors with unsigned elements, but the result is signed. So, we
5460 need to compute the MINUS_EXPR into vectype temporary and
5461 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5462 tree vec_cvt_dest = NULL_TREE;
5463 if (orig_code == POINTER_DIFF_EXPR)
5464 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5466 /* In case the vectorization factor (VF) is bigger than the number
5467 of elements that we can fit in a vectype (nunits), we have to generate
5468 more than one vector stmt - i.e - we need to "unroll" the
5469 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5470 from one copy of the vector stmt to the next, in the field
5471 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5472 stages to find the correct vector defs to be used when vectorizing
5473 stmts that use the defs of the current stmt. The example below
5474 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5475 we need to create 4 vectorized stmts):
5477 before vectorization:
5478 RELATED_STMT VEC_STMT
5479 S1: x = memref - -
5480 S2: z = x + 1 - -
5482 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5483 there):
5484 RELATED_STMT VEC_STMT
5485 VS1_0: vx0 = memref0 VS1_1 -
5486 VS1_1: vx1 = memref1 VS1_2 -
5487 VS1_2: vx2 = memref2 VS1_3 -
5488 VS1_3: vx3 = memref3 - -
5489 S1: x = load - VS1_0
5490 S2: z = x + 1 - -
5492 step2: vectorize stmt S2 (done here):
5493 To vectorize stmt S2 we first need to find the relevant vector
5494 def for the first operand 'x'. This is, as usual, obtained from
5495 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5496 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5497 relevant vector def 'vx0'. Having found 'vx0' we can generate
5498 the vector stmt VS2_0, and as usual, record it in the
5499 STMT_VINFO_VEC_STMT of stmt S2.
5500 When creating the second copy (VS2_1), we obtain the relevant vector
5501 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5502 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5503 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5504 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5505 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5506 chain of stmts and pointers:
5507 RELATED_STMT VEC_STMT
5508 VS1_0: vx0 = memref0 VS1_1 -
5509 VS1_1: vx1 = memref1 VS1_2 -
5510 VS1_2: vx2 = memref2 VS1_3 -
5511 VS1_3: vx3 = memref3 - -
5512 S1: x = load - VS1_0
5513 VS2_0: vz0 = vx0 + v1 VS2_1 -
5514 VS2_1: vz1 = vx1 + v1 VS2_2 -
5515 VS2_2: vz2 = vx2 + v1 VS2_3 -
5516 VS2_3: vz3 = vx3 + v1 - -
5517 S2: z = x + 1 - VS2_0 */
5519 prev_stmt_info = NULL;
5520 for (j = 0; j < ncopies; j++)
5522 /* Handle uses. */
5523 if (j == 0)
5525 if (op_type == binary_op || op_type == ternary_op)
5526 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5527 slp_node);
5528 else
5529 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5530 slp_node);
5531 if (op_type == ternary_op)
5532 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5533 slp_node);
5535 else
5537 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5538 if (op_type == ternary_op)
5540 tree vec_oprnd = vec_oprnds2.pop ();
5541 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5542 vec_oprnd));
5546 /* Arguments are ready. Create the new vector stmt. */
5547 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5549 vop1 = ((op_type == binary_op || op_type == ternary_op)
5550 ? vec_oprnds1[i] : NULL_TREE);
5551 vop2 = ((op_type == ternary_op)
5552 ? vec_oprnds2[i] : NULL_TREE);
5553 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5554 new_temp = make_ssa_name (vec_dest, new_stmt);
5555 gimple_assign_set_lhs (new_stmt, new_temp);
5556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5557 if (vec_cvt_dest)
5559 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5560 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5561 new_temp);
5562 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5563 gimple_assign_set_lhs (new_stmt, new_temp);
5564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5566 if (slp_node)
5567 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5570 if (slp_node)
5571 continue;
5573 if (j == 0)
5574 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5575 else
5576 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5577 prev_stmt_info = vinfo_for_stmt (new_stmt);
5580 vec_oprnds0.release ();
5581 vec_oprnds1.release ();
5582 vec_oprnds2.release ();
5584 return true;
5587 /* A helper function to ensure data reference DR's base alignment. */
5589 static void
5590 ensure_base_align (struct data_reference *dr)
5592 if (!dr->aux)
5593 return;
5595 if (DR_VECT_AUX (dr)->base_misaligned)
5597 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5599 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5601 if (decl_in_symtab_p (base_decl))
5602 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5603 else
5605 SET_DECL_ALIGN (base_decl, align_base_to);
5606 DECL_USER_ALIGN (base_decl) = 1;
5608 DR_VECT_AUX (dr)->base_misaligned = false;
5613 /* Function get_group_alias_ptr_type.
5615 Return the alias type for the group starting at FIRST_STMT. */
5617 static tree
5618 get_group_alias_ptr_type (gimple *first_stmt)
5620 struct data_reference *first_dr, *next_dr;
5621 gimple *next_stmt;
5623 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5624 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5625 while (next_stmt)
5627 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5628 if (get_alias_set (DR_REF (first_dr))
5629 != get_alias_set (DR_REF (next_dr)))
5631 if (dump_enabled_p ())
5632 dump_printf_loc (MSG_NOTE, vect_location,
5633 "conflicting alias set types.\n");
5634 return ptr_type_node;
5636 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5638 return reference_alias_ptr_type (DR_REF (first_dr));
5642 /* Function vectorizable_store.
5644 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5645 can be vectorized.
5646 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5647 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5648 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5650 static bool
5651 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5652 slp_tree slp_node)
5654 tree scalar_dest;
5655 tree data_ref;
5656 tree op;
5657 tree vec_oprnd = NULL_TREE;
5658 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5659 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5660 tree elem_type;
5661 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5662 struct loop *loop = NULL;
5663 machine_mode vec_mode;
5664 tree dummy;
5665 enum dr_alignment_support alignment_support_scheme;
5666 gimple *def_stmt;
5667 enum vect_def_type dt;
5668 stmt_vec_info prev_stmt_info = NULL;
5669 tree dataref_ptr = NULL_TREE;
5670 tree dataref_offset = NULL_TREE;
5671 gimple *ptr_incr = NULL;
5672 int ncopies;
5673 int j;
5674 gimple *next_stmt, *first_stmt;
5675 bool grouped_store;
5676 unsigned int group_size, i;
5677 vec<tree> oprnds = vNULL;
5678 vec<tree> result_chain = vNULL;
5679 bool inv_p;
5680 tree offset = NULL_TREE;
5681 vec<tree> vec_oprnds = vNULL;
5682 bool slp = (slp_node != NULL);
5683 unsigned int vec_num;
5684 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5685 vec_info *vinfo = stmt_info->vinfo;
5686 tree aggr_type;
5687 gather_scatter_info gs_info;
5688 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5689 gimple *new_stmt;
5690 int vf;
5691 vec_load_store_type vls_type;
5692 tree ref_type;
5694 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5695 return false;
5697 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5698 && ! vec_stmt)
5699 return false;
5701 /* Is vectorizable store? */
5703 if (!is_gimple_assign (stmt))
5704 return false;
5706 scalar_dest = gimple_assign_lhs (stmt);
5707 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5708 && is_pattern_stmt_p (stmt_info))
5709 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5710 if (TREE_CODE (scalar_dest) != ARRAY_REF
5711 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5712 && TREE_CODE (scalar_dest) != INDIRECT_REF
5713 && TREE_CODE (scalar_dest) != COMPONENT_REF
5714 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5715 && TREE_CODE (scalar_dest) != REALPART_EXPR
5716 && TREE_CODE (scalar_dest) != MEM_REF)
5717 return false;
5719 /* Cannot have hybrid store SLP -- that would mean storing to the
5720 same location twice. */
5721 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5723 gcc_assert (gimple_assign_single_p (stmt));
5725 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5726 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5728 if (loop_vinfo)
5730 loop = LOOP_VINFO_LOOP (loop_vinfo);
5731 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5733 else
5734 vf = 1;
5736 /* Multiple types in SLP are handled by creating the appropriate number of
5737 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5738 case of SLP. */
5739 if (slp)
5740 ncopies = 1;
5741 else
5742 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5744 gcc_assert (ncopies >= 1);
5746 /* FORNOW. This restriction should be relaxed. */
5747 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5749 if (dump_enabled_p ())
5750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5751 "multiple types in nested loop.\n");
5752 return false;
5755 op = gimple_assign_rhs1 (stmt);
5757 /* In the case this is a store from a constant make sure
5758 native_encode_expr can handle it. */
5759 if (CONSTANT_CLASS_P (op) && native_encode_expr (op, NULL, 64) == 0)
5760 return false;
5762 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5764 if (dump_enabled_p ())
5765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5766 "use not simple.\n");
5767 return false;
5770 if (dt == vect_constant_def || dt == vect_external_def)
5771 vls_type = VLS_STORE_INVARIANT;
5772 else
5773 vls_type = VLS_STORE;
5775 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5776 return false;
5778 elem_type = TREE_TYPE (vectype);
5779 vec_mode = TYPE_MODE (vectype);
5781 /* FORNOW. In some cases can vectorize even if data-type not supported
5782 (e.g. - array initialization with 0). */
5783 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5784 return false;
5786 if (!STMT_VINFO_DATA_REF (stmt_info))
5787 return false;
5789 vect_memory_access_type memory_access_type;
5790 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5791 &memory_access_type, &gs_info))
5792 return false;
5794 if (!vec_stmt) /* transformation not required. */
5796 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5797 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5798 /* The SLP costs are calculated during SLP analysis. */
5799 if (!PURE_SLP_STMT (stmt_info))
5800 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5801 NULL, NULL, NULL);
5802 return true;
5804 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5806 /* Transform. */
5808 ensure_base_align (dr);
5810 if (memory_access_type == VMAT_GATHER_SCATTER)
5812 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5813 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5814 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5815 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5816 edge pe = loop_preheader_edge (loop);
5817 gimple_seq seq;
5818 basic_block new_bb;
5819 enum { NARROW, NONE, WIDEN } modifier;
5820 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5822 if (nunits == (unsigned int) scatter_off_nunits)
5823 modifier = NONE;
5824 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5826 modifier = WIDEN;
5828 auto_vec_perm_indices sel (scatter_off_nunits);
5829 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5830 sel.quick_push (i | nunits);
5832 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5833 gcc_assert (perm_mask != NULL_TREE);
5835 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5837 modifier = NARROW;
5839 auto_vec_perm_indices sel (nunits);
5840 for (i = 0; i < (unsigned int) nunits; ++i)
5841 sel.quick_push (i | scatter_off_nunits);
5843 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5844 gcc_assert (perm_mask != NULL_TREE);
5845 ncopies *= 2;
5847 else
5848 gcc_unreachable ();
5850 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5851 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5852 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5853 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5854 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5855 scaletype = TREE_VALUE (arglist);
5857 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5858 && TREE_CODE (rettype) == VOID_TYPE);
5860 ptr = fold_convert (ptrtype, gs_info.base);
5861 if (!is_gimple_min_invariant (ptr))
5863 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5864 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5865 gcc_assert (!new_bb);
5868 /* Currently we support only unconditional scatter stores,
5869 so mask should be all ones. */
5870 mask = build_int_cst (masktype, -1);
5871 mask = vect_init_vector (stmt, mask, masktype, NULL);
5873 scale = build_int_cst (scaletype, gs_info.scale);
5875 prev_stmt_info = NULL;
5876 for (j = 0; j < ncopies; ++j)
5878 if (j == 0)
5880 src = vec_oprnd1
5881 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5882 op = vec_oprnd0
5883 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5885 else if (modifier != NONE && (j & 1))
5887 if (modifier == WIDEN)
5889 src = vec_oprnd1
5890 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5891 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5892 stmt, gsi);
5894 else if (modifier == NARROW)
5896 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5897 stmt, gsi);
5898 op = vec_oprnd0
5899 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5900 vec_oprnd0);
5902 else
5903 gcc_unreachable ();
5905 else
5907 src = vec_oprnd1
5908 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5909 op = vec_oprnd0
5910 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5911 vec_oprnd0);
5914 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5916 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5917 == TYPE_VECTOR_SUBPARTS (srctype));
5918 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5919 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5920 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5921 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5922 src = var;
5925 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5927 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5928 == TYPE_VECTOR_SUBPARTS (idxtype));
5929 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5930 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5931 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5932 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5933 op = var;
5936 new_stmt
5937 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5941 if (prev_stmt_info == NULL)
5942 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5943 else
5944 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5945 prev_stmt_info = vinfo_for_stmt (new_stmt);
5947 return true;
5950 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5951 if (grouped_store)
5953 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5954 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5955 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5957 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5959 /* FORNOW */
5960 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5962 /* We vectorize all the stmts of the interleaving group when we
5963 reach the last stmt in the group. */
5964 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5965 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5966 && !slp)
5968 *vec_stmt = NULL;
5969 return true;
5972 if (slp)
5974 grouped_store = false;
5975 /* VEC_NUM is the number of vect stmts to be created for this
5976 group. */
5977 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5978 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5979 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5980 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5981 op = gimple_assign_rhs1 (first_stmt);
5983 else
5984 /* VEC_NUM is the number of vect stmts to be created for this
5985 group. */
5986 vec_num = group_size;
5988 ref_type = get_group_alias_ptr_type (first_stmt);
5990 else
5992 first_stmt = stmt;
5993 first_dr = dr;
5994 group_size = vec_num = 1;
5995 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5998 if (dump_enabled_p ())
5999 dump_printf_loc (MSG_NOTE, vect_location,
6000 "transform store. ncopies = %d\n", ncopies);
6002 if (memory_access_type == VMAT_ELEMENTWISE
6003 || memory_access_type == VMAT_STRIDED_SLP)
6005 gimple_stmt_iterator incr_gsi;
6006 bool insert_after;
6007 gimple *incr;
6008 tree offvar;
6009 tree ivstep;
6010 tree running_off;
6011 gimple_seq stmts = NULL;
6012 tree stride_base, stride_step, alias_off;
6013 tree vec_oprnd;
6014 unsigned int g;
6016 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6018 stride_base
6019 = fold_build_pointer_plus
6020 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6021 size_binop (PLUS_EXPR,
6022 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6023 convert_to_ptrofftype (DR_INIT (first_dr))));
6024 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6026 /* For a store with loop-invariant (but other than power-of-2)
6027 stride (i.e. not a grouped access) like so:
6029 for (i = 0; i < n; i += stride)
6030 array[i] = ...;
6032 we generate a new induction variable and new stores from
6033 the components of the (vectorized) rhs:
6035 for (j = 0; ; j += VF*stride)
6036 vectemp = ...;
6037 tmp1 = vectemp[0];
6038 array[j] = tmp1;
6039 tmp2 = vectemp[1];
6040 array[j + stride] = tmp2;
6044 unsigned nstores = nunits;
6045 unsigned lnel = 1;
6046 tree ltype = elem_type;
6047 tree lvectype = vectype;
6048 if (slp)
6050 if (group_size < nunits
6051 && nunits % group_size == 0)
6053 nstores = nunits / group_size;
6054 lnel = group_size;
6055 ltype = build_vector_type (elem_type, group_size);
6056 lvectype = vectype;
6058 /* First check if vec_extract optab doesn't support extraction
6059 of vector elts directly. */
6060 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6061 machine_mode vmode;
6062 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6063 || !VECTOR_MODE_P (vmode)
6064 || (convert_optab_handler (vec_extract_optab,
6065 TYPE_MODE (vectype), vmode)
6066 == CODE_FOR_nothing))
6068 /* Try to avoid emitting an extract of vector elements
6069 by performing the extracts using an integer type of the
6070 same size, extracting from a vector of those and then
6071 re-interpreting it as the original vector type if
6072 supported. */
6073 unsigned lsize
6074 = group_size * GET_MODE_BITSIZE (elmode);
6075 elmode = int_mode_for_size (lsize, 0).require ();
6076 /* If we can't construct such a vector fall back to
6077 element extracts from the original vector type and
6078 element size stores. */
6079 if (mode_for_vector (elmode,
6080 nunits / group_size).exists (&vmode)
6081 && VECTOR_MODE_P (vmode)
6082 && (convert_optab_handler (vec_extract_optab,
6083 vmode, elmode)
6084 != CODE_FOR_nothing))
6086 nstores = nunits / group_size;
6087 lnel = group_size;
6088 ltype = build_nonstandard_integer_type (lsize, 1);
6089 lvectype = build_vector_type (ltype, nstores);
6091 /* Else fall back to vector extraction anyway.
6092 Fewer stores are more important than avoiding spilling
6093 of the vector we extract from. Compared to the
6094 construction case in vectorizable_load no store-forwarding
6095 issue exists here for reasonable archs. */
6098 else if (group_size >= nunits
6099 && group_size % nunits == 0)
6101 nstores = 1;
6102 lnel = nunits;
6103 ltype = vectype;
6104 lvectype = vectype;
6106 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6107 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6110 ivstep = stride_step;
6111 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6112 build_int_cst (TREE_TYPE (ivstep), vf));
6114 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6116 create_iv (stride_base, ivstep, NULL,
6117 loop, &incr_gsi, insert_after,
6118 &offvar, NULL);
6119 incr = gsi_stmt (incr_gsi);
6120 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6122 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6123 if (stmts)
6124 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6126 prev_stmt_info = NULL;
6127 alias_off = build_int_cst (ref_type, 0);
6128 next_stmt = first_stmt;
6129 for (g = 0; g < group_size; g++)
6131 running_off = offvar;
6132 if (g)
6134 tree size = TYPE_SIZE_UNIT (ltype);
6135 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6136 size);
6137 tree newoff = copy_ssa_name (running_off, NULL);
6138 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6139 running_off, pos);
6140 vect_finish_stmt_generation (stmt, incr, gsi);
6141 running_off = newoff;
6143 unsigned int group_el = 0;
6144 unsigned HOST_WIDE_INT
6145 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6146 for (j = 0; j < ncopies; j++)
6148 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6149 and first_stmt == stmt. */
6150 if (j == 0)
6152 if (slp)
6154 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6155 slp_node);
6156 vec_oprnd = vec_oprnds[0];
6158 else
6160 gcc_assert (gimple_assign_single_p (next_stmt));
6161 op = gimple_assign_rhs1 (next_stmt);
6162 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6165 else
6167 if (slp)
6168 vec_oprnd = vec_oprnds[j];
6169 else
6171 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6172 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6175 /* Pun the vector to extract from if necessary. */
6176 if (lvectype != vectype)
6178 tree tem = make_ssa_name (lvectype);
6179 gimple *pun
6180 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6181 lvectype, vec_oprnd));
6182 vect_finish_stmt_generation (stmt, pun, gsi);
6183 vec_oprnd = tem;
6185 for (i = 0; i < nstores; i++)
6187 tree newref, newoff;
6188 gimple *incr, *assign;
6189 tree size = TYPE_SIZE (ltype);
6190 /* Extract the i'th component. */
6191 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6192 bitsize_int (i), size);
6193 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6194 size, pos);
6196 elem = force_gimple_operand_gsi (gsi, elem, true,
6197 NULL_TREE, true,
6198 GSI_SAME_STMT);
6200 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6201 group_el * elsz);
6202 newref = build2 (MEM_REF, ltype,
6203 running_off, this_off);
6205 /* And store it to *running_off. */
6206 assign = gimple_build_assign (newref, elem);
6207 vect_finish_stmt_generation (stmt, assign, gsi);
6209 group_el += lnel;
6210 if (! slp
6211 || group_el == group_size)
6213 newoff = copy_ssa_name (running_off, NULL);
6214 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6215 running_off, stride_step);
6216 vect_finish_stmt_generation (stmt, incr, gsi);
6218 running_off = newoff;
6219 group_el = 0;
6221 if (g == group_size - 1
6222 && !slp)
6224 if (j == 0 && i == 0)
6225 STMT_VINFO_VEC_STMT (stmt_info)
6226 = *vec_stmt = assign;
6227 else
6228 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6229 prev_stmt_info = vinfo_for_stmt (assign);
6233 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6234 if (slp)
6235 break;
6238 vec_oprnds.release ();
6239 return true;
6242 auto_vec<tree> dr_chain (group_size);
6243 oprnds.create (group_size);
6245 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6246 gcc_assert (alignment_support_scheme);
6247 /* Targets with store-lane instructions must not require explicit
6248 realignment. */
6249 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6250 || alignment_support_scheme == dr_aligned
6251 || alignment_support_scheme == dr_unaligned_supported);
6253 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6254 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6255 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6257 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6258 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6259 else
6260 aggr_type = vectype;
6262 /* In case the vectorization factor (VF) is bigger than the number
6263 of elements that we can fit in a vectype (nunits), we have to generate
6264 more than one vector stmt - i.e - we need to "unroll" the
6265 vector stmt by a factor VF/nunits. For more details see documentation in
6266 vect_get_vec_def_for_copy_stmt. */
6268 /* In case of interleaving (non-unit grouped access):
6270 S1: &base + 2 = x2
6271 S2: &base = x0
6272 S3: &base + 1 = x1
6273 S4: &base + 3 = x3
6275 We create vectorized stores starting from base address (the access of the
6276 first stmt in the chain (S2 in the above example), when the last store stmt
6277 of the chain (S4) is reached:
6279 VS1: &base = vx2
6280 VS2: &base + vec_size*1 = vx0
6281 VS3: &base + vec_size*2 = vx1
6282 VS4: &base + vec_size*3 = vx3
6284 Then permutation statements are generated:
6286 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6287 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6290 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6291 (the order of the data-refs in the output of vect_permute_store_chain
6292 corresponds to the order of scalar stmts in the interleaving chain - see
6293 the documentation of vect_permute_store_chain()).
6295 In case of both multiple types and interleaving, above vector stores and
6296 permutation stmts are created for every copy. The result vector stmts are
6297 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6298 STMT_VINFO_RELATED_STMT for the next copies.
6301 prev_stmt_info = NULL;
6302 for (j = 0; j < ncopies; j++)
6305 if (j == 0)
6307 if (slp)
6309 /* Get vectorized arguments for SLP_NODE. */
6310 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6311 NULL, slp_node);
6313 vec_oprnd = vec_oprnds[0];
6315 else
6317 /* For interleaved stores we collect vectorized defs for all the
6318 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6319 used as an input to vect_permute_store_chain(), and OPRNDS as
6320 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6322 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6323 OPRNDS are of size 1. */
6324 next_stmt = first_stmt;
6325 for (i = 0; i < group_size; i++)
6327 /* Since gaps are not supported for interleaved stores,
6328 GROUP_SIZE is the exact number of stmts in the chain.
6329 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6330 there is no interleaving, GROUP_SIZE is 1, and only one
6331 iteration of the loop will be executed. */
6332 gcc_assert (next_stmt
6333 && gimple_assign_single_p (next_stmt));
6334 op = gimple_assign_rhs1 (next_stmt);
6336 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6337 dr_chain.quick_push (vec_oprnd);
6338 oprnds.quick_push (vec_oprnd);
6339 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6343 /* We should have catched mismatched types earlier. */
6344 gcc_assert (useless_type_conversion_p (vectype,
6345 TREE_TYPE (vec_oprnd)));
6346 bool simd_lane_access_p
6347 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6348 if (simd_lane_access_p
6349 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6350 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6351 && integer_zerop (DR_OFFSET (first_dr))
6352 && integer_zerop (DR_INIT (first_dr))
6353 && alias_sets_conflict_p (get_alias_set (aggr_type),
6354 get_alias_set (TREE_TYPE (ref_type))))
6356 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6357 dataref_offset = build_int_cst (ref_type, 0);
6358 inv_p = false;
6360 else
6361 dataref_ptr
6362 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6363 simd_lane_access_p ? loop : NULL,
6364 offset, &dummy, gsi, &ptr_incr,
6365 simd_lane_access_p, &inv_p);
6366 gcc_assert (bb_vinfo || !inv_p);
6368 else
6370 /* For interleaved stores we created vectorized defs for all the
6371 defs stored in OPRNDS in the previous iteration (previous copy).
6372 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6373 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6374 next copy.
6375 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6376 OPRNDS are of size 1. */
6377 for (i = 0; i < group_size; i++)
6379 op = oprnds[i];
6380 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6381 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6382 dr_chain[i] = vec_oprnd;
6383 oprnds[i] = vec_oprnd;
6385 if (dataref_offset)
6386 dataref_offset
6387 = int_const_binop (PLUS_EXPR, dataref_offset,
6388 TYPE_SIZE_UNIT (aggr_type));
6389 else
6390 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6391 TYPE_SIZE_UNIT (aggr_type));
6394 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6396 tree vec_array;
6398 /* Combine all the vectors into an array. */
6399 vec_array = create_vector_array (vectype, vec_num);
6400 for (i = 0; i < vec_num; i++)
6402 vec_oprnd = dr_chain[i];
6403 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6406 /* Emit:
6407 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6408 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6409 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6410 vec_array);
6411 gimple_call_set_lhs (call, data_ref);
6412 gimple_call_set_nothrow (call, true);
6413 new_stmt = call;
6414 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6416 else
6418 new_stmt = NULL;
6419 if (grouped_store)
6421 if (j == 0)
6422 result_chain.create (group_size);
6423 /* Permute. */
6424 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6425 &result_chain);
6428 next_stmt = first_stmt;
6429 for (i = 0; i < vec_num; i++)
6431 unsigned align, misalign;
6433 if (i > 0)
6434 /* Bump the vector pointer. */
6435 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6436 stmt, NULL_TREE);
6438 if (slp)
6439 vec_oprnd = vec_oprnds[i];
6440 else if (grouped_store)
6441 /* For grouped stores vectorized defs are interleaved in
6442 vect_permute_store_chain(). */
6443 vec_oprnd = result_chain[i];
6445 data_ref = fold_build2 (MEM_REF, vectype,
6446 dataref_ptr,
6447 dataref_offset
6448 ? dataref_offset
6449 : build_int_cst (ref_type, 0));
6450 align = DR_TARGET_ALIGNMENT (first_dr);
6451 if (aligned_access_p (first_dr))
6452 misalign = 0;
6453 else if (DR_MISALIGNMENT (first_dr) == -1)
6455 align = dr_alignment (vect_dr_behavior (first_dr));
6456 misalign = 0;
6457 TREE_TYPE (data_ref)
6458 = build_aligned_type (TREE_TYPE (data_ref),
6459 align * BITS_PER_UNIT);
6461 else
6463 TREE_TYPE (data_ref)
6464 = build_aligned_type (TREE_TYPE (data_ref),
6465 TYPE_ALIGN (elem_type));
6466 misalign = DR_MISALIGNMENT (first_dr);
6468 if (dataref_offset == NULL_TREE
6469 && TREE_CODE (dataref_ptr) == SSA_NAME)
6470 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6471 misalign);
6473 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6475 tree perm_mask = perm_mask_for_reverse (vectype);
6476 tree perm_dest
6477 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6478 vectype);
6479 tree new_temp = make_ssa_name (perm_dest);
6481 /* Generate the permute statement. */
6482 gimple *perm_stmt
6483 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6484 vec_oprnd, perm_mask);
6485 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6487 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6488 vec_oprnd = new_temp;
6491 /* Arguments are ready. Create the new vector stmt. */
6492 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6493 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6495 if (slp)
6496 continue;
6498 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6499 if (!next_stmt)
6500 break;
6503 if (!slp)
6505 if (j == 0)
6506 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6507 else
6508 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6509 prev_stmt_info = vinfo_for_stmt (new_stmt);
6513 oprnds.release ();
6514 result_chain.release ();
6515 vec_oprnds.release ();
6517 return true;
6520 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6521 VECTOR_CST mask. No checks are made that the target platform supports the
6522 mask, so callers may wish to test can_vec_perm_p separately, or use
6523 vect_gen_perm_mask_checked. */
6525 tree
6526 vect_gen_perm_mask_any (tree vectype, vec_perm_indices sel)
6528 tree mask_elt_type, mask_type;
6530 unsigned int nunits = sel.length ();
6531 gcc_checking_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
6533 mask_elt_type = lang_hooks.types.type_for_mode
6534 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
6535 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6537 tree_vector_builder mask_elts (mask_type, nunits, 1);
6538 for (unsigned int i = 0; i < nunits; ++i)
6539 mask_elts.quick_push (build_int_cst (mask_elt_type, sel[i]));
6540 return mask_elts.build ();
6543 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6544 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6546 tree
6547 vect_gen_perm_mask_checked (tree vectype, vec_perm_indices sel)
6549 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, &sel));
6550 return vect_gen_perm_mask_any (vectype, sel);
6553 /* Given a vector variable X and Y, that was generated for the scalar
6554 STMT, generate instructions to permute the vector elements of X and Y
6555 using permutation mask MASK_VEC, insert them at *GSI and return the
6556 permuted vector variable. */
6558 static tree
6559 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6560 gimple_stmt_iterator *gsi)
6562 tree vectype = TREE_TYPE (x);
6563 tree perm_dest, data_ref;
6564 gimple *perm_stmt;
6566 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6567 data_ref = make_ssa_name (perm_dest);
6569 /* Generate the permute statement. */
6570 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6571 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6573 return data_ref;
6576 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6577 inserting them on the loops preheader edge. Returns true if we
6578 were successful in doing so (and thus STMT can be moved then),
6579 otherwise returns false. */
6581 static bool
6582 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6584 ssa_op_iter i;
6585 tree op;
6586 bool any = false;
6588 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6590 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6591 if (!gimple_nop_p (def_stmt)
6592 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6594 /* Make sure we don't need to recurse. While we could do
6595 so in simple cases when there are more complex use webs
6596 we don't have an easy way to preserve stmt order to fulfil
6597 dependencies within them. */
6598 tree op2;
6599 ssa_op_iter i2;
6600 if (gimple_code (def_stmt) == GIMPLE_PHI)
6601 return false;
6602 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6604 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6605 if (!gimple_nop_p (def_stmt2)
6606 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6607 return false;
6609 any = true;
6613 if (!any)
6614 return true;
6616 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6618 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6619 if (!gimple_nop_p (def_stmt)
6620 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6622 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6623 gsi_remove (&gsi, false);
6624 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6628 return true;
6631 /* vectorizable_load.
6633 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6634 can be vectorized.
6635 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6636 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6637 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6639 static bool
6640 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6641 slp_tree slp_node, slp_instance slp_node_instance)
6643 tree scalar_dest;
6644 tree vec_dest = NULL;
6645 tree data_ref = NULL;
6646 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6647 stmt_vec_info prev_stmt_info;
6648 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6649 struct loop *loop = NULL;
6650 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6651 bool nested_in_vect_loop = false;
6652 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6653 tree elem_type;
6654 tree new_temp;
6655 machine_mode mode;
6656 gimple *new_stmt = NULL;
6657 tree dummy;
6658 enum dr_alignment_support alignment_support_scheme;
6659 tree dataref_ptr = NULL_TREE;
6660 tree dataref_offset = NULL_TREE;
6661 gimple *ptr_incr = NULL;
6662 int ncopies;
6663 int i, j, group_size, group_gap_adj;
6664 tree msq = NULL_TREE, lsq;
6665 tree offset = NULL_TREE;
6666 tree byte_offset = NULL_TREE;
6667 tree realignment_token = NULL_TREE;
6668 gphi *phi = NULL;
6669 vec<tree> dr_chain = vNULL;
6670 bool grouped_load = false;
6671 gimple *first_stmt;
6672 gimple *first_stmt_for_drptr = NULL;
6673 bool inv_p;
6674 bool compute_in_loop = false;
6675 struct loop *at_loop;
6676 int vec_num;
6677 bool slp = (slp_node != NULL);
6678 bool slp_perm = false;
6679 enum tree_code code;
6680 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6681 int vf;
6682 tree aggr_type;
6683 gather_scatter_info gs_info;
6684 vec_info *vinfo = stmt_info->vinfo;
6685 tree ref_type;
6687 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6688 return false;
6690 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6691 && ! vec_stmt)
6692 return false;
6694 /* Is vectorizable load? */
6695 if (!is_gimple_assign (stmt))
6696 return false;
6698 scalar_dest = gimple_assign_lhs (stmt);
6699 if (TREE_CODE (scalar_dest) != SSA_NAME)
6700 return false;
6702 code = gimple_assign_rhs_code (stmt);
6703 if (code != ARRAY_REF
6704 && code != BIT_FIELD_REF
6705 && code != INDIRECT_REF
6706 && code != COMPONENT_REF
6707 && code != IMAGPART_EXPR
6708 && code != REALPART_EXPR
6709 && code != MEM_REF
6710 && TREE_CODE_CLASS (code) != tcc_declaration)
6711 return false;
6713 if (!STMT_VINFO_DATA_REF (stmt_info))
6714 return false;
6716 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6717 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6719 if (loop_vinfo)
6721 loop = LOOP_VINFO_LOOP (loop_vinfo);
6722 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6723 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6725 else
6726 vf = 1;
6728 /* Multiple types in SLP are handled by creating the appropriate number of
6729 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6730 case of SLP. */
6731 if (slp)
6732 ncopies = 1;
6733 else
6734 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6736 gcc_assert (ncopies >= 1);
6738 /* FORNOW. This restriction should be relaxed. */
6739 if (nested_in_vect_loop && ncopies > 1)
6741 if (dump_enabled_p ())
6742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6743 "multiple types in nested loop.\n");
6744 return false;
6747 /* Invalidate assumptions made by dependence analysis when vectorization
6748 on the unrolled body effectively re-orders stmts. */
6749 if (ncopies > 1
6750 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6751 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6752 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6754 if (dump_enabled_p ())
6755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6756 "cannot perform implicit CSE when unrolling "
6757 "with negative dependence distance\n");
6758 return false;
6761 elem_type = TREE_TYPE (vectype);
6762 mode = TYPE_MODE (vectype);
6764 /* FORNOW. In some cases can vectorize even if data-type not supported
6765 (e.g. - data copies). */
6766 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6768 if (dump_enabled_p ())
6769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6770 "Aligned load, but unsupported type.\n");
6771 return false;
6774 /* Check if the load is a part of an interleaving chain. */
6775 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6777 grouped_load = true;
6778 /* FORNOW */
6779 gcc_assert (!nested_in_vect_loop);
6780 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6782 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6783 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6785 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6786 slp_perm = true;
6788 /* Invalidate assumptions made by dependence analysis when vectorization
6789 on the unrolled body effectively re-orders stmts. */
6790 if (!PURE_SLP_STMT (stmt_info)
6791 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6792 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6793 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6795 if (dump_enabled_p ())
6796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6797 "cannot perform implicit CSE when performing "
6798 "group loads with negative dependence distance\n");
6799 return false;
6802 /* Similarly when the stmt is a load that is both part of a SLP
6803 instance and a loop vectorized stmt via the same-dr mechanism
6804 we have to give up. */
6805 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6806 && (STMT_SLP_TYPE (stmt_info)
6807 != STMT_SLP_TYPE (vinfo_for_stmt
6808 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6810 if (dump_enabled_p ())
6811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6812 "conflicting SLP types for CSEd load\n");
6813 return false;
6817 vect_memory_access_type memory_access_type;
6818 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6819 &memory_access_type, &gs_info))
6820 return false;
6822 if (!vec_stmt) /* transformation not required. */
6824 if (!slp)
6825 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6826 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6827 /* The SLP costs are calculated during SLP analysis. */
6828 if (!PURE_SLP_STMT (stmt_info))
6829 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6830 NULL, NULL, NULL);
6831 return true;
6834 if (!slp)
6835 gcc_assert (memory_access_type
6836 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6838 if (dump_enabled_p ())
6839 dump_printf_loc (MSG_NOTE, vect_location,
6840 "transform load. ncopies = %d\n", ncopies);
6842 /* Transform. */
6844 ensure_base_align (dr);
6846 if (memory_access_type == VMAT_GATHER_SCATTER)
6848 tree vec_oprnd0 = NULL_TREE, op;
6849 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6850 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6851 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6852 edge pe = loop_preheader_edge (loop);
6853 gimple_seq seq;
6854 basic_block new_bb;
6855 enum { NARROW, NONE, WIDEN } modifier;
6856 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6858 if (nunits == gather_off_nunits)
6859 modifier = NONE;
6860 else if (nunits == gather_off_nunits / 2)
6862 modifier = WIDEN;
6864 auto_vec_perm_indices sel (gather_off_nunits);
6865 for (i = 0; i < gather_off_nunits; ++i)
6866 sel.quick_push (i | nunits);
6868 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6870 else if (nunits == gather_off_nunits * 2)
6872 modifier = NARROW;
6874 auto_vec_perm_indices sel (nunits);
6875 for (i = 0; i < nunits; ++i)
6876 sel.quick_push (i < gather_off_nunits
6877 ? i : i + nunits - gather_off_nunits);
6879 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6880 ncopies *= 2;
6882 else
6883 gcc_unreachable ();
6885 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6886 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6887 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6888 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6889 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6890 scaletype = TREE_VALUE (arglist);
6891 gcc_checking_assert (types_compatible_p (srctype, rettype));
6893 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6895 ptr = fold_convert (ptrtype, gs_info.base);
6896 if (!is_gimple_min_invariant (ptr))
6898 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6899 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6900 gcc_assert (!new_bb);
6903 /* Currently we support only unconditional gather loads,
6904 so mask should be all ones. */
6905 if (TREE_CODE (masktype) == INTEGER_TYPE)
6906 mask = build_int_cst (masktype, -1);
6907 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6909 mask = build_int_cst (TREE_TYPE (masktype), -1);
6910 mask = build_vector_from_val (masktype, mask);
6911 mask = vect_init_vector (stmt, mask, masktype, NULL);
6913 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6915 REAL_VALUE_TYPE r;
6916 long tmp[6];
6917 for (j = 0; j < 6; ++j)
6918 tmp[j] = -1;
6919 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6920 mask = build_real (TREE_TYPE (masktype), r);
6921 mask = build_vector_from_val (masktype, mask);
6922 mask = vect_init_vector (stmt, mask, masktype, NULL);
6924 else
6925 gcc_unreachable ();
6927 scale = build_int_cst (scaletype, gs_info.scale);
6929 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6930 merge = build_int_cst (TREE_TYPE (rettype), 0);
6931 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6933 REAL_VALUE_TYPE r;
6934 long tmp[6];
6935 for (j = 0; j < 6; ++j)
6936 tmp[j] = 0;
6937 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6938 merge = build_real (TREE_TYPE (rettype), r);
6940 else
6941 gcc_unreachable ();
6942 merge = build_vector_from_val (rettype, merge);
6943 merge = vect_init_vector (stmt, merge, rettype, NULL);
6945 prev_stmt_info = NULL;
6946 for (j = 0; j < ncopies; ++j)
6948 if (modifier == WIDEN && (j & 1))
6949 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6950 perm_mask, stmt, gsi);
6951 else if (j == 0)
6952 op = vec_oprnd0
6953 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6954 else
6955 op = vec_oprnd0
6956 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6958 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6960 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6961 == TYPE_VECTOR_SUBPARTS (idxtype));
6962 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6963 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6964 new_stmt
6965 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6966 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6967 op = var;
6970 new_stmt
6971 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6973 if (!useless_type_conversion_p (vectype, rettype))
6975 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6976 == TYPE_VECTOR_SUBPARTS (rettype));
6977 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6978 gimple_call_set_lhs (new_stmt, op);
6979 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6980 var = make_ssa_name (vec_dest);
6981 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6982 new_stmt
6983 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6985 else
6987 var = make_ssa_name (vec_dest, new_stmt);
6988 gimple_call_set_lhs (new_stmt, var);
6991 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6993 if (modifier == NARROW)
6995 if ((j & 1) == 0)
6997 prev_res = var;
6998 continue;
7000 var = permute_vec_elements (prev_res, var,
7001 perm_mask, stmt, gsi);
7002 new_stmt = SSA_NAME_DEF_STMT (var);
7005 if (prev_stmt_info == NULL)
7006 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7007 else
7008 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7009 prev_stmt_info = vinfo_for_stmt (new_stmt);
7011 return true;
7014 if (memory_access_type == VMAT_ELEMENTWISE
7015 || memory_access_type == VMAT_STRIDED_SLP)
7017 gimple_stmt_iterator incr_gsi;
7018 bool insert_after;
7019 gimple *incr;
7020 tree offvar;
7021 tree ivstep;
7022 tree running_off;
7023 vec<constructor_elt, va_gc> *v = NULL;
7024 gimple_seq stmts = NULL;
7025 tree stride_base, stride_step, alias_off;
7027 gcc_assert (!nested_in_vect_loop);
7029 if (slp && grouped_load)
7031 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7032 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7033 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7034 ref_type = get_group_alias_ptr_type (first_stmt);
7036 else
7038 first_stmt = stmt;
7039 first_dr = dr;
7040 group_size = 1;
7041 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7044 stride_base
7045 = fold_build_pointer_plus
7046 (DR_BASE_ADDRESS (first_dr),
7047 size_binop (PLUS_EXPR,
7048 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7049 convert_to_ptrofftype (DR_INIT (first_dr))));
7050 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7052 /* For a load with loop-invariant (but other than power-of-2)
7053 stride (i.e. not a grouped access) like so:
7055 for (i = 0; i < n; i += stride)
7056 ... = array[i];
7058 we generate a new induction variable and new accesses to
7059 form a new vector (or vectors, depending on ncopies):
7061 for (j = 0; ; j += VF*stride)
7062 tmp1 = array[j];
7063 tmp2 = array[j + stride];
7065 vectemp = {tmp1, tmp2, ...}
7068 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7069 build_int_cst (TREE_TYPE (stride_step), vf));
7071 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7073 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7074 loop, &incr_gsi, insert_after,
7075 &offvar, NULL);
7076 incr = gsi_stmt (incr_gsi);
7077 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7079 stride_step = force_gimple_operand (unshare_expr (stride_step),
7080 &stmts, true, NULL_TREE);
7081 if (stmts)
7082 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7084 prev_stmt_info = NULL;
7085 running_off = offvar;
7086 alias_off = build_int_cst (ref_type, 0);
7087 int nloads = nunits;
7088 int lnel = 1;
7089 tree ltype = TREE_TYPE (vectype);
7090 tree lvectype = vectype;
7091 auto_vec<tree> dr_chain;
7092 if (memory_access_type == VMAT_STRIDED_SLP)
7094 if (group_size < nunits)
7096 /* First check if vec_init optab supports construction from
7097 vector elts directly. */
7098 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7099 machine_mode vmode;
7100 if (mode_for_vector (elmode, group_size).exists (&vmode)
7101 && VECTOR_MODE_P (vmode)
7102 && (convert_optab_handler (vec_init_optab,
7103 TYPE_MODE (vectype), vmode)
7104 != CODE_FOR_nothing))
7106 nloads = nunits / group_size;
7107 lnel = group_size;
7108 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7110 else
7112 /* Otherwise avoid emitting a constructor of vector elements
7113 by performing the loads using an integer type of the same
7114 size, constructing a vector of those and then
7115 re-interpreting it as the original vector type.
7116 This avoids a huge runtime penalty due to the general
7117 inability to perform store forwarding from smaller stores
7118 to a larger load. */
7119 unsigned lsize
7120 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7121 elmode = int_mode_for_size (lsize, 0).require ();
7122 /* If we can't construct such a vector fall back to
7123 element loads of the original vector type. */
7124 if (mode_for_vector (elmode,
7125 nunits / group_size).exists (&vmode)
7126 && VECTOR_MODE_P (vmode)
7127 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7128 != CODE_FOR_nothing))
7130 nloads = nunits / group_size;
7131 lnel = group_size;
7132 ltype = build_nonstandard_integer_type (lsize, 1);
7133 lvectype = build_vector_type (ltype, nloads);
7137 else
7139 nloads = 1;
7140 lnel = nunits;
7141 ltype = vectype;
7143 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7145 if (slp)
7147 /* For SLP permutation support we need to load the whole group,
7148 not only the number of vector stmts the permutation result
7149 fits in. */
7150 if (slp_perm)
7152 ncopies = (group_size * vf + nunits - 1) / nunits;
7153 dr_chain.create (ncopies);
7155 else
7156 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7158 int group_el = 0;
7159 unsigned HOST_WIDE_INT
7160 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7161 for (j = 0; j < ncopies; j++)
7163 if (nloads > 1)
7164 vec_alloc (v, nloads);
7165 for (i = 0; i < nloads; i++)
7167 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7168 group_el * elsz);
7169 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7170 build2 (MEM_REF, ltype,
7171 running_off, this_off));
7172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7173 if (nloads > 1)
7174 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7175 gimple_assign_lhs (new_stmt));
7177 group_el += lnel;
7178 if (! slp
7179 || group_el == group_size)
7181 tree newoff = copy_ssa_name (running_off);
7182 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7183 running_off, stride_step);
7184 vect_finish_stmt_generation (stmt, incr, gsi);
7186 running_off = newoff;
7187 group_el = 0;
7190 if (nloads > 1)
7192 tree vec_inv = build_constructor (lvectype, v);
7193 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7194 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7195 if (lvectype != vectype)
7197 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7198 VIEW_CONVERT_EXPR,
7199 build1 (VIEW_CONVERT_EXPR,
7200 vectype, new_temp));
7201 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7205 if (slp)
7207 if (slp_perm)
7208 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7209 else
7210 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7212 else
7214 if (j == 0)
7215 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7216 else
7217 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7218 prev_stmt_info = vinfo_for_stmt (new_stmt);
7221 if (slp_perm)
7223 unsigned n_perms;
7224 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7225 slp_node_instance, false, &n_perms);
7227 return true;
7230 if (grouped_load)
7232 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7233 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7234 /* For SLP vectorization we directly vectorize a subchain
7235 without permutation. */
7236 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7237 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7238 /* For BB vectorization always use the first stmt to base
7239 the data ref pointer on. */
7240 if (bb_vinfo)
7241 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7243 /* Check if the chain of loads is already vectorized. */
7244 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7245 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7246 ??? But we can only do so if there is exactly one
7247 as we have no way to get at the rest. Leave the CSE
7248 opportunity alone.
7249 ??? With the group load eventually participating
7250 in multiple different permutations (having multiple
7251 slp nodes which refer to the same group) the CSE
7252 is even wrong code. See PR56270. */
7253 && !slp)
7255 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7256 return true;
7258 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7259 group_gap_adj = 0;
7261 /* VEC_NUM is the number of vect stmts to be created for this group. */
7262 if (slp)
7264 grouped_load = false;
7265 /* For SLP permutation support we need to load the whole group,
7266 not only the number of vector stmts the permutation result
7267 fits in. */
7268 if (slp_perm)
7270 vec_num = (group_size * vf + nunits - 1) / nunits;
7271 group_gap_adj = vf * group_size - nunits * vec_num;
7273 else
7275 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7276 group_gap_adj
7277 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7280 else
7281 vec_num = group_size;
7283 ref_type = get_group_alias_ptr_type (first_stmt);
7285 else
7287 first_stmt = stmt;
7288 first_dr = dr;
7289 group_size = vec_num = 1;
7290 group_gap_adj = 0;
7291 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7294 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7295 gcc_assert (alignment_support_scheme);
7296 /* Targets with load-lane instructions must not require explicit
7297 realignment. */
7298 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7299 || alignment_support_scheme == dr_aligned
7300 || alignment_support_scheme == dr_unaligned_supported);
7302 /* In case the vectorization factor (VF) is bigger than the number
7303 of elements that we can fit in a vectype (nunits), we have to generate
7304 more than one vector stmt - i.e - we need to "unroll" the
7305 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7306 from one copy of the vector stmt to the next, in the field
7307 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7308 stages to find the correct vector defs to be used when vectorizing
7309 stmts that use the defs of the current stmt. The example below
7310 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7311 need to create 4 vectorized stmts):
7313 before vectorization:
7314 RELATED_STMT VEC_STMT
7315 S1: x = memref - -
7316 S2: z = x + 1 - -
7318 step 1: vectorize stmt S1:
7319 We first create the vector stmt VS1_0, and, as usual, record a
7320 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7321 Next, we create the vector stmt VS1_1, and record a pointer to
7322 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7323 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7324 stmts and pointers:
7325 RELATED_STMT VEC_STMT
7326 VS1_0: vx0 = memref0 VS1_1 -
7327 VS1_1: vx1 = memref1 VS1_2 -
7328 VS1_2: vx2 = memref2 VS1_3 -
7329 VS1_3: vx3 = memref3 - -
7330 S1: x = load - VS1_0
7331 S2: z = x + 1 - -
7333 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7334 information we recorded in RELATED_STMT field is used to vectorize
7335 stmt S2. */
7337 /* In case of interleaving (non-unit grouped access):
7339 S1: x2 = &base + 2
7340 S2: x0 = &base
7341 S3: x1 = &base + 1
7342 S4: x3 = &base + 3
7344 Vectorized loads are created in the order of memory accesses
7345 starting from the access of the first stmt of the chain:
7347 VS1: vx0 = &base
7348 VS2: vx1 = &base + vec_size*1
7349 VS3: vx3 = &base + vec_size*2
7350 VS4: vx4 = &base + vec_size*3
7352 Then permutation statements are generated:
7354 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7355 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7358 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7359 (the order of the data-refs in the output of vect_permute_load_chain
7360 corresponds to the order of scalar stmts in the interleaving chain - see
7361 the documentation of vect_permute_load_chain()).
7362 The generation of permutation stmts and recording them in
7363 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7365 In case of both multiple types and interleaving, the vector loads and
7366 permutation stmts above are created for every copy. The result vector
7367 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7368 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7370 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7371 on a target that supports unaligned accesses (dr_unaligned_supported)
7372 we generate the following code:
7373 p = initial_addr;
7374 indx = 0;
7375 loop {
7376 p = p + indx * vectype_size;
7377 vec_dest = *(p);
7378 indx = indx + 1;
7381 Otherwise, the data reference is potentially unaligned on a target that
7382 does not support unaligned accesses (dr_explicit_realign_optimized) -
7383 then generate the following code, in which the data in each iteration is
7384 obtained by two vector loads, one from the previous iteration, and one
7385 from the current iteration:
7386 p1 = initial_addr;
7387 msq_init = *(floor(p1))
7388 p2 = initial_addr + VS - 1;
7389 realignment_token = call target_builtin;
7390 indx = 0;
7391 loop {
7392 p2 = p2 + indx * vectype_size
7393 lsq = *(floor(p2))
7394 vec_dest = realign_load (msq, lsq, realignment_token)
7395 indx = indx + 1;
7396 msq = lsq;
7397 } */
7399 /* If the misalignment remains the same throughout the execution of the
7400 loop, we can create the init_addr and permutation mask at the loop
7401 preheader. Otherwise, it needs to be created inside the loop.
7402 This can only occur when vectorizing memory accesses in the inner-loop
7403 nested within an outer-loop that is being vectorized. */
7405 if (nested_in_vect_loop
7406 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
7408 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7409 compute_in_loop = true;
7412 if ((alignment_support_scheme == dr_explicit_realign_optimized
7413 || alignment_support_scheme == dr_explicit_realign)
7414 && !compute_in_loop)
7416 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7417 alignment_support_scheme, NULL_TREE,
7418 &at_loop);
7419 if (alignment_support_scheme == dr_explicit_realign_optimized)
7421 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7422 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7423 size_one_node);
7426 else
7427 at_loop = loop;
7429 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7430 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7432 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7433 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7434 else
7435 aggr_type = vectype;
7437 prev_stmt_info = NULL;
7438 int group_elt = 0;
7439 for (j = 0; j < ncopies; j++)
7441 /* 1. Create the vector or array pointer update chain. */
7442 if (j == 0)
7444 bool simd_lane_access_p
7445 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7446 if (simd_lane_access_p
7447 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7448 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7449 && integer_zerop (DR_OFFSET (first_dr))
7450 && integer_zerop (DR_INIT (first_dr))
7451 && alias_sets_conflict_p (get_alias_set (aggr_type),
7452 get_alias_set (TREE_TYPE (ref_type)))
7453 && (alignment_support_scheme == dr_aligned
7454 || alignment_support_scheme == dr_unaligned_supported))
7456 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7457 dataref_offset = build_int_cst (ref_type, 0);
7458 inv_p = false;
7460 else if (first_stmt_for_drptr
7461 && first_stmt != first_stmt_for_drptr)
7463 dataref_ptr
7464 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7465 at_loop, offset, &dummy, gsi,
7466 &ptr_incr, simd_lane_access_p,
7467 &inv_p, byte_offset);
7468 /* Adjust the pointer by the difference to first_stmt. */
7469 data_reference_p ptrdr
7470 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7471 tree diff = fold_convert (sizetype,
7472 size_binop (MINUS_EXPR,
7473 DR_INIT (first_dr),
7474 DR_INIT (ptrdr)));
7475 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7476 stmt, diff);
7478 else
7479 dataref_ptr
7480 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7481 offset, &dummy, gsi, &ptr_incr,
7482 simd_lane_access_p, &inv_p,
7483 byte_offset);
7485 else if (dataref_offset)
7486 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7487 TYPE_SIZE_UNIT (aggr_type));
7488 else
7489 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7490 TYPE_SIZE_UNIT (aggr_type));
7492 if (grouped_load || slp_perm)
7493 dr_chain.create (vec_num);
7495 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7497 tree vec_array;
7499 vec_array = create_vector_array (vectype, vec_num);
7501 /* Emit:
7502 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7503 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7504 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7505 data_ref);
7506 gimple_call_set_lhs (call, vec_array);
7507 gimple_call_set_nothrow (call, true);
7508 new_stmt = call;
7509 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7511 /* Extract each vector into an SSA_NAME. */
7512 for (i = 0; i < vec_num; i++)
7514 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7515 vec_array, i);
7516 dr_chain.quick_push (new_temp);
7519 /* Record the mapping between SSA_NAMEs and statements. */
7520 vect_record_grouped_load_vectors (stmt, dr_chain);
7522 else
7524 for (i = 0; i < vec_num; i++)
7526 if (i > 0)
7527 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7528 stmt, NULL_TREE);
7530 /* 2. Create the vector-load in the loop. */
7531 switch (alignment_support_scheme)
7533 case dr_aligned:
7534 case dr_unaligned_supported:
7536 unsigned int align, misalign;
7538 data_ref
7539 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7540 dataref_offset
7541 ? dataref_offset
7542 : build_int_cst (ref_type, 0));
7543 align = DR_TARGET_ALIGNMENT (dr);
7544 if (alignment_support_scheme == dr_aligned)
7546 gcc_assert (aligned_access_p (first_dr));
7547 misalign = 0;
7549 else if (DR_MISALIGNMENT (first_dr) == -1)
7551 align = dr_alignment (vect_dr_behavior (first_dr));
7552 misalign = 0;
7553 TREE_TYPE (data_ref)
7554 = build_aligned_type (TREE_TYPE (data_ref),
7555 align * BITS_PER_UNIT);
7557 else
7559 TREE_TYPE (data_ref)
7560 = build_aligned_type (TREE_TYPE (data_ref),
7561 TYPE_ALIGN (elem_type));
7562 misalign = DR_MISALIGNMENT (first_dr);
7564 if (dataref_offset == NULL_TREE
7565 && TREE_CODE (dataref_ptr) == SSA_NAME)
7566 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7567 align, misalign);
7568 break;
7570 case dr_explicit_realign:
7572 tree ptr, bump;
7574 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7576 if (compute_in_loop)
7577 msq = vect_setup_realignment (first_stmt, gsi,
7578 &realignment_token,
7579 dr_explicit_realign,
7580 dataref_ptr, NULL);
7582 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7583 ptr = copy_ssa_name (dataref_ptr);
7584 else
7585 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7586 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7587 new_stmt = gimple_build_assign
7588 (ptr, BIT_AND_EXPR, dataref_ptr,
7589 build_int_cst
7590 (TREE_TYPE (dataref_ptr),
7591 -(HOST_WIDE_INT) align));
7592 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7593 data_ref
7594 = build2 (MEM_REF, vectype, ptr,
7595 build_int_cst (ref_type, 0));
7596 vec_dest = vect_create_destination_var (scalar_dest,
7597 vectype);
7598 new_stmt = gimple_build_assign (vec_dest, data_ref);
7599 new_temp = make_ssa_name (vec_dest, new_stmt);
7600 gimple_assign_set_lhs (new_stmt, new_temp);
7601 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7602 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7603 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7604 msq = new_temp;
7606 bump = size_binop (MULT_EXPR, vs,
7607 TYPE_SIZE_UNIT (elem_type));
7608 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7609 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7610 new_stmt = gimple_build_assign
7611 (NULL_TREE, BIT_AND_EXPR, ptr,
7612 build_int_cst
7613 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7614 ptr = copy_ssa_name (ptr, new_stmt);
7615 gimple_assign_set_lhs (new_stmt, ptr);
7616 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7617 data_ref
7618 = build2 (MEM_REF, vectype, ptr,
7619 build_int_cst (ref_type, 0));
7620 break;
7622 case dr_explicit_realign_optimized:
7624 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7625 new_temp = copy_ssa_name (dataref_ptr);
7626 else
7627 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7628 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7629 new_stmt = gimple_build_assign
7630 (new_temp, BIT_AND_EXPR, dataref_ptr,
7631 build_int_cst (TREE_TYPE (dataref_ptr),
7632 -(HOST_WIDE_INT) align));
7633 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7634 data_ref
7635 = build2 (MEM_REF, vectype, new_temp,
7636 build_int_cst (ref_type, 0));
7637 break;
7639 default:
7640 gcc_unreachable ();
7642 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7643 new_stmt = gimple_build_assign (vec_dest, data_ref);
7644 new_temp = make_ssa_name (vec_dest, new_stmt);
7645 gimple_assign_set_lhs (new_stmt, new_temp);
7646 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7648 /* 3. Handle explicit realignment if necessary/supported.
7649 Create in loop:
7650 vec_dest = realign_load (msq, lsq, realignment_token) */
7651 if (alignment_support_scheme == dr_explicit_realign_optimized
7652 || alignment_support_scheme == dr_explicit_realign)
7654 lsq = gimple_assign_lhs (new_stmt);
7655 if (!realignment_token)
7656 realignment_token = dataref_ptr;
7657 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7658 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7659 msq, lsq, realignment_token);
7660 new_temp = make_ssa_name (vec_dest, new_stmt);
7661 gimple_assign_set_lhs (new_stmt, new_temp);
7662 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7664 if (alignment_support_scheme == dr_explicit_realign_optimized)
7666 gcc_assert (phi);
7667 if (i == vec_num - 1 && j == ncopies - 1)
7668 add_phi_arg (phi, lsq,
7669 loop_latch_edge (containing_loop),
7670 UNKNOWN_LOCATION);
7671 msq = lsq;
7675 /* 4. Handle invariant-load. */
7676 if (inv_p && !bb_vinfo)
7678 gcc_assert (!grouped_load);
7679 /* If we have versioned for aliasing or the loop doesn't
7680 have any data dependencies that would preclude this,
7681 then we are sure this is a loop invariant load and
7682 thus we can insert it on the preheader edge. */
7683 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7684 && !nested_in_vect_loop
7685 && hoist_defs_of_uses (stmt, loop))
7687 if (dump_enabled_p ())
7689 dump_printf_loc (MSG_NOTE, vect_location,
7690 "hoisting out of the vectorized "
7691 "loop: ");
7692 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7694 tree tem = copy_ssa_name (scalar_dest);
7695 gsi_insert_on_edge_immediate
7696 (loop_preheader_edge (loop),
7697 gimple_build_assign (tem,
7698 unshare_expr
7699 (gimple_assign_rhs1 (stmt))));
7700 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7701 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7702 set_vinfo_for_stmt (new_stmt,
7703 new_stmt_vec_info (new_stmt, vinfo));
7705 else
7707 gimple_stmt_iterator gsi2 = *gsi;
7708 gsi_next (&gsi2);
7709 new_temp = vect_init_vector (stmt, scalar_dest,
7710 vectype, &gsi2);
7711 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7715 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7717 tree perm_mask = perm_mask_for_reverse (vectype);
7718 new_temp = permute_vec_elements (new_temp, new_temp,
7719 perm_mask, stmt, gsi);
7720 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7723 /* Collect vector loads and later create their permutation in
7724 vect_transform_grouped_load (). */
7725 if (grouped_load || slp_perm)
7726 dr_chain.quick_push (new_temp);
7728 /* Store vector loads in the corresponding SLP_NODE. */
7729 if (slp && !slp_perm)
7730 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7732 /* With SLP permutation we load the gaps as well, without
7733 we need to skip the gaps after we manage to fully load
7734 all elements. group_gap_adj is GROUP_SIZE here. */
7735 group_elt += nunits;
7736 if (group_gap_adj != 0 && ! slp_perm
7737 && group_elt == group_size - group_gap_adj)
7739 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7740 * group_gap_adj);
7741 tree bump = wide_int_to_tree (sizetype, bump_val);
7742 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7743 stmt, bump);
7744 group_elt = 0;
7747 /* Bump the vector pointer to account for a gap or for excess
7748 elements loaded for a permuted SLP load. */
7749 if (group_gap_adj != 0 && slp_perm)
7751 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7752 * group_gap_adj);
7753 tree bump = wide_int_to_tree (sizetype, bump_val);
7754 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7755 stmt, bump);
7759 if (slp && !slp_perm)
7760 continue;
7762 if (slp_perm)
7764 unsigned n_perms;
7765 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7766 slp_node_instance, false,
7767 &n_perms))
7769 dr_chain.release ();
7770 return false;
7773 else
7775 if (grouped_load)
7777 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7778 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7779 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7781 else
7783 if (j == 0)
7784 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7785 else
7786 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7787 prev_stmt_info = vinfo_for_stmt (new_stmt);
7790 dr_chain.release ();
7793 return true;
7796 /* Function vect_is_simple_cond.
7798 Input:
7799 LOOP - the loop that is being vectorized.
7800 COND - Condition that is checked for simple use.
7802 Output:
7803 *COMP_VECTYPE - the vector type for the comparison.
7804 *DTS - The def types for the arguments of the comparison
7806 Returns whether a COND can be vectorized. Checks whether
7807 condition operands are supportable using vec_is_simple_use. */
7809 static bool
7810 vect_is_simple_cond (tree cond, vec_info *vinfo,
7811 tree *comp_vectype, enum vect_def_type *dts,
7812 tree vectype)
7814 tree lhs, rhs;
7815 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7817 /* Mask case. */
7818 if (TREE_CODE (cond) == SSA_NAME
7819 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7821 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7822 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7823 &dts[0], comp_vectype)
7824 || !*comp_vectype
7825 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7826 return false;
7827 return true;
7830 if (!COMPARISON_CLASS_P (cond))
7831 return false;
7833 lhs = TREE_OPERAND (cond, 0);
7834 rhs = TREE_OPERAND (cond, 1);
7836 if (TREE_CODE (lhs) == SSA_NAME)
7838 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7839 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7840 return false;
7842 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7843 || TREE_CODE (lhs) == FIXED_CST)
7844 dts[0] = vect_constant_def;
7845 else
7846 return false;
7848 if (TREE_CODE (rhs) == SSA_NAME)
7850 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7851 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7852 return false;
7854 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7855 || TREE_CODE (rhs) == FIXED_CST)
7856 dts[1] = vect_constant_def;
7857 else
7858 return false;
7860 if (vectype1 && vectype2
7861 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7862 return false;
7864 *comp_vectype = vectype1 ? vectype1 : vectype2;
7865 /* Invariant comparison. */
7866 if (! *comp_vectype)
7868 tree scalar_type = TREE_TYPE (lhs);
7869 /* If we can widen the comparison to match vectype do so. */
7870 if (INTEGRAL_TYPE_P (scalar_type)
7871 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7872 TYPE_SIZE (TREE_TYPE (vectype))))
7873 scalar_type = build_nonstandard_integer_type
7874 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7875 TYPE_UNSIGNED (scalar_type));
7876 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7879 return true;
7882 /* vectorizable_condition.
7884 Check if STMT is conditional modify expression that can be vectorized.
7885 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7886 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7887 at GSI.
7889 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7890 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7891 else clause if it is 2).
7893 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7895 bool
7896 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7897 gimple **vec_stmt, tree reduc_def, int reduc_index,
7898 slp_tree slp_node)
7900 tree scalar_dest = NULL_TREE;
7901 tree vec_dest = NULL_TREE;
7902 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7903 tree then_clause, else_clause;
7904 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7905 tree comp_vectype = NULL_TREE;
7906 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7907 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7908 tree vec_compare;
7909 tree new_temp;
7910 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7911 enum vect_def_type dts[4]
7912 = {vect_unknown_def_type, vect_unknown_def_type,
7913 vect_unknown_def_type, vect_unknown_def_type};
7914 int ndts = 4;
7915 int ncopies;
7916 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7917 stmt_vec_info prev_stmt_info = NULL;
7918 int i, j;
7919 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7920 vec<tree> vec_oprnds0 = vNULL;
7921 vec<tree> vec_oprnds1 = vNULL;
7922 vec<tree> vec_oprnds2 = vNULL;
7923 vec<tree> vec_oprnds3 = vNULL;
7924 tree vec_cmp_type;
7925 bool masked = false;
7927 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7928 return false;
7930 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7932 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7933 return false;
7935 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7936 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7937 && reduc_def))
7938 return false;
7940 /* FORNOW: not yet supported. */
7941 if (STMT_VINFO_LIVE_P (stmt_info))
7943 if (dump_enabled_p ())
7944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7945 "value used after loop.\n");
7946 return false;
7950 /* Is vectorizable conditional operation? */
7951 if (!is_gimple_assign (stmt))
7952 return false;
7954 code = gimple_assign_rhs_code (stmt);
7956 if (code != COND_EXPR)
7957 return false;
7959 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7960 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7962 if (slp_node)
7963 ncopies = 1;
7964 else
7965 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7967 gcc_assert (ncopies >= 1);
7968 if (reduc_index && ncopies > 1)
7969 return false; /* FORNOW */
7971 cond_expr = gimple_assign_rhs1 (stmt);
7972 then_clause = gimple_assign_rhs2 (stmt);
7973 else_clause = gimple_assign_rhs3 (stmt);
7975 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7976 &comp_vectype, &dts[0], vectype)
7977 || !comp_vectype)
7978 return false;
7980 gimple *def_stmt;
7981 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7982 &vectype1))
7983 return false;
7984 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7985 &vectype2))
7986 return false;
7988 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7989 return false;
7991 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7992 return false;
7994 masked = !COMPARISON_CLASS_P (cond_expr);
7995 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7997 if (vec_cmp_type == NULL_TREE)
7998 return false;
8000 cond_code = TREE_CODE (cond_expr);
8001 if (!masked)
8003 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8004 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8007 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8009 /* Boolean values may have another representation in vectors
8010 and therefore we prefer bit operations over comparison for
8011 them (which also works for scalar masks). We store opcodes
8012 to use in bitop1 and bitop2. Statement is vectorized as
8013 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8014 depending on bitop1 and bitop2 arity. */
8015 switch (cond_code)
8017 case GT_EXPR:
8018 bitop1 = BIT_NOT_EXPR;
8019 bitop2 = BIT_AND_EXPR;
8020 break;
8021 case GE_EXPR:
8022 bitop1 = BIT_NOT_EXPR;
8023 bitop2 = BIT_IOR_EXPR;
8024 break;
8025 case LT_EXPR:
8026 bitop1 = BIT_NOT_EXPR;
8027 bitop2 = BIT_AND_EXPR;
8028 std::swap (cond_expr0, cond_expr1);
8029 break;
8030 case LE_EXPR:
8031 bitop1 = BIT_NOT_EXPR;
8032 bitop2 = BIT_IOR_EXPR;
8033 std::swap (cond_expr0, cond_expr1);
8034 break;
8035 case NE_EXPR:
8036 bitop1 = BIT_XOR_EXPR;
8037 break;
8038 case EQ_EXPR:
8039 bitop1 = BIT_XOR_EXPR;
8040 bitop2 = BIT_NOT_EXPR;
8041 break;
8042 default:
8043 return false;
8045 cond_code = SSA_NAME;
8048 if (!vec_stmt)
8050 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8051 if (bitop1 != NOP_EXPR)
8053 machine_mode mode = TYPE_MODE (comp_vectype);
8054 optab optab;
8056 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8057 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8058 return false;
8060 if (bitop2 != NOP_EXPR)
8062 optab = optab_for_tree_code (bitop2, comp_vectype,
8063 optab_default);
8064 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8065 return false;
8068 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8069 cond_code))
8071 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8072 return true;
8074 return false;
8077 /* Transform. */
8079 if (!slp_node)
8081 vec_oprnds0.create (1);
8082 vec_oprnds1.create (1);
8083 vec_oprnds2.create (1);
8084 vec_oprnds3.create (1);
8087 /* Handle def. */
8088 scalar_dest = gimple_assign_lhs (stmt);
8089 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8091 /* Handle cond expr. */
8092 for (j = 0; j < ncopies; j++)
8094 gassign *new_stmt = NULL;
8095 if (j == 0)
8097 if (slp_node)
8099 auto_vec<tree, 4> ops;
8100 auto_vec<vec<tree>, 4> vec_defs;
8102 if (masked)
8103 ops.safe_push (cond_expr);
8104 else
8106 ops.safe_push (cond_expr0);
8107 ops.safe_push (cond_expr1);
8109 ops.safe_push (then_clause);
8110 ops.safe_push (else_clause);
8111 vect_get_slp_defs (ops, slp_node, &vec_defs);
8112 vec_oprnds3 = vec_defs.pop ();
8113 vec_oprnds2 = vec_defs.pop ();
8114 if (!masked)
8115 vec_oprnds1 = vec_defs.pop ();
8116 vec_oprnds0 = vec_defs.pop ();
8118 else
8120 gimple *gtemp;
8121 if (masked)
8123 vec_cond_lhs
8124 = vect_get_vec_def_for_operand (cond_expr, stmt,
8125 comp_vectype);
8126 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8127 &gtemp, &dts[0]);
8129 else
8131 vec_cond_lhs
8132 = vect_get_vec_def_for_operand (cond_expr0,
8133 stmt, comp_vectype);
8134 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8136 vec_cond_rhs
8137 = vect_get_vec_def_for_operand (cond_expr1,
8138 stmt, comp_vectype);
8139 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8141 if (reduc_index == 1)
8142 vec_then_clause = reduc_def;
8143 else
8145 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8146 stmt);
8147 vect_is_simple_use (then_clause, loop_vinfo,
8148 &gtemp, &dts[2]);
8150 if (reduc_index == 2)
8151 vec_else_clause = reduc_def;
8152 else
8154 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8155 stmt);
8156 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8160 else
8162 vec_cond_lhs
8163 = vect_get_vec_def_for_stmt_copy (dts[0],
8164 vec_oprnds0.pop ());
8165 if (!masked)
8166 vec_cond_rhs
8167 = vect_get_vec_def_for_stmt_copy (dts[1],
8168 vec_oprnds1.pop ());
8170 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8171 vec_oprnds2.pop ());
8172 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8173 vec_oprnds3.pop ());
8176 if (!slp_node)
8178 vec_oprnds0.quick_push (vec_cond_lhs);
8179 if (!masked)
8180 vec_oprnds1.quick_push (vec_cond_rhs);
8181 vec_oprnds2.quick_push (vec_then_clause);
8182 vec_oprnds3.quick_push (vec_else_clause);
8185 /* Arguments are ready. Create the new vector stmt. */
8186 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8188 vec_then_clause = vec_oprnds2[i];
8189 vec_else_clause = vec_oprnds3[i];
8191 if (masked)
8192 vec_compare = vec_cond_lhs;
8193 else
8195 vec_cond_rhs = vec_oprnds1[i];
8196 if (bitop1 == NOP_EXPR)
8197 vec_compare = build2 (cond_code, vec_cmp_type,
8198 vec_cond_lhs, vec_cond_rhs);
8199 else
8201 new_temp = make_ssa_name (vec_cmp_type);
8202 if (bitop1 == BIT_NOT_EXPR)
8203 new_stmt = gimple_build_assign (new_temp, bitop1,
8204 vec_cond_rhs);
8205 else
8206 new_stmt
8207 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8208 vec_cond_rhs);
8209 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8210 if (bitop2 == NOP_EXPR)
8211 vec_compare = new_temp;
8212 else if (bitop2 == BIT_NOT_EXPR)
8214 /* Instead of doing ~x ? y : z do x ? z : y. */
8215 vec_compare = new_temp;
8216 std::swap (vec_then_clause, vec_else_clause);
8218 else
8220 vec_compare = make_ssa_name (vec_cmp_type);
8221 new_stmt
8222 = gimple_build_assign (vec_compare, bitop2,
8223 vec_cond_lhs, new_temp);
8224 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8228 new_temp = make_ssa_name (vec_dest);
8229 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8230 vec_compare, vec_then_clause,
8231 vec_else_clause);
8232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8233 if (slp_node)
8234 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8237 if (slp_node)
8238 continue;
8240 if (j == 0)
8241 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8242 else
8243 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8245 prev_stmt_info = vinfo_for_stmt (new_stmt);
8248 vec_oprnds0.release ();
8249 vec_oprnds1.release ();
8250 vec_oprnds2.release ();
8251 vec_oprnds3.release ();
8253 return true;
8256 /* vectorizable_comparison.
8258 Check if STMT is comparison expression that can be vectorized.
8259 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8260 comparison, put it in VEC_STMT, and insert it at GSI.
8262 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8264 static bool
8265 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8266 gimple **vec_stmt, tree reduc_def,
8267 slp_tree slp_node)
8269 tree lhs, rhs1, rhs2;
8270 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8271 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8272 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8273 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8274 tree new_temp;
8275 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8276 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8277 int ndts = 2;
8278 unsigned nunits;
8279 int ncopies;
8280 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8281 stmt_vec_info prev_stmt_info = NULL;
8282 int i, j;
8283 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8284 vec<tree> vec_oprnds0 = vNULL;
8285 vec<tree> vec_oprnds1 = vNULL;
8286 gimple *def_stmt;
8287 tree mask_type;
8288 tree mask;
8290 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8291 return false;
8293 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8294 return false;
8296 mask_type = vectype;
8297 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8299 if (slp_node)
8300 ncopies = 1;
8301 else
8302 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8304 gcc_assert (ncopies >= 1);
8305 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8306 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8307 && reduc_def))
8308 return false;
8310 if (STMT_VINFO_LIVE_P (stmt_info))
8312 if (dump_enabled_p ())
8313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8314 "value used after loop.\n");
8315 return false;
8318 if (!is_gimple_assign (stmt))
8319 return false;
8321 code = gimple_assign_rhs_code (stmt);
8323 if (TREE_CODE_CLASS (code) != tcc_comparison)
8324 return false;
8326 rhs1 = gimple_assign_rhs1 (stmt);
8327 rhs2 = gimple_assign_rhs2 (stmt);
8329 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8330 &dts[0], &vectype1))
8331 return false;
8333 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8334 &dts[1], &vectype2))
8335 return false;
8337 if (vectype1 && vectype2
8338 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8339 return false;
8341 vectype = vectype1 ? vectype1 : vectype2;
8343 /* Invariant comparison. */
8344 if (!vectype)
8346 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8347 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8348 return false;
8350 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8351 return false;
8353 /* Can't compare mask and non-mask types. */
8354 if (vectype1 && vectype2
8355 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8356 return false;
8358 /* Boolean values may have another representation in vectors
8359 and therefore we prefer bit operations over comparison for
8360 them (which also works for scalar masks). We store opcodes
8361 to use in bitop1 and bitop2. Statement is vectorized as
8362 BITOP2 (rhs1 BITOP1 rhs2) or
8363 rhs1 BITOP2 (BITOP1 rhs2)
8364 depending on bitop1 and bitop2 arity. */
8365 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8367 if (code == GT_EXPR)
8369 bitop1 = BIT_NOT_EXPR;
8370 bitop2 = BIT_AND_EXPR;
8372 else if (code == GE_EXPR)
8374 bitop1 = BIT_NOT_EXPR;
8375 bitop2 = BIT_IOR_EXPR;
8377 else if (code == LT_EXPR)
8379 bitop1 = BIT_NOT_EXPR;
8380 bitop2 = BIT_AND_EXPR;
8381 std::swap (rhs1, rhs2);
8382 std::swap (dts[0], dts[1]);
8384 else if (code == LE_EXPR)
8386 bitop1 = BIT_NOT_EXPR;
8387 bitop2 = BIT_IOR_EXPR;
8388 std::swap (rhs1, rhs2);
8389 std::swap (dts[0], dts[1]);
8391 else
8393 bitop1 = BIT_XOR_EXPR;
8394 if (code == EQ_EXPR)
8395 bitop2 = BIT_NOT_EXPR;
8399 if (!vec_stmt)
8401 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8402 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8403 dts, ndts, NULL, NULL);
8404 if (bitop1 == NOP_EXPR)
8405 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8406 else
8408 machine_mode mode = TYPE_MODE (vectype);
8409 optab optab;
8411 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8412 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8413 return false;
8415 if (bitop2 != NOP_EXPR)
8417 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8418 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8419 return false;
8421 return true;
8425 /* Transform. */
8426 if (!slp_node)
8428 vec_oprnds0.create (1);
8429 vec_oprnds1.create (1);
8432 /* Handle def. */
8433 lhs = gimple_assign_lhs (stmt);
8434 mask = vect_create_destination_var (lhs, mask_type);
8436 /* Handle cmp expr. */
8437 for (j = 0; j < ncopies; j++)
8439 gassign *new_stmt = NULL;
8440 if (j == 0)
8442 if (slp_node)
8444 auto_vec<tree, 2> ops;
8445 auto_vec<vec<tree>, 2> vec_defs;
8447 ops.safe_push (rhs1);
8448 ops.safe_push (rhs2);
8449 vect_get_slp_defs (ops, slp_node, &vec_defs);
8450 vec_oprnds1 = vec_defs.pop ();
8451 vec_oprnds0 = vec_defs.pop ();
8453 else
8455 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8456 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8459 else
8461 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8462 vec_oprnds0.pop ());
8463 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8464 vec_oprnds1.pop ());
8467 if (!slp_node)
8469 vec_oprnds0.quick_push (vec_rhs1);
8470 vec_oprnds1.quick_push (vec_rhs2);
8473 /* Arguments are ready. Create the new vector stmt. */
8474 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8476 vec_rhs2 = vec_oprnds1[i];
8478 new_temp = make_ssa_name (mask);
8479 if (bitop1 == NOP_EXPR)
8481 new_stmt = gimple_build_assign (new_temp, code,
8482 vec_rhs1, vec_rhs2);
8483 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8485 else
8487 if (bitop1 == BIT_NOT_EXPR)
8488 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8489 else
8490 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8491 vec_rhs2);
8492 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8493 if (bitop2 != NOP_EXPR)
8495 tree res = make_ssa_name (mask);
8496 if (bitop2 == BIT_NOT_EXPR)
8497 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8498 else
8499 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8500 new_temp);
8501 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8504 if (slp_node)
8505 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8508 if (slp_node)
8509 continue;
8511 if (j == 0)
8512 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8513 else
8514 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8516 prev_stmt_info = vinfo_for_stmt (new_stmt);
8519 vec_oprnds0.release ();
8520 vec_oprnds1.release ();
8522 return true;
8525 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8526 can handle all live statements in the node. Otherwise return true
8527 if STMT is not live or if vectorizable_live_operation can handle it.
8528 GSI and VEC_STMT are as for vectorizable_live_operation. */
8530 static bool
8531 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8532 slp_tree slp_node, gimple **vec_stmt)
8534 if (slp_node)
8536 gimple *slp_stmt;
8537 unsigned int i;
8538 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8540 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8541 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8542 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8543 vec_stmt))
8544 return false;
8547 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8548 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8549 return false;
8551 return true;
8554 /* Make sure the statement is vectorizable. */
8556 bool
8557 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8558 slp_instance node_instance)
8560 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8561 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8562 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8563 bool ok;
8564 gimple *pattern_stmt;
8565 gimple_seq pattern_def_seq;
8567 if (dump_enabled_p ())
8569 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8570 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8573 if (gimple_has_volatile_ops (stmt))
8575 if (dump_enabled_p ())
8576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8577 "not vectorized: stmt has volatile operands\n");
8579 return false;
8582 /* Skip stmts that do not need to be vectorized. In loops this is expected
8583 to include:
8584 - the COND_EXPR which is the loop exit condition
8585 - any LABEL_EXPRs in the loop
8586 - computations that are used only for array indexing or loop control.
8587 In basic blocks we only analyze statements that are a part of some SLP
8588 instance, therefore, all the statements are relevant.
8590 Pattern statement needs to be analyzed instead of the original statement
8591 if the original statement is not relevant. Otherwise, we analyze both
8592 statements. In basic blocks we are called from some SLP instance
8593 traversal, don't analyze pattern stmts instead, the pattern stmts
8594 already will be part of SLP instance. */
8596 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8597 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8598 && !STMT_VINFO_LIVE_P (stmt_info))
8600 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8601 && pattern_stmt
8602 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8603 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8605 /* Analyze PATTERN_STMT instead of the original stmt. */
8606 stmt = pattern_stmt;
8607 stmt_info = vinfo_for_stmt (pattern_stmt);
8608 if (dump_enabled_p ())
8610 dump_printf_loc (MSG_NOTE, vect_location,
8611 "==> examining pattern statement: ");
8612 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8615 else
8617 if (dump_enabled_p ())
8618 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8620 return true;
8623 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8624 && node == NULL
8625 && pattern_stmt
8626 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8627 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8629 /* Analyze PATTERN_STMT too. */
8630 if (dump_enabled_p ())
8632 dump_printf_loc (MSG_NOTE, vect_location,
8633 "==> examining pattern statement: ");
8634 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8637 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8638 node_instance))
8639 return false;
8642 if (is_pattern_stmt_p (stmt_info)
8643 && node == NULL
8644 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8646 gimple_stmt_iterator si;
8648 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8650 gimple *pattern_def_stmt = gsi_stmt (si);
8651 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8652 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8654 /* Analyze def stmt of STMT if it's a pattern stmt. */
8655 if (dump_enabled_p ())
8657 dump_printf_loc (MSG_NOTE, vect_location,
8658 "==> examining pattern def statement: ");
8659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8662 if (!vect_analyze_stmt (pattern_def_stmt,
8663 need_to_vectorize, node, node_instance))
8664 return false;
8669 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8671 case vect_internal_def:
8672 break;
8674 case vect_reduction_def:
8675 case vect_nested_cycle:
8676 gcc_assert (!bb_vinfo
8677 && (relevance == vect_used_in_outer
8678 || relevance == vect_used_in_outer_by_reduction
8679 || relevance == vect_used_by_reduction
8680 || relevance == vect_unused_in_scope
8681 || relevance == vect_used_only_live));
8682 break;
8684 case vect_induction_def:
8685 gcc_assert (!bb_vinfo);
8686 break;
8688 case vect_constant_def:
8689 case vect_external_def:
8690 case vect_unknown_def_type:
8691 default:
8692 gcc_unreachable ();
8695 if (STMT_VINFO_RELEVANT_P (stmt_info))
8697 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8698 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8699 || (is_gimple_call (stmt)
8700 && gimple_call_lhs (stmt) == NULL_TREE));
8701 *need_to_vectorize = true;
8704 if (PURE_SLP_STMT (stmt_info) && !node)
8706 dump_printf_loc (MSG_NOTE, vect_location,
8707 "handled only by SLP analysis\n");
8708 return true;
8711 ok = true;
8712 if (!bb_vinfo
8713 && (STMT_VINFO_RELEVANT_P (stmt_info)
8714 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8715 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8716 || vectorizable_conversion (stmt, NULL, NULL, node)
8717 || vectorizable_shift (stmt, NULL, NULL, node)
8718 || vectorizable_operation (stmt, NULL, NULL, node)
8719 || vectorizable_assignment (stmt, NULL, NULL, node)
8720 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8721 || vectorizable_call (stmt, NULL, NULL, node)
8722 || vectorizable_store (stmt, NULL, NULL, node)
8723 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8724 || vectorizable_induction (stmt, NULL, NULL, node)
8725 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8726 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8727 else
8729 if (bb_vinfo)
8730 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8731 || vectorizable_conversion (stmt, NULL, NULL, node)
8732 || vectorizable_shift (stmt, NULL, NULL, node)
8733 || vectorizable_operation (stmt, NULL, NULL, node)
8734 || vectorizable_assignment (stmt, NULL, NULL, node)
8735 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8736 || vectorizable_call (stmt, NULL, NULL, node)
8737 || vectorizable_store (stmt, NULL, NULL, node)
8738 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8739 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8742 if (!ok)
8744 if (dump_enabled_p ())
8746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8747 "not vectorized: relevant stmt not ");
8748 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8749 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8752 return false;
8755 if (bb_vinfo)
8756 return true;
8758 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8759 need extra handling, except for vectorizable reductions. */
8760 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8761 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
8763 if (dump_enabled_p ())
8765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8766 "not vectorized: live stmt not supported: ");
8767 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8770 return false;
8773 return true;
8777 /* Function vect_transform_stmt.
8779 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8781 bool
8782 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8783 bool *grouped_store, slp_tree slp_node,
8784 slp_instance slp_node_instance)
8786 bool is_store = false;
8787 gimple *vec_stmt = NULL;
8788 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8789 bool done;
8791 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8792 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8794 switch (STMT_VINFO_TYPE (stmt_info))
8796 case type_demotion_vec_info_type:
8797 case type_promotion_vec_info_type:
8798 case type_conversion_vec_info_type:
8799 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8800 gcc_assert (done);
8801 break;
8803 case induc_vec_info_type:
8804 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8805 gcc_assert (done);
8806 break;
8808 case shift_vec_info_type:
8809 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8810 gcc_assert (done);
8811 break;
8813 case op_vec_info_type:
8814 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8815 gcc_assert (done);
8816 break;
8818 case assignment_vec_info_type:
8819 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8820 gcc_assert (done);
8821 break;
8823 case load_vec_info_type:
8824 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8825 slp_node_instance);
8826 gcc_assert (done);
8827 break;
8829 case store_vec_info_type:
8830 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8831 gcc_assert (done);
8832 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8834 /* In case of interleaving, the whole chain is vectorized when the
8835 last store in the chain is reached. Store stmts before the last
8836 one are skipped, and there vec_stmt_info shouldn't be freed
8837 meanwhile. */
8838 *grouped_store = true;
8839 if (STMT_VINFO_VEC_STMT (stmt_info))
8840 is_store = true;
8842 else
8843 is_store = true;
8844 break;
8846 case condition_vec_info_type:
8847 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8848 gcc_assert (done);
8849 break;
8851 case comparison_vec_info_type:
8852 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8853 gcc_assert (done);
8854 break;
8856 case call_vec_info_type:
8857 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8858 stmt = gsi_stmt (*gsi);
8859 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8860 is_store = true;
8861 break;
8863 case call_simd_clone_vec_info_type:
8864 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8865 stmt = gsi_stmt (*gsi);
8866 break;
8868 case reduc_vec_info_type:
8869 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8870 slp_node_instance);
8871 gcc_assert (done);
8872 break;
8874 default:
8875 if (!STMT_VINFO_LIVE_P (stmt_info))
8877 if (dump_enabled_p ())
8878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8879 "stmt not supported.\n");
8880 gcc_unreachable ();
8884 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8885 This would break hybrid SLP vectorization. */
8886 if (slp_node)
8887 gcc_assert (!vec_stmt
8888 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8890 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8891 is being vectorized, but outside the immediately enclosing loop. */
8892 if (vec_stmt
8893 && STMT_VINFO_LOOP_VINFO (stmt_info)
8894 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8895 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8896 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8897 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8898 || STMT_VINFO_RELEVANT (stmt_info) ==
8899 vect_used_in_outer_by_reduction))
8901 struct loop *innerloop = LOOP_VINFO_LOOP (
8902 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8903 imm_use_iterator imm_iter;
8904 use_operand_p use_p;
8905 tree scalar_dest;
8906 gimple *exit_phi;
8908 if (dump_enabled_p ())
8909 dump_printf_loc (MSG_NOTE, vect_location,
8910 "Record the vdef for outer-loop vectorization.\n");
8912 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8913 (to be used when vectorizing outer-loop stmts that use the DEF of
8914 STMT). */
8915 if (gimple_code (stmt) == GIMPLE_PHI)
8916 scalar_dest = PHI_RESULT (stmt);
8917 else
8918 scalar_dest = gimple_assign_lhs (stmt);
8920 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8922 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8924 exit_phi = USE_STMT (use_p);
8925 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8930 /* Handle stmts whose DEF is used outside the loop-nest that is
8931 being vectorized. */
8932 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8934 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
8935 gcc_assert (done);
8938 if (vec_stmt)
8939 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8941 return is_store;
8945 /* Remove a group of stores (for SLP or interleaving), free their
8946 stmt_vec_info. */
8948 void
8949 vect_remove_stores (gimple *first_stmt)
8951 gimple *next = first_stmt;
8952 gimple *tmp;
8953 gimple_stmt_iterator next_si;
8955 while (next)
8957 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8959 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8960 if (is_pattern_stmt_p (stmt_info))
8961 next = STMT_VINFO_RELATED_STMT (stmt_info);
8962 /* Free the attached stmt_vec_info and remove the stmt. */
8963 next_si = gsi_for_stmt (next);
8964 unlink_stmt_vdef (next);
8965 gsi_remove (&next_si, true);
8966 release_defs (next);
8967 free_stmt_vec_info (next);
8968 next = tmp;
8973 /* Function new_stmt_vec_info.
8975 Create and initialize a new stmt_vec_info struct for STMT. */
8977 stmt_vec_info
8978 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8980 stmt_vec_info res;
8981 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8983 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8984 STMT_VINFO_STMT (res) = stmt;
8985 res->vinfo = vinfo;
8986 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8987 STMT_VINFO_LIVE_P (res) = false;
8988 STMT_VINFO_VECTYPE (res) = NULL;
8989 STMT_VINFO_VEC_STMT (res) = NULL;
8990 STMT_VINFO_VECTORIZABLE (res) = true;
8991 STMT_VINFO_IN_PATTERN_P (res) = false;
8992 STMT_VINFO_RELATED_STMT (res) = NULL;
8993 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8994 STMT_VINFO_DATA_REF (res) = NULL;
8995 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8996 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8998 if (gimple_code (stmt) == GIMPLE_PHI
8999 && is_loop_header_bb_p (gimple_bb (stmt)))
9000 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9001 else
9002 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9004 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9005 STMT_SLP_TYPE (res) = loop_vect;
9006 STMT_VINFO_NUM_SLP_USES (res) = 0;
9008 GROUP_FIRST_ELEMENT (res) = NULL;
9009 GROUP_NEXT_ELEMENT (res) = NULL;
9010 GROUP_SIZE (res) = 0;
9011 GROUP_STORE_COUNT (res) = 0;
9012 GROUP_GAP (res) = 0;
9013 GROUP_SAME_DR_STMT (res) = NULL;
9015 return res;
9019 /* Create a hash table for stmt_vec_info. */
9021 void
9022 init_stmt_vec_info_vec (void)
9024 gcc_assert (!stmt_vec_info_vec.exists ());
9025 stmt_vec_info_vec.create (50);
9029 /* Free hash table for stmt_vec_info. */
9031 void
9032 free_stmt_vec_info_vec (void)
9034 unsigned int i;
9035 stmt_vec_info info;
9036 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9037 if (info != NULL)
9038 free_stmt_vec_info (STMT_VINFO_STMT (info));
9039 gcc_assert (stmt_vec_info_vec.exists ());
9040 stmt_vec_info_vec.release ();
9044 /* Free stmt vectorization related info. */
9046 void
9047 free_stmt_vec_info (gimple *stmt)
9049 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9051 if (!stmt_info)
9052 return;
9054 /* Check if this statement has a related "pattern stmt"
9055 (introduced by the vectorizer during the pattern recognition
9056 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9057 too. */
9058 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9060 stmt_vec_info patt_info
9061 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9062 if (patt_info)
9064 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9065 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9066 gimple_set_bb (patt_stmt, NULL);
9067 tree lhs = gimple_get_lhs (patt_stmt);
9068 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9069 release_ssa_name (lhs);
9070 if (seq)
9072 gimple_stmt_iterator si;
9073 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9075 gimple *seq_stmt = gsi_stmt (si);
9076 gimple_set_bb (seq_stmt, NULL);
9077 lhs = gimple_get_lhs (seq_stmt);
9078 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9079 release_ssa_name (lhs);
9080 free_stmt_vec_info (seq_stmt);
9083 free_stmt_vec_info (patt_stmt);
9087 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9088 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9089 set_vinfo_for_stmt (stmt, NULL);
9090 free (stmt_info);
9094 /* Function get_vectype_for_scalar_type_and_size.
9096 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9097 by the target. */
9099 static tree
9100 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
9102 tree orig_scalar_type = scalar_type;
9103 scalar_mode inner_mode;
9104 machine_mode simd_mode;
9105 int nunits;
9106 tree vectype;
9108 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9109 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9110 return NULL_TREE;
9112 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9114 /* For vector types of elements whose mode precision doesn't
9115 match their types precision we use a element type of mode
9116 precision. The vectorization routines will have to make sure
9117 they support the proper result truncation/extension.
9118 We also make sure to build vector types with INTEGER_TYPE
9119 component type only. */
9120 if (INTEGRAL_TYPE_P (scalar_type)
9121 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9122 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9123 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9124 TYPE_UNSIGNED (scalar_type));
9126 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9127 When the component mode passes the above test simply use a type
9128 corresponding to that mode. The theory is that any use that
9129 would cause problems with this will disable vectorization anyway. */
9130 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9131 && !INTEGRAL_TYPE_P (scalar_type))
9132 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9134 /* We can't build a vector type of elements with alignment bigger than
9135 their size. */
9136 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9137 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9138 TYPE_UNSIGNED (scalar_type));
9140 /* If we felt back to using the mode fail if there was
9141 no scalar type for it. */
9142 if (scalar_type == NULL_TREE)
9143 return NULL_TREE;
9145 /* If no size was supplied use the mode the target prefers. Otherwise
9146 lookup a vector mode of the specified size. */
9147 if (size == 0)
9148 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9149 else if (!mode_for_vector (inner_mode, size / nbytes).exists (&simd_mode))
9150 return NULL_TREE;
9151 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9152 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9153 if (nunits < 1)
9154 return NULL_TREE;
9156 vectype = build_vector_type (scalar_type, nunits);
9158 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9159 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9160 return NULL_TREE;
9162 /* Re-attach the address-space qualifier if we canonicalized the scalar
9163 type. */
9164 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9165 return build_qualified_type
9166 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9168 return vectype;
9171 unsigned int current_vector_size;
9173 /* Function get_vectype_for_scalar_type.
9175 Returns the vector type corresponding to SCALAR_TYPE as supported
9176 by the target. */
9178 tree
9179 get_vectype_for_scalar_type (tree scalar_type)
9181 tree vectype;
9182 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9183 current_vector_size);
9184 if (vectype
9185 && current_vector_size == 0)
9186 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9187 return vectype;
9190 /* Function get_mask_type_for_scalar_type.
9192 Returns the mask type corresponding to a result of comparison
9193 of vectors of specified SCALAR_TYPE as supported by target. */
9195 tree
9196 get_mask_type_for_scalar_type (tree scalar_type)
9198 tree vectype = get_vectype_for_scalar_type (scalar_type);
9200 if (!vectype)
9201 return NULL;
9203 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9204 current_vector_size);
9207 /* Function get_same_sized_vectype
9209 Returns a vector type corresponding to SCALAR_TYPE of size
9210 VECTOR_TYPE if supported by the target. */
9212 tree
9213 get_same_sized_vectype (tree scalar_type, tree vector_type)
9215 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9216 return build_same_sized_truth_vector_type (vector_type);
9218 return get_vectype_for_scalar_type_and_size
9219 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9222 /* Function vect_is_simple_use.
9224 Input:
9225 VINFO - the vect info of the loop or basic block that is being vectorized.
9226 OPERAND - operand in the loop or bb.
9227 Output:
9228 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9229 DT - the type of definition
9231 Returns whether a stmt with OPERAND can be vectorized.
9232 For loops, supportable operands are constants, loop invariants, and operands
9233 that are defined by the current iteration of the loop. Unsupportable
9234 operands are those that are defined by a previous iteration of the loop (as
9235 is the case in reduction/induction computations).
9236 For basic blocks, supportable operands are constants and bb invariants.
9237 For now, operands defined outside the basic block are not supported. */
9239 bool
9240 vect_is_simple_use (tree operand, vec_info *vinfo,
9241 gimple **def_stmt, enum vect_def_type *dt)
9243 *def_stmt = NULL;
9244 *dt = vect_unknown_def_type;
9246 if (dump_enabled_p ())
9248 dump_printf_loc (MSG_NOTE, vect_location,
9249 "vect_is_simple_use: operand ");
9250 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9251 dump_printf (MSG_NOTE, "\n");
9254 if (CONSTANT_CLASS_P (operand))
9256 *dt = vect_constant_def;
9257 return true;
9260 if (is_gimple_min_invariant (operand))
9262 *dt = vect_external_def;
9263 return true;
9266 if (TREE_CODE (operand) != SSA_NAME)
9268 if (dump_enabled_p ())
9269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9270 "not ssa-name.\n");
9271 return false;
9274 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9276 *dt = vect_external_def;
9277 return true;
9280 *def_stmt = SSA_NAME_DEF_STMT (operand);
9281 if (dump_enabled_p ())
9283 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9284 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9287 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9288 *dt = vect_external_def;
9289 else
9291 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9292 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9295 if (dump_enabled_p ())
9297 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9298 switch (*dt)
9300 case vect_uninitialized_def:
9301 dump_printf (MSG_NOTE, "uninitialized\n");
9302 break;
9303 case vect_constant_def:
9304 dump_printf (MSG_NOTE, "constant\n");
9305 break;
9306 case vect_external_def:
9307 dump_printf (MSG_NOTE, "external\n");
9308 break;
9309 case vect_internal_def:
9310 dump_printf (MSG_NOTE, "internal\n");
9311 break;
9312 case vect_induction_def:
9313 dump_printf (MSG_NOTE, "induction\n");
9314 break;
9315 case vect_reduction_def:
9316 dump_printf (MSG_NOTE, "reduction\n");
9317 break;
9318 case vect_double_reduction_def:
9319 dump_printf (MSG_NOTE, "double reduction\n");
9320 break;
9321 case vect_nested_cycle:
9322 dump_printf (MSG_NOTE, "nested cycle\n");
9323 break;
9324 case vect_unknown_def_type:
9325 dump_printf (MSG_NOTE, "unknown\n");
9326 break;
9330 if (*dt == vect_unknown_def_type)
9332 if (dump_enabled_p ())
9333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9334 "Unsupported pattern.\n");
9335 return false;
9338 switch (gimple_code (*def_stmt))
9340 case GIMPLE_PHI:
9341 case GIMPLE_ASSIGN:
9342 case GIMPLE_CALL:
9343 break;
9344 default:
9345 if (dump_enabled_p ())
9346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9347 "unsupported defining stmt:\n");
9348 return false;
9351 return true;
9354 /* Function vect_is_simple_use.
9356 Same as vect_is_simple_use but also determines the vector operand
9357 type of OPERAND and stores it to *VECTYPE. If the definition of
9358 OPERAND is vect_uninitialized_def, vect_constant_def or
9359 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9360 is responsible to compute the best suited vector type for the
9361 scalar operand. */
9363 bool
9364 vect_is_simple_use (tree operand, vec_info *vinfo,
9365 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9367 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9368 return false;
9370 /* Now get a vector type if the def is internal, otherwise supply
9371 NULL_TREE and leave it up to the caller to figure out a proper
9372 type for the use stmt. */
9373 if (*dt == vect_internal_def
9374 || *dt == vect_induction_def
9375 || *dt == vect_reduction_def
9376 || *dt == vect_double_reduction_def
9377 || *dt == vect_nested_cycle)
9379 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9381 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9382 && !STMT_VINFO_RELEVANT (stmt_info)
9383 && !STMT_VINFO_LIVE_P (stmt_info))
9384 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9386 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9387 gcc_assert (*vectype != NULL_TREE);
9389 else if (*dt == vect_uninitialized_def
9390 || *dt == vect_constant_def
9391 || *dt == vect_external_def)
9392 *vectype = NULL_TREE;
9393 else
9394 gcc_unreachable ();
9396 return true;
9400 /* Function supportable_widening_operation
9402 Check whether an operation represented by the code CODE is a
9403 widening operation that is supported by the target platform in
9404 vector form (i.e., when operating on arguments of type VECTYPE_IN
9405 producing a result of type VECTYPE_OUT).
9407 Widening operations we currently support are NOP (CONVERT), FLOAT
9408 and WIDEN_MULT. This function checks if these operations are supported
9409 by the target platform either directly (via vector tree-codes), or via
9410 target builtins.
9412 Output:
9413 - CODE1 and CODE2 are codes of vector operations to be used when
9414 vectorizing the operation, if available.
9415 - MULTI_STEP_CVT determines the number of required intermediate steps in
9416 case of multi-step conversion (like char->short->int - in that case
9417 MULTI_STEP_CVT will be 1).
9418 - INTERM_TYPES contains the intermediate type required to perform the
9419 widening operation (short in the above example). */
9421 bool
9422 supportable_widening_operation (enum tree_code code, gimple *stmt,
9423 tree vectype_out, tree vectype_in,
9424 enum tree_code *code1, enum tree_code *code2,
9425 int *multi_step_cvt,
9426 vec<tree> *interm_types)
9428 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9429 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9430 struct loop *vect_loop = NULL;
9431 machine_mode vec_mode;
9432 enum insn_code icode1, icode2;
9433 optab optab1, optab2;
9434 tree vectype = vectype_in;
9435 tree wide_vectype = vectype_out;
9436 enum tree_code c1, c2;
9437 int i;
9438 tree prev_type, intermediate_type;
9439 machine_mode intermediate_mode, prev_mode;
9440 optab optab3, optab4;
9442 *multi_step_cvt = 0;
9443 if (loop_info)
9444 vect_loop = LOOP_VINFO_LOOP (loop_info);
9446 switch (code)
9448 case WIDEN_MULT_EXPR:
9449 /* The result of a vectorized widening operation usually requires
9450 two vectors (because the widened results do not fit into one vector).
9451 The generated vector results would normally be expected to be
9452 generated in the same order as in the original scalar computation,
9453 i.e. if 8 results are generated in each vector iteration, they are
9454 to be organized as follows:
9455 vect1: [res1,res2,res3,res4],
9456 vect2: [res5,res6,res7,res8].
9458 However, in the special case that the result of the widening
9459 operation is used in a reduction computation only, the order doesn't
9460 matter (because when vectorizing a reduction we change the order of
9461 the computation). Some targets can take advantage of this and
9462 generate more efficient code. For example, targets like Altivec,
9463 that support widen_mult using a sequence of {mult_even,mult_odd}
9464 generate the following vectors:
9465 vect1: [res1,res3,res5,res7],
9466 vect2: [res2,res4,res6,res8].
9468 When vectorizing outer-loops, we execute the inner-loop sequentially
9469 (each vectorized inner-loop iteration contributes to VF outer-loop
9470 iterations in parallel). We therefore don't allow to change the
9471 order of the computation in the inner-loop during outer-loop
9472 vectorization. */
9473 /* TODO: Another case in which order doesn't *really* matter is when we
9474 widen and then contract again, e.g. (short)((int)x * y >> 8).
9475 Normally, pack_trunc performs an even/odd permute, whereas the
9476 repack from an even/odd expansion would be an interleave, which
9477 would be significantly simpler for e.g. AVX2. */
9478 /* In any case, in order to avoid duplicating the code below, recurse
9479 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9480 are properly set up for the caller. If we fail, we'll continue with
9481 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9482 if (vect_loop
9483 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9484 && !nested_in_vect_loop_p (vect_loop, stmt)
9485 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9486 stmt, vectype_out, vectype_in,
9487 code1, code2, multi_step_cvt,
9488 interm_types))
9490 /* Elements in a vector with vect_used_by_reduction property cannot
9491 be reordered if the use chain with this property does not have the
9492 same operation. One such an example is s += a * b, where elements
9493 in a and b cannot be reordered. Here we check if the vector defined
9494 by STMT is only directly used in the reduction statement. */
9495 tree lhs = gimple_assign_lhs (stmt);
9496 use_operand_p dummy;
9497 gimple *use_stmt;
9498 stmt_vec_info use_stmt_info = NULL;
9499 if (single_imm_use (lhs, &dummy, &use_stmt)
9500 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9501 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9502 return true;
9504 c1 = VEC_WIDEN_MULT_LO_EXPR;
9505 c2 = VEC_WIDEN_MULT_HI_EXPR;
9506 break;
9508 case DOT_PROD_EXPR:
9509 c1 = DOT_PROD_EXPR;
9510 c2 = DOT_PROD_EXPR;
9511 break;
9513 case SAD_EXPR:
9514 c1 = SAD_EXPR;
9515 c2 = SAD_EXPR;
9516 break;
9518 case VEC_WIDEN_MULT_EVEN_EXPR:
9519 /* Support the recursion induced just above. */
9520 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9521 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9522 break;
9524 case WIDEN_LSHIFT_EXPR:
9525 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9526 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9527 break;
9529 CASE_CONVERT:
9530 c1 = VEC_UNPACK_LO_EXPR;
9531 c2 = VEC_UNPACK_HI_EXPR;
9532 break;
9534 case FLOAT_EXPR:
9535 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9536 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9537 break;
9539 case FIX_TRUNC_EXPR:
9540 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9541 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9542 computing the operation. */
9543 return false;
9545 default:
9546 gcc_unreachable ();
9549 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9550 std::swap (c1, c2);
9552 if (code == FIX_TRUNC_EXPR)
9554 /* The signedness is determined from output operand. */
9555 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9556 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9558 else
9560 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9561 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9564 if (!optab1 || !optab2)
9565 return false;
9567 vec_mode = TYPE_MODE (vectype);
9568 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9569 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9570 return false;
9572 *code1 = c1;
9573 *code2 = c2;
9575 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9576 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9577 /* For scalar masks we may have different boolean
9578 vector types having the same QImode. Thus we
9579 add additional check for elements number. */
9580 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9581 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9582 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9584 /* Check if it's a multi-step conversion that can be done using intermediate
9585 types. */
9587 prev_type = vectype;
9588 prev_mode = vec_mode;
9590 if (!CONVERT_EXPR_CODE_P (code))
9591 return false;
9593 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9594 intermediate steps in promotion sequence. We try
9595 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9596 not. */
9597 interm_types->create (MAX_INTERM_CVT_STEPS);
9598 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9600 intermediate_mode = insn_data[icode1].operand[0].mode;
9601 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9603 intermediate_type
9604 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9605 current_vector_size);
9606 if (intermediate_mode != TYPE_MODE (intermediate_type))
9607 return false;
9609 else
9610 intermediate_type
9611 = lang_hooks.types.type_for_mode (intermediate_mode,
9612 TYPE_UNSIGNED (prev_type));
9614 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9615 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9617 if (!optab3 || !optab4
9618 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9619 || insn_data[icode1].operand[0].mode != intermediate_mode
9620 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9621 || insn_data[icode2].operand[0].mode != intermediate_mode
9622 || ((icode1 = optab_handler (optab3, intermediate_mode))
9623 == CODE_FOR_nothing)
9624 || ((icode2 = optab_handler (optab4, intermediate_mode))
9625 == CODE_FOR_nothing))
9626 break;
9628 interm_types->quick_push (intermediate_type);
9629 (*multi_step_cvt)++;
9631 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9632 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9633 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9634 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9635 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9637 prev_type = intermediate_type;
9638 prev_mode = intermediate_mode;
9641 interm_types->release ();
9642 return false;
9646 /* Function supportable_narrowing_operation
9648 Check whether an operation represented by the code CODE is a
9649 narrowing operation that is supported by the target platform in
9650 vector form (i.e., when operating on arguments of type VECTYPE_IN
9651 and producing a result of type VECTYPE_OUT).
9653 Narrowing operations we currently support are NOP (CONVERT) and
9654 FIX_TRUNC. This function checks if these operations are supported by
9655 the target platform directly via vector tree-codes.
9657 Output:
9658 - CODE1 is the code of a vector operation to be used when
9659 vectorizing the operation, if available.
9660 - MULTI_STEP_CVT determines the number of required intermediate steps in
9661 case of multi-step conversion (like int->short->char - in that case
9662 MULTI_STEP_CVT will be 1).
9663 - INTERM_TYPES contains the intermediate type required to perform the
9664 narrowing operation (short in the above example). */
9666 bool
9667 supportable_narrowing_operation (enum tree_code code,
9668 tree vectype_out, tree vectype_in,
9669 enum tree_code *code1, int *multi_step_cvt,
9670 vec<tree> *interm_types)
9672 machine_mode vec_mode;
9673 enum insn_code icode1;
9674 optab optab1, interm_optab;
9675 tree vectype = vectype_in;
9676 tree narrow_vectype = vectype_out;
9677 enum tree_code c1;
9678 tree intermediate_type, prev_type;
9679 machine_mode intermediate_mode, prev_mode;
9680 int i;
9681 bool uns;
9683 *multi_step_cvt = 0;
9684 switch (code)
9686 CASE_CONVERT:
9687 c1 = VEC_PACK_TRUNC_EXPR;
9688 break;
9690 case FIX_TRUNC_EXPR:
9691 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9692 break;
9694 case FLOAT_EXPR:
9695 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9696 tree code and optabs used for computing the operation. */
9697 return false;
9699 default:
9700 gcc_unreachable ();
9703 if (code == FIX_TRUNC_EXPR)
9704 /* The signedness is determined from output operand. */
9705 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9706 else
9707 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9709 if (!optab1)
9710 return false;
9712 vec_mode = TYPE_MODE (vectype);
9713 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9714 return false;
9716 *code1 = c1;
9718 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9719 /* For scalar masks we may have different boolean
9720 vector types having the same QImode. Thus we
9721 add additional check for elements number. */
9722 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9723 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9724 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9726 /* Check if it's a multi-step conversion that can be done using intermediate
9727 types. */
9728 prev_mode = vec_mode;
9729 prev_type = vectype;
9730 if (code == FIX_TRUNC_EXPR)
9731 uns = TYPE_UNSIGNED (vectype_out);
9732 else
9733 uns = TYPE_UNSIGNED (vectype);
9735 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9736 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9737 costly than signed. */
9738 if (code == FIX_TRUNC_EXPR && uns)
9740 enum insn_code icode2;
9742 intermediate_type
9743 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9744 interm_optab
9745 = optab_for_tree_code (c1, intermediate_type, optab_default);
9746 if (interm_optab != unknown_optab
9747 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9748 && insn_data[icode1].operand[0].mode
9749 == insn_data[icode2].operand[0].mode)
9751 uns = false;
9752 optab1 = interm_optab;
9753 icode1 = icode2;
9757 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9758 intermediate steps in promotion sequence. We try
9759 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9760 interm_types->create (MAX_INTERM_CVT_STEPS);
9761 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9763 intermediate_mode = insn_data[icode1].operand[0].mode;
9764 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9766 intermediate_type
9767 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9768 current_vector_size);
9769 if (intermediate_mode != TYPE_MODE (intermediate_type))
9770 return false;
9772 else
9773 intermediate_type
9774 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9775 interm_optab
9776 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9777 optab_default);
9778 if (!interm_optab
9779 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9780 || insn_data[icode1].operand[0].mode != intermediate_mode
9781 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9782 == CODE_FOR_nothing))
9783 break;
9785 interm_types->quick_push (intermediate_type);
9786 (*multi_step_cvt)++;
9788 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9789 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9790 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9791 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9793 prev_mode = intermediate_mode;
9794 prev_type = intermediate_type;
9795 optab1 = interm_optab;
9798 interm_types->release ();
9799 return false;