diagnostic-show-locus.c: remove unused field from class colorizer
[official-gcc.git] / gcc / tree-vect-stmts.c
blobee32c5671e292627eb845500acbae2dd66ac119d
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
582 /* We are also not interested in uses on loop PHI backedges that are
583 inductions. Otherwise we'll needlessly vectorize the IV increment
584 and cause hybrid SLP for SLP inductions. Unless the PHI is live
585 of course. */
586 else if (gimple_code (stmt) == GIMPLE_PHI
587 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
588 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
589 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
590 == use))
592 if (dump_enabled_p ())
593 dump_printf_loc (MSG_NOTE, vect_location,
594 "induction value on backedge.\n");
595 return true;
599 vect_mark_relevant (worklist, def_stmt, relevant, false);
600 return true;
604 /* Function vect_mark_stmts_to_be_vectorized.
606 Not all stmts in the loop need to be vectorized. For example:
608 for i...
609 for j...
610 1. T0 = i + j
611 2. T1 = a[T0]
613 3. j = j + 1
615 Stmt 1 and 3 do not need to be vectorized, because loop control and
616 addressing of vectorized data-refs are handled differently.
618 This pass detects such stmts. */
620 bool
621 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
623 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
624 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
625 unsigned int nbbs = loop->num_nodes;
626 gimple_stmt_iterator si;
627 gimple *stmt;
628 unsigned int i;
629 stmt_vec_info stmt_vinfo;
630 basic_block bb;
631 gimple *phi;
632 bool live_p;
633 enum vect_relevant relevant;
635 if (dump_enabled_p ())
636 dump_printf_loc (MSG_NOTE, vect_location,
637 "=== vect_mark_stmts_to_be_vectorized ===\n");
639 auto_vec<gimple *, 64> worklist;
641 /* 1. Init worklist. */
642 for (i = 0; i < nbbs; i++)
644 bb = bbs[i];
645 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
647 phi = gsi_stmt (si);
648 if (dump_enabled_p ())
650 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
654 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
655 vect_mark_relevant (&worklist, phi, relevant, live_p);
657 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
659 stmt = gsi_stmt (si);
660 if (dump_enabled_p ())
662 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
666 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
667 vect_mark_relevant (&worklist, stmt, relevant, live_p);
671 /* 2. Process_worklist */
672 while (worklist.length () > 0)
674 use_operand_p use_p;
675 ssa_op_iter iter;
677 stmt = worklist.pop ();
678 if (dump_enabled_p ())
680 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
681 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
684 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
685 (DEF_STMT) as relevant/irrelevant according to the relevance property
686 of STMT. */
687 stmt_vinfo = vinfo_for_stmt (stmt);
688 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
690 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
691 propagated as is to the DEF_STMTs of its USEs.
693 One exception is when STMT has been identified as defining a reduction
694 variable; in this case we set the relevance to vect_used_by_reduction.
695 This is because we distinguish between two kinds of relevant stmts -
696 those that are used by a reduction computation, and those that are
697 (also) used by a regular computation. This allows us later on to
698 identify stmts that are used solely by a reduction, and therefore the
699 order of the results that they produce does not have to be kept. */
701 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
703 case vect_reduction_def:
704 gcc_assert (relevant != vect_unused_in_scope);
705 if (relevant != vect_unused_in_scope
706 && relevant != vect_used_in_scope
707 && relevant != vect_used_by_reduction
708 && relevant != vect_used_only_live)
710 if (dump_enabled_p ())
711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
712 "unsupported use of reduction.\n");
713 return false;
715 break;
717 case vect_nested_cycle:
718 if (relevant != vect_unused_in_scope
719 && relevant != vect_used_in_outer_by_reduction
720 && relevant != vect_used_in_outer)
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
724 "unsupported use of nested cycle.\n");
726 return false;
728 break;
730 case vect_double_reduction_def:
731 if (relevant != vect_unused_in_scope
732 && relevant != vect_used_by_reduction
733 && relevant != vect_used_only_live)
735 if (dump_enabled_p ())
736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
737 "unsupported use of double reduction.\n");
739 return false;
741 break;
743 default:
744 break;
747 if (is_pattern_stmt_p (stmt_vinfo))
749 /* Pattern statements are not inserted into the code, so
750 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
751 have to scan the RHS or function arguments instead. */
752 if (is_gimple_assign (stmt))
754 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
755 tree op = gimple_assign_rhs1 (stmt);
757 i = 1;
758 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
760 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
761 relevant, &worklist, false)
762 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
763 relevant, &worklist, false))
764 return false;
765 i = 2;
767 for (; i < gimple_num_ops (stmt); i++)
769 op = gimple_op (stmt, i);
770 if (TREE_CODE (op) == SSA_NAME
771 && !process_use (stmt, op, loop_vinfo, relevant,
772 &worklist, false))
773 return false;
776 else if (is_gimple_call (stmt))
778 for (i = 0; i < gimple_call_num_args (stmt); i++)
780 tree arg = gimple_call_arg (stmt, i);
781 if (!process_use (stmt, arg, loop_vinfo, relevant,
782 &worklist, false))
783 return false;
787 else
788 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
790 tree op = USE_FROM_PTR (use_p);
791 if (!process_use (stmt, op, loop_vinfo, relevant,
792 &worklist, false))
793 return false;
796 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
798 gather_scatter_info gs_info;
799 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
800 gcc_unreachable ();
801 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
802 &worklist, true))
803 return false;
805 } /* while worklist */
807 return true;
811 /* Function vect_model_simple_cost.
813 Models cost for simple operations, i.e. those that only emit ncopies of a
814 single op. Right now, this does not account for multiple insns that could
815 be generated for the single vector op. We will handle that shortly. */
817 void
818 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
819 enum vect_def_type *dt,
820 int ndts,
821 stmt_vector_for_cost *prologue_cost_vec,
822 stmt_vector_for_cost *body_cost_vec)
824 int i;
825 int inside_cost = 0, prologue_cost = 0;
827 /* The SLP costs were already calculated during SLP tree build. */
828 if (PURE_SLP_STMT (stmt_info))
829 return;
831 /* Cost the "broadcast" of a scalar operand in to a vector operand.
832 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
833 cost model. */
834 for (i = 0; i < ndts; i++)
835 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
836 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
837 stmt_info, 0, vect_prologue);
839 /* Pass the inside-of-loop statements to the target-specific cost model. */
840 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
841 stmt_info, 0, vect_body);
843 if (dump_enabled_p ())
844 dump_printf_loc (MSG_NOTE, vect_location,
845 "vect_model_simple_cost: inside_cost = %d, "
846 "prologue_cost = %d .\n", inside_cost, prologue_cost);
850 /* Model cost for type demotion and promotion operations. PWR is normally
851 zero for single-step promotions and demotions. It will be one if
852 two-step promotion/demotion is required, and so on. Each additional
853 step doubles the number of instructions required. */
855 static void
856 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
857 enum vect_def_type *dt, int pwr)
859 int i, tmp;
860 int inside_cost = 0, prologue_cost = 0;
861 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
862 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
863 void *target_cost_data;
865 /* The SLP costs were already calculated during SLP tree build. */
866 if (PURE_SLP_STMT (stmt_info))
867 return;
869 if (loop_vinfo)
870 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
871 else
872 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
874 for (i = 0; i < pwr + 1; i++)
876 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
877 (i + 1) : i;
878 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
879 vec_promote_demote, stmt_info, 0,
880 vect_body);
883 /* FORNOW: Assuming maximum 2 args per stmts. */
884 for (i = 0; i < 2; i++)
885 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
886 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
887 stmt_info, 0, vect_prologue);
889 if (dump_enabled_p ())
890 dump_printf_loc (MSG_NOTE, vect_location,
891 "vect_model_promotion_demotion_cost: inside_cost = %d, "
892 "prologue_cost = %d .\n", inside_cost, prologue_cost);
895 /* Function vect_model_store_cost
897 Models cost for stores. In the case of grouped accesses, one access
898 has the overhead of the grouped access attributed to it. */
900 void
901 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
902 vect_memory_access_type memory_access_type,
903 enum vect_def_type dt, slp_tree slp_node,
904 stmt_vector_for_cost *prologue_cost_vec,
905 stmt_vector_for_cost *body_cost_vec)
907 unsigned int inside_cost = 0, prologue_cost = 0;
908 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
909 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
910 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
912 if (dt == vect_constant_def || dt == vect_external_def)
913 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
914 stmt_info, 0, vect_prologue);
916 /* Grouped stores update all elements in the group at once,
917 so we want the DR for the first statement. */
918 if (!slp_node && grouped_access_p)
920 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
921 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
924 /* True if we should include any once-per-group costs as well as
925 the cost of the statement itself. For SLP we only get called
926 once per group anyhow. */
927 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
929 /* We assume that the cost of a single store-lanes instruction is
930 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
931 access is instead being provided by a permute-and-store operation,
932 include the cost of the permutes. */
933 if (first_stmt_p
934 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
936 /* Uses a high and low interleave or shuffle operations for each
937 needed permute. */
938 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
939 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
940 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
941 stmt_info, 0, vect_body);
943 if (dump_enabled_p ())
944 dump_printf_loc (MSG_NOTE, vect_location,
945 "vect_model_store_cost: strided group_size = %d .\n",
946 group_size);
949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
950 /* Costs of the stores. */
951 if (memory_access_type == VMAT_ELEMENTWISE
952 || memory_access_type == VMAT_GATHER_SCATTER)
953 /* N scalar stores plus extracting the elements. */
954 inside_cost += record_stmt_cost (body_cost_vec,
955 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
956 scalar_store, stmt_info, 0, vect_body);
957 else
958 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
960 if (memory_access_type == VMAT_ELEMENTWISE
961 || memory_access_type == VMAT_STRIDED_SLP)
962 inside_cost += record_stmt_cost (body_cost_vec,
963 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
964 vec_to_scalar, stmt_info, 0, vect_body);
966 if (dump_enabled_p ())
967 dump_printf_loc (MSG_NOTE, vect_location,
968 "vect_model_store_cost: inside_cost = %d, "
969 "prologue_cost = %d .\n", inside_cost, prologue_cost);
973 /* Calculate cost of DR's memory access. */
974 void
975 vect_get_store_cost (struct data_reference *dr, int ncopies,
976 unsigned int *inside_cost,
977 stmt_vector_for_cost *body_cost_vec)
979 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
980 gimple *stmt = DR_STMT (dr);
981 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
983 switch (alignment_support_scheme)
985 case dr_aligned:
987 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
988 vector_store, stmt_info, 0,
989 vect_body);
991 if (dump_enabled_p ())
992 dump_printf_loc (MSG_NOTE, vect_location,
993 "vect_model_store_cost: aligned.\n");
994 break;
997 case dr_unaligned_supported:
999 /* Here, we assign an additional cost for the unaligned store. */
1000 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1001 unaligned_store, stmt_info,
1002 DR_MISALIGNMENT (dr), vect_body);
1003 if (dump_enabled_p ())
1004 dump_printf_loc (MSG_NOTE, vect_location,
1005 "vect_model_store_cost: unaligned supported by "
1006 "hardware.\n");
1007 break;
1010 case dr_unaligned_unsupported:
1012 *inside_cost = VECT_MAX_COST;
1014 if (dump_enabled_p ())
1015 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1016 "vect_model_store_cost: unsupported access.\n");
1017 break;
1020 default:
1021 gcc_unreachable ();
1026 /* Function vect_model_load_cost
1028 Models cost for loads. In the case of grouped accesses, one access has
1029 the overhead of the grouped access attributed to it. Since unaligned
1030 accesses are supported for loads, we also account for the costs of the
1031 access scheme chosen. */
1033 void
1034 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1035 vect_memory_access_type memory_access_type,
1036 slp_tree slp_node,
1037 stmt_vector_for_cost *prologue_cost_vec,
1038 stmt_vector_for_cost *body_cost_vec)
1040 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1041 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1042 unsigned int inside_cost = 0, prologue_cost = 0;
1043 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1045 /* Grouped loads read all elements in the group at once,
1046 so we want the DR for the first statement. */
1047 if (!slp_node && grouped_access_p)
1049 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1050 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1053 /* True if we should include any once-per-group costs as well as
1054 the cost of the statement itself. For SLP we only get called
1055 once per group anyhow. */
1056 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1058 /* We assume that the cost of a single load-lanes instruction is
1059 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1060 access is instead being provided by a load-and-permute operation,
1061 include the cost of the permutes. */
1062 if (first_stmt_p
1063 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1065 /* Uses an even and odd extract operations or shuffle operations
1066 for each needed permute. */
1067 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1068 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1069 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1070 stmt_info, 0, vect_body);
1072 if (dump_enabled_p ())
1073 dump_printf_loc (MSG_NOTE, vect_location,
1074 "vect_model_load_cost: strided group_size = %d .\n",
1075 group_size);
1078 /* The loads themselves. */
1079 if (memory_access_type == VMAT_ELEMENTWISE
1080 || memory_access_type == VMAT_GATHER_SCATTER)
1082 /* N scalar loads plus gathering them into a vector. */
1083 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1084 inside_cost += record_stmt_cost (body_cost_vec,
1085 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1086 scalar_load, stmt_info, 0, vect_body);
1088 else
1089 vect_get_load_cost (dr, ncopies, first_stmt_p,
1090 &inside_cost, &prologue_cost,
1091 prologue_cost_vec, body_cost_vec, true);
1092 if (memory_access_type == VMAT_ELEMENTWISE
1093 || memory_access_type == VMAT_STRIDED_SLP)
1094 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1095 stmt_info, 0, vect_body);
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_NOTE, vect_location,
1099 "vect_model_load_cost: inside_cost = %d, "
1100 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1104 /* Calculate cost of DR's memory access. */
1105 void
1106 vect_get_load_cost (struct data_reference *dr, int ncopies,
1107 bool add_realign_cost, unsigned int *inside_cost,
1108 unsigned int *prologue_cost,
1109 stmt_vector_for_cost *prologue_cost_vec,
1110 stmt_vector_for_cost *body_cost_vec,
1111 bool record_prologue_costs)
1113 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1114 gimple *stmt = DR_STMT (dr);
1115 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1117 switch (alignment_support_scheme)
1119 case dr_aligned:
1121 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1122 stmt_info, 0, vect_body);
1124 if (dump_enabled_p ())
1125 dump_printf_loc (MSG_NOTE, vect_location,
1126 "vect_model_load_cost: aligned.\n");
1128 break;
1130 case dr_unaligned_supported:
1132 /* Here, we assign an additional cost for the unaligned load. */
1133 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1134 unaligned_load, stmt_info,
1135 DR_MISALIGNMENT (dr), vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: unaligned supported by "
1140 "hardware.\n");
1142 break;
1144 case dr_explicit_realign:
1146 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1147 vector_load, stmt_info, 0, vect_body);
1148 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1149 vec_perm, stmt_info, 0, vect_body);
1151 /* FIXME: If the misalignment remains fixed across the iterations of
1152 the containing loop, the following cost should be added to the
1153 prologue costs. */
1154 if (targetm.vectorize.builtin_mask_for_load)
1155 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1156 stmt_info, 0, vect_body);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE, vect_location,
1160 "vect_model_load_cost: explicit realign\n");
1162 break;
1164 case dr_explicit_realign_optimized:
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: unaligned software "
1169 "pipelined.\n");
1171 /* Unaligned software pipeline has a load of an address, an initial
1172 load, and possibly a mask operation to "prime" the loop. However,
1173 if this is an access in a group of loads, which provide grouped
1174 access, then the above cost should only be considered for one
1175 access in the group. Inside the loop, there is a load op
1176 and a realignment op. */
1178 if (add_realign_cost && record_prologue_costs)
1180 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1181 vector_stmt, stmt_info,
1182 0, vect_prologue);
1183 if (targetm.vectorize.builtin_mask_for_load)
1184 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1185 vector_stmt, stmt_info,
1186 0, vect_prologue);
1189 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1190 stmt_info, 0, vect_body);
1191 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1192 stmt_info, 0, vect_body);
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_NOTE, vect_location,
1196 "vect_model_load_cost: explicit realign optimized"
1197 "\n");
1199 break;
1202 case dr_unaligned_unsupported:
1204 *inside_cost = VECT_MAX_COST;
1206 if (dump_enabled_p ())
1207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1208 "vect_model_load_cost: unsupported access.\n");
1209 break;
1212 default:
1213 gcc_unreachable ();
1217 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1218 the loop preheader for the vectorized stmt STMT. */
1220 static void
1221 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1223 if (gsi)
1224 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1225 else
1227 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1228 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1230 if (loop_vinfo)
1232 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1233 basic_block new_bb;
1234 edge pe;
1236 if (nested_in_vect_loop_p (loop, stmt))
1237 loop = loop->inner;
1239 pe = loop_preheader_edge (loop);
1240 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1241 gcc_assert (!new_bb);
1243 else
1245 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1246 basic_block bb;
1247 gimple_stmt_iterator gsi_bb_start;
1249 gcc_assert (bb_vinfo);
1250 bb = BB_VINFO_BB (bb_vinfo);
1251 gsi_bb_start = gsi_after_labels (bb);
1252 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1256 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "created new init_stmt: ");
1260 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1264 /* Function vect_init_vector.
1266 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1267 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1268 vector type a vector with all elements equal to VAL is created first.
1269 Place the initialization at BSI if it is not NULL. Otherwise, place the
1270 initialization at the loop preheader.
1271 Return the DEF of INIT_STMT.
1272 It will be used in the vectorization of STMT. */
1274 tree
1275 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1277 gimple *init_stmt;
1278 tree new_temp;
1280 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1281 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1283 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1284 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1286 /* Scalar boolean value should be transformed into
1287 all zeros or all ones value before building a vector. */
1288 if (VECTOR_BOOLEAN_TYPE_P (type))
1290 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1291 tree false_val = build_zero_cst (TREE_TYPE (type));
1293 if (CONSTANT_CLASS_P (val))
1294 val = integer_zerop (val) ? false_val : true_val;
1295 else
1297 new_temp = make_ssa_name (TREE_TYPE (type));
1298 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1299 val, true_val, false_val);
1300 vect_init_vector_1 (stmt, init_stmt, gsi);
1301 val = new_temp;
1304 else if (CONSTANT_CLASS_P (val))
1305 val = fold_convert (TREE_TYPE (type), val);
1306 else
1308 new_temp = make_ssa_name (TREE_TYPE (type));
1309 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1310 init_stmt = gimple_build_assign (new_temp,
1311 fold_build1 (VIEW_CONVERT_EXPR,
1312 TREE_TYPE (type),
1313 val));
1314 else
1315 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1316 vect_init_vector_1 (stmt, init_stmt, gsi);
1317 val = new_temp;
1320 val = build_vector_from_val (type, val);
1323 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1324 init_stmt = gimple_build_assign (new_temp, val);
1325 vect_init_vector_1 (stmt, init_stmt, gsi);
1326 return new_temp;
1329 /* Function vect_get_vec_def_for_operand_1.
1331 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1332 DT that will be used in the vectorized stmt. */
1334 tree
1335 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1337 tree vec_oprnd;
1338 gimple *vec_stmt;
1339 stmt_vec_info def_stmt_info = NULL;
1341 switch (dt)
1343 /* operand is a constant or a loop invariant. */
1344 case vect_constant_def:
1345 case vect_external_def:
1346 /* Code should use vect_get_vec_def_for_operand. */
1347 gcc_unreachable ();
1349 /* operand is defined inside the loop. */
1350 case vect_internal_def:
1352 /* Get the def from the vectorized stmt. */
1353 def_stmt_info = vinfo_for_stmt (def_stmt);
1355 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1356 /* Get vectorized pattern statement. */
1357 if (!vec_stmt
1358 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1359 && !STMT_VINFO_RELEVANT (def_stmt_info))
1360 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1361 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1362 gcc_assert (vec_stmt);
1363 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1364 vec_oprnd = PHI_RESULT (vec_stmt);
1365 else if (is_gimple_call (vec_stmt))
1366 vec_oprnd = gimple_call_lhs (vec_stmt);
1367 else
1368 vec_oprnd = gimple_assign_lhs (vec_stmt);
1369 return vec_oprnd;
1372 /* operand is defined by a loop header phi. */
1373 case vect_reduction_def:
1374 case vect_double_reduction_def:
1375 case vect_nested_cycle:
1376 case vect_induction_def:
1378 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1380 /* Get the def from the vectorized stmt. */
1381 def_stmt_info = vinfo_for_stmt (def_stmt);
1382 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1383 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1384 vec_oprnd = PHI_RESULT (vec_stmt);
1385 else
1386 vec_oprnd = gimple_get_lhs (vec_stmt);
1387 return vec_oprnd;
1390 default:
1391 gcc_unreachable ();
1396 /* Function vect_get_vec_def_for_operand.
1398 OP is an operand in STMT. This function returns a (vector) def that will be
1399 used in the vectorized stmt for STMT.
1401 In the case that OP is an SSA_NAME which is defined in the loop, then
1402 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1404 In case OP is an invariant or constant, a new stmt that creates a vector def
1405 needs to be introduced. VECTYPE may be used to specify a required type for
1406 vector invariant. */
1408 tree
1409 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1411 gimple *def_stmt;
1412 enum vect_def_type dt;
1413 bool is_simple_use;
1414 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1415 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1417 if (dump_enabled_p ())
1419 dump_printf_loc (MSG_NOTE, vect_location,
1420 "vect_get_vec_def_for_operand: ");
1421 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1422 dump_printf (MSG_NOTE, "\n");
1425 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1426 gcc_assert (is_simple_use);
1427 if (def_stmt && dump_enabled_p ())
1429 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1430 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1433 if (dt == vect_constant_def || dt == vect_external_def)
1435 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1436 tree vector_type;
1438 if (vectype)
1439 vector_type = vectype;
1440 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1441 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1442 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1443 else
1444 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1446 gcc_assert (vector_type);
1447 return vect_init_vector (stmt, op, vector_type, NULL);
1449 else
1450 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1454 /* Function vect_get_vec_def_for_stmt_copy
1456 Return a vector-def for an operand. This function is used when the
1457 vectorized stmt to be created (by the caller to this function) is a "copy"
1458 created in case the vectorized result cannot fit in one vector, and several
1459 copies of the vector-stmt are required. In this case the vector-def is
1460 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1461 of the stmt that defines VEC_OPRND.
1462 DT is the type of the vector def VEC_OPRND.
1464 Context:
1465 In case the vectorization factor (VF) is bigger than the number
1466 of elements that can fit in a vectype (nunits), we have to generate
1467 more than one vector stmt to vectorize the scalar stmt. This situation
1468 arises when there are multiple data-types operated upon in the loop; the
1469 smallest data-type determines the VF, and as a result, when vectorizing
1470 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1471 vector stmt (each computing a vector of 'nunits' results, and together
1472 computing 'VF' results in each iteration). This function is called when
1473 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1474 which VF=16 and nunits=4, so the number of copies required is 4):
1476 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1478 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1479 VS1.1: vx.1 = memref1 VS1.2
1480 VS1.2: vx.2 = memref2 VS1.3
1481 VS1.3: vx.3 = memref3
1483 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1484 VSnew.1: vz1 = vx.1 + ... VSnew.2
1485 VSnew.2: vz2 = vx.2 + ... VSnew.3
1486 VSnew.3: vz3 = vx.3 + ...
1488 The vectorization of S1 is explained in vectorizable_load.
1489 The vectorization of S2:
1490 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1491 the function 'vect_get_vec_def_for_operand' is called to
1492 get the relevant vector-def for each operand of S2. For operand x it
1493 returns the vector-def 'vx.0'.
1495 To create the remaining copies of the vector-stmt (VSnew.j), this
1496 function is called to get the relevant vector-def for each operand. It is
1497 obtained from the respective VS1.j stmt, which is recorded in the
1498 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1500 For example, to obtain the vector-def 'vx.1' in order to create the
1501 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1502 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1503 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1504 and return its def ('vx.1').
1505 Overall, to create the above sequence this function will be called 3 times:
1506 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1507 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1508 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1510 tree
1511 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1513 gimple *vec_stmt_for_operand;
1514 stmt_vec_info def_stmt_info;
1516 /* Do nothing; can reuse same def. */
1517 if (dt == vect_external_def || dt == vect_constant_def )
1518 return vec_oprnd;
1520 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1521 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1522 gcc_assert (def_stmt_info);
1523 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1524 gcc_assert (vec_stmt_for_operand);
1525 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1526 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1527 else
1528 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1529 return vec_oprnd;
1533 /* Get vectorized definitions for the operands to create a copy of an original
1534 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1536 void
1537 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1538 vec<tree> *vec_oprnds0,
1539 vec<tree> *vec_oprnds1)
1541 tree vec_oprnd = vec_oprnds0->pop ();
1543 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1544 vec_oprnds0->quick_push (vec_oprnd);
1546 if (vec_oprnds1 && vec_oprnds1->length ())
1548 vec_oprnd = vec_oprnds1->pop ();
1549 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1550 vec_oprnds1->quick_push (vec_oprnd);
1555 /* Get vectorized definitions for OP0 and OP1. */
1557 void
1558 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1559 vec<tree> *vec_oprnds0,
1560 vec<tree> *vec_oprnds1,
1561 slp_tree slp_node)
1563 if (slp_node)
1565 int nops = (op1 == NULL_TREE) ? 1 : 2;
1566 auto_vec<tree> ops (nops);
1567 auto_vec<vec<tree> > vec_defs (nops);
1569 ops.quick_push (op0);
1570 if (op1)
1571 ops.quick_push (op1);
1573 vect_get_slp_defs (ops, slp_node, &vec_defs);
1575 *vec_oprnds0 = vec_defs[0];
1576 if (op1)
1577 *vec_oprnds1 = vec_defs[1];
1579 else
1581 tree vec_oprnd;
1583 vec_oprnds0->create (1);
1584 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1585 vec_oprnds0->quick_push (vec_oprnd);
1587 if (op1)
1589 vec_oprnds1->create (1);
1590 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1591 vec_oprnds1->quick_push (vec_oprnd);
1597 /* Function vect_finish_stmt_generation.
1599 Insert a new stmt. */
1601 void
1602 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1603 gimple_stmt_iterator *gsi)
1605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1606 vec_info *vinfo = stmt_info->vinfo;
1608 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1610 if (!gsi_end_p (*gsi)
1611 && gimple_has_mem_ops (vec_stmt))
1613 gimple *at_stmt = gsi_stmt (*gsi);
1614 tree vuse = gimple_vuse (at_stmt);
1615 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1617 tree vdef = gimple_vdef (at_stmt);
1618 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1619 /* If we have an SSA vuse and insert a store, update virtual
1620 SSA form to avoid triggering the renamer. Do so only
1621 if we can easily see all uses - which is what almost always
1622 happens with the way vectorized stmts are inserted. */
1623 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1624 && ((is_gimple_assign (vec_stmt)
1625 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1626 || (is_gimple_call (vec_stmt)
1627 && !(gimple_call_flags (vec_stmt)
1628 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1630 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1631 gimple_set_vdef (vec_stmt, new_vdef);
1632 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1636 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1638 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1640 if (dump_enabled_p ())
1642 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1643 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1646 gimple_set_location (vec_stmt, gimple_location (stmt));
1648 /* While EH edges will generally prevent vectorization, stmt might
1649 e.g. be in a must-not-throw region. Ensure newly created stmts
1650 that could throw are part of the same region. */
1651 int lp_nr = lookup_stmt_eh_lp (stmt);
1652 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1653 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1656 /* We want to vectorize a call to combined function CFN with function
1657 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1658 as the types of all inputs. Check whether this is possible using
1659 an internal function, returning its code if so or IFN_LAST if not. */
1661 static internal_fn
1662 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1663 tree vectype_out, tree vectype_in)
1665 internal_fn ifn;
1666 if (internal_fn_p (cfn))
1667 ifn = as_internal_fn (cfn);
1668 else
1669 ifn = associated_internal_fn (fndecl);
1670 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1672 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1673 if (info.vectorizable)
1675 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1676 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1677 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1678 OPTIMIZE_FOR_SPEED))
1679 return ifn;
1682 return IFN_LAST;
1686 static tree permute_vec_elements (tree, tree, tree, gimple *,
1687 gimple_stmt_iterator *);
1689 /* STMT is a non-strided load or store, meaning that it accesses
1690 elements with a known constant step. Return -1 if that step
1691 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1693 static int
1694 compare_step_with_zero (gimple *stmt)
1696 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1697 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1698 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1699 size_zero_node);
1702 /* If the target supports a permute mask that reverses the elements in
1703 a vector of type VECTYPE, return that mask, otherwise return null. */
1705 static tree
1706 perm_mask_for_reverse (tree vectype)
1708 int i, nunits;
1709 unsigned char *sel;
1711 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1712 sel = XALLOCAVEC (unsigned char, nunits);
1714 for (i = 0; i < nunits; ++i)
1715 sel[i] = nunits - 1 - i;
1717 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1718 return NULL_TREE;
1719 return vect_gen_perm_mask_checked (vectype, sel);
1722 /* A subroutine of get_load_store_type, with a subset of the same
1723 arguments. Handle the case where STMT is part of a grouped load
1724 or store.
1726 For stores, the statements in the group are all consecutive
1727 and there is no gap at the end. For loads, the statements in the
1728 group might not be consecutive; there can be gaps between statements
1729 as well as at the end. */
1731 static bool
1732 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1733 vec_load_store_type vls_type,
1734 vect_memory_access_type *memory_access_type)
1736 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1737 vec_info *vinfo = stmt_info->vinfo;
1738 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1739 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1740 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1741 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1742 bool single_element_p = (stmt == first_stmt
1743 && !GROUP_NEXT_ELEMENT (stmt_info));
1744 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1745 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1747 /* True if the vectorized statements would access beyond the last
1748 statement in the group. */
1749 bool overrun_p = false;
1751 /* True if we can cope with such overrun by peeling for gaps, so that
1752 there is at least one final scalar iteration after the vector loop. */
1753 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1755 /* There can only be a gap at the end of the group if the stride is
1756 known at compile time. */
1757 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1759 /* Stores can't yet have gaps. */
1760 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1762 if (slp)
1764 if (STMT_VINFO_STRIDED_P (stmt_info))
1766 /* Try to use consecutive accesses of GROUP_SIZE elements,
1767 separated by the stride, until we have a complete vector.
1768 Fall back to scalar accesses if that isn't possible. */
1769 if (nunits % group_size == 0)
1770 *memory_access_type = VMAT_STRIDED_SLP;
1771 else
1772 *memory_access_type = VMAT_ELEMENTWISE;
1774 else
1776 overrun_p = loop_vinfo && gap != 0;
1777 if (overrun_p && vls_type != VLS_LOAD)
1779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1780 "Grouped store with gaps requires"
1781 " non-consecutive accesses\n");
1782 return false;
1784 /* If the access is aligned an overrun is fine. */
1785 if (overrun_p
1786 && aligned_access_p
1787 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1788 overrun_p = false;
1789 if (overrun_p && !can_overrun_p)
1791 if (dump_enabled_p ())
1792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1793 "Peeling for outer loop is not supported\n");
1794 return false;
1796 *memory_access_type = VMAT_CONTIGUOUS;
1799 else
1801 /* We can always handle this case using elementwise accesses,
1802 but see if something more efficient is available. */
1803 *memory_access_type = VMAT_ELEMENTWISE;
1805 /* If there is a gap at the end of the group then these optimizations
1806 would access excess elements in the last iteration. */
1807 bool would_overrun_p = (gap != 0);
1808 /* If the access is aligned an overrun is fine, but only if the
1809 overrun is not inside an unused vector (if the gap is as large
1810 or larger than a vector). */
1811 if (would_overrun_p
1812 && gap < nunits
1813 && aligned_access_p
1814 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1815 would_overrun_p = false;
1816 if (!STMT_VINFO_STRIDED_P (stmt_info)
1817 && (can_overrun_p || !would_overrun_p)
1818 && compare_step_with_zero (stmt) > 0)
1820 /* First try using LOAD/STORE_LANES. */
1821 if (vls_type == VLS_LOAD
1822 ? vect_load_lanes_supported (vectype, group_size)
1823 : vect_store_lanes_supported (vectype, group_size))
1825 *memory_access_type = VMAT_LOAD_STORE_LANES;
1826 overrun_p = would_overrun_p;
1829 /* If that fails, try using permuting loads. */
1830 if (*memory_access_type == VMAT_ELEMENTWISE
1831 && (vls_type == VLS_LOAD
1832 ? vect_grouped_load_supported (vectype, single_element_p,
1833 group_size)
1834 : vect_grouped_store_supported (vectype, group_size)))
1836 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1837 overrun_p = would_overrun_p;
1842 if (vls_type != VLS_LOAD && first_stmt == stmt)
1844 /* STMT is the leader of the group. Check the operands of all the
1845 stmts of the group. */
1846 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1847 while (next_stmt)
1849 gcc_assert (gimple_assign_single_p (next_stmt));
1850 tree op = gimple_assign_rhs1 (next_stmt);
1851 gimple *def_stmt;
1852 enum vect_def_type dt;
1853 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1855 if (dump_enabled_p ())
1856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1857 "use not simple.\n");
1858 return false;
1860 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1864 if (overrun_p)
1866 gcc_assert (can_overrun_p);
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1869 "Data access with gaps requires scalar "
1870 "epilogue loop\n");
1871 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1874 return true;
1877 /* A subroutine of get_load_store_type, with a subset of the same
1878 arguments. Handle the case where STMT is a load or store that
1879 accesses consecutive elements with a negative step. */
1881 static vect_memory_access_type
1882 get_negative_load_store_type (gimple *stmt, tree vectype,
1883 vec_load_store_type vls_type,
1884 unsigned int ncopies)
1886 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1887 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1888 dr_alignment_support alignment_support_scheme;
1890 if (ncopies > 1)
1892 if (dump_enabled_p ())
1893 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1894 "multiple types with negative step.\n");
1895 return VMAT_ELEMENTWISE;
1898 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1899 if (alignment_support_scheme != dr_aligned
1900 && alignment_support_scheme != dr_unaligned_supported)
1902 if (dump_enabled_p ())
1903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1904 "negative step but alignment required.\n");
1905 return VMAT_ELEMENTWISE;
1908 if (vls_type == VLS_STORE_INVARIANT)
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_NOTE, vect_location,
1912 "negative step with invariant source;"
1913 " no permute needed.\n");
1914 return VMAT_CONTIGUOUS_DOWN;
1917 if (!perm_mask_for_reverse (vectype))
1919 if (dump_enabled_p ())
1920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1921 "negative step and reversing not supported.\n");
1922 return VMAT_ELEMENTWISE;
1925 return VMAT_CONTIGUOUS_REVERSE;
1928 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1929 if there is a memory access type that the vectorized form can use,
1930 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1931 or scatters, fill in GS_INFO accordingly.
1933 SLP says whether we're performing SLP rather than loop vectorization.
1934 VECTYPE is the vector type that the vectorized statements will use.
1935 NCOPIES is the number of vector statements that will be needed. */
1937 static bool
1938 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1939 vec_load_store_type vls_type, unsigned int ncopies,
1940 vect_memory_access_type *memory_access_type,
1941 gather_scatter_info *gs_info)
1943 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1944 vec_info *vinfo = stmt_info->vinfo;
1945 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1946 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1948 *memory_access_type = VMAT_GATHER_SCATTER;
1949 gimple *def_stmt;
1950 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1951 gcc_unreachable ();
1952 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1953 &gs_info->offset_dt,
1954 &gs_info->offset_vectype))
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1958 "%s index use not simple.\n",
1959 vls_type == VLS_LOAD ? "gather" : "scatter");
1960 return false;
1963 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1965 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1966 memory_access_type))
1967 return false;
1969 else if (STMT_VINFO_STRIDED_P (stmt_info))
1971 gcc_assert (!slp);
1972 *memory_access_type = VMAT_ELEMENTWISE;
1974 else
1976 int cmp = compare_step_with_zero (stmt);
1977 if (cmp < 0)
1978 *memory_access_type = get_negative_load_store_type
1979 (stmt, vectype, vls_type, ncopies);
1980 else if (cmp == 0)
1982 gcc_assert (vls_type == VLS_LOAD);
1983 *memory_access_type = VMAT_INVARIANT;
1985 else
1986 *memory_access_type = VMAT_CONTIGUOUS;
1989 /* FIXME: At the moment the cost model seems to underestimate the
1990 cost of using elementwise accesses. This check preserves the
1991 traditional behavior until that can be fixed. */
1992 if (*memory_access_type == VMAT_ELEMENTWISE
1993 && !STMT_VINFO_STRIDED_P (stmt_info))
1995 if (dump_enabled_p ())
1996 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1997 "not falling back to elementwise accesses\n");
1998 return false;
2000 return true;
2003 /* Function vectorizable_mask_load_store.
2005 Check if STMT performs a conditional load or store that can be vectorized.
2006 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2007 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2008 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2010 static bool
2011 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2012 gimple **vec_stmt, slp_tree slp_node)
2014 tree vec_dest = NULL;
2015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2016 stmt_vec_info prev_stmt_info;
2017 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2018 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2019 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2020 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2021 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2022 tree rhs_vectype = NULL_TREE;
2023 tree mask_vectype;
2024 tree elem_type;
2025 gimple *new_stmt;
2026 tree dummy;
2027 tree dataref_ptr = NULL_TREE;
2028 gimple *ptr_incr;
2029 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2030 int ncopies;
2031 int i, j;
2032 bool inv_p;
2033 gather_scatter_info gs_info;
2034 vec_load_store_type vls_type;
2035 tree mask;
2036 gimple *def_stmt;
2037 enum vect_def_type dt;
2039 if (slp_node != NULL)
2040 return false;
2042 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2043 gcc_assert (ncopies >= 1);
2045 mask = gimple_call_arg (stmt, 2);
2047 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2048 return false;
2050 /* FORNOW. This restriction should be relaxed. */
2051 if (nested_in_vect_loop && ncopies > 1)
2053 if (dump_enabled_p ())
2054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2055 "multiple types in nested loop.");
2056 return false;
2059 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2060 return false;
2062 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2063 && ! vec_stmt)
2064 return false;
2066 if (!STMT_VINFO_DATA_REF (stmt_info))
2067 return false;
2069 elem_type = TREE_TYPE (vectype);
2071 if (TREE_CODE (mask) != SSA_NAME)
2072 return false;
2074 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2075 return false;
2077 if (!mask_vectype)
2078 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2080 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2081 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2082 return false;
2084 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2086 tree rhs = gimple_call_arg (stmt, 3);
2087 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2088 return false;
2089 if (dt == vect_constant_def || dt == vect_external_def)
2090 vls_type = VLS_STORE_INVARIANT;
2091 else
2092 vls_type = VLS_STORE;
2094 else
2095 vls_type = VLS_LOAD;
2097 vect_memory_access_type memory_access_type;
2098 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2099 &memory_access_type, &gs_info))
2100 return false;
2102 if (memory_access_type == VMAT_GATHER_SCATTER)
2104 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2105 tree masktype
2106 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2107 if (TREE_CODE (masktype) == INTEGER_TYPE)
2109 if (dump_enabled_p ())
2110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2111 "masked gather with integer mask not supported.");
2112 return false;
2115 else if (memory_access_type != VMAT_CONTIGUOUS)
2117 if (dump_enabled_p ())
2118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2119 "unsupported access type for masked %s.\n",
2120 vls_type == VLS_LOAD ? "load" : "store");
2121 return false;
2123 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2124 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2125 TYPE_MODE (mask_vectype),
2126 vls_type == VLS_LOAD)
2127 || (rhs_vectype
2128 && !useless_type_conversion_p (vectype, rhs_vectype)))
2129 return false;
2131 if (!vec_stmt) /* transformation not required. */
2133 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2134 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2135 if (vls_type == VLS_LOAD)
2136 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2137 NULL, NULL, NULL);
2138 else
2139 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2140 dt, NULL, NULL, NULL);
2141 return true;
2143 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2145 /* Transform. */
2147 if (memory_access_type == VMAT_GATHER_SCATTER)
2149 tree vec_oprnd0 = NULL_TREE, op;
2150 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2151 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2152 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2153 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2154 tree mask_perm_mask = NULL_TREE;
2155 edge pe = loop_preheader_edge (loop);
2156 gimple_seq seq;
2157 basic_block new_bb;
2158 enum { NARROW, NONE, WIDEN } modifier;
2159 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2161 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2162 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2163 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2164 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2165 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2166 scaletype = TREE_VALUE (arglist);
2167 gcc_checking_assert (types_compatible_p (srctype, rettype)
2168 && types_compatible_p (srctype, masktype));
2170 if (nunits == gather_off_nunits)
2171 modifier = NONE;
2172 else if (nunits == gather_off_nunits / 2)
2174 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2175 modifier = WIDEN;
2177 for (i = 0; i < gather_off_nunits; ++i)
2178 sel[i] = i | nunits;
2180 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2182 else if (nunits == gather_off_nunits * 2)
2184 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2185 modifier = NARROW;
2187 for (i = 0; i < nunits; ++i)
2188 sel[i] = i < gather_off_nunits
2189 ? i : i + nunits - gather_off_nunits;
2191 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2192 ncopies *= 2;
2193 for (i = 0; i < nunits; ++i)
2194 sel[i] = i | gather_off_nunits;
2195 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2197 else
2198 gcc_unreachable ();
2200 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2202 ptr = fold_convert (ptrtype, gs_info.base);
2203 if (!is_gimple_min_invariant (ptr))
2205 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2206 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2207 gcc_assert (!new_bb);
2210 scale = build_int_cst (scaletype, gs_info.scale);
2212 prev_stmt_info = NULL;
2213 for (j = 0; j < ncopies; ++j)
2215 if (modifier == WIDEN && (j & 1))
2216 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2217 perm_mask, stmt, gsi);
2218 else if (j == 0)
2219 op = vec_oprnd0
2220 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2221 else
2222 op = vec_oprnd0
2223 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2225 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2227 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2228 == TYPE_VECTOR_SUBPARTS (idxtype));
2229 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2230 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2231 new_stmt
2232 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2233 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2234 op = var;
2237 if (mask_perm_mask && (j & 1))
2238 mask_op = permute_vec_elements (mask_op, mask_op,
2239 mask_perm_mask, stmt, gsi);
2240 else
2242 if (j == 0)
2243 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2244 else
2246 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2247 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2250 mask_op = vec_mask;
2251 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2253 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2254 == TYPE_VECTOR_SUBPARTS (masktype));
2255 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2256 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2257 new_stmt
2258 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2259 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2260 mask_op = var;
2264 new_stmt
2265 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2266 scale);
2268 if (!useless_type_conversion_p (vectype, rettype))
2270 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2271 == TYPE_VECTOR_SUBPARTS (rettype));
2272 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2273 gimple_call_set_lhs (new_stmt, op);
2274 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2275 var = make_ssa_name (vec_dest);
2276 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2277 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2279 else
2281 var = make_ssa_name (vec_dest, new_stmt);
2282 gimple_call_set_lhs (new_stmt, var);
2285 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2287 if (modifier == NARROW)
2289 if ((j & 1) == 0)
2291 prev_res = var;
2292 continue;
2294 var = permute_vec_elements (prev_res, var,
2295 perm_mask, stmt, gsi);
2296 new_stmt = SSA_NAME_DEF_STMT (var);
2299 if (prev_stmt_info == NULL)
2300 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2301 else
2302 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2303 prev_stmt_info = vinfo_for_stmt (new_stmt);
2306 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2307 from the IL. */
2308 if (STMT_VINFO_RELATED_STMT (stmt_info))
2310 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2311 stmt_info = vinfo_for_stmt (stmt);
2313 tree lhs = gimple_call_lhs (stmt);
2314 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2315 set_vinfo_for_stmt (new_stmt, stmt_info);
2316 set_vinfo_for_stmt (stmt, NULL);
2317 STMT_VINFO_STMT (stmt_info) = new_stmt;
2318 gsi_replace (gsi, new_stmt, true);
2319 return true;
2321 else if (vls_type != VLS_LOAD)
2323 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2324 prev_stmt_info = NULL;
2325 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2326 for (i = 0; i < ncopies; i++)
2328 unsigned align, misalign;
2330 if (i == 0)
2332 tree rhs = gimple_call_arg (stmt, 3);
2333 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2334 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2335 /* We should have catched mismatched types earlier. */
2336 gcc_assert (useless_type_conversion_p (vectype,
2337 TREE_TYPE (vec_rhs)));
2338 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2339 NULL_TREE, &dummy, gsi,
2340 &ptr_incr, false, &inv_p);
2341 gcc_assert (!inv_p);
2343 else
2345 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2346 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2347 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2348 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2349 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2350 TYPE_SIZE_UNIT (vectype));
2353 align = TYPE_ALIGN_UNIT (vectype);
2354 if (aligned_access_p (dr))
2355 misalign = 0;
2356 else if (DR_MISALIGNMENT (dr) == -1)
2358 align = TYPE_ALIGN_UNIT (elem_type);
2359 misalign = 0;
2361 else
2362 misalign = DR_MISALIGNMENT (dr);
2363 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2364 misalign);
2365 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2366 misalign ? least_bit_hwi (misalign) : align);
2367 new_stmt
2368 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2369 ptr, vec_mask, vec_rhs);
2370 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2371 if (i == 0)
2372 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2373 else
2374 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2375 prev_stmt_info = vinfo_for_stmt (new_stmt);
2378 else
2380 tree vec_mask = NULL_TREE;
2381 prev_stmt_info = NULL;
2382 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2383 for (i = 0; i < ncopies; i++)
2385 unsigned align, misalign;
2387 if (i == 0)
2389 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2390 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2391 NULL_TREE, &dummy, gsi,
2392 &ptr_incr, false, &inv_p);
2393 gcc_assert (!inv_p);
2395 else
2397 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2398 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2399 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2400 TYPE_SIZE_UNIT (vectype));
2403 align = TYPE_ALIGN_UNIT (vectype);
2404 if (aligned_access_p (dr))
2405 misalign = 0;
2406 else if (DR_MISALIGNMENT (dr) == -1)
2408 align = TYPE_ALIGN_UNIT (elem_type);
2409 misalign = 0;
2411 else
2412 misalign = DR_MISALIGNMENT (dr);
2413 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2414 misalign);
2415 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2416 misalign ? least_bit_hwi (misalign) : align);
2417 new_stmt
2418 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2419 ptr, vec_mask);
2420 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2421 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2422 if (i == 0)
2423 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2424 else
2425 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2426 prev_stmt_info = vinfo_for_stmt (new_stmt);
2430 if (vls_type == VLS_LOAD)
2432 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2433 from the IL. */
2434 if (STMT_VINFO_RELATED_STMT (stmt_info))
2436 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2437 stmt_info = vinfo_for_stmt (stmt);
2439 tree lhs = gimple_call_lhs (stmt);
2440 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2441 set_vinfo_for_stmt (new_stmt, stmt_info);
2442 set_vinfo_for_stmt (stmt, NULL);
2443 STMT_VINFO_STMT (stmt_info) = new_stmt;
2444 gsi_replace (gsi, new_stmt, true);
2447 return true;
2450 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2452 static bool
2453 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2454 gimple **vec_stmt, slp_tree slp_node,
2455 tree vectype_in, enum vect_def_type *dt)
2457 tree op, vectype;
2458 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2459 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2460 unsigned ncopies, nunits;
2462 op = gimple_call_arg (stmt, 0);
2463 vectype = STMT_VINFO_VECTYPE (stmt_info);
2464 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2466 /* Multiple types in SLP are handled by creating the appropriate number of
2467 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2468 case of SLP. */
2469 if (slp_node)
2470 ncopies = 1;
2471 else
2472 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2474 gcc_assert (ncopies >= 1);
2476 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2477 if (! char_vectype)
2478 return false;
2480 unsigned char *elts
2481 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype));
2482 unsigned char *elt = elts;
2483 unsigned word_bytes = TYPE_VECTOR_SUBPARTS (char_vectype) / nunits;
2484 for (unsigned i = 0; i < nunits; ++i)
2485 for (unsigned j = 0; j < word_bytes; ++j)
2486 *elt++ = (i + 1) * word_bytes - j - 1;
2488 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
2489 return false;
2491 if (! vec_stmt)
2493 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2494 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2496 "\n");
2497 if (! PURE_SLP_STMT (stmt_info))
2499 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2500 1, vector_stmt, stmt_info, 0, vect_prologue);
2501 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2502 ncopies, vec_perm, stmt_info, 0, vect_body);
2504 return true;
2507 tree *telts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (char_vectype));
2508 for (unsigned i = 0; i < TYPE_VECTOR_SUBPARTS (char_vectype); ++i)
2509 telts[i] = build_int_cst (char_type_node, elts[i]);
2510 tree bswap_vconst = build_vector (char_vectype, telts);
2512 /* Transform. */
2513 vec<tree> vec_oprnds = vNULL;
2514 gimple *new_stmt = NULL;
2515 stmt_vec_info prev_stmt_info = NULL;
2516 for (unsigned j = 0; j < ncopies; j++)
2518 /* Handle uses. */
2519 if (j == 0)
2520 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2521 else
2522 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2524 /* Arguments are ready. create the new vector stmt. */
2525 unsigned i;
2526 tree vop;
2527 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2529 tree tem = make_ssa_name (char_vectype);
2530 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2531 char_vectype, vop));
2532 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2533 tree tem2 = make_ssa_name (char_vectype);
2534 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2535 tem, tem, bswap_vconst);
2536 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2537 tem = make_ssa_name (vectype);
2538 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2539 vectype, tem2));
2540 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2541 if (slp_node)
2542 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2545 if (slp_node)
2546 continue;
2548 if (j == 0)
2549 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2550 else
2551 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2553 prev_stmt_info = vinfo_for_stmt (new_stmt);
2556 vec_oprnds.release ();
2557 return true;
2560 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2561 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2562 in a single step. On success, store the binary pack code in
2563 *CONVERT_CODE. */
2565 static bool
2566 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2567 tree_code *convert_code)
2569 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2570 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2571 return false;
2573 tree_code code;
2574 int multi_step_cvt = 0;
2575 auto_vec <tree, 8> interm_types;
2576 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2577 &code, &multi_step_cvt,
2578 &interm_types)
2579 || multi_step_cvt)
2580 return false;
2582 *convert_code = code;
2583 return true;
2586 /* Function vectorizable_call.
2588 Check if GS performs a function call that can be vectorized.
2589 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2590 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2591 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2593 static bool
2594 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2595 slp_tree slp_node)
2597 gcall *stmt;
2598 tree vec_dest;
2599 tree scalar_dest;
2600 tree op, type;
2601 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2602 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2603 tree vectype_out, vectype_in;
2604 int nunits_in;
2605 int nunits_out;
2606 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2607 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2608 vec_info *vinfo = stmt_info->vinfo;
2609 tree fndecl, new_temp, rhs_type;
2610 gimple *def_stmt;
2611 enum vect_def_type dt[3]
2612 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2613 int ndts = 3;
2614 gimple *new_stmt = NULL;
2615 int ncopies, j;
2616 vec<tree> vargs = vNULL;
2617 enum { NARROW, NONE, WIDEN } modifier;
2618 size_t i, nargs;
2619 tree lhs;
2621 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2622 return false;
2624 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2625 && ! vec_stmt)
2626 return false;
2628 /* Is GS a vectorizable call? */
2629 stmt = dyn_cast <gcall *> (gs);
2630 if (!stmt)
2631 return false;
2633 if (gimple_call_internal_p (stmt)
2634 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2635 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2636 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2637 slp_node);
2639 if (gimple_call_lhs (stmt) == NULL_TREE
2640 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2641 return false;
2643 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2645 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2647 /* Process function arguments. */
2648 rhs_type = NULL_TREE;
2649 vectype_in = NULL_TREE;
2650 nargs = gimple_call_num_args (stmt);
2652 /* Bail out if the function has more than three arguments, we do not have
2653 interesting builtin functions to vectorize with more than two arguments
2654 except for fma. No arguments is also not good. */
2655 if (nargs == 0 || nargs > 3)
2656 return false;
2658 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2659 if (gimple_call_internal_p (stmt)
2660 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2662 nargs = 0;
2663 rhs_type = unsigned_type_node;
2666 for (i = 0; i < nargs; i++)
2668 tree opvectype;
2670 op = gimple_call_arg (stmt, i);
2672 /* We can only handle calls with arguments of the same type. */
2673 if (rhs_type
2674 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2676 if (dump_enabled_p ())
2677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2678 "argument types differ.\n");
2679 return false;
2681 if (!rhs_type)
2682 rhs_type = TREE_TYPE (op);
2684 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2686 if (dump_enabled_p ())
2687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2688 "use not simple.\n");
2689 return false;
2692 if (!vectype_in)
2693 vectype_in = opvectype;
2694 else if (opvectype
2695 && opvectype != vectype_in)
2697 if (dump_enabled_p ())
2698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2699 "argument vector types differ.\n");
2700 return false;
2703 /* If all arguments are external or constant defs use a vector type with
2704 the same size as the output vector type. */
2705 if (!vectype_in)
2706 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2707 if (vec_stmt)
2708 gcc_assert (vectype_in);
2709 if (!vectype_in)
2711 if (dump_enabled_p ())
2713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2714 "no vectype for scalar type ");
2715 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2716 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2719 return false;
2722 /* FORNOW */
2723 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2724 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2725 if (nunits_in == nunits_out / 2)
2726 modifier = NARROW;
2727 else if (nunits_out == nunits_in)
2728 modifier = NONE;
2729 else if (nunits_out == nunits_in / 2)
2730 modifier = WIDEN;
2731 else
2732 return false;
2734 /* We only handle functions that do not read or clobber memory. */
2735 if (gimple_vuse (stmt))
2737 if (dump_enabled_p ())
2738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2739 "function reads from or writes to memory.\n");
2740 return false;
2743 /* For now, we only vectorize functions if a target specific builtin
2744 is available. TODO -- in some cases, it might be profitable to
2745 insert the calls for pieces of the vector, in order to be able
2746 to vectorize other operations in the loop. */
2747 fndecl = NULL_TREE;
2748 internal_fn ifn = IFN_LAST;
2749 combined_fn cfn = gimple_call_combined_fn (stmt);
2750 tree callee = gimple_call_fndecl (stmt);
2752 /* First try using an internal function. */
2753 tree_code convert_code = ERROR_MARK;
2754 if (cfn != CFN_LAST
2755 && (modifier == NONE
2756 || (modifier == NARROW
2757 && simple_integer_narrowing (vectype_out, vectype_in,
2758 &convert_code))))
2759 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2760 vectype_in);
2762 /* If that fails, try asking for a target-specific built-in function. */
2763 if (ifn == IFN_LAST)
2765 if (cfn != CFN_LAST)
2766 fndecl = targetm.vectorize.builtin_vectorized_function
2767 (cfn, vectype_out, vectype_in);
2768 else
2769 fndecl = targetm.vectorize.builtin_md_vectorized_function
2770 (callee, vectype_out, vectype_in);
2773 if (ifn == IFN_LAST && !fndecl)
2775 if (cfn == CFN_GOMP_SIMD_LANE
2776 && !slp_node
2777 && loop_vinfo
2778 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2779 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2780 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2781 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2783 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2784 { 0, 1, 2, ... vf - 1 } vector. */
2785 gcc_assert (nargs == 0);
2787 else if (modifier == NONE
2788 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2789 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2790 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2791 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2792 vectype_in, dt);
2793 else
2795 if (dump_enabled_p ())
2796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2797 "function is not vectorizable.\n");
2798 return false;
2802 if (slp_node)
2803 ncopies = 1;
2804 else if (modifier == NARROW && ifn == IFN_LAST)
2805 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2806 else
2807 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2809 /* Sanity check: make sure that at least one copy of the vectorized stmt
2810 needs to be generated. */
2811 gcc_assert (ncopies >= 1);
2813 if (!vec_stmt) /* transformation not required. */
2815 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2816 if (dump_enabled_p ())
2817 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2818 "\n");
2819 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2820 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2821 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2822 vec_promote_demote, stmt_info, 0, vect_body);
2824 return true;
2827 /* Transform. */
2829 if (dump_enabled_p ())
2830 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2832 /* Handle def. */
2833 scalar_dest = gimple_call_lhs (stmt);
2834 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2836 prev_stmt_info = NULL;
2837 if (modifier == NONE || ifn != IFN_LAST)
2839 tree prev_res = NULL_TREE;
2840 for (j = 0; j < ncopies; ++j)
2842 /* Build argument list for the vectorized call. */
2843 if (j == 0)
2844 vargs.create (nargs);
2845 else
2846 vargs.truncate (0);
2848 if (slp_node)
2850 auto_vec<vec<tree> > vec_defs (nargs);
2851 vec<tree> vec_oprnds0;
2853 for (i = 0; i < nargs; i++)
2854 vargs.quick_push (gimple_call_arg (stmt, i));
2855 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2856 vec_oprnds0 = vec_defs[0];
2858 /* Arguments are ready. Create the new vector stmt. */
2859 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2861 size_t k;
2862 for (k = 0; k < nargs; k++)
2864 vec<tree> vec_oprndsk = vec_defs[k];
2865 vargs[k] = vec_oprndsk[i];
2867 if (modifier == NARROW)
2869 tree half_res = make_ssa_name (vectype_in);
2870 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2871 gimple_call_set_lhs (new_stmt, half_res);
2872 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2873 if ((i & 1) == 0)
2875 prev_res = half_res;
2876 continue;
2878 new_temp = make_ssa_name (vec_dest);
2879 new_stmt = gimple_build_assign (new_temp, convert_code,
2880 prev_res, half_res);
2882 else
2884 if (ifn != IFN_LAST)
2885 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2886 else
2887 new_stmt = gimple_build_call_vec (fndecl, vargs);
2888 new_temp = make_ssa_name (vec_dest, new_stmt);
2889 gimple_call_set_lhs (new_stmt, new_temp);
2891 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2892 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2895 for (i = 0; i < nargs; i++)
2897 vec<tree> vec_oprndsi = vec_defs[i];
2898 vec_oprndsi.release ();
2900 continue;
2903 for (i = 0; i < nargs; i++)
2905 op = gimple_call_arg (stmt, i);
2906 if (j == 0)
2907 vec_oprnd0
2908 = vect_get_vec_def_for_operand (op, stmt);
2909 else
2911 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2912 vec_oprnd0
2913 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2916 vargs.quick_push (vec_oprnd0);
2919 if (gimple_call_internal_p (stmt)
2920 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2922 tree *v = XALLOCAVEC (tree, nunits_out);
2923 int k;
2924 for (k = 0; k < nunits_out; ++k)
2925 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2926 tree cst = build_vector (vectype_out, v);
2927 tree new_var
2928 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2929 gimple *init_stmt = gimple_build_assign (new_var, cst);
2930 vect_init_vector_1 (stmt, init_stmt, NULL);
2931 new_temp = make_ssa_name (vec_dest);
2932 new_stmt = gimple_build_assign (new_temp, new_var);
2934 else if (modifier == NARROW)
2936 tree half_res = make_ssa_name (vectype_in);
2937 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2938 gimple_call_set_lhs (new_stmt, half_res);
2939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2940 if ((j & 1) == 0)
2942 prev_res = half_res;
2943 continue;
2945 new_temp = make_ssa_name (vec_dest);
2946 new_stmt = gimple_build_assign (new_temp, convert_code,
2947 prev_res, half_res);
2949 else
2951 if (ifn != IFN_LAST)
2952 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2953 else
2954 new_stmt = gimple_build_call_vec (fndecl, vargs);
2955 new_temp = make_ssa_name (vec_dest, new_stmt);
2956 gimple_call_set_lhs (new_stmt, new_temp);
2958 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2960 if (j == (modifier == NARROW ? 1 : 0))
2961 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2962 else
2963 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2965 prev_stmt_info = vinfo_for_stmt (new_stmt);
2968 else if (modifier == NARROW)
2970 for (j = 0; j < ncopies; ++j)
2972 /* Build argument list for the vectorized call. */
2973 if (j == 0)
2974 vargs.create (nargs * 2);
2975 else
2976 vargs.truncate (0);
2978 if (slp_node)
2980 auto_vec<vec<tree> > vec_defs (nargs);
2981 vec<tree> vec_oprnds0;
2983 for (i = 0; i < nargs; i++)
2984 vargs.quick_push (gimple_call_arg (stmt, i));
2985 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2986 vec_oprnds0 = vec_defs[0];
2988 /* Arguments are ready. Create the new vector stmt. */
2989 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2991 size_t k;
2992 vargs.truncate (0);
2993 for (k = 0; k < nargs; k++)
2995 vec<tree> vec_oprndsk = vec_defs[k];
2996 vargs.quick_push (vec_oprndsk[i]);
2997 vargs.quick_push (vec_oprndsk[i + 1]);
2999 if (ifn != IFN_LAST)
3000 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
3001 else
3002 new_stmt = gimple_build_call_vec (fndecl, vargs);
3003 new_temp = make_ssa_name (vec_dest, new_stmt);
3004 gimple_call_set_lhs (new_stmt, new_temp);
3005 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3006 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3009 for (i = 0; i < nargs; i++)
3011 vec<tree> vec_oprndsi = vec_defs[i];
3012 vec_oprndsi.release ();
3014 continue;
3017 for (i = 0; i < nargs; i++)
3019 op = gimple_call_arg (stmt, i);
3020 if (j == 0)
3022 vec_oprnd0
3023 = vect_get_vec_def_for_operand (op, stmt);
3024 vec_oprnd1
3025 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3027 else
3029 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3030 vec_oprnd0
3031 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3032 vec_oprnd1
3033 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3036 vargs.quick_push (vec_oprnd0);
3037 vargs.quick_push (vec_oprnd1);
3040 new_stmt = gimple_build_call_vec (fndecl, vargs);
3041 new_temp = make_ssa_name (vec_dest, new_stmt);
3042 gimple_call_set_lhs (new_stmt, new_temp);
3043 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3045 if (j == 0)
3046 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3047 else
3048 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3050 prev_stmt_info = vinfo_for_stmt (new_stmt);
3053 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3055 else
3056 /* No current target implements this case. */
3057 return false;
3059 vargs.release ();
3061 /* The call in STMT might prevent it from being removed in dce.
3062 We however cannot remove it here, due to the way the ssa name
3063 it defines is mapped to the new definition. So just replace
3064 rhs of the statement with something harmless. */
3066 if (slp_node)
3067 return true;
3069 type = TREE_TYPE (scalar_dest);
3070 if (is_pattern_stmt_p (stmt_info))
3071 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3072 else
3073 lhs = gimple_call_lhs (stmt);
3075 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3076 set_vinfo_for_stmt (new_stmt, stmt_info);
3077 set_vinfo_for_stmt (stmt, NULL);
3078 STMT_VINFO_STMT (stmt_info) = new_stmt;
3079 gsi_replace (gsi, new_stmt, false);
3081 return true;
3085 struct simd_call_arg_info
3087 tree vectype;
3088 tree op;
3089 HOST_WIDE_INT linear_step;
3090 enum vect_def_type dt;
3091 unsigned int align;
3092 bool simd_lane_linear;
3095 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3096 is linear within simd lane (but not within whole loop), note it in
3097 *ARGINFO. */
3099 static void
3100 vect_simd_lane_linear (tree op, struct loop *loop,
3101 struct simd_call_arg_info *arginfo)
3103 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3105 if (!is_gimple_assign (def_stmt)
3106 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3107 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3108 return;
3110 tree base = gimple_assign_rhs1 (def_stmt);
3111 HOST_WIDE_INT linear_step = 0;
3112 tree v = gimple_assign_rhs2 (def_stmt);
3113 while (TREE_CODE (v) == SSA_NAME)
3115 tree t;
3116 def_stmt = SSA_NAME_DEF_STMT (v);
3117 if (is_gimple_assign (def_stmt))
3118 switch (gimple_assign_rhs_code (def_stmt))
3120 case PLUS_EXPR:
3121 t = gimple_assign_rhs2 (def_stmt);
3122 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3123 return;
3124 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3125 v = gimple_assign_rhs1 (def_stmt);
3126 continue;
3127 case MULT_EXPR:
3128 t = gimple_assign_rhs2 (def_stmt);
3129 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3130 return;
3131 linear_step = tree_to_shwi (t);
3132 v = gimple_assign_rhs1 (def_stmt);
3133 continue;
3134 CASE_CONVERT:
3135 t = gimple_assign_rhs1 (def_stmt);
3136 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3137 || (TYPE_PRECISION (TREE_TYPE (v))
3138 < TYPE_PRECISION (TREE_TYPE (t))))
3139 return;
3140 if (!linear_step)
3141 linear_step = 1;
3142 v = t;
3143 continue;
3144 default:
3145 return;
3147 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3148 && loop->simduid
3149 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3150 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3151 == loop->simduid))
3153 if (!linear_step)
3154 linear_step = 1;
3155 arginfo->linear_step = linear_step;
3156 arginfo->op = base;
3157 arginfo->simd_lane_linear = true;
3158 return;
3163 /* Function vectorizable_simd_clone_call.
3165 Check if STMT performs a function call that can be vectorized
3166 by calling a simd clone of the function.
3167 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3168 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3169 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3171 static bool
3172 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3173 gimple **vec_stmt, slp_tree slp_node)
3175 tree vec_dest;
3176 tree scalar_dest;
3177 tree op, type;
3178 tree vec_oprnd0 = NULL_TREE;
3179 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3180 tree vectype;
3181 unsigned int nunits;
3182 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3183 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3184 vec_info *vinfo = stmt_info->vinfo;
3185 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3186 tree fndecl, new_temp;
3187 gimple *def_stmt;
3188 gimple *new_stmt = NULL;
3189 int ncopies, j;
3190 auto_vec<simd_call_arg_info> arginfo;
3191 vec<tree> vargs = vNULL;
3192 size_t i, nargs;
3193 tree lhs, rtype, ratype;
3194 vec<constructor_elt, va_gc> *ret_ctor_elts;
3196 /* Is STMT a vectorizable call? */
3197 if (!is_gimple_call (stmt))
3198 return false;
3200 fndecl = gimple_call_fndecl (stmt);
3201 if (fndecl == NULL_TREE)
3202 return false;
3204 struct cgraph_node *node = cgraph_node::get (fndecl);
3205 if (node == NULL || node->simd_clones == NULL)
3206 return false;
3208 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3209 return false;
3211 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3212 && ! vec_stmt)
3213 return false;
3215 if (gimple_call_lhs (stmt)
3216 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3217 return false;
3219 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3221 vectype = STMT_VINFO_VECTYPE (stmt_info);
3223 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3224 return false;
3226 /* FORNOW */
3227 if (slp_node)
3228 return false;
3230 /* Process function arguments. */
3231 nargs = gimple_call_num_args (stmt);
3233 /* Bail out if the function has zero arguments. */
3234 if (nargs == 0)
3235 return false;
3237 arginfo.reserve (nargs, true);
3239 for (i = 0; i < nargs; i++)
3241 simd_call_arg_info thisarginfo;
3242 affine_iv iv;
3244 thisarginfo.linear_step = 0;
3245 thisarginfo.align = 0;
3246 thisarginfo.op = NULL_TREE;
3247 thisarginfo.simd_lane_linear = false;
3249 op = gimple_call_arg (stmt, i);
3250 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3251 &thisarginfo.vectype)
3252 || thisarginfo.dt == vect_uninitialized_def)
3254 if (dump_enabled_p ())
3255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3256 "use not simple.\n");
3257 return false;
3260 if (thisarginfo.dt == vect_constant_def
3261 || thisarginfo.dt == vect_external_def)
3262 gcc_assert (thisarginfo.vectype == NULL_TREE);
3263 else
3264 gcc_assert (thisarginfo.vectype != NULL_TREE);
3266 /* For linear arguments, the analyze phase should have saved
3267 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3268 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3269 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3271 gcc_assert (vec_stmt);
3272 thisarginfo.linear_step
3273 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3274 thisarginfo.op
3275 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3276 thisarginfo.simd_lane_linear
3277 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3278 == boolean_true_node);
3279 /* If loop has been peeled for alignment, we need to adjust it. */
3280 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3281 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3282 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3284 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3285 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3286 tree opt = TREE_TYPE (thisarginfo.op);
3287 bias = fold_convert (TREE_TYPE (step), bias);
3288 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3289 thisarginfo.op
3290 = fold_build2 (POINTER_TYPE_P (opt)
3291 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3292 thisarginfo.op, bias);
3295 else if (!vec_stmt
3296 && thisarginfo.dt != vect_constant_def
3297 && thisarginfo.dt != vect_external_def
3298 && loop_vinfo
3299 && TREE_CODE (op) == SSA_NAME
3300 && simple_iv (loop, loop_containing_stmt (stmt), op,
3301 &iv, false)
3302 && tree_fits_shwi_p (iv.step))
3304 thisarginfo.linear_step = tree_to_shwi (iv.step);
3305 thisarginfo.op = iv.base;
3307 else if ((thisarginfo.dt == vect_constant_def
3308 || thisarginfo.dt == vect_external_def)
3309 && POINTER_TYPE_P (TREE_TYPE (op)))
3310 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3311 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3312 linear too. */
3313 if (POINTER_TYPE_P (TREE_TYPE (op))
3314 && !thisarginfo.linear_step
3315 && !vec_stmt
3316 && thisarginfo.dt != vect_constant_def
3317 && thisarginfo.dt != vect_external_def
3318 && loop_vinfo
3319 && !slp_node
3320 && TREE_CODE (op) == SSA_NAME)
3321 vect_simd_lane_linear (op, loop, &thisarginfo);
3323 arginfo.quick_push (thisarginfo);
3326 unsigned int badness = 0;
3327 struct cgraph_node *bestn = NULL;
3328 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3329 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3330 else
3331 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3332 n = n->simdclone->next_clone)
3334 unsigned int this_badness = 0;
3335 if (n->simdclone->simdlen
3336 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3337 || n->simdclone->nargs != nargs)
3338 continue;
3339 if (n->simdclone->simdlen
3340 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3341 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3342 - exact_log2 (n->simdclone->simdlen)) * 1024;
3343 if (n->simdclone->inbranch)
3344 this_badness += 2048;
3345 int target_badness = targetm.simd_clone.usable (n);
3346 if (target_badness < 0)
3347 continue;
3348 this_badness += target_badness * 512;
3349 /* FORNOW: Have to add code to add the mask argument. */
3350 if (n->simdclone->inbranch)
3351 continue;
3352 for (i = 0; i < nargs; i++)
3354 switch (n->simdclone->args[i].arg_type)
3356 case SIMD_CLONE_ARG_TYPE_VECTOR:
3357 if (!useless_type_conversion_p
3358 (n->simdclone->args[i].orig_type,
3359 TREE_TYPE (gimple_call_arg (stmt, i))))
3360 i = -1;
3361 else if (arginfo[i].dt == vect_constant_def
3362 || arginfo[i].dt == vect_external_def
3363 || arginfo[i].linear_step)
3364 this_badness += 64;
3365 break;
3366 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3367 if (arginfo[i].dt != vect_constant_def
3368 && arginfo[i].dt != vect_external_def)
3369 i = -1;
3370 break;
3371 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3372 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3373 if (arginfo[i].dt == vect_constant_def
3374 || arginfo[i].dt == vect_external_def
3375 || (arginfo[i].linear_step
3376 != n->simdclone->args[i].linear_step))
3377 i = -1;
3378 break;
3379 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3380 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3381 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3382 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3383 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3384 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3385 /* FORNOW */
3386 i = -1;
3387 break;
3388 case SIMD_CLONE_ARG_TYPE_MASK:
3389 gcc_unreachable ();
3391 if (i == (size_t) -1)
3392 break;
3393 if (n->simdclone->args[i].alignment > arginfo[i].align)
3395 i = -1;
3396 break;
3398 if (arginfo[i].align)
3399 this_badness += (exact_log2 (arginfo[i].align)
3400 - exact_log2 (n->simdclone->args[i].alignment));
3402 if (i == (size_t) -1)
3403 continue;
3404 if (bestn == NULL || this_badness < badness)
3406 bestn = n;
3407 badness = this_badness;
3411 if (bestn == NULL)
3412 return false;
3414 for (i = 0; i < nargs; i++)
3415 if ((arginfo[i].dt == vect_constant_def
3416 || arginfo[i].dt == vect_external_def)
3417 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3419 arginfo[i].vectype
3420 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3421 i)));
3422 if (arginfo[i].vectype == NULL
3423 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3424 > bestn->simdclone->simdlen))
3425 return false;
3428 fndecl = bestn->decl;
3429 nunits = bestn->simdclone->simdlen;
3430 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3432 /* If the function isn't const, only allow it in simd loops where user
3433 has asserted that at least nunits consecutive iterations can be
3434 performed using SIMD instructions. */
3435 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3436 && gimple_vuse (stmt))
3437 return false;
3439 /* Sanity check: make sure that at least one copy of the vectorized stmt
3440 needs to be generated. */
3441 gcc_assert (ncopies >= 1);
3443 if (!vec_stmt) /* transformation not required. */
3445 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3446 for (i = 0; i < nargs; i++)
3447 if ((bestn->simdclone->args[i].arg_type
3448 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3449 || (bestn->simdclone->args[i].arg_type
3450 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3452 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3453 + 1);
3454 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3455 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3456 ? size_type_node : TREE_TYPE (arginfo[i].op);
3457 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3458 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3459 tree sll = arginfo[i].simd_lane_linear
3460 ? boolean_true_node : boolean_false_node;
3461 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3463 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3464 if (dump_enabled_p ())
3465 dump_printf_loc (MSG_NOTE, vect_location,
3466 "=== vectorizable_simd_clone_call ===\n");
3467 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3468 return true;
3471 /* Transform. */
3473 if (dump_enabled_p ())
3474 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3476 /* Handle def. */
3477 scalar_dest = gimple_call_lhs (stmt);
3478 vec_dest = NULL_TREE;
3479 rtype = NULL_TREE;
3480 ratype = NULL_TREE;
3481 if (scalar_dest)
3483 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3484 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3485 if (TREE_CODE (rtype) == ARRAY_TYPE)
3487 ratype = rtype;
3488 rtype = TREE_TYPE (ratype);
3492 prev_stmt_info = NULL;
3493 for (j = 0; j < ncopies; ++j)
3495 /* Build argument list for the vectorized call. */
3496 if (j == 0)
3497 vargs.create (nargs);
3498 else
3499 vargs.truncate (0);
3501 for (i = 0; i < nargs; i++)
3503 unsigned int k, l, m, o;
3504 tree atype;
3505 op = gimple_call_arg (stmt, i);
3506 switch (bestn->simdclone->args[i].arg_type)
3508 case SIMD_CLONE_ARG_TYPE_VECTOR:
3509 atype = bestn->simdclone->args[i].vector_type;
3510 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3511 for (m = j * o; m < (j + 1) * o; m++)
3513 if (TYPE_VECTOR_SUBPARTS (atype)
3514 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3516 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3517 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3518 / TYPE_VECTOR_SUBPARTS (atype));
3519 gcc_assert ((k & (k - 1)) == 0);
3520 if (m == 0)
3521 vec_oprnd0
3522 = vect_get_vec_def_for_operand (op, stmt);
3523 else
3525 vec_oprnd0 = arginfo[i].op;
3526 if ((m & (k - 1)) == 0)
3527 vec_oprnd0
3528 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3529 vec_oprnd0);
3531 arginfo[i].op = vec_oprnd0;
3532 vec_oprnd0
3533 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3534 bitsize_int (prec),
3535 bitsize_int ((m & (k - 1)) * prec));
3536 new_stmt
3537 = gimple_build_assign (make_ssa_name (atype),
3538 vec_oprnd0);
3539 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3540 vargs.safe_push (gimple_assign_lhs (new_stmt));
3542 else
3544 k = (TYPE_VECTOR_SUBPARTS (atype)
3545 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3546 gcc_assert ((k & (k - 1)) == 0);
3547 vec<constructor_elt, va_gc> *ctor_elts;
3548 if (k != 1)
3549 vec_alloc (ctor_elts, k);
3550 else
3551 ctor_elts = NULL;
3552 for (l = 0; l < k; l++)
3554 if (m == 0 && l == 0)
3555 vec_oprnd0
3556 = vect_get_vec_def_for_operand (op, stmt);
3557 else
3558 vec_oprnd0
3559 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3560 arginfo[i].op);
3561 arginfo[i].op = vec_oprnd0;
3562 if (k == 1)
3563 break;
3564 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3565 vec_oprnd0);
3567 if (k == 1)
3568 vargs.safe_push (vec_oprnd0);
3569 else
3571 vec_oprnd0 = build_constructor (atype, ctor_elts);
3572 new_stmt
3573 = gimple_build_assign (make_ssa_name (atype),
3574 vec_oprnd0);
3575 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3576 vargs.safe_push (gimple_assign_lhs (new_stmt));
3580 break;
3581 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3582 vargs.safe_push (op);
3583 break;
3584 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3585 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3586 if (j == 0)
3588 gimple_seq stmts;
3589 arginfo[i].op
3590 = force_gimple_operand (arginfo[i].op, &stmts, true,
3591 NULL_TREE);
3592 if (stmts != NULL)
3594 basic_block new_bb;
3595 edge pe = loop_preheader_edge (loop);
3596 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3597 gcc_assert (!new_bb);
3599 if (arginfo[i].simd_lane_linear)
3601 vargs.safe_push (arginfo[i].op);
3602 break;
3604 tree phi_res = copy_ssa_name (op);
3605 gphi *new_phi = create_phi_node (phi_res, loop->header);
3606 set_vinfo_for_stmt (new_phi,
3607 new_stmt_vec_info (new_phi, loop_vinfo));
3608 add_phi_arg (new_phi, arginfo[i].op,
3609 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3610 enum tree_code code
3611 = POINTER_TYPE_P (TREE_TYPE (op))
3612 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3613 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3614 ? sizetype : TREE_TYPE (op);
3615 widest_int cst
3616 = wi::mul (bestn->simdclone->args[i].linear_step,
3617 ncopies * nunits);
3618 tree tcst = wide_int_to_tree (type, cst);
3619 tree phi_arg = copy_ssa_name (op);
3620 new_stmt
3621 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3622 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3623 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3624 set_vinfo_for_stmt (new_stmt,
3625 new_stmt_vec_info (new_stmt, loop_vinfo));
3626 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3627 UNKNOWN_LOCATION);
3628 arginfo[i].op = phi_res;
3629 vargs.safe_push (phi_res);
3631 else
3633 enum tree_code code
3634 = POINTER_TYPE_P (TREE_TYPE (op))
3635 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3636 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3637 ? sizetype : TREE_TYPE (op);
3638 widest_int cst
3639 = wi::mul (bestn->simdclone->args[i].linear_step,
3640 j * nunits);
3641 tree tcst = wide_int_to_tree (type, cst);
3642 new_temp = make_ssa_name (TREE_TYPE (op));
3643 new_stmt = gimple_build_assign (new_temp, code,
3644 arginfo[i].op, tcst);
3645 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3646 vargs.safe_push (new_temp);
3648 break;
3649 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3650 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3651 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3652 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3653 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3654 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3655 default:
3656 gcc_unreachable ();
3660 new_stmt = gimple_build_call_vec (fndecl, vargs);
3661 if (vec_dest)
3663 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3664 if (ratype)
3665 new_temp = create_tmp_var (ratype);
3666 else if (TYPE_VECTOR_SUBPARTS (vectype)
3667 == TYPE_VECTOR_SUBPARTS (rtype))
3668 new_temp = make_ssa_name (vec_dest, new_stmt);
3669 else
3670 new_temp = make_ssa_name (rtype, new_stmt);
3671 gimple_call_set_lhs (new_stmt, new_temp);
3673 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3675 if (vec_dest)
3677 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3679 unsigned int k, l;
3680 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3681 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3682 gcc_assert ((k & (k - 1)) == 0);
3683 for (l = 0; l < k; l++)
3685 tree t;
3686 if (ratype)
3688 t = build_fold_addr_expr (new_temp);
3689 t = build2 (MEM_REF, vectype, t,
3690 build_int_cst (TREE_TYPE (t),
3691 l * prec / BITS_PER_UNIT));
3693 else
3694 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3695 bitsize_int (prec), bitsize_int (l * prec));
3696 new_stmt
3697 = gimple_build_assign (make_ssa_name (vectype), t);
3698 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3699 if (j == 0 && l == 0)
3700 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3701 else
3702 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3704 prev_stmt_info = vinfo_for_stmt (new_stmt);
3707 if (ratype)
3709 tree clobber = build_constructor (ratype, NULL);
3710 TREE_THIS_VOLATILE (clobber) = 1;
3711 new_stmt = gimple_build_assign (new_temp, clobber);
3712 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3714 continue;
3716 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3718 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3719 / TYPE_VECTOR_SUBPARTS (rtype));
3720 gcc_assert ((k & (k - 1)) == 0);
3721 if ((j & (k - 1)) == 0)
3722 vec_alloc (ret_ctor_elts, k);
3723 if (ratype)
3725 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3726 for (m = 0; m < o; m++)
3728 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3729 size_int (m), NULL_TREE, NULL_TREE);
3730 new_stmt
3731 = gimple_build_assign (make_ssa_name (rtype), tem);
3732 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3733 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3734 gimple_assign_lhs (new_stmt));
3736 tree clobber = build_constructor (ratype, NULL);
3737 TREE_THIS_VOLATILE (clobber) = 1;
3738 new_stmt = gimple_build_assign (new_temp, clobber);
3739 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3741 else
3742 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3743 if ((j & (k - 1)) != k - 1)
3744 continue;
3745 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3746 new_stmt
3747 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3748 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3750 if ((unsigned) j == k - 1)
3751 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3752 else
3753 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3755 prev_stmt_info = vinfo_for_stmt (new_stmt);
3756 continue;
3758 else if (ratype)
3760 tree t = build_fold_addr_expr (new_temp);
3761 t = build2 (MEM_REF, vectype, t,
3762 build_int_cst (TREE_TYPE (t), 0));
3763 new_stmt
3764 = gimple_build_assign (make_ssa_name (vec_dest), t);
3765 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3766 tree clobber = build_constructor (ratype, NULL);
3767 TREE_THIS_VOLATILE (clobber) = 1;
3768 vect_finish_stmt_generation (stmt,
3769 gimple_build_assign (new_temp,
3770 clobber), gsi);
3774 if (j == 0)
3775 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3776 else
3777 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3779 prev_stmt_info = vinfo_for_stmt (new_stmt);
3782 vargs.release ();
3784 /* The call in STMT might prevent it from being removed in dce.
3785 We however cannot remove it here, due to the way the ssa name
3786 it defines is mapped to the new definition. So just replace
3787 rhs of the statement with something harmless. */
3789 if (slp_node)
3790 return true;
3792 if (scalar_dest)
3794 type = TREE_TYPE (scalar_dest);
3795 if (is_pattern_stmt_p (stmt_info))
3796 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3797 else
3798 lhs = gimple_call_lhs (stmt);
3799 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3801 else
3802 new_stmt = gimple_build_nop ();
3803 set_vinfo_for_stmt (new_stmt, stmt_info);
3804 set_vinfo_for_stmt (stmt, NULL);
3805 STMT_VINFO_STMT (stmt_info) = new_stmt;
3806 gsi_replace (gsi, new_stmt, true);
3807 unlink_stmt_vdef (stmt);
3809 return true;
3813 /* Function vect_gen_widened_results_half
3815 Create a vector stmt whose code, type, number of arguments, and result
3816 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3817 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3818 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3819 needs to be created (DECL is a function-decl of a target-builtin).
3820 STMT is the original scalar stmt that we are vectorizing. */
3822 static gimple *
3823 vect_gen_widened_results_half (enum tree_code code,
3824 tree decl,
3825 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3826 tree vec_dest, gimple_stmt_iterator *gsi,
3827 gimple *stmt)
3829 gimple *new_stmt;
3830 tree new_temp;
3832 /* Generate half of the widened result: */
3833 if (code == CALL_EXPR)
3835 /* Target specific support */
3836 if (op_type == binary_op)
3837 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3838 else
3839 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3840 new_temp = make_ssa_name (vec_dest, new_stmt);
3841 gimple_call_set_lhs (new_stmt, new_temp);
3843 else
3845 /* Generic support */
3846 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3847 if (op_type != binary_op)
3848 vec_oprnd1 = NULL;
3849 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3850 new_temp = make_ssa_name (vec_dest, new_stmt);
3851 gimple_assign_set_lhs (new_stmt, new_temp);
3853 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3855 return new_stmt;
3859 /* Get vectorized definitions for loop-based vectorization. For the first
3860 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3861 scalar operand), and for the rest we get a copy with
3862 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3863 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3864 The vectors are collected into VEC_OPRNDS. */
3866 static void
3867 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3868 vec<tree> *vec_oprnds, int multi_step_cvt)
3870 tree vec_oprnd;
3872 /* Get first vector operand. */
3873 /* All the vector operands except the very first one (that is scalar oprnd)
3874 are stmt copies. */
3875 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3876 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3877 else
3878 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3880 vec_oprnds->quick_push (vec_oprnd);
3882 /* Get second vector operand. */
3883 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3884 vec_oprnds->quick_push (vec_oprnd);
3886 *oprnd = vec_oprnd;
3888 /* For conversion in multiple steps, continue to get operands
3889 recursively. */
3890 if (multi_step_cvt)
3891 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3895 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3896 For multi-step conversions store the resulting vectors and call the function
3897 recursively. */
3899 static void
3900 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3901 int multi_step_cvt, gimple *stmt,
3902 vec<tree> vec_dsts,
3903 gimple_stmt_iterator *gsi,
3904 slp_tree slp_node, enum tree_code code,
3905 stmt_vec_info *prev_stmt_info)
3907 unsigned int i;
3908 tree vop0, vop1, new_tmp, vec_dest;
3909 gimple *new_stmt;
3910 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3912 vec_dest = vec_dsts.pop ();
3914 for (i = 0; i < vec_oprnds->length (); i += 2)
3916 /* Create demotion operation. */
3917 vop0 = (*vec_oprnds)[i];
3918 vop1 = (*vec_oprnds)[i + 1];
3919 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3920 new_tmp = make_ssa_name (vec_dest, new_stmt);
3921 gimple_assign_set_lhs (new_stmt, new_tmp);
3922 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3924 if (multi_step_cvt)
3925 /* Store the resulting vector for next recursive call. */
3926 (*vec_oprnds)[i/2] = new_tmp;
3927 else
3929 /* This is the last step of the conversion sequence. Store the
3930 vectors in SLP_NODE or in vector info of the scalar statement
3931 (or in STMT_VINFO_RELATED_STMT chain). */
3932 if (slp_node)
3933 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3934 else
3936 if (!*prev_stmt_info)
3937 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3938 else
3939 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3941 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3946 /* For multi-step demotion operations we first generate demotion operations
3947 from the source type to the intermediate types, and then combine the
3948 results (stored in VEC_OPRNDS) in demotion operation to the destination
3949 type. */
3950 if (multi_step_cvt)
3952 /* At each level of recursion we have half of the operands we had at the
3953 previous level. */
3954 vec_oprnds->truncate ((i+1)/2);
3955 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3956 stmt, vec_dsts, gsi, slp_node,
3957 VEC_PACK_TRUNC_EXPR,
3958 prev_stmt_info);
3961 vec_dsts.quick_push (vec_dest);
3965 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3966 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3967 the resulting vectors and call the function recursively. */
3969 static void
3970 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3971 vec<tree> *vec_oprnds1,
3972 gimple *stmt, tree vec_dest,
3973 gimple_stmt_iterator *gsi,
3974 enum tree_code code1,
3975 enum tree_code code2, tree decl1,
3976 tree decl2, int op_type)
3978 int i;
3979 tree vop0, vop1, new_tmp1, new_tmp2;
3980 gimple *new_stmt1, *new_stmt2;
3981 vec<tree> vec_tmp = vNULL;
3983 vec_tmp.create (vec_oprnds0->length () * 2);
3984 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3986 if (op_type == binary_op)
3987 vop1 = (*vec_oprnds1)[i];
3988 else
3989 vop1 = NULL_TREE;
3991 /* Generate the two halves of promotion operation. */
3992 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3993 op_type, vec_dest, gsi, stmt);
3994 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3995 op_type, vec_dest, gsi, stmt);
3996 if (is_gimple_call (new_stmt1))
3998 new_tmp1 = gimple_call_lhs (new_stmt1);
3999 new_tmp2 = gimple_call_lhs (new_stmt2);
4001 else
4003 new_tmp1 = gimple_assign_lhs (new_stmt1);
4004 new_tmp2 = gimple_assign_lhs (new_stmt2);
4007 /* Store the results for the next step. */
4008 vec_tmp.quick_push (new_tmp1);
4009 vec_tmp.quick_push (new_tmp2);
4012 vec_oprnds0->release ();
4013 *vec_oprnds0 = vec_tmp;
4017 /* Check if STMT performs a conversion operation, that can be vectorized.
4018 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4019 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4020 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4022 static bool
4023 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4024 gimple **vec_stmt, slp_tree slp_node)
4026 tree vec_dest;
4027 tree scalar_dest;
4028 tree op0, op1 = NULL_TREE;
4029 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4030 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4031 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4032 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4033 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4034 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4035 tree new_temp;
4036 gimple *def_stmt;
4037 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4038 int ndts = 2;
4039 gimple *new_stmt = NULL;
4040 stmt_vec_info prev_stmt_info;
4041 int nunits_in;
4042 int nunits_out;
4043 tree vectype_out, vectype_in;
4044 int ncopies, i, j;
4045 tree lhs_type, rhs_type;
4046 enum { NARROW, NONE, WIDEN } modifier;
4047 vec<tree> vec_oprnds0 = vNULL;
4048 vec<tree> vec_oprnds1 = vNULL;
4049 tree vop0;
4050 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4051 vec_info *vinfo = stmt_info->vinfo;
4052 int multi_step_cvt = 0;
4053 vec<tree> interm_types = vNULL;
4054 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4055 int op_type;
4056 machine_mode rhs_mode;
4057 unsigned short fltsz;
4059 /* Is STMT a vectorizable conversion? */
4061 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4062 return false;
4064 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4065 && ! vec_stmt)
4066 return false;
4068 if (!is_gimple_assign (stmt))
4069 return false;
4071 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4072 return false;
4074 code = gimple_assign_rhs_code (stmt);
4075 if (!CONVERT_EXPR_CODE_P (code)
4076 && code != FIX_TRUNC_EXPR
4077 && code != FLOAT_EXPR
4078 && code != WIDEN_MULT_EXPR
4079 && code != WIDEN_LSHIFT_EXPR)
4080 return false;
4082 op_type = TREE_CODE_LENGTH (code);
4084 /* Check types of lhs and rhs. */
4085 scalar_dest = gimple_assign_lhs (stmt);
4086 lhs_type = TREE_TYPE (scalar_dest);
4087 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4089 op0 = gimple_assign_rhs1 (stmt);
4090 rhs_type = TREE_TYPE (op0);
4092 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4093 && !((INTEGRAL_TYPE_P (lhs_type)
4094 && INTEGRAL_TYPE_P (rhs_type))
4095 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4096 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4097 return false;
4099 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4100 && ((INTEGRAL_TYPE_P (lhs_type)
4101 && (TYPE_PRECISION (lhs_type)
4102 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
4103 || (INTEGRAL_TYPE_P (rhs_type)
4104 && (TYPE_PRECISION (rhs_type)
4105 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4107 if (dump_enabled_p ())
4108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4109 "type conversion to/from bit-precision unsupported."
4110 "\n");
4111 return false;
4114 /* Check the operands of the operation. */
4115 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4117 if (dump_enabled_p ())
4118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4119 "use not simple.\n");
4120 return false;
4122 if (op_type == binary_op)
4124 bool ok;
4126 op1 = gimple_assign_rhs2 (stmt);
4127 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4128 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4129 OP1. */
4130 if (CONSTANT_CLASS_P (op0))
4131 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4132 else
4133 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4135 if (!ok)
4137 if (dump_enabled_p ())
4138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4139 "use not simple.\n");
4140 return false;
4144 /* If op0 is an external or constant defs use a vector type of
4145 the same size as the output vector type. */
4146 if (!vectype_in)
4147 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4148 if (vec_stmt)
4149 gcc_assert (vectype_in);
4150 if (!vectype_in)
4152 if (dump_enabled_p ())
4154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4155 "no vectype for scalar type ");
4156 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4157 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4160 return false;
4163 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4164 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4166 if (dump_enabled_p ())
4168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4169 "can't convert between boolean and non "
4170 "boolean vectors");
4171 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4172 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4175 return false;
4178 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4179 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4180 if (nunits_in < nunits_out)
4181 modifier = NARROW;
4182 else if (nunits_out == nunits_in)
4183 modifier = NONE;
4184 else
4185 modifier = WIDEN;
4187 /* Multiple types in SLP are handled by creating the appropriate number of
4188 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4189 case of SLP. */
4190 if (slp_node)
4191 ncopies = 1;
4192 else if (modifier == NARROW)
4193 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4194 else
4195 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4197 /* Sanity check: make sure that at least one copy of the vectorized stmt
4198 needs to be generated. */
4199 gcc_assert (ncopies >= 1);
4201 /* Supportable by target? */
4202 switch (modifier)
4204 case NONE:
4205 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4206 return false;
4207 if (supportable_convert_operation (code, vectype_out, vectype_in,
4208 &decl1, &code1))
4209 break;
4210 /* FALLTHRU */
4211 unsupported:
4212 if (dump_enabled_p ())
4213 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4214 "conversion not supported by target.\n");
4215 return false;
4217 case WIDEN:
4218 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4219 &code1, &code2, &multi_step_cvt,
4220 &interm_types))
4222 /* Binary widening operation can only be supported directly by the
4223 architecture. */
4224 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4225 break;
4228 if (code != FLOAT_EXPR
4229 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4230 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4231 goto unsupported;
4233 rhs_mode = TYPE_MODE (rhs_type);
4234 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4235 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4236 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4237 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4239 cvt_type
4240 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4241 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4242 if (cvt_type == NULL_TREE)
4243 goto unsupported;
4245 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4247 if (!supportable_convert_operation (code, vectype_out,
4248 cvt_type, &decl1, &codecvt1))
4249 goto unsupported;
4251 else if (!supportable_widening_operation (code, stmt, vectype_out,
4252 cvt_type, &codecvt1,
4253 &codecvt2, &multi_step_cvt,
4254 &interm_types))
4255 continue;
4256 else
4257 gcc_assert (multi_step_cvt == 0);
4259 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4260 vectype_in, &code1, &code2,
4261 &multi_step_cvt, &interm_types))
4262 break;
4265 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4266 goto unsupported;
4268 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4269 codecvt2 = ERROR_MARK;
4270 else
4272 multi_step_cvt++;
4273 interm_types.safe_push (cvt_type);
4274 cvt_type = NULL_TREE;
4276 break;
4278 case NARROW:
4279 gcc_assert (op_type == unary_op);
4280 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4281 &code1, &multi_step_cvt,
4282 &interm_types))
4283 break;
4285 if (code != FIX_TRUNC_EXPR
4286 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4287 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4288 goto unsupported;
4290 rhs_mode = TYPE_MODE (rhs_type);
4291 cvt_type
4292 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4293 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4294 if (cvt_type == NULL_TREE)
4295 goto unsupported;
4296 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4297 &decl1, &codecvt1))
4298 goto unsupported;
4299 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4300 &code1, &multi_step_cvt,
4301 &interm_types))
4302 break;
4303 goto unsupported;
4305 default:
4306 gcc_unreachable ();
4309 if (!vec_stmt) /* transformation not required. */
4311 if (dump_enabled_p ())
4312 dump_printf_loc (MSG_NOTE, vect_location,
4313 "=== vectorizable_conversion ===\n");
4314 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4316 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4317 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4319 else if (modifier == NARROW)
4321 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4322 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4324 else
4326 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4327 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4329 interm_types.release ();
4330 return true;
4333 /* Transform. */
4334 if (dump_enabled_p ())
4335 dump_printf_loc (MSG_NOTE, vect_location,
4336 "transform conversion. ncopies = %d.\n", ncopies);
4338 if (op_type == binary_op)
4340 if (CONSTANT_CLASS_P (op0))
4341 op0 = fold_convert (TREE_TYPE (op1), op0);
4342 else if (CONSTANT_CLASS_P (op1))
4343 op1 = fold_convert (TREE_TYPE (op0), op1);
4346 /* In case of multi-step conversion, we first generate conversion operations
4347 to the intermediate types, and then from that types to the final one.
4348 We create vector destinations for the intermediate type (TYPES) received
4349 from supportable_*_operation, and store them in the correct order
4350 for future use in vect_create_vectorized_*_stmts (). */
4351 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4352 vec_dest = vect_create_destination_var (scalar_dest,
4353 (cvt_type && modifier == WIDEN)
4354 ? cvt_type : vectype_out);
4355 vec_dsts.quick_push (vec_dest);
4357 if (multi_step_cvt)
4359 for (i = interm_types.length () - 1;
4360 interm_types.iterate (i, &intermediate_type); i--)
4362 vec_dest = vect_create_destination_var (scalar_dest,
4363 intermediate_type);
4364 vec_dsts.quick_push (vec_dest);
4368 if (cvt_type)
4369 vec_dest = vect_create_destination_var (scalar_dest,
4370 modifier == WIDEN
4371 ? vectype_out : cvt_type);
4373 if (!slp_node)
4375 if (modifier == WIDEN)
4377 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4378 if (op_type == binary_op)
4379 vec_oprnds1.create (1);
4381 else if (modifier == NARROW)
4382 vec_oprnds0.create (
4383 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4385 else if (code == WIDEN_LSHIFT_EXPR)
4386 vec_oprnds1.create (slp_node->vec_stmts_size);
4388 last_oprnd = op0;
4389 prev_stmt_info = NULL;
4390 switch (modifier)
4392 case NONE:
4393 for (j = 0; j < ncopies; j++)
4395 if (j == 0)
4396 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4397 else
4398 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4400 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4402 /* Arguments are ready, create the new vector stmt. */
4403 if (code1 == CALL_EXPR)
4405 new_stmt = gimple_build_call (decl1, 1, vop0);
4406 new_temp = make_ssa_name (vec_dest, new_stmt);
4407 gimple_call_set_lhs (new_stmt, new_temp);
4409 else
4411 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4412 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4413 new_temp = make_ssa_name (vec_dest, new_stmt);
4414 gimple_assign_set_lhs (new_stmt, new_temp);
4417 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4418 if (slp_node)
4419 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4420 else
4422 if (!prev_stmt_info)
4423 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4424 else
4425 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4426 prev_stmt_info = vinfo_for_stmt (new_stmt);
4430 break;
4432 case WIDEN:
4433 /* In case the vectorization factor (VF) is bigger than the number
4434 of elements that we can fit in a vectype (nunits), we have to
4435 generate more than one vector stmt - i.e - we need to "unroll"
4436 the vector stmt by a factor VF/nunits. */
4437 for (j = 0; j < ncopies; j++)
4439 /* Handle uses. */
4440 if (j == 0)
4442 if (slp_node)
4444 if (code == WIDEN_LSHIFT_EXPR)
4446 unsigned int k;
4448 vec_oprnd1 = op1;
4449 /* Store vec_oprnd1 for every vector stmt to be created
4450 for SLP_NODE. We check during the analysis that all
4451 the shift arguments are the same. */
4452 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4453 vec_oprnds1.quick_push (vec_oprnd1);
4455 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4456 slp_node);
4458 else
4459 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4460 &vec_oprnds1, slp_node);
4462 else
4464 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4465 vec_oprnds0.quick_push (vec_oprnd0);
4466 if (op_type == binary_op)
4468 if (code == WIDEN_LSHIFT_EXPR)
4469 vec_oprnd1 = op1;
4470 else
4471 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4472 vec_oprnds1.quick_push (vec_oprnd1);
4476 else
4478 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4479 vec_oprnds0.truncate (0);
4480 vec_oprnds0.quick_push (vec_oprnd0);
4481 if (op_type == binary_op)
4483 if (code == WIDEN_LSHIFT_EXPR)
4484 vec_oprnd1 = op1;
4485 else
4486 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4487 vec_oprnd1);
4488 vec_oprnds1.truncate (0);
4489 vec_oprnds1.quick_push (vec_oprnd1);
4493 /* Arguments are ready. Create the new vector stmts. */
4494 for (i = multi_step_cvt; i >= 0; i--)
4496 tree this_dest = vec_dsts[i];
4497 enum tree_code c1 = code1, c2 = code2;
4498 if (i == 0 && codecvt2 != ERROR_MARK)
4500 c1 = codecvt1;
4501 c2 = codecvt2;
4503 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4504 &vec_oprnds1,
4505 stmt, this_dest, gsi,
4506 c1, c2, decl1, decl2,
4507 op_type);
4510 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4512 if (cvt_type)
4514 if (codecvt1 == CALL_EXPR)
4516 new_stmt = gimple_build_call (decl1, 1, vop0);
4517 new_temp = make_ssa_name (vec_dest, new_stmt);
4518 gimple_call_set_lhs (new_stmt, new_temp);
4520 else
4522 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4523 new_temp = make_ssa_name (vec_dest);
4524 new_stmt = gimple_build_assign (new_temp, codecvt1,
4525 vop0);
4528 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4530 else
4531 new_stmt = SSA_NAME_DEF_STMT (vop0);
4533 if (slp_node)
4534 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4535 else
4537 if (!prev_stmt_info)
4538 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4539 else
4540 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4541 prev_stmt_info = vinfo_for_stmt (new_stmt);
4546 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4547 break;
4549 case NARROW:
4550 /* In case the vectorization factor (VF) is bigger than the number
4551 of elements that we can fit in a vectype (nunits), we have to
4552 generate more than one vector stmt - i.e - we need to "unroll"
4553 the vector stmt by a factor VF/nunits. */
4554 for (j = 0; j < ncopies; j++)
4556 /* Handle uses. */
4557 if (slp_node)
4558 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4559 slp_node);
4560 else
4562 vec_oprnds0.truncate (0);
4563 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4564 vect_pow2 (multi_step_cvt) - 1);
4567 /* Arguments are ready. Create the new vector stmts. */
4568 if (cvt_type)
4569 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4571 if (codecvt1 == CALL_EXPR)
4573 new_stmt = gimple_build_call (decl1, 1, vop0);
4574 new_temp = make_ssa_name (vec_dest, new_stmt);
4575 gimple_call_set_lhs (new_stmt, new_temp);
4577 else
4579 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4580 new_temp = make_ssa_name (vec_dest);
4581 new_stmt = gimple_build_assign (new_temp, codecvt1,
4582 vop0);
4585 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4586 vec_oprnds0[i] = new_temp;
4589 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4590 stmt, vec_dsts, gsi,
4591 slp_node, code1,
4592 &prev_stmt_info);
4595 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4596 break;
4599 vec_oprnds0.release ();
4600 vec_oprnds1.release ();
4601 interm_types.release ();
4603 return true;
4607 /* Function vectorizable_assignment.
4609 Check if STMT performs an assignment (copy) that can be vectorized.
4610 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4611 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4612 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4614 static bool
4615 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4616 gimple **vec_stmt, slp_tree slp_node)
4618 tree vec_dest;
4619 tree scalar_dest;
4620 tree op;
4621 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4622 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4623 tree new_temp;
4624 gimple *def_stmt;
4625 enum vect_def_type dt[1] = {vect_unknown_def_type};
4626 int ndts = 1;
4627 int ncopies;
4628 int i, j;
4629 vec<tree> vec_oprnds = vNULL;
4630 tree vop;
4631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4632 vec_info *vinfo = stmt_info->vinfo;
4633 gimple *new_stmt = NULL;
4634 stmt_vec_info prev_stmt_info = NULL;
4635 enum tree_code code;
4636 tree vectype_in;
4638 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4639 return false;
4641 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4642 && ! vec_stmt)
4643 return false;
4645 /* Is vectorizable assignment? */
4646 if (!is_gimple_assign (stmt))
4647 return false;
4649 scalar_dest = gimple_assign_lhs (stmt);
4650 if (TREE_CODE (scalar_dest) != SSA_NAME)
4651 return false;
4653 code = gimple_assign_rhs_code (stmt);
4654 if (gimple_assign_single_p (stmt)
4655 || code == PAREN_EXPR
4656 || CONVERT_EXPR_CODE_P (code))
4657 op = gimple_assign_rhs1 (stmt);
4658 else
4659 return false;
4661 if (code == VIEW_CONVERT_EXPR)
4662 op = TREE_OPERAND (op, 0);
4664 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4665 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4667 /* Multiple types in SLP are handled by creating the appropriate number of
4668 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4669 case of SLP. */
4670 if (slp_node)
4671 ncopies = 1;
4672 else
4673 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4675 gcc_assert (ncopies >= 1);
4677 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4679 if (dump_enabled_p ())
4680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4681 "use not simple.\n");
4682 return false;
4685 /* We can handle NOP_EXPR conversions that do not change the number
4686 of elements or the vector size. */
4687 if ((CONVERT_EXPR_CODE_P (code)
4688 || code == VIEW_CONVERT_EXPR)
4689 && (!vectype_in
4690 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4691 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4692 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4693 return false;
4695 /* We do not handle bit-precision changes. */
4696 if ((CONVERT_EXPR_CODE_P (code)
4697 || code == VIEW_CONVERT_EXPR)
4698 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4699 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4700 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4701 || ((TYPE_PRECISION (TREE_TYPE (op))
4702 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4703 /* But a conversion that does not change the bit-pattern is ok. */
4704 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4705 > TYPE_PRECISION (TREE_TYPE (op)))
4706 && TYPE_UNSIGNED (TREE_TYPE (op)))
4707 /* Conversion between boolean types of different sizes is
4708 a simple assignment in case their vectypes are same
4709 boolean vectors. */
4710 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4711 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4713 if (dump_enabled_p ())
4714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4715 "type conversion to/from bit-precision "
4716 "unsupported.\n");
4717 return false;
4720 if (!vec_stmt) /* transformation not required. */
4722 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4723 if (dump_enabled_p ())
4724 dump_printf_loc (MSG_NOTE, vect_location,
4725 "=== vectorizable_assignment ===\n");
4726 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4727 return true;
4730 /* Transform. */
4731 if (dump_enabled_p ())
4732 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4734 /* Handle def. */
4735 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4737 /* Handle use. */
4738 for (j = 0; j < ncopies; j++)
4740 /* Handle uses. */
4741 if (j == 0)
4742 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4743 else
4744 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4746 /* Arguments are ready. create the new vector stmt. */
4747 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4749 if (CONVERT_EXPR_CODE_P (code)
4750 || code == VIEW_CONVERT_EXPR)
4751 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4752 new_stmt = gimple_build_assign (vec_dest, vop);
4753 new_temp = make_ssa_name (vec_dest, new_stmt);
4754 gimple_assign_set_lhs (new_stmt, new_temp);
4755 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4756 if (slp_node)
4757 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4760 if (slp_node)
4761 continue;
4763 if (j == 0)
4764 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4765 else
4766 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4768 prev_stmt_info = vinfo_for_stmt (new_stmt);
4771 vec_oprnds.release ();
4772 return true;
4776 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4777 either as shift by a scalar or by a vector. */
4779 bool
4780 vect_supportable_shift (enum tree_code code, tree scalar_type)
4783 machine_mode vec_mode;
4784 optab optab;
4785 int icode;
4786 tree vectype;
4788 vectype = get_vectype_for_scalar_type (scalar_type);
4789 if (!vectype)
4790 return false;
4792 optab = optab_for_tree_code (code, vectype, optab_scalar);
4793 if (!optab
4794 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4796 optab = optab_for_tree_code (code, vectype, optab_vector);
4797 if (!optab
4798 || (optab_handler (optab, TYPE_MODE (vectype))
4799 == CODE_FOR_nothing))
4800 return false;
4803 vec_mode = TYPE_MODE (vectype);
4804 icode = (int) optab_handler (optab, vec_mode);
4805 if (icode == CODE_FOR_nothing)
4806 return false;
4808 return true;
4812 /* Function vectorizable_shift.
4814 Check if STMT performs a shift operation that can be vectorized.
4815 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4816 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4817 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4819 static bool
4820 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4821 gimple **vec_stmt, slp_tree slp_node)
4823 tree vec_dest;
4824 tree scalar_dest;
4825 tree op0, op1 = NULL;
4826 tree vec_oprnd1 = NULL_TREE;
4827 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4828 tree vectype;
4829 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4830 enum tree_code code;
4831 machine_mode vec_mode;
4832 tree new_temp;
4833 optab optab;
4834 int icode;
4835 machine_mode optab_op2_mode;
4836 gimple *def_stmt;
4837 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4838 int ndts = 2;
4839 gimple *new_stmt = NULL;
4840 stmt_vec_info prev_stmt_info;
4841 int nunits_in;
4842 int nunits_out;
4843 tree vectype_out;
4844 tree op1_vectype;
4845 int ncopies;
4846 int j, i;
4847 vec<tree> vec_oprnds0 = vNULL;
4848 vec<tree> vec_oprnds1 = vNULL;
4849 tree vop0, vop1;
4850 unsigned int k;
4851 bool scalar_shift_arg = true;
4852 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4853 vec_info *vinfo = stmt_info->vinfo;
4854 int vf;
4856 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4857 return false;
4859 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4860 && ! vec_stmt)
4861 return false;
4863 /* Is STMT a vectorizable binary/unary operation? */
4864 if (!is_gimple_assign (stmt))
4865 return false;
4867 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4868 return false;
4870 code = gimple_assign_rhs_code (stmt);
4872 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4873 || code == RROTATE_EXPR))
4874 return false;
4876 scalar_dest = gimple_assign_lhs (stmt);
4877 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4878 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4879 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4881 if (dump_enabled_p ())
4882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4883 "bit-precision shifts not supported.\n");
4884 return false;
4887 op0 = gimple_assign_rhs1 (stmt);
4888 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4890 if (dump_enabled_p ())
4891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4892 "use not simple.\n");
4893 return false;
4895 /* If op0 is an external or constant def use a vector type with
4896 the same size as the output vector type. */
4897 if (!vectype)
4898 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4899 if (vec_stmt)
4900 gcc_assert (vectype);
4901 if (!vectype)
4903 if (dump_enabled_p ())
4904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4905 "no vectype for scalar type\n");
4906 return false;
4909 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4910 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4911 if (nunits_out != nunits_in)
4912 return false;
4914 op1 = gimple_assign_rhs2 (stmt);
4915 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4917 if (dump_enabled_p ())
4918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4919 "use not simple.\n");
4920 return false;
4923 if (loop_vinfo)
4924 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4925 else
4926 vf = 1;
4928 /* Multiple types in SLP are handled by creating the appropriate number of
4929 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4930 case of SLP. */
4931 if (slp_node)
4932 ncopies = 1;
4933 else
4934 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4936 gcc_assert (ncopies >= 1);
4938 /* Determine whether the shift amount is a vector, or scalar. If the
4939 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4941 if ((dt[1] == vect_internal_def
4942 || dt[1] == vect_induction_def)
4943 && !slp_node)
4944 scalar_shift_arg = false;
4945 else if (dt[1] == vect_constant_def
4946 || dt[1] == vect_external_def
4947 || dt[1] == vect_internal_def)
4949 /* In SLP, need to check whether the shift count is the same,
4950 in loops if it is a constant or invariant, it is always
4951 a scalar shift. */
4952 if (slp_node)
4954 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4955 gimple *slpstmt;
4957 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4958 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4959 scalar_shift_arg = false;
4962 /* If the shift amount is computed by a pattern stmt we cannot
4963 use the scalar amount directly thus give up and use a vector
4964 shift. */
4965 if (dt[1] == vect_internal_def)
4967 gimple *def = SSA_NAME_DEF_STMT (op1);
4968 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4969 scalar_shift_arg = false;
4972 else
4974 if (dump_enabled_p ())
4975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4976 "operand mode requires invariant argument.\n");
4977 return false;
4980 /* Vector shifted by vector. */
4981 if (!scalar_shift_arg)
4983 optab = optab_for_tree_code (code, vectype, optab_vector);
4984 if (dump_enabled_p ())
4985 dump_printf_loc (MSG_NOTE, vect_location,
4986 "vector/vector shift/rotate found.\n");
4988 if (!op1_vectype)
4989 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4990 if (op1_vectype == NULL_TREE
4991 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4993 if (dump_enabled_p ())
4994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4995 "unusable type for last operand in"
4996 " vector/vector shift/rotate.\n");
4997 return false;
5000 /* See if the machine has a vector shifted by scalar insn and if not
5001 then see if it has a vector shifted by vector insn. */
5002 else
5004 optab = optab_for_tree_code (code, vectype, optab_scalar);
5005 if (optab
5006 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5008 if (dump_enabled_p ())
5009 dump_printf_loc (MSG_NOTE, vect_location,
5010 "vector/scalar shift/rotate found.\n");
5012 else
5014 optab = optab_for_tree_code (code, vectype, optab_vector);
5015 if (optab
5016 && (optab_handler (optab, TYPE_MODE (vectype))
5017 != CODE_FOR_nothing))
5019 scalar_shift_arg = false;
5021 if (dump_enabled_p ())
5022 dump_printf_loc (MSG_NOTE, vect_location,
5023 "vector/vector shift/rotate found.\n");
5025 /* Unlike the other binary operators, shifts/rotates have
5026 the rhs being int, instead of the same type as the lhs,
5027 so make sure the scalar is the right type if we are
5028 dealing with vectors of long long/long/short/char. */
5029 if (dt[1] == vect_constant_def)
5030 op1 = fold_convert (TREE_TYPE (vectype), op1);
5031 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5032 TREE_TYPE (op1)))
5034 if (slp_node
5035 && TYPE_MODE (TREE_TYPE (vectype))
5036 != TYPE_MODE (TREE_TYPE (op1)))
5038 if (dump_enabled_p ())
5039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5040 "unusable type for last operand in"
5041 " vector/vector shift/rotate.\n");
5042 return false;
5044 if (vec_stmt && !slp_node)
5046 op1 = fold_convert (TREE_TYPE (vectype), op1);
5047 op1 = vect_init_vector (stmt, op1,
5048 TREE_TYPE (vectype), NULL);
5055 /* Supportable by target? */
5056 if (!optab)
5058 if (dump_enabled_p ())
5059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5060 "no optab.\n");
5061 return false;
5063 vec_mode = TYPE_MODE (vectype);
5064 icode = (int) optab_handler (optab, vec_mode);
5065 if (icode == CODE_FOR_nothing)
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "op not supported by target.\n");
5070 /* Check only during analysis. */
5071 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5072 || (vf < vect_min_worthwhile_factor (code)
5073 && !vec_stmt))
5074 return false;
5075 if (dump_enabled_p ())
5076 dump_printf_loc (MSG_NOTE, vect_location,
5077 "proceeding using word mode.\n");
5080 /* Worthwhile without SIMD support? Check only during analysis. */
5081 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5082 && vf < vect_min_worthwhile_factor (code)
5083 && !vec_stmt)
5085 if (dump_enabled_p ())
5086 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5087 "not worthwhile without SIMD support.\n");
5088 return false;
5091 if (!vec_stmt) /* transformation not required. */
5093 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5094 if (dump_enabled_p ())
5095 dump_printf_loc (MSG_NOTE, vect_location,
5096 "=== vectorizable_shift ===\n");
5097 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5098 return true;
5101 /* Transform. */
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_NOTE, vect_location,
5105 "transform binary/unary operation.\n");
5107 /* Handle def. */
5108 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5110 prev_stmt_info = NULL;
5111 for (j = 0; j < ncopies; j++)
5113 /* Handle uses. */
5114 if (j == 0)
5116 if (scalar_shift_arg)
5118 /* Vector shl and shr insn patterns can be defined with scalar
5119 operand 2 (shift operand). In this case, use constant or loop
5120 invariant op1 directly, without extending it to vector mode
5121 first. */
5122 optab_op2_mode = insn_data[icode].operand[2].mode;
5123 if (!VECTOR_MODE_P (optab_op2_mode))
5125 if (dump_enabled_p ())
5126 dump_printf_loc (MSG_NOTE, vect_location,
5127 "operand 1 using scalar mode.\n");
5128 vec_oprnd1 = op1;
5129 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5130 vec_oprnds1.quick_push (vec_oprnd1);
5131 if (slp_node)
5133 /* Store vec_oprnd1 for every vector stmt to be created
5134 for SLP_NODE. We check during the analysis that all
5135 the shift arguments are the same.
5136 TODO: Allow different constants for different vector
5137 stmts generated for an SLP instance. */
5138 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5139 vec_oprnds1.quick_push (vec_oprnd1);
5144 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5145 (a special case for certain kind of vector shifts); otherwise,
5146 operand 1 should be of a vector type (the usual case). */
5147 if (vec_oprnd1)
5148 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5149 slp_node);
5150 else
5151 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5152 slp_node);
5154 else
5155 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5157 /* Arguments are ready. Create the new vector stmt. */
5158 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5160 vop1 = vec_oprnds1[i];
5161 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5162 new_temp = make_ssa_name (vec_dest, new_stmt);
5163 gimple_assign_set_lhs (new_stmt, new_temp);
5164 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5165 if (slp_node)
5166 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5169 if (slp_node)
5170 continue;
5172 if (j == 0)
5173 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5174 else
5175 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5176 prev_stmt_info = vinfo_for_stmt (new_stmt);
5179 vec_oprnds0.release ();
5180 vec_oprnds1.release ();
5182 return true;
5186 /* Function vectorizable_operation.
5188 Check if STMT performs a binary, unary or ternary operation that can
5189 be vectorized.
5190 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5191 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5192 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5194 static bool
5195 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5196 gimple **vec_stmt, slp_tree slp_node)
5198 tree vec_dest;
5199 tree scalar_dest;
5200 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5202 tree vectype;
5203 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5204 enum tree_code code;
5205 machine_mode vec_mode;
5206 tree new_temp;
5207 int op_type;
5208 optab optab;
5209 bool target_support_p;
5210 gimple *def_stmt;
5211 enum vect_def_type dt[3]
5212 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5213 int ndts = 3;
5214 gimple *new_stmt = NULL;
5215 stmt_vec_info prev_stmt_info;
5216 int nunits_in;
5217 int nunits_out;
5218 tree vectype_out;
5219 int ncopies;
5220 int j, i;
5221 vec<tree> vec_oprnds0 = vNULL;
5222 vec<tree> vec_oprnds1 = vNULL;
5223 vec<tree> vec_oprnds2 = vNULL;
5224 tree vop0, vop1, vop2;
5225 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5226 vec_info *vinfo = stmt_info->vinfo;
5227 int vf;
5229 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5230 return false;
5232 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5233 && ! vec_stmt)
5234 return false;
5236 /* Is STMT a vectorizable binary/unary operation? */
5237 if (!is_gimple_assign (stmt))
5238 return false;
5240 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5241 return false;
5243 code = gimple_assign_rhs_code (stmt);
5245 /* For pointer addition, we should use the normal plus for
5246 the vector addition. */
5247 if (code == POINTER_PLUS_EXPR)
5248 code = PLUS_EXPR;
5250 /* Support only unary or binary operations. */
5251 op_type = TREE_CODE_LENGTH (code);
5252 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5254 if (dump_enabled_p ())
5255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5256 "num. args = %d (not unary/binary/ternary op).\n",
5257 op_type);
5258 return false;
5261 scalar_dest = gimple_assign_lhs (stmt);
5262 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5264 /* Most operations cannot handle bit-precision types without extra
5265 truncations. */
5266 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5267 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5268 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5269 /* Exception are bitwise binary operations. */
5270 && code != BIT_IOR_EXPR
5271 && code != BIT_XOR_EXPR
5272 && code != BIT_AND_EXPR)
5274 if (dump_enabled_p ())
5275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5276 "bit-precision arithmetic not supported.\n");
5277 return false;
5280 op0 = gimple_assign_rhs1 (stmt);
5281 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5283 if (dump_enabled_p ())
5284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5285 "use not simple.\n");
5286 return false;
5288 /* If op0 is an external or constant def use a vector type with
5289 the same size as the output vector type. */
5290 if (!vectype)
5292 /* For boolean type we cannot determine vectype by
5293 invariant value (don't know whether it is a vector
5294 of booleans or vector of integers). We use output
5295 vectype because operations on boolean don't change
5296 type. */
5297 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5299 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5301 if (dump_enabled_p ())
5302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5303 "not supported operation on bool value.\n");
5304 return false;
5306 vectype = vectype_out;
5308 else
5309 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5311 if (vec_stmt)
5312 gcc_assert (vectype);
5313 if (!vectype)
5315 if (dump_enabled_p ())
5317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5318 "no vectype for scalar type ");
5319 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5320 TREE_TYPE (op0));
5321 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5324 return false;
5327 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5328 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5329 if (nunits_out != nunits_in)
5330 return false;
5332 if (op_type == binary_op || op_type == ternary_op)
5334 op1 = gimple_assign_rhs2 (stmt);
5335 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5337 if (dump_enabled_p ())
5338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5339 "use not simple.\n");
5340 return false;
5343 if (op_type == ternary_op)
5345 op2 = gimple_assign_rhs3 (stmt);
5346 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5348 if (dump_enabled_p ())
5349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5350 "use not simple.\n");
5351 return false;
5355 if (loop_vinfo)
5356 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5357 else
5358 vf = 1;
5360 /* Multiple types in SLP are handled by creating the appropriate number of
5361 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5362 case of SLP. */
5363 if (slp_node)
5364 ncopies = 1;
5365 else
5366 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5368 gcc_assert (ncopies >= 1);
5370 /* Shifts are handled in vectorizable_shift (). */
5371 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5372 || code == RROTATE_EXPR)
5373 return false;
5375 /* Supportable by target? */
5377 vec_mode = TYPE_MODE (vectype);
5378 if (code == MULT_HIGHPART_EXPR)
5379 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5380 else
5382 optab = optab_for_tree_code (code, vectype, optab_default);
5383 if (!optab)
5385 if (dump_enabled_p ())
5386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5387 "no optab.\n");
5388 return false;
5390 target_support_p = (optab_handler (optab, vec_mode)
5391 != CODE_FOR_nothing);
5394 if (!target_support_p)
5396 if (dump_enabled_p ())
5397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5398 "op not supported by target.\n");
5399 /* Check only during analysis. */
5400 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5401 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5402 return false;
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_NOTE, vect_location,
5405 "proceeding using word mode.\n");
5408 /* Worthwhile without SIMD support? Check only during analysis. */
5409 if (!VECTOR_MODE_P (vec_mode)
5410 && !vec_stmt
5411 && vf < vect_min_worthwhile_factor (code))
5413 if (dump_enabled_p ())
5414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5415 "not worthwhile without SIMD support.\n");
5416 return false;
5419 if (!vec_stmt) /* transformation not required. */
5421 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5422 if (dump_enabled_p ())
5423 dump_printf_loc (MSG_NOTE, vect_location,
5424 "=== vectorizable_operation ===\n");
5425 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5426 return true;
5429 /* Transform. */
5431 if (dump_enabled_p ())
5432 dump_printf_loc (MSG_NOTE, vect_location,
5433 "transform binary/unary operation.\n");
5435 /* Handle def. */
5436 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5438 /* In case the vectorization factor (VF) is bigger than the number
5439 of elements that we can fit in a vectype (nunits), we have to generate
5440 more than one vector stmt - i.e - we need to "unroll" the
5441 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5442 from one copy of the vector stmt to the next, in the field
5443 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5444 stages to find the correct vector defs to be used when vectorizing
5445 stmts that use the defs of the current stmt. The example below
5446 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5447 we need to create 4 vectorized stmts):
5449 before vectorization:
5450 RELATED_STMT VEC_STMT
5451 S1: x = memref - -
5452 S2: z = x + 1 - -
5454 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5455 there):
5456 RELATED_STMT VEC_STMT
5457 VS1_0: vx0 = memref0 VS1_1 -
5458 VS1_1: vx1 = memref1 VS1_2 -
5459 VS1_2: vx2 = memref2 VS1_3 -
5460 VS1_3: vx3 = memref3 - -
5461 S1: x = load - VS1_0
5462 S2: z = x + 1 - -
5464 step2: vectorize stmt S2 (done here):
5465 To vectorize stmt S2 we first need to find the relevant vector
5466 def for the first operand 'x'. This is, as usual, obtained from
5467 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5468 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5469 relevant vector def 'vx0'. Having found 'vx0' we can generate
5470 the vector stmt VS2_0, and as usual, record it in the
5471 STMT_VINFO_VEC_STMT of stmt S2.
5472 When creating the second copy (VS2_1), we obtain the relevant vector
5473 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5474 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5475 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5476 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5477 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5478 chain of stmts and pointers:
5479 RELATED_STMT VEC_STMT
5480 VS1_0: vx0 = memref0 VS1_1 -
5481 VS1_1: vx1 = memref1 VS1_2 -
5482 VS1_2: vx2 = memref2 VS1_3 -
5483 VS1_3: vx3 = memref3 - -
5484 S1: x = load - VS1_0
5485 VS2_0: vz0 = vx0 + v1 VS2_1 -
5486 VS2_1: vz1 = vx1 + v1 VS2_2 -
5487 VS2_2: vz2 = vx2 + v1 VS2_3 -
5488 VS2_3: vz3 = vx3 + v1 - -
5489 S2: z = x + 1 - VS2_0 */
5491 prev_stmt_info = NULL;
5492 for (j = 0; j < ncopies; j++)
5494 /* Handle uses. */
5495 if (j == 0)
5497 if (op_type == binary_op || op_type == ternary_op)
5498 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5499 slp_node);
5500 else
5501 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5502 slp_node);
5503 if (op_type == ternary_op)
5504 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5505 slp_node);
5507 else
5509 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5510 if (op_type == ternary_op)
5512 tree vec_oprnd = vec_oprnds2.pop ();
5513 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5514 vec_oprnd));
5518 /* Arguments are ready. Create the new vector stmt. */
5519 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5521 vop1 = ((op_type == binary_op || op_type == ternary_op)
5522 ? vec_oprnds1[i] : NULL_TREE);
5523 vop2 = ((op_type == ternary_op)
5524 ? vec_oprnds2[i] : NULL_TREE);
5525 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5526 new_temp = make_ssa_name (vec_dest, new_stmt);
5527 gimple_assign_set_lhs (new_stmt, new_temp);
5528 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5529 if (slp_node)
5530 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5533 if (slp_node)
5534 continue;
5536 if (j == 0)
5537 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5538 else
5539 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5540 prev_stmt_info = vinfo_for_stmt (new_stmt);
5543 vec_oprnds0.release ();
5544 vec_oprnds1.release ();
5545 vec_oprnds2.release ();
5547 return true;
5550 /* A helper function to ensure data reference DR's base alignment
5551 for STMT_INFO. */
5553 static void
5554 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5556 if (!dr->aux)
5557 return;
5559 if (DR_VECT_AUX (dr)->base_misaligned)
5561 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5562 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5564 if (decl_in_symtab_p (base_decl))
5565 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5566 else
5568 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5569 DECL_USER_ALIGN (base_decl) = 1;
5571 DR_VECT_AUX (dr)->base_misaligned = false;
5576 /* Function get_group_alias_ptr_type.
5578 Return the alias type for the group starting at FIRST_STMT. */
5580 static tree
5581 get_group_alias_ptr_type (gimple *first_stmt)
5583 struct data_reference *first_dr, *next_dr;
5584 gimple *next_stmt;
5586 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5587 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5588 while (next_stmt)
5590 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5591 if (get_alias_set (DR_REF (first_dr))
5592 != get_alias_set (DR_REF (next_dr)))
5594 if (dump_enabled_p ())
5595 dump_printf_loc (MSG_NOTE, vect_location,
5596 "conflicting alias set types.\n");
5597 return ptr_type_node;
5599 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5601 return reference_alias_ptr_type (DR_REF (first_dr));
5605 /* Function vectorizable_store.
5607 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5608 can be vectorized.
5609 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5610 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5611 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5613 static bool
5614 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5615 slp_tree slp_node)
5617 tree scalar_dest;
5618 tree data_ref;
5619 tree op;
5620 tree vec_oprnd = NULL_TREE;
5621 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5622 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5623 tree elem_type;
5624 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5625 struct loop *loop = NULL;
5626 machine_mode vec_mode;
5627 tree dummy;
5628 enum dr_alignment_support alignment_support_scheme;
5629 gimple *def_stmt;
5630 enum vect_def_type dt;
5631 stmt_vec_info prev_stmt_info = NULL;
5632 tree dataref_ptr = NULL_TREE;
5633 tree dataref_offset = NULL_TREE;
5634 gimple *ptr_incr = NULL;
5635 int ncopies;
5636 int j;
5637 gimple *next_stmt, *first_stmt;
5638 bool grouped_store;
5639 unsigned int group_size, i;
5640 vec<tree> oprnds = vNULL;
5641 vec<tree> result_chain = vNULL;
5642 bool inv_p;
5643 tree offset = NULL_TREE;
5644 vec<tree> vec_oprnds = vNULL;
5645 bool slp = (slp_node != NULL);
5646 unsigned int vec_num;
5647 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5648 vec_info *vinfo = stmt_info->vinfo;
5649 tree aggr_type;
5650 gather_scatter_info gs_info;
5651 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5652 gimple *new_stmt;
5653 int vf;
5654 vec_load_store_type vls_type;
5655 tree ref_type;
5657 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5658 return false;
5660 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5661 && ! vec_stmt)
5662 return false;
5664 /* Is vectorizable store? */
5666 if (!is_gimple_assign (stmt))
5667 return false;
5669 scalar_dest = gimple_assign_lhs (stmt);
5670 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5671 && is_pattern_stmt_p (stmt_info))
5672 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5673 if (TREE_CODE (scalar_dest) != ARRAY_REF
5674 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5675 && TREE_CODE (scalar_dest) != INDIRECT_REF
5676 && TREE_CODE (scalar_dest) != COMPONENT_REF
5677 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5678 && TREE_CODE (scalar_dest) != REALPART_EXPR
5679 && TREE_CODE (scalar_dest) != MEM_REF)
5680 return false;
5682 /* Cannot have hybrid store SLP -- that would mean storing to the
5683 same location twice. */
5684 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5686 gcc_assert (gimple_assign_single_p (stmt));
5688 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5689 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5691 if (loop_vinfo)
5693 loop = LOOP_VINFO_LOOP (loop_vinfo);
5694 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5696 else
5697 vf = 1;
5699 /* Multiple types in SLP are handled by creating the appropriate number of
5700 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5701 case of SLP. */
5702 if (slp)
5703 ncopies = 1;
5704 else
5705 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5707 gcc_assert (ncopies >= 1);
5709 /* FORNOW. This restriction should be relaxed. */
5710 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5712 if (dump_enabled_p ())
5713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5714 "multiple types in nested loop.\n");
5715 return false;
5718 op = gimple_assign_rhs1 (stmt);
5720 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5722 if (dump_enabled_p ())
5723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5724 "use not simple.\n");
5725 return false;
5728 if (dt == vect_constant_def || dt == vect_external_def)
5729 vls_type = VLS_STORE_INVARIANT;
5730 else
5731 vls_type = VLS_STORE;
5733 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5734 return false;
5736 elem_type = TREE_TYPE (vectype);
5737 vec_mode = TYPE_MODE (vectype);
5739 /* FORNOW. In some cases can vectorize even if data-type not supported
5740 (e.g. - array initialization with 0). */
5741 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5742 return false;
5744 if (!STMT_VINFO_DATA_REF (stmt_info))
5745 return false;
5747 vect_memory_access_type memory_access_type;
5748 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5749 &memory_access_type, &gs_info))
5750 return false;
5752 if (!vec_stmt) /* transformation not required. */
5754 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5755 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5756 /* The SLP costs are calculated during SLP analysis. */
5757 if (!PURE_SLP_STMT (stmt_info))
5758 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5759 NULL, NULL, NULL);
5760 return true;
5762 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5764 /* Transform. */
5766 ensure_base_align (stmt_info, dr);
5768 if (memory_access_type == VMAT_GATHER_SCATTER)
5770 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5771 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5772 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5773 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5774 edge pe = loop_preheader_edge (loop);
5775 gimple_seq seq;
5776 basic_block new_bb;
5777 enum { NARROW, NONE, WIDEN } modifier;
5778 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5780 if (nunits == (unsigned int) scatter_off_nunits)
5781 modifier = NONE;
5782 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5784 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5785 modifier = WIDEN;
5787 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5788 sel[i] = i | nunits;
5790 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5791 gcc_assert (perm_mask != NULL_TREE);
5793 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5795 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5796 modifier = NARROW;
5798 for (i = 0; i < (unsigned int) nunits; ++i)
5799 sel[i] = i | scatter_off_nunits;
5801 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5802 gcc_assert (perm_mask != NULL_TREE);
5803 ncopies *= 2;
5805 else
5806 gcc_unreachable ();
5808 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5809 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5810 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5811 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5812 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5813 scaletype = TREE_VALUE (arglist);
5815 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5816 && TREE_CODE (rettype) == VOID_TYPE);
5818 ptr = fold_convert (ptrtype, gs_info.base);
5819 if (!is_gimple_min_invariant (ptr))
5821 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5822 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5823 gcc_assert (!new_bb);
5826 /* Currently we support only unconditional scatter stores,
5827 so mask should be all ones. */
5828 mask = build_int_cst (masktype, -1);
5829 mask = vect_init_vector (stmt, mask, masktype, NULL);
5831 scale = build_int_cst (scaletype, gs_info.scale);
5833 prev_stmt_info = NULL;
5834 for (j = 0; j < ncopies; ++j)
5836 if (j == 0)
5838 src = vec_oprnd1
5839 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5840 op = vec_oprnd0
5841 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5843 else if (modifier != NONE && (j & 1))
5845 if (modifier == WIDEN)
5847 src = vec_oprnd1
5848 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5849 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5850 stmt, gsi);
5852 else if (modifier == NARROW)
5854 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5855 stmt, gsi);
5856 op = vec_oprnd0
5857 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5858 vec_oprnd0);
5860 else
5861 gcc_unreachable ();
5863 else
5865 src = vec_oprnd1
5866 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5867 op = vec_oprnd0
5868 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5869 vec_oprnd0);
5872 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5874 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5875 == TYPE_VECTOR_SUBPARTS (srctype));
5876 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5877 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5878 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5879 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5880 src = var;
5883 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5885 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5886 == TYPE_VECTOR_SUBPARTS (idxtype));
5887 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5888 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5889 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5890 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5891 op = var;
5894 new_stmt
5895 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5897 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5899 if (prev_stmt_info == NULL)
5900 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5901 else
5902 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5903 prev_stmt_info = vinfo_for_stmt (new_stmt);
5905 return true;
5908 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5909 if (grouped_store)
5911 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5912 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5913 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5915 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5917 /* FORNOW */
5918 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5920 /* We vectorize all the stmts of the interleaving group when we
5921 reach the last stmt in the group. */
5922 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5923 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5924 && !slp)
5926 *vec_stmt = NULL;
5927 return true;
5930 if (slp)
5932 grouped_store = false;
5933 /* VEC_NUM is the number of vect stmts to be created for this
5934 group. */
5935 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5936 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5937 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5938 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5939 op = gimple_assign_rhs1 (first_stmt);
5941 else
5942 /* VEC_NUM is the number of vect stmts to be created for this
5943 group. */
5944 vec_num = group_size;
5946 ref_type = get_group_alias_ptr_type (first_stmt);
5948 else
5950 first_stmt = stmt;
5951 first_dr = dr;
5952 group_size = vec_num = 1;
5953 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5956 if (dump_enabled_p ())
5957 dump_printf_loc (MSG_NOTE, vect_location,
5958 "transform store. ncopies = %d\n", ncopies);
5960 if (memory_access_type == VMAT_ELEMENTWISE
5961 || memory_access_type == VMAT_STRIDED_SLP)
5963 gimple_stmt_iterator incr_gsi;
5964 bool insert_after;
5965 gimple *incr;
5966 tree offvar;
5967 tree ivstep;
5968 tree running_off;
5969 gimple_seq stmts = NULL;
5970 tree stride_base, stride_step, alias_off;
5971 tree vec_oprnd;
5972 unsigned int g;
5974 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5976 stride_base
5977 = fold_build_pointer_plus
5978 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5979 size_binop (PLUS_EXPR,
5980 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5981 convert_to_ptrofftype (DR_INIT (first_dr))));
5982 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5984 /* For a store with loop-invariant (but other than power-of-2)
5985 stride (i.e. not a grouped access) like so:
5987 for (i = 0; i < n; i += stride)
5988 array[i] = ...;
5990 we generate a new induction variable and new stores from
5991 the components of the (vectorized) rhs:
5993 for (j = 0; ; j += VF*stride)
5994 vectemp = ...;
5995 tmp1 = vectemp[0];
5996 array[j] = tmp1;
5997 tmp2 = vectemp[1];
5998 array[j + stride] = tmp2;
6002 unsigned nstores = nunits;
6003 unsigned lnel = 1;
6004 tree ltype = elem_type;
6005 tree lvectype = vectype;
6006 if (slp)
6008 if (group_size < nunits
6009 && nunits % group_size == 0)
6011 nstores = nunits / group_size;
6012 lnel = group_size;
6013 ltype = build_vector_type (elem_type, group_size);
6014 lvectype = vectype;
6016 /* First check if vec_extract optab doesn't support extraction
6017 of vector elts directly. */
6018 machine_mode elmode = TYPE_MODE (elem_type);
6019 machine_mode vmode = mode_for_vector (elmode, group_size);
6020 if (! VECTOR_MODE_P (vmode)
6021 || (convert_optab_handler (vec_extract_optab,
6022 TYPE_MODE (vectype), vmode)
6023 == CODE_FOR_nothing))
6025 /* Try to avoid emitting an extract of vector elements
6026 by performing the extracts using an integer type of the
6027 same size, extracting from a vector of those and then
6028 re-interpreting it as the original vector type if
6029 supported. */
6030 unsigned lsize
6031 = group_size * GET_MODE_BITSIZE (elmode);
6032 elmode = mode_for_size (lsize, MODE_INT, 0);
6033 vmode = mode_for_vector (elmode, nunits / group_size);
6034 /* If we can't construct such a vector fall back to
6035 element extracts from the original vector type and
6036 element size stores. */
6037 if (VECTOR_MODE_P (vmode)
6038 && (convert_optab_handler (vec_extract_optab,
6039 vmode, elmode)
6040 != CODE_FOR_nothing))
6042 nstores = nunits / group_size;
6043 lnel = group_size;
6044 ltype = build_nonstandard_integer_type (lsize, 1);
6045 lvectype = build_vector_type (ltype, nstores);
6047 /* Else fall back to vector extraction anyway.
6048 Fewer stores are more important than avoiding spilling
6049 of the vector we extract from. Compared to the
6050 construction case in vectorizable_load no store-forwarding
6051 issue exists here for reasonable archs. */
6054 else if (group_size >= nunits
6055 && group_size % nunits == 0)
6057 nstores = 1;
6058 lnel = nunits;
6059 ltype = vectype;
6060 lvectype = vectype;
6062 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6063 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6066 ivstep = stride_step;
6067 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6068 build_int_cst (TREE_TYPE (ivstep), vf));
6070 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6072 create_iv (stride_base, ivstep, NULL,
6073 loop, &incr_gsi, insert_after,
6074 &offvar, NULL);
6075 incr = gsi_stmt (incr_gsi);
6076 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6078 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6079 if (stmts)
6080 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6082 prev_stmt_info = NULL;
6083 alias_off = build_int_cst (ref_type, 0);
6084 next_stmt = first_stmt;
6085 for (g = 0; g < group_size; g++)
6087 running_off = offvar;
6088 if (g)
6090 tree size = TYPE_SIZE_UNIT (ltype);
6091 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6092 size);
6093 tree newoff = copy_ssa_name (running_off, NULL);
6094 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6095 running_off, pos);
6096 vect_finish_stmt_generation (stmt, incr, gsi);
6097 running_off = newoff;
6099 unsigned int group_el = 0;
6100 unsigned HOST_WIDE_INT
6101 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6102 for (j = 0; j < ncopies; j++)
6104 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6105 and first_stmt == stmt. */
6106 if (j == 0)
6108 if (slp)
6110 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6111 slp_node);
6112 vec_oprnd = vec_oprnds[0];
6114 else
6116 gcc_assert (gimple_assign_single_p (next_stmt));
6117 op = gimple_assign_rhs1 (next_stmt);
6118 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6121 else
6123 if (slp)
6124 vec_oprnd = vec_oprnds[j];
6125 else
6127 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6128 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6131 /* Pun the vector to extract from if necessary. */
6132 if (lvectype != vectype)
6134 tree tem = make_ssa_name (lvectype);
6135 gimple *pun
6136 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6137 lvectype, vec_oprnd));
6138 vect_finish_stmt_generation (stmt, pun, gsi);
6139 vec_oprnd = tem;
6141 for (i = 0; i < nstores; i++)
6143 tree newref, newoff;
6144 gimple *incr, *assign;
6145 tree size = TYPE_SIZE (ltype);
6146 /* Extract the i'th component. */
6147 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6148 bitsize_int (i), size);
6149 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6150 size, pos);
6152 elem = force_gimple_operand_gsi (gsi, elem, true,
6153 NULL_TREE, true,
6154 GSI_SAME_STMT);
6156 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6157 group_el * elsz);
6158 newref = build2 (MEM_REF, ltype,
6159 running_off, this_off);
6161 /* And store it to *running_off. */
6162 assign = gimple_build_assign (newref, elem);
6163 vect_finish_stmt_generation (stmt, assign, gsi);
6165 group_el += lnel;
6166 if (! slp
6167 || group_el == group_size)
6169 newoff = copy_ssa_name (running_off, NULL);
6170 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6171 running_off, stride_step);
6172 vect_finish_stmt_generation (stmt, incr, gsi);
6174 running_off = newoff;
6175 group_el = 0;
6177 if (g == group_size - 1
6178 && !slp)
6180 if (j == 0 && i == 0)
6181 STMT_VINFO_VEC_STMT (stmt_info)
6182 = *vec_stmt = assign;
6183 else
6184 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6185 prev_stmt_info = vinfo_for_stmt (assign);
6189 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6190 if (slp)
6191 break;
6194 vec_oprnds.release ();
6195 return true;
6198 auto_vec<tree> dr_chain (group_size);
6199 oprnds.create (group_size);
6201 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6202 gcc_assert (alignment_support_scheme);
6203 /* Targets with store-lane instructions must not require explicit
6204 realignment. */
6205 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6206 || alignment_support_scheme == dr_aligned
6207 || alignment_support_scheme == dr_unaligned_supported);
6209 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6210 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6211 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6213 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6214 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6215 else
6216 aggr_type = vectype;
6218 /* In case the vectorization factor (VF) is bigger than the number
6219 of elements that we can fit in a vectype (nunits), we have to generate
6220 more than one vector stmt - i.e - we need to "unroll" the
6221 vector stmt by a factor VF/nunits. For more details see documentation in
6222 vect_get_vec_def_for_copy_stmt. */
6224 /* In case of interleaving (non-unit grouped access):
6226 S1: &base + 2 = x2
6227 S2: &base = x0
6228 S3: &base + 1 = x1
6229 S4: &base + 3 = x3
6231 We create vectorized stores starting from base address (the access of the
6232 first stmt in the chain (S2 in the above example), when the last store stmt
6233 of the chain (S4) is reached:
6235 VS1: &base = vx2
6236 VS2: &base + vec_size*1 = vx0
6237 VS3: &base + vec_size*2 = vx1
6238 VS4: &base + vec_size*3 = vx3
6240 Then permutation statements are generated:
6242 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6243 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6246 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6247 (the order of the data-refs in the output of vect_permute_store_chain
6248 corresponds to the order of scalar stmts in the interleaving chain - see
6249 the documentation of vect_permute_store_chain()).
6251 In case of both multiple types and interleaving, above vector stores and
6252 permutation stmts are created for every copy. The result vector stmts are
6253 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6254 STMT_VINFO_RELATED_STMT for the next copies.
6257 prev_stmt_info = NULL;
6258 for (j = 0; j < ncopies; j++)
6261 if (j == 0)
6263 if (slp)
6265 /* Get vectorized arguments for SLP_NODE. */
6266 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6267 NULL, slp_node);
6269 vec_oprnd = vec_oprnds[0];
6271 else
6273 /* For interleaved stores we collect vectorized defs for all the
6274 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6275 used as an input to vect_permute_store_chain(), and OPRNDS as
6276 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6278 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6279 OPRNDS are of size 1. */
6280 next_stmt = first_stmt;
6281 for (i = 0; i < group_size; i++)
6283 /* Since gaps are not supported for interleaved stores,
6284 GROUP_SIZE is the exact number of stmts in the chain.
6285 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6286 there is no interleaving, GROUP_SIZE is 1, and only one
6287 iteration of the loop will be executed. */
6288 gcc_assert (next_stmt
6289 && gimple_assign_single_p (next_stmt));
6290 op = gimple_assign_rhs1 (next_stmt);
6292 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6293 dr_chain.quick_push (vec_oprnd);
6294 oprnds.quick_push (vec_oprnd);
6295 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6299 /* We should have catched mismatched types earlier. */
6300 gcc_assert (useless_type_conversion_p (vectype,
6301 TREE_TYPE (vec_oprnd)));
6302 bool simd_lane_access_p
6303 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6304 if (simd_lane_access_p
6305 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6306 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6307 && integer_zerop (DR_OFFSET (first_dr))
6308 && integer_zerop (DR_INIT (first_dr))
6309 && alias_sets_conflict_p (get_alias_set (aggr_type),
6310 get_alias_set (TREE_TYPE (ref_type))))
6312 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6313 dataref_offset = build_int_cst (ref_type, 0);
6314 inv_p = false;
6316 else
6317 dataref_ptr
6318 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6319 simd_lane_access_p ? loop : NULL,
6320 offset, &dummy, gsi, &ptr_incr,
6321 simd_lane_access_p, &inv_p);
6322 gcc_assert (bb_vinfo || !inv_p);
6324 else
6326 /* For interleaved stores we created vectorized defs for all the
6327 defs stored in OPRNDS in the previous iteration (previous copy).
6328 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6329 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6330 next copy.
6331 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6332 OPRNDS are of size 1. */
6333 for (i = 0; i < group_size; i++)
6335 op = oprnds[i];
6336 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6337 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6338 dr_chain[i] = vec_oprnd;
6339 oprnds[i] = vec_oprnd;
6341 if (dataref_offset)
6342 dataref_offset
6343 = int_const_binop (PLUS_EXPR, dataref_offset,
6344 TYPE_SIZE_UNIT (aggr_type));
6345 else
6346 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6347 TYPE_SIZE_UNIT (aggr_type));
6350 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6352 tree vec_array;
6354 /* Combine all the vectors into an array. */
6355 vec_array = create_vector_array (vectype, vec_num);
6356 for (i = 0; i < vec_num; i++)
6358 vec_oprnd = dr_chain[i];
6359 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6362 /* Emit:
6363 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6364 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6365 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6366 gimple_call_set_lhs (new_stmt, data_ref);
6367 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6369 else
6371 new_stmt = NULL;
6372 if (grouped_store)
6374 if (j == 0)
6375 result_chain.create (group_size);
6376 /* Permute. */
6377 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6378 &result_chain);
6381 next_stmt = first_stmt;
6382 for (i = 0; i < vec_num; i++)
6384 unsigned align, misalign;
6386 if (i > 0)
6387 /* Bump the vector pointer. */
6388 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6389 stmt, NULL_TREE);
6391 if (slp)
6392 vec_oprnd = vec_oprnds[i];
6393 else if (grouped_store)
6394 /* For grouped stores vectorized defs are interleaved in
6395 vect_permute_store_chain(). */
6396 vec_oprnd = result_chain[i];
6398 data_ref = fold_build2 (MEM_REF, vectype,
6399 dataref_ptr,
6400 dataref_offset
6401 ? dataref_offset
6402 : build_int_cst (ref_type, 0));
6403 align = TYPE_ALIGN_UNIT (vectype);
6404 if (aligned_access_p (first_dr))
6405 misalign = 0;
6406 else if (DR_MISALIGNMENT (first_dr) == -1)
6408 align = dr_alignment (vect_dr_behavior (first_dr));
6409 misalign = 0;
6410 TREE_TYPE (data_ref)
6411 = build_aligned_type (TREE_TYPE (data_ref),
6412 align * BITS_PER_UNIT);
6414 else
6416 TREE_TYPE (data_ref)
6417 = build_aligned_type (TREE_TYPE (data_ref),
6418 TYPE_ALIGN (elem_type));
6419 misalign = DR_MISALIGNMENT (first_dr);
6421 if (dataref_offset == NULL_TREE
6422 && TREE_CODE (dataref_ptr) == SSA_NAME)
6423 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6424 misalign);
6426 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6428 tree perm_mask = perm_mask_for_reverse (vectype);
6429 tree perm_dest
6430 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6431 vectype);
6432 tree new_temp = make_ssa_name (perm_dest);
6434 /* Generate the permute statement. */
6435 gimple *perm_stmt
6436 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6437 vec_oprnd, perm_mask);
6438 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6440 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6441 vec_oprnd = new_temp;
6444 /* Arguments are ready. Create the new vector stmt. */
6445 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6446 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6448 if (slp)
6449 continue;
6451 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6452 if (!next_stmt)
6453 break;
6456 if (!slp)
6458 if (j == 0)
6459 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6460 else
6461 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6462 prev_stmt_info = vinfo_for_stmt (new_stmt);
6466 oprnds.release ();
6467 result_chain.release ();
6468 vec_oprnds.release ();
6470 return true;
6473 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6474 VECTOR_CST mask. No checks are made that the target platform supports the
6475 mask, so callers may wish to test can_vec_perm_p separately, or use
6476 vect_gen_perm_mask_checked. */
6478 tree
6479 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6481 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6482 int i, nunits;
6484 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6486 mask_elt_type = lang_hooks.types.type_for_mode
6487 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6488 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6490 mask_elts = XALLOCAVEC (tree, nunits);
6491 for (i = nunits - 1; i >= 0; i--)
6492 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6493 mask_vec = build_vector (mask_type, mask_elts);
6495 return mask_vec;
6498 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6499 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6501 tree
6502 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6504 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6505 return vect_gen_perm_mask_any (vectype, sel);
6508 /* Given a vector variable X and Y, that was generated for the scalar
6509 STMT, generate instructions to permute the vector elements of X and Y
6510 using permutation mask MASK_VEC, insert them at *GSI and return the
6511 permuted vector variable. */
6513 static tree
6514 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6515 gimple_stmt_iterator *gsi)
6517 tree vectype = TREE_TYPE (x);
6518 tree perm_dest, data_ref;
6519 gimple *perm_stmt;
6521 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6522 data_ref = make_ssa_name (perm_dest);
6524 /* Generate the permute statement. */
6525 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6526 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6528 return data_ref;
6531 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6532 inserting them on the loops preheader edge. Returns true if we
6533 were successful in doing so (and thus STMT can be moved then),
6534 otherwise returns false. */
6536 static bool
6537 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6539 ssa_op_iter i;
6540 tree op;
6541 bool any = false;
6543 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6545 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6546 if (!gimple_nop_p (def_stmt)
6547 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6549 /* Make sure we don't need to recurse. While we could do
6550 so in simple cases when there are more complex use webs
6551 we don't have an easy way to preserve stmt order to fulfil
6552 dependencies within them. */
6553 tree op2;
6554 ssa_op_iter i2;
6555 if (gimple_code (def_stmt) == GIMPLE_PHI)
6556 return false;
6557 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6559 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6560 if (!gimple_nop_p (def_stmt2)
6561 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6562 return false;
6564 any = true;
6568 if (!any)
6569 return true;
6571 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6573 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6574 if (!gimple_nop_p (def_stmt)
6575 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6577 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6578 gsi_remove (&gsi, false);
6579 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6583 return true;
6586 /* vectorizable_load.
6588 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6589 can be vectorized.
6590 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6591 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6592 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6594 static bool
6595 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6596 slp_tree slp_node, slp_instance slp_node_instance)
6598 tree scalar_dest;
6599 tree vec_dest = NULL;
6600 tree data_ref = NULL;
6601 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6602 stmt_vec_info prev_stmt_info;
6603 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6604 struct loop *loop = NULL;
6605 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6606 bool nested_in_vect_loop = false;
6607 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6608 tree elem_type;
6609 tree new_temp;
6610 machine_mode mode;
6611 gimple *new_stmt = NULL;
6612 tree dummy;
6613 enum dr_alignment_support alignment_support_scheme;
6614 tree dataref_ptr = NULL_TREE;
6615 tree dataref_offset = NULL_TREE;
6616 gimple *ptr_incr = NULL;
6617 int ncopies;
6618 int i, j, group_size, group_gap_adj;
6619 tree msq = NULL_TREE, lsq;
6620 tree offset = NULL_TREE;
6621 tree byte_offset = NULL_TREE;
6622 tree realignment_token = NULL_TREE;
6623 gphi *phi = NULL;
6624 vec<tree> dr_chain = vNULL;
6625 bool grouped_load = false;
6626 gimple *first_stmt;
6627 gimple *first_stmt_for_drptr = NULL;
6628 bool inv_p;
6629 bool compute_in_loop = false;
6630 struct loop *at_loop;
6631 int vec_num;
6632 bool slp = (slp_node != NULL);
6633 bool slp_perm = false;
6634 enum tree_code code;
6635 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6636 int vf;
6637 tree aggr_type;
6638 gather_scatter_info gs_info;
6639 vec_info *vinfo = stmt_info->vinfo;
6640 tree ref_type;
6642 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6643 return false;
6645 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6646 && ! vec_stmt)
6647 return false;
6649 /* Is vectorizable load? */
6650 if (!is_gimple_assign (stmt))
6651 return false;
6653 scalar_dest = gimple_assign_lhs (stmt);
6654 if (TREE_CODE (scalar_dest) != SSA_NAME)
6655 return false;
6657 code = gimple_assign_rhs_code (stmt);
6658 if (code != ARRAY_REF
6659 && code != BIT_FIELD_REF
6660 && code != INDIRECT_REF
6661 && code != COMPONENT_REF
6662 && code != IMAGPART_EXPR
6663 && code != REALPART_EXPR
6664 && code != MEM_REF
6665 && TREE_CODE_CLASS (code) != tcc_declaration)
6666 return false;
6668 if (!STMT_VINFO_DATA_REF (stmt_info))
6669 return false;
6671 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6672 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6674 if (loop_vinfo)
6676 loop = LOOP_VINFO_LOOP (loop_vinfo);
6677 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6678 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6680 else
6681 vf = 1;
6683 /* Multiple types in SLP are handled by creating the appropriate number of
6684 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6685 case of SLP. */
6686 if (slp)
6687 ncopies = 1;
6688 else
6689 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6691 gcc_assert (ncopies >= 1);
6693 /* FORNOW. This restriction should be relaxed. */
6694 if (nested_in_vect_loop && ncopies > 1)
6696 if (dump_enabled_p ())
6697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6698 "multiple types in nested loop.\n");
6699 return false;
6702 /* Invalidate assumptions made by dependence analysis when vectorization
6703 on the unrolled body effectively re-orders stmts. */
6704 if (ncopies > 1
6705 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6706 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6707 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6709 if (dump_enabled_p ())
6710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6711 "cannot perform implicit CSE when unrolling "
6712 "with negative dependence distance\n");
6713 return false;
6716 elem_type = TREE_TYPE (vectype);
6717 mode = TYPE_MODE (vectype);
6719 /* FORNOW. In some cases can vectorize even if data-type not supported
6720 (e.g. - data copies). */
6721 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6723 if (dump_enabled_p ())
6724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6725 "Aligned load, but unsupported type.\n");
6726 return false;
6729 /* Check if the load is a part of an interleaving chain. */
6730 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6732 grouped_load = true;
6733 /* FORNOW */
6734 gcc_assert (!nested_in_vect_loop);
6735 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6737 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6738 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6740 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6741 slp_perm = true;
6743 /* Invalidate assumptions made by dependence analysis when vectorization
6744 on the unrolled body effectively re-orders stmts. */
6745 if (!PURE_SLP_STMT (stmt_info)
6746 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6747 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6748 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6750 if (dump_enabled_p ())
6751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6752 "cannot perform implicit CSE when performing "
6753 "group loads with negative dependence distance\n");
6754 return false;
6757 /* Similarly when the stmt is a load that is both part of a SLP
6758 instance and a loop vectorized stmt via the same-dr mechanism
6759 we have to give up. */
6760 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6761 && (STMT_SLP_TYPE (stmt_info)
6762 != STMT_SLP_TYPE (vinfo_for_stmt
6763 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6765 if (dump_enabled_p ())
6766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6767 "conflicting SLP types for CSEd load\n");
6768 return false;
6772 vect_memory_access_type memory_access_type;
6773 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6774 &memory_access_type, &gs_info))
6775 return false;
6777 if (!vec_stmt) /* transformation not required. */
6779 if (!slp)
6780 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6781 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6782 /* The SLP costs are calculated during SLP analysis. */
6783 if (!PURE_SLP_STMT (stmt_info))
6784 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6785 NULL, NULL, NULL);
6786 return true;
6789 if (!slp)
6790 gcc_assert (memory_access_type
6791 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6793 if (dump_enabled_p ())
6794 dump_printf_loc (MSG_NOTE, vect_location,
6795 "transform load. ncopies = %d\n", ncopies);
6797 /* Transform. */
6799 ensure_base_align (stmt_info, dr);
6801 if (memory_access_type == VMAT_GATHER_SCATTER)
6803 tree vec_oprnd0 = NULL_TREE, op;
6804 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6805 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6806 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6807 edge pe = loop_preheader_edge (loop);
6808 gimple_seq seq;
6809 basic_block new_bb;
6810 enum { NARROW, NONE, WIDEN } modifier;
6811 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6813 if (nunits == gather_off_nunits)
6814 modifier = NONE;
6815 else if (nunits == gather_off_nunits / 2)
6817 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6818 modifier = WIDEN;
6820 for (i = 0; i < gather_off_nunits; ++i)
6821 sel[i] = i | nunits;
6823 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6825 else if (nunits == gather_off_nunits * 2)
6827 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6828 modifier = NARROW;
6830 for (i = 0; i < nunits; ++i)
6831 sel[i] = i < gather_off_nunits
6832 ? i : i + nunits - gather_off_nunits;
6834 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6835 ncopies *= 2;
6837 else
6838 gcc_unreachable ();
6840 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6841 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6842 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6843 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6844 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6845 scaletype = TREE_VALUE (arglist);
6846 gcc_checking_assert (types_compatible_p (srctype, rettype));
6848 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6850 ptr = fold_convert (ptrtype, gs_info.base);
6851 if (!is_gimple_min_invariant (ptr))
6853 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6854 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6855 gcc_assert (!new_bb);
6858 /* Currently we support only unconditional gather loads,
6859 so mask should be all ones. */
6860 if (TREE_CODE (masktype) == INTEGER_TYPE)
6861 mask = build_int_cst (masktype, -1);
6862 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6864 mask = build_int_cst (TREE_TYPE (masktype), -1);
6865 mask = build_vector_from_val (masktype, mask);
6866 mask = vect_init_vector (stmt, mask, masktype, NULL);
6868 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6870 REAL_VALUE_TYPE r;
6871 long tmp[6];
6872 for (j = 0; j < 6; ++j)
6873 tmp[j] = -1;
6874 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6875 mask = build_real (TREE_TYPE (masktype), r);
6876 mask = build_vector_from_val (masktype, mask);
6877 mask = vect_init_vector (stmt, mask, masktype, NULL);
6879 else
6880 gcc_unreachable ();
6882 scale = build_int_cst (scaletype, gs_info.scale);
6884 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6885 merge = build_int_cst (TREE_TYPE (rettype), 0);
6886 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6888 REAL_VALUE_TYPE r;
6889 long tmp[6];
6890 for (j = 0; j < 6; ++j)
6891 tmp[j] = 0;
6892 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6893 merge = build_real (TREE_TYPE (rettype), r);
6895 else
6896 gcc_unreachable ();
6897 merge = build_vector_from_val (rettype, merge);
6898 merge = vect_init_vector (stmt, merge, rettype, NULL);
6900 prev_stmt_info = NULL;
6901 for (j = 0; j < ncopies; ++j)
6903 if (modifier == WIDEN && (j & 1))
6904 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6905 perm_mask, stmt, gsi);
6906 else if (j == 0)
6907 op = vec_oprnd0
6908 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6909 else
6910 op = vec_oprnd0
6911 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6913 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6915 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6916 == TYPE_VECTOR_SUBPARTS (idxtype));
6917 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6918 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6919 new_stmt
6920 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6921 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6922 op = var;
6925 new_stmt
6926 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6928 if (!useless_type_conversion_p (vectype, rettype))
6930 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6931 == TYPE_VECTOR_SUBPARTS (rettype));
6932 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6933 gimple_call_set_lhs (new_stmt, op);
6934 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6935 var = make_ssa_name (vec_dest);
6936 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6937 new_stmt
6938 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6940 else
6942 var = make_ssa_name (vec_dest, new_stmt);
6943 gimple_call_set_lhs (new_stmt, var);
6946 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6948 if (modifier == NARROW)
6950 if ((j & 1) == 0)
6952 prev_res = var;
6953 continue;
6955 var = permute_vec_elements (prev_res, var,
6956 perm_mask, stmt, gsi);
6957 new_stmt = SSA_NAME_DEF_STMT (var);
6960 if (prev_stmt_info == NULL)
6961 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6962 else
6963 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6964 prev_stmt_info = vinfo_for_stmt (new_stmt);
6966 return true;
6969 if (memory_access_type == VMAT_ELEMENTWISE
6970 || memory_access_type == VMAT_STRIDED_SLP)
6972 gimple_stmt_iterator incr_gsi;
6973 bool insert_after;
6974 gimple *incr;
6975 tree offvar;
6976 tree ivstep;
6977 tree running_off;
6978 vec<constructor_elt, va_gc> *v = NULL;
6979 gimple_seq stmts = NULL;
6980 tree stride_base, stride_step, alias_off;
6982 gcc_assert (!nested_in_vect_loop);
6984 if (slp && grouped_load)
6986 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6987 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6988 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6989 ref_type = get_group_alias_ptr_type (first_stmt);
6991 else
6993 first_stmt = stmt;
6994 first_dr = dr;
6995 group_size = 1;
6996 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6999 stride_base
7000 = fold_build_pointer_plus
7001 (DR_BASE_ADDRESS (first_dr),
7002 size_binop (PLUS_EXPR,
7003 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7004 convert_to_ptrofftype (DR_INIT (first_dr))));
7005 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7007 /* For a load with loop-invariant (but other than power-of-2)
7008 stride (i.e. not a grouped access) like so:
7010 for (i = 0; i < n; i += stride)
7011 ... = array[i];
7013 we generate a new induction variable and new accesses to
7014 form a new vector (or vectors, depending on ncopies):
7016 for (j = 0; ; j += VF*stride)
7017 tmp1 = array[j];
7018 tmp2 = array[j + stride];
7020 vectemp = {tmp1, tmp2, ...}
7023 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7024 build_int_cst (TREE_TYPE (stride_step), vf));
7026 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7028 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7029 loop, &incr_gsi, insert_after,
7030 &offvar, NULL);
7031 incr = gsi_stmt (incr_gsi);
7032 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7034 stride_step = force_gimple_operand (unshare_expr (stride_step),
7035 &stmts, true, NULL_TREE);
7036 if (stmts)
7037 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7039 prev_stmt_info = NULL;
7040 running_off = offvar;
7041 alias_off = build_int_cst (ref_type, 0);
7042 int nloads = nunits;
7043 int lnel = 1;
7044 tree ltype = TREE_TYPE (vectype);
7045 tree lvectype = vectype;
7046 auto_vec<tree> dr_chain;
7047 if (memory_access_type == VMAT_STRIDED_SLP)
7049 if (group_size < nunits)
7051 /* First check if vec_init optab supports construction from
7052 vector elts directly. */
7053 machine_mode elmode = TYPE_MODE (TREE_TYPE (vectype));
7054 machine_mode vmode = mode_for_vector (elmode, group_size);
7055 if (VECTOR_MODE_P (vmode)
7056 && (convert_optab_handler (vec_init_optab,
7057 TYPE_MODE (vectype), vmode)
7058 != CODE_FOR_nothing))
7060 nloads = nunits / group_size;
7061 lnel = group_size;
7062 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7064 else
7066 /* Otherwise avoid emitting a constructor of vector elements
7067 by performing the loads using an integer type of the same
7068 size, constructing a vector of those and then
7069 re-interpreting it as the original vector type.
7070 This avoids a huge runtime penalty due to the general
7071 inability to perform store forwarding from smaller stores
7072 to a larger load. */
7073 unsigned lsize
7074 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7075 elmode = mode_for_size (lsize, MODE_INT, 0);
7076 vmode = mode_for_vector (elmode, nunits / group_size);
7077 /* If we can't construct such a vector fall back to
7078 element loads of the original vector type. */
7079 if (VECTOR_MODE_P (vmode)
7080 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7081 != CODE_FOR_nothing))
7083 nloads = nunits / group_size;
7084 lnel = group_size;
7085 ltype = build_nonstandard_integer_type (lsize, 1);
7086 lvectype = build_vector_type (ltype, nloads);
7090 else
7092 nloads = 1;
7093 lnel = nunits;
7094 ltype = vectype;
7096 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7098 if (slp)
7100 /* For SLP permutation support we need to load the whole group,
7101 not only the number of vector stmts the permutation result
7102 fits in. */
7103 if (slp_perm)
7105 ncopies = (group_size * vf + nunits - 1) / nunits;
7106 dr_chain.create (ncopies);
7108 else
7109 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7111 int group_el = 0;
7112 unsigned HOST_WIDE_INT
7113 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7114 for (j = 0; j < ncopies; j++)
7116 if (nloads > 1)
7117 vec_alloc (v, nloads);
7118 for (i = 0; i < nloads; i++)
7120 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7121 group_el * elsz);
7122 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7123 build2 (MEM_REF, ltype,
7124 running_off, this_off));
7125 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7126 if (nloads > 1)
7127 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7128 gimple_assign_lhs (new_stmt));
7130 group_el += lnel;
7131 if (! slp
7132 || group_el == group_size)
7134 tree newoff = copy_ssa_name (running_off);
7135 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7136 running_off, stride_step);
7137 vect_finish_stmt_generation (stmt, incr, gsi);
7139 running_off = newoff;
7140 group_el = 0;
7143 if (nloads > 1)
7145 tree vec_inv = build_constructor (lvectype, v);
7146 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7147 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7148 if (lvectype != vectype)
7150 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7151 VIEW_CONVERT_EXPR,
7152 build1 (VIEW_CONVERT_EXPR,
7153 vectype, new_temp));
7154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7158 if (slp)
7160 if (slp_perm)
7161 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7162 else
7163 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7165 else
7167 if (j == 0)
7168 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7169 else
7170 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7171 prev_stmt_info = vinfo_for_stmt (new_stmt);
7174 if (slp_perm)
7176 unsigned n_perms;
7177 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7178 slp_node_instance, false, &n_perms);
7180 return true;
7183 if (grouped_load)
7185 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7186 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7187 int group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
7188 /* For SLP vectorization we directly vectorize a subchain
7189 without permutation. */
7190 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7191 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7192 /* For BB vectorization always use the first stmt to base
7193 the data ref pointer on. */
7194 if (bb_vinfo)
7195 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7197 /* Check if the chain of loads is already vectorized. */
7198 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7199 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7200 ??? But we can only do so if there is exactly one
7201 as we have no way to get at the rest. Leave the CSE
7202 opportunity alone.
7203 ??? With the group load eventually participating
7204 in multiple different permutations (having multiple
7205 slp nodes which refer to the same group) the CSE
7206 is even wrong code. See PR56270. */
7207 && !slp)
7209 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7210 return true;
7212 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7213 group_gap_adj = 0;
7215 /* VEC_NUM is the number of vect stmts to be created for this group. */
7216 if (slp)
7218 grouped_load = false;
7219 /* For SLP permutation support we need to load the whole group,
7220 not only the number of vector stmts the permutation result
7221 fits in. */
7222 if (slp_perm)
7224 vec_num = (group_size * vf + nunits - 1) / nunits;
7225 group_gap_adj = vf * group_size - nunits * vec_num;
7227 else
7229 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7230 group_gap_adj = group_gap;
7233 else
7234 vec_num = group_size;
7236 ref_type = get_group_alias_ptr_type (first_stmt);
7238 else
7240 first_stmt = stmt;
7241 first_dr = dr;
7242 group_size = vec_num = 1;
7243 group_gap_adj = 0;
7244 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7247 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7248 gcc_assert (alignment_support_scheme);
7249 /* Targets with load-lane instructions must not require explicit
7250 realignment. */
7251 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7252 || alignment_support_scheme == dr_aligned
7253 || alignment_support_scheme == dr_unaligned_supported);
7255 /* In case the vectorization factor (VF) is bigger than the number
7256 of elements that we can fit in a vectype (nunits), we have to generate
7257 more than one vector stmt - i.e - we need to "unroll" the
7258 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7259 from one copy of the vector stmt to the next, in the field
7260 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7261 stages to find the correct vector defs to be used when vectorizing
7262 stmts that use the defs of the current stmt. The example below
7263 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7264 need to create 4 vectorized stmts):
7266 before vectorization:
7267 RELATED_STMT VEC_STMT
7268 S1: x = memref - -
7269 S2: z = x + 1 - -
7271 step 1: vectorize stmt S1:
7272 We first create the vector stmt VS1_0, and, as usual, record a
7273 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7274 Next, we create the vector stmt VS1_1, and record a pointer to
7275 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7276 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7277 stmts and pointers:
7278 RELATED_STMT VEC_STMT
7279 VS1_0: vx0 = memref0 VS1_1 -
7280 VS1_1: vx1 = memref1 VS1_2 -
7281 VS1_2: vx2 = memref2 VS1_3 -
7282 VS1_3: vx3 = memref3 - -
7283 S1: x = load - VS1_0
7284 S2: z = x + 1 - -
7286 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7287 information we recorded in RELATED_STMT field is used to vectorize
7288 stmt S2. */
7290 /* In case of interleaving (non-unit grouped access):
7292 S1: x2 = &base + 2
7293 S2: x0 = &base
7294 S3: x1 = &base + 1
7295 S4: x3 = &base + 3
7297 Vectorized loads are created in the order of memory accesses
7298 starting from the access of the first stmt of the chain:
7300 VS1: vx0 = &base
7301 VS2: vx1 = &base + vec_size*1
7302 VS3: vx3 = &base + vec_size*2
7303 VS4: vx4 = &base + vec_size*3
7305 Then permutation statements are generated:
7307 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7308 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7311 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7312 (the order of the data-refs in the output of vect_permute_load_chain
7313 corresponds to the order of scalar stmts in the interleaving chain - see
7314 the documentation of vect_permute_load_chain()).
7315 The generation of permutation stmts and recording them in
7316 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7318 In case of both multiple types and interleaving, the vector loads and
7319 permutation stmts above are created for every copy. The result vector
7320 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7321 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7323 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7324 on a target that supports unaligned accesses (dr_unaligned_supported)
7325 we generate the following code:
7326 p = initial_addr;
7327 indx = 0;
7328 loop {
7329 p = p + indx * vectype_size;
7330 vec_dest = *(p);
7331 indx = indx + 1;
7334 Otherwise, the data reference is potentially unaligned on a target that
7335 does not support unaligned accesses (dr_explicit_realign_optimized) -
7336 then generate the following code, in which the data in each iteration is
7337 obtained by two vector loads, one from the previous iteration, and one
7338 from the current iteration:
7339 p1 = initial_addr;
7340 msq_init = *(floor(p1))
7341 p2 = initial_addr + VS - 1;
7342 realignment_token = call target_builtin;
7343 indx = 0;
7344 loop {
7345 p2 = p2 + indx * vectype_size
7346 lsq = *(floor(p2))
7347 vec_dest = realign_load (msq, lsq, realignment_token)
7348 indx = indx + 1;
7349 msq = lsq;
7350 } */
7352 /* If the misalignment remains the same throughout the execution of the
7353 loop, we can create the init_addr and permutation mask at the loop
7354 preheader. Otherwise, it needs to be created inside the loop.
7355 This can only occur when vectorizing memory accesses in the inner-loop
7356 nested within an outer-loop that is being vectorized. */
7358 if (nested_in_vect_loop
7359 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
7361 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7362 compute_in_loop = true;
7365 if ((alignment_support_scheme == dr_explicit_realign_optimized
7366 || alignment_support_scheme == dr_explicit_realign)
7367 && !compute_in_loop)
7369 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7370 alignment_support_scheme, NULL_TREE,
7371 &at_loop);
7372 if (alignment_support_scheme == dr_explicit_realign_optimized)
7374 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7375 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7376 size_one_node);
7379 else
7380 at_loop = loop;
7382 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7383 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7385 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7386 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7387 else
7388 aggr_type = vectype;
7390 prev_stmt_info = NULL;
7391 int group_elt = 0;
7392 for (j = 0; j < ncopies; j++)
7394 /* 1. Create the vector or array pointer update chain. */
7395 if (j == 0)
7397 bool simd_lane_access_p
7398 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7399 if (simd_lane_access_p
7400 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7401 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7402 && integer_zerop (DR_OFFSET (first_dr))
7403 && integer_zerop (DR_INIT (first_dr))
7404 && alias_sets_conflict_p (get_alias_set (aggr_type),
7405 get_alias_set (TREE_TYPE (ref_type)))
7406 && (alignment_support_scheme == dr_aligned
7407 || alignment_support_scheme == dr_unaligned_supported))
7409 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7410 dataref_offset = build_int_cst (ref_type, 0);
7411 inv_p = false;
7413 else if (first_stmt_for_drptr
7414 && first_stmt != first_stmt_for_drptr)
7416 dataref_ptr
7417 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7418 at_loop, offset, &dummy, gsi,
7419 &ptr_incr, simd_lane_access_p,
7420 &inv_p, byte_offset);
7421 /* Adjust the pointer by the difference to first_stmt. */
7422 data_reference_p ptrdr
7423 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7424 tree diff = fold_convert (sizetype,
7425 size_binop (MINUS_EXPR,
7426 DR_INIT (first_dr),
7427 DR_INIT (ptrdr)));
7428 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7429 stmt, diff);
7431 else
7432 dataref_ptr
7433 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7434 offset, &dummy, gsi, &ptr_incr,
7435 simd_lane_access_p, &inv_p,
7436 byte_offset);
7438 else if (dataref_offset)
7439 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7440 TYPE_SIZE_UNIT (aggr_type));
7441 else
7442 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7443 TYPE_SIZE_UNIT (aggr_type));
7445 if (grouped_load || slp_perm)
7446 dr_chain.create (vec_num);
7448 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7450 tree vec_array;
7452 vec_array = create_vector_array (vectype, vec_num);
7454 /* Emit:
7455 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7456 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7457 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7458 gimple_call_set_lhs (new_stmt, vec_array);
7459 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7461 /* Extract each vector into an SSA_NAME. */
7462 for (i = 0; i < vec_num; i++)
7464 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7465 vec_array, i);
7466 dr_chain.quick_push (new_temp);
7469 /* Record the mapping between SSA_NAMEs and statements. */
7470 vect_record_grouped_load_vectors (stmt, dr_chain);
7472 else
7474 for (i = 0; i < vec_num; i++)
7476 if (i > 0)
7477 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7478 stmt, NULL_TREE);
7480 /* 2. Create the vector-load in the loop. */
7481 switch (alignment_support_scheme)
7483 case dr_aligned:
7484 case dr_unaligned_supported:
7486 unsigned int align, misalign;
7488 data_ref
7489 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7490 dataref_offset
7491 ? dataref_offset
7492 : build_int_cst (ref_type, 0));
7493 align = TYPE_ALIGN_UNIT (vectype);
7494 if (alignment_support_scheme == dr_aligned)
7496 gcc_assert (aligned_access_p (first_dr));
7497 misalign = 0;
7499 else if (DR_MISALIGNMENT (first_dr) == -1)
7501 align = dr_alignment (vect_dr_behavior (first_dr));
7502 misalign = 0;
7503 TREE_TYPE (data_ref)
7504 = build_aligned_type (TREE_TYPE (data_ref),
7505 align * BITS_PER_UNIT);
7507 else
7509 TREE_TYPE (data_ref)
7510 = build_aligned_type (TREE_TYPE (data_ref),
7511 TYPE_ALIGN (elem_type));
7512 misalign = DR_MISALIGNMENT (first_dr);
7514 if (dataref_offset == NULL_TREE
7515 && TREE_CODE (dataref_ptr) == SSA_NAME)
7516 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7517 align, misalign);
7518 break;
7520 case dr_explicit_realign:
7522 tree ptr, bump;
7524 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7526 if (compute_in_loop)
7527 msq = vect_setup_realignment (first_stmt, gsi,
7528 &realignment_token,
7529 dr_explicit_realign,
7530 dataref_ptr, NULL);
7532 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7533 ptr = copy_ssa_name (dataref_ptr);
7534 else
7535 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7536 new_stmt = gimple_build_assign
7537 (ptr, BIT_AND_EXPR, dataref_ptr,
7538 build_int_cst
7539 (TREE_TYPE (dataref_ptr),
7540 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7541 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7542 data_ref
7543 = build2 (MEM_REF, vectype, ptr,
7544 build_int_cst (ref_type, 0));
7545 vec_dest = vect_create_destination_var (scalar_dest,
7546 vectype);
7547 new_stmt = gimple_build_assign (vec_dest, data_ref);
7548 new_temp = make_ssa_name (vec_dest, new_stmt);
7549 gimple_assign_set_lhs (new_stmt, new_temp);
7550 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7551 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7552 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7553 msq = new_temp;
7555 bump = size_binop (MULT_EXPR, vs,
7556 TYPE_SIZE_UNIT (elem_type));
7557 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7558 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7559 new_stmt = gimple_build_assign
7560 (NULL_TREE, BIT_AND_EXPR, ptr,
7561 build_int_cst
7562 (TREE_TYPE (ptr),
7563 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7564 ptr = copy_ssa_name (ptr, new_stmt);
7565 gimple_assign_set_lhs (new_stmt, ptr);
7566 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7567 data_ref
7568 = build2 (MEM_REF, vectype, ptr,
7569 build_int_cst (ref_type, 0));
7570 break;
7572 case dr_explicit_realign_optimized:
7573 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7574 new_temp = copy_ssa_name (dataref_ptr);
7575 else
7576 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7577 new_stmt = gimple_build_assign
7578 (new_temp, BIT_AND_EXPR, dataref_ptr,
7579 build_int_cst
7580 (TREE_TYPE (dataref_ptr),
7581 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7582 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7583 data_ref
7584 = build2 (MEM_REF, vectype, new_temp,
7585 build_int_cst (ref_type, 0));
7586 break;
7587 default:
7588 gcc_unreachable ();
7590 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7591 new_stmt = gimple_build_assign (vec_dest, data_ref);
7592 new_temp = make_ssa_name (vec_dest, new_stmt);
7593 gimple_assign_set_lhs (new_stmt, new_temp);
7594 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7596 /* 3. Handle explicit realignment if necessary/supported.
7597 Create in loop:
7598 vec_dest = realign_load (msq, lsq, realignment_token) */
7599 if (alignment_support_scheme == dr_explicit_realign_optimized
7600 || alignment_support_scheme == dr_explicit_realign)
7602 lsq = gimple_assign_lhs (new_stmt);
7603 if (!realignment_token)
7604 realignment_token = dataref_ptr;
7605 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7606 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7607 msq, lsq, realignment_token);
7608 new_temp = make_ssa_name (vec_dest, new_stmt);
7609 gimple_assign_set_lhs (new_stmt, new_temp);
7610 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7612 if (alignment_support_scheme == dr_explicit_realign_optimized)
7614 gcc_assert (phi);
7615 if (i == vec_num - 1 && j == ncopies - 1)
7616 add_phi_arg (phi, lsq,
7617 loop_latch_edge (containing_loop),
7618 UNKNOWN_LOCATION);
7619 msq = lsq;
7623 /* 4. Handle invariant-load. */
7624 if (inv_p && !bb_vinfo)
7626 gcc_assert (!grouped_load);
7627 /* If we have versioned for aliasing or the loop doesn't
7628 have any data dependencies that would preclude this,
7629 then we are sure this is a loop invariant load and
7630 thus we can insert it on the preheader edge. */
7631 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7632 && !nested_in_vect_loop
7633 && hoist_defs_of_uses (stmt, loop))
7635 if (dump_enabled_p ())
7637 dump_printf_loc (MSG_NOTE, vect_location,
7638 "hoisting out of the vectorized "
7639 "loop: ");
7640 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7642 tree tem = copy_ssa_name (scalar_dest);
7643 gsi_insert_on_edge_immediate
7644 (loop_preheader_edge (loop),
7645 gimple_build_assign (tem,
7646 unshare_expr
7647 (gimple_assign_rhs1 (stmt))));
7648 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7649 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7650 set_vinfo_for_stmt (new_stmt,
7651 new_stmt_vec_info (new_stmt, vinfo));
7653 else
7655 gimple_stmt_iterator gsi2 = *gsi;
7656 gsi_next (&gsi2);
7657 new_temp = vect_init_vector (stmt, scalar_dest,
7658 vectype, &gsi2);
7659 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7663 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7665 tree perm_mask = perm_mask_for_reverse (vectype);
7666 new_temp = permute_vec_elements (new_temp, new_temp,
7667 perm_mask, stmt, gsi);
7668 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7671 /* Collect vector loads and later create their permutation in
7672 vect_transform_grouped_load (). */
7673 if (grouped_load || slp_perm)
7674 dr_chain.quick_push (new_temp);
7676 /* Store vector loads in the corresponding SLP_NODE. */
7677 if (slp && !slp_perm)
7678 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7680 /* With SLP permutation we load the gaps as well, without
7681 we need to skip the gaps after we manage to fully load
7682 all elements. group_gap_adj is GROUP_SIZE here. */
7683 group_elt += nunits;
7684 if (group_gap_adj != 0 && ! slp_perm
7685 && group_elt == group_size - group_gap_adj)
7687 bool ovf;
7688 tree bump
7689 = wide_int_to_tree (sizetype,
7690 wi::smul (TYPE_SIZE_UNIT (elem_type),
7691 group_gap_adj, &ovf));
7692 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7693 stmt, bump);
7694 group_elt = 0;
7697 /* Bump the vector pointer to account for a gap or for excess
7698 elements loaded for a permuted SLP load. */
7699 if (group_gap_adj != 0 && slp_perm)
7701 bool ovf;
7702 tree bump
7703 = wide_int_to_tree (sizetype,
7704 wi::smul (TYPE_SIZE_UNIT (elem_type),
7705 group_gap_adj, &ovf));
7706 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7707 stmt, bump);
7711 if (slp && !slp_perm)
7712 continue;
7714 if (slp_perm)
7716 unsigned n_perms;
7717 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7718 slp_node_instance, false,
7719 &n_perms))
7721 dr_chain.release ();
7722 return false;
7725 else
7727 if (grouped_load)
7729 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7730 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7731 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7733 else
7735 if (j == 0)
7736 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7737 else
7738 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7739 prev_stmt_info = vinfo_for_stmt (new_stmt);
7742 dr_chain.release ();
7745 return true;
7748 /* Function vect_is_simple_cond.
7750 Input:
7751 LOOP - the loop that is being vectorized.
7752 COND - Condition that is checked for simple use.
7754 Output:
7755 *COMP_VECTYPE - the vector type for the comparison.
7756 *DTS - The def types for the arguments of the comparison
7758 Returns whether a COND can be vectorized. Checks whether
7759 condition operands are supportable using vec_is_simple_use. */
7761 static bool
7762 vect_is_simple_cond (tree cond, vec_info *vinfo,
7763 tree *comp_vectype, enum vect_def_type *dts)
7765 tree lhs, rhs;
7766 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7768 /* Mask case. */
7769 if (TREE_CODE (cond) == SSA_NAME
7770 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7772 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7773 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7774 &dts[0], comp_vectype)
7775 || !*comp_vectype
7776 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7777 return false;
7778 return true;
7781 if (!COMPARISON_CLASS_P (cond))
7782 return false;
7784 lhs = TREE_OPERAND (cond, 0);
7785 rhs = TREE_OPERAND (cond, 1);
7787 if (TREE_CODE (lhs) == SSA_NAME)
7789 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7790 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7791 return false;
7793 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7794 || TREE_CODE (lhs) == FIXED_CST)
7795 dts[0] = vect_constant_def;
7796 else
7797 return false;
7799 if (TREE_CODE (rhs) == SSA_NAME)
7801 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7802 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7803 return false;
7805 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7806 || TREE_CODE (rhs) == FIXED_CST)
7807 dts[1] = vect_constant_def;
7808 else
7809 return false;
7811 if (vectype1 && vectype2
7812 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7813 return false;
7815 *comp_vectype = vectype1 ? vectype1 : vectype2;
7816 return true;
7819 /* vectorizable_condition.
7821 Check if STMT is conditional modify expression that can be vectorized.
7822 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7823 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7824 at GSI.
7826 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7827 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7828 else clause if it is 2).
7830 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7832 bool
7833 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7834 gimple **vec_stmt, tree reduc_def, int reduc_index,
7835 slp_tree slp_node)
7837 tree scalar_dest = NULL_TREE;
7838 tree vec_dest = NULL_TREE;
7839 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7840 tree then_clause, else_clause;
7841 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7842 tree comp_vectype = NULL_TREE;
7843 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7844 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7845 tree vec_compare;
7846 tree new_temp;
7847 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7848 enum vect_def_type dts[4]
7849 = {vect_unknown_def_type, vect_unknown_def_type,
7850 vect_unknown_def_type, vect_unknown_def_type};
7851 int ndts = 4;
7852 int ncopies;
7853 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7854 stmt_vec_info prev_stmt_info = NULL;
7855 int i, j;
7856 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7857 vec<tree> vec_oprnds0 = vNULL;
7858 vec<tree> vec_oprnds1 = vNULL;
7859 vec<tree> vec_oprnds2 = vNULL;
7860 vec<tree> vec_oprnds3 = vNULL;
7861 tree vec_cmp_type;
7862 bool masked = false;
7864 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7865 return false;
7867 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7869 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7870 return false;
7872 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7873 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7874 && reduc_def))
7875 return false;
7877 /* FORNOW: not yet supported. */
7878 if (STMT_VINFO_LIVE_P (stmt_info))
7880 if (dump_enabled_p ())
7881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7882 "value used after loop.\n");
7883 return false;
7887 /* Is vectorizable conditional operation? */
7888 if (!is_gimple_assign (stmt))
7889 return false;
7891 code = gimple_assign_rhs_code (stmt);
7893 if (code != COND_EXPR)
7894 return false;
7896 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7897 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7898 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7900 if (slp_node)
7901 ncopies = 1;
7902 else
7903 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7905 gcc_assert (ncopies >= 1);
7906 if (reduc_index && ncopies > 1)
7907 return false; /* FORNOW */
7909 cond_expr = gimple_assign_rhs1 (stmt);
7910 then_clause = gimple_assign_rhs2 (stmt);
7911 else_clause = gimple_assign_rhs3 (stmt);
7913 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7914 &comp_vectype, &dts[0])
7915 || !comp_vectype)
7916 return false;
7918 gimple *def_stmt;
7919 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7920 &vectype1))
7921 return false;
7922 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7923 &vectype2))
7924 return false;
7926 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7927 return false;
7929 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7930 return false;
7932 masked = !COMPARISON_CLASS_P (cond_expr);
7933 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7935 if (vec_cmp_type == NULL_TREE)
7936 return false;
7938 cond_code = TREE_CODE (cond_expr);
7939 if (!masked)
7941 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7942 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7945 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7947 /* Boolean values may have another representation in vectors
7948 and therefore we prefer bit operations over comparison for
7949 them (which also works for scalar masks). We store opcodes
7950 to use in bitop1 and bitop2. Statement is vectorized as
7951 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7952 depending on bitop1 and bitop2 arity. */
7953 switch (cond_code)
7955 case GT_EXPR:
7956 bitop1 = BIT_NOT_EXPR;
7957 bitop2 = BIT_AND_EXPR;
7958 break;
7959 case GE_EXPR:
7960 bitop1 = BIT_NOT_EXPR;
7961 bitop2 = BIT_IOR_EXPR;
7962 break;
7963 case LT_EXPR:
7964 bitop1 = BIT_NOT_EXPR;
7965 bitop2 = BIT_AND_EXPR;
7966 std::swap (cond_expr0, cond_expr1);
7967 break;
7968 case LE_EXPR:
7969 bitop1 = BIT_NOT_EXPR;
7970 bitop2 = BIT_IOR_EXPR;
7971 std::swap (cond_expr0, cond_expr1);
7972 break;
7973 case NE_EXPR:
7974 bitop1 = BIT_XOR_EXPR;
7975 break;
7976 case EQ_EXPR:
7977 bitop1 = BIT_XOR_EXPR;
7978 bitop2 = BIT_NOT_EXPR;
7979 break;
7980 default:
7981 return false;
7983 cond_code = SSA_NAME;
7986 if (!vec_stmt)
7988 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7989 if (bitop1 != NOP_EXPR)
7991 machine_mode mode = TYPE_MODE (comp_vectype);
7992 optab optab;
7994 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
7995 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7996 return false;
7998 if (bitop2 != NOP_EXPR)
8000 optab = optab_for_tree_code (bitop2, comp_vectype,
8001 optab_default);
8002 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8003 return false;
8006 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8007 cond_code))
8009 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8010 return true;
8012 return false;
8015 /* Transform. */
8017 if (!slp_node)
8019 vec_oprnds0.create (1);
8020 vec_oprnds1.create (1);
8021 vec_oprnds2.create (1);
8022 vec_oprnds3.create (1);
8025 /* Handle def. */
8026 scalar_dest = gimple_assign_lhs (stmt);
8027 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8029 /* Handle cond expr. */
8030 for (j = 0; j < ncopies; j++)
8032 gassign *new_stmt = NULL;
8033 if (j == 0)
8035 if (slp_node)
8037 auto_vec<tree, 4> ops;
8038 auto_vec<vec<tree>, 4> vec_defs;
8040 if (masked)
8041 ops.safe_push (cond_expr);
8042 else
8044 ops.safe_push (cond_expr0);
8045 ops.safe_push (cond_expr1);
8047 ops.safe_push (then_clause);
8048 ops.safe_push (else_clause);
8049 vect_get_slp_defs (ops, slp_node, &vec_defs);
8050 vec_oprnds3 = vec_defs.pop ();
8051 vec_oprnds2 = vec_defs.pop ();
8052 if (!masked)
8053 vec_oprnds1 = vec_defs.pop ();
8054 vec_oprnds0 = vec_defs.pop ();
8056 else
8058 gimple *gtemp;
8059 if (masked)
8061 vec_cond_lhs
8062 = vect_get_vec_def_for_operand (cond_expr, stmt,
8063 comp_vectype);
8064 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8065 &gtemp, &dts[0]);
8067 else
8069 vec_cond_lhs
8070 = vect_get_vec_def_for_operand (cond_expr0,
8071 stmt, comp_vectype);
8072 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8074 vec_cond_rhs
8075 = vect_get_vec_def_for_operand (cond_expr1,
8076 stmt, comp_vectype);
8077 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8079 if (reduc_index == 1)
8080 vec_then_clause = reduc_def;
8081 else
8083 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8084 stmt);
8085 vect_is_simple_use (then_clause, loop_vinfo,
8086 &gtemp, &dts[2]);
8088 if (reduc_index == 2)
8089 vec_else_clause = reduc_def;
8090 else
8092 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8093 stmt);
8094 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8098 else
8100 vec_cond_lhs
8101 = vect_get_vec_def_for_stmt_copy (dts[0],
8102 vec_oprnds0.pop ());
8103 if (!masked)
8104 vec_cond_rhs
8105 = vect_get_vec_def_for_stmt_copy (dts[1],
8106 vec_oprnds1.pop ());
8108 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8109 vec_oprnds2.pop ());
8110 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8111 vec_oprnds3.pop ());
8114 if (!slp_node)
8116 vec_oprnds0.quick_push (vec_cond_lhs);
8117 if (!masked)
8118 vec_oprnds1.quick_push (vec_cond_rhs);
8119 vec_oprnds2.quick_push (vec_then_clause);
8120 vec_oprnds3.quick_push (vec_else_clause);
8123 /* Arguments are ready. Create the new vector stmt. */
8124 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8126 vec_then_clause = vec_oprnds2[i];
8127 vec_else_clause = vec_oprnds3[i];
8129 if (masked)
8130 vec_compare = vec_cond_lhs;
8131 else
8133 vec_cond_rhs = vec_oprnds1[i];
8134 if (bitop1 == NOP_EXPR)
8135 vec_compare = build2 (cond_code, vec_cmp_type,
8136 vec_cond_lhs, vec_cond_rhs);
8137 else
8139 new_temp = make_ssa_name (vec_cmp_type);
8140 if (bitop1 == BIT_NOT_EXPR)
8141 new_stmt = gimple_build_assign (new_temp, bitop1,
8142 vec_cond_rhs);
8143 else
8144 new_stmt
8145 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8146 vec_cond_rhs);
8147 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8148 if (bitop2 == NOP_EXPR)
8149 vec_compare = new_temp;
8150 else if (bitop2 == BIT_NOT_EXPR)
8152 /* Instead of doing ~x ? y : z do x ? z : y. */
8153 vec_compare = new_temp;
8154 std::swap (vec_then_clause, vec_else_clause);
8156 else
8158 vec_compare = make_ssa_name (vec_cmp_type);
8159 new_stmt
8160 = gimple_build_assign (vec_compare, bitop2,
8161 vec_cond_lhs, new_temp);
8162 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8166 new_temp = make_ssa_name (vec_dest);
8167 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8168 vec_compare, vec_then_clause,
8169 vec_else_clause);
8170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8171 if (slp_node)
8172 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8175 if (slp_node)
8176 continue;
8178 if (j == 0)
8179 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8180 else
8181 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8183 prev_stmt_info = vinfo_for_stmt (new_stmt);
8186 vec_oprnds0.release ();
8187 vec_oprnds1.release ();
8188 vec_oprnds2.release ();
8189 vec_oprnds3.release ();
8191 return true;
8194 /* vectorizable_comparison.
8196 Check if STMT is comparison expression that can be vectorized.
8197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8198 comparison, put it in VEC_STMT, and insert it at GSI.
8200 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8202 static bool
8203 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8204 gimple **vec_stmt, tree reduc_def,
8205 slp_tree slp_node)
8207 tree lhs, rhs1, rhs2;
8208 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8209 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8210 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8211 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8212 tree new_temp;
8213 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8214 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8215 int ndts = 2;
8216 unsigned nunits;
8217 int ncopies;
8218 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8219 stmt_vec_info prev_stmt_info = NULL;
8220 int i, j;
8221 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8222 vec<tree> vec_oprnds0 = vNULL;
8223 vec<tree> vec_oprnds1 = vNULL;
8224 gimple *def_stmt;
8225 tree mask_type;
8226 tree mask;
8228 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8229 return false;
8231 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8232 return false;
8234 mask_type = vectype;
8235 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8237 if (slp_node)
8238 ncopies = 1;
8239 else
8240 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
8242 gcc_assert (ncopies >= 1);
8243 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8244 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8245 && reduc_def))
8246 return false;
8248 if (STMT_VINFO_LIVE_P (stmt_info))
8250 if (dump_enabled_p ())
8251 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8252 "value used after loop.\n");
8253 return false;
8256 if (!is_gimple_assign (stmt))
8257 return false;
8259 code = gimple_assign_rhs_code (stmt);
8261 if (TREE_CODE_CLASS (code) != tcc_comparison)
8262 return false;
8264 rhs1 = gimple_assign_rhs1 (stmt);
8265 rhs2 = gimple_assign_rhs2 (stmt);
8267 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8268 &dts[0], &vectype1))
8269 return false;
8271 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8272 &dts[1], &vectype2))
8273 return false;
8275 if (vectype1 && vectype2
8276 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8277 return false;
8279 vectype = vectype1 ? vectype1 : vectype2;
8281 /* Invariant comparison. */
8282 if (!vectype)
8284 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8285 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8286 return false;
8288 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8289 return false;
8291 /* Can't compare mask and non-mask types. */
8292 if (vectype1 && vectype2
8293 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8294 return false;
8296 /* Boolean values may have another representation in vectors
8297 and therefore we prefer bit operations over comparison for
8298 them (which also works for scalar masks). We store opcodes
8299 to use in bitop1 and bitop2. Statement is vectorized as
8300 BITOP2 (rhs1 BITOP1 rhs2) or
8301 rhs1 BITOP2 (BITOP1 rhs2)
8302 depending on bitop1 and bitop2 arity. */
8303 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8305 if (code == GT_EXPR)
8307 bitop1 = BIT_NOT_EXPR;
8308 bitop2 = BIT_AND_EXPR;
8310 else if (code == GE_EXPR)
8312 bitop1 = BIT_NOT_EXPR;
8313 bitop2 = BIT_IOR_EXPR;
8315 else if (code == LT_EXPR)
8317 bitop1 = BIT_NOT_EXPR;
8318 bitop2 = BIT_AND_EXPR;
8319 std::swap (rhs1, rhs2);
8320 std::swap (dts[0], dts[1]);
8322 else if (code == LE_EXPR)
8324 bitop1 = BIT_NOT_EXPR;
8325 bitop2 = BIT_IOR_EXPR;
8326 std::swap (rhs1, rhs2);
8327 std::swap (dts[0], dts[1]);
8329 else
8331 bitop1 = BIT_XOR_EXPR;
8332 if (code == EQ_EXPR)
8333 bitop2 = BIT_NOT_EXPR;
8337 if (!vec_stmt)
8339 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8340 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8341 dts, ndts, NULL, NULL);
8342 if (bitop1 == NOP_EXPR)
8343 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8344 else
8346 machine_mode mode = TYPE_MODE (vectype);
8347 optab optab;
8349 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8350 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8351 return false;
8353 if (bitop2 != NOP_EXPR)
8355 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8356 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8357 return false;
8359 return true;
8363 /* Transform. */
8364 if (!slp_node)
8366 vec_oprnds0.create (1);
8367 vec_oprnds1.create (1);
8370 /* Handle def. */
8371 lhs = gimple_assign_lhs (stmt);
8372 mask = vect_create_destination_var (lhs, mask_type);
8374 /* Handle cmp expr. */
8375 for (j = 0; j < ncopies; j++)
8377 gassign *new_stmt = NULL;
8378 if (j == 0)
8380 if (slp_node)
8382 auto_vec<tree, 2> ops;
8383 auto_vec<vec<tree>, 2> vec_defs;
8385 ops.safe_push (rhs1);
8386 ops.safe_push (rhs2);
8387 vect_get_slp_defs (ops, slp_node, &vec_defs);
8388 vec_oprnds1 = vec_defs.pop ();
8389 vec_oprnds0 = vec_defs.pop ();
8391 else
8393 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8394 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8397 else
8399 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8400 vec_oprnds0.pop ());
8401 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8402 vec_oprnds1.pop ());
8405 if (!slp_node)
8407 vec_oprnds0.quick_push (vec_rhs1);
8408 vec_oprnds1.quick_push (vec_rhs2);
8411 /* Arguments are ready. Create the new vector stmt. */
8412 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8414 vec_rhs2 = vec_oprnds1[i];
8416 new_temp = make_ssa_name (mask);
8417 if (bitop1 == NOP_EXPR)
8419 new_stmt = gimple_build_assign (new_temp, code,
8420 vec_rhs1, vec_rhs2);
8421 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8423 else
8425 if (bitop1 == BIT_NOT_EXPR)
8426 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8427 else
8428 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8429 vec_rhs2);
8430 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8431 if (bitop2 != NOP_EXPR)
8433 tree res = make_ssa_name (mask);
8434 if (bitop2 == BIT_NOT_EXPR)
8435 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8436 else
8437 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8438 new_temp);
8439 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8442 if (slp_node)
8443 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8446 if (slp_node)
8447 continue;
8449 if (j == 0)
8450 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8451 else
8452 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8454 prev_stmt_info = vinfo_for_stmt (new_stmt);
8457 vec_oprnds0.release ();
8458 vec_oprnds1.release ();
8460 return true;
8463 /* Make sure the statement is vectorizable. */
8465 bool
8466 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8467 slp_instance node_instance)
8469 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8470 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8471 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8472 bool ok;
8473 gimple *pattern_stmt;
8474 gimple_seq pattern_def_seq;
8476 if (dump_enabled_p ())
8478 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8479 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8482 if (gimple_has_volatile_ops (stmt))
8484 if (dump_enabled_p ())
8485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8486 "not vectorized: stmt has volatile operands\n");
8488 return false;
8491 /* Skip stmts that do not need to be vectorized. In loops this is expected
8492 to include:
8493 - the COND_EXPR which is the loop exit condition
8494 - any LABEL_EXPRs in the loop
8495 - computations that are used only for array indexing or loop control.
8496 In basic blocks we only analyze statements that are a part of some SLP
8497 instance, therefore, all the statements are relevant.
8499 Pattern statement needs to be analyzed instead of the original statement
8500 if the original statement is not relevant. Otherwise, we analyze both
8501 statements. In basic blocks we are called from some SLP instance
8502 traversal, don't analyze pattern stmts instead, the pattern stmts
8503 already will be part of SLP instance. */
8505 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8506 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8507 && !STMT_VINFO_LIVE_P (stmt_info))
8509 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8510 && pattern_stmt
8511 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8512 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8514 /* Analyze PATTERN_STMT instead of the original stmt. */
8515 stmt = pattern_stmt;
8516 stmt_info = vinfo_for_stmt (pattern_stmt);
8517 if (dump_enabled_p ())
8519 dump_printf_loc (MSG_NOTE, vect_location,
8520 "==> examining pattern statement: ");
8521 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8524 else
8526 if (dump_enabled_p ())
8527 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8529 return true;
8532 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8533 && node == NULL
8534 && pattern_stmt
8535 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8536 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8538 /* Analyze PATTERN_STMT too. */
8539 if (dump_enabled_p ())
8541 dump_printf_loc (MSG_NOTE, vect_location,
8542 "==> examining pattern statement: ");
8543 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8546 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8547 node_instance))
8548 return false;
8551 if (is_pattern_stmt_p (stmt_info)
8552 && node == NULL
8553 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8555 gimple_stmt_iterator si;
8557 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8559 gimple *pattern_def_stmt = gsi_stmt (si);
8560 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8561 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8563 /* Analyze def stmt of STMT if it's a pattern stmt. */
8564 if (dump_enabled_p ())
8566 dump_printf_loc (MSG_NOTE, vect_location,
8567 "==> examining pattern def statement: ");
8568 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8571 if (!vect_analyze_stmt (pattern_def_stmt,
8572 need_to_vectorize, node, node_instance))
8573 return false;
8578 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8580 case vect_internal_def:
8581 break;
8583 case vect_reduction_def:
8584 case vect_nested_cycle:
8585 gcc_assert (!bb_vinfo
8586 && (relevance == vect_used_in_outer
8587 || relevance == vect_used_in_outer_by_reduction
8588 || relevance == vect_used_by_reduction
8589 || relevance == vect_unused_in_scope
8590 || relevance == vect_used_only_live));
8591 break;
8593 case vect_induction_def:
8594 gcc_assert (!bb_vinfo);
8595 break;
8597 case vect_constant_def:
8598 case vect_external_def:
8599 case vect_unknown_def_type:
8600 default:
8601 gcc_unreachable ();
8604 if (STMT_VINFO_RELEVANT_P (stmt_info))
8606 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8607 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8608 || (is_gimple_call (stmt)
8609 && gimple_call_lhs (stmt) == NULL_TREE));
8610 *need_to_vectorize = true;
8613 if (PURE_SLP_STMT (stmt_info) && !node)
8615 dump_printf_loc (MSG_NOTE, vect_location,
8616 "handled only by SLP analysis\n");
8617 return true;
8620 ok = true;
8621 if (!bb_vinfo
8622 && (STMT_VINFO_RELEVANT_P (stmt_info)
8623 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8624 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8625 || vectorizable_conversion (stmt, NULL, NULL, node)
8626 || vectorizable_shift (stmt, NULL, NULL, node)
8627 || vectorizable_operation (stmt, NULL, NULL, node)
8628 || vectorizable_assignment (stmt, NULL, NULL, node)
8629 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8630 || vectorizable_call (stmt, NULL, NULL, node)
8631 || vectorizable_store (stmt, NULL, NULL, node)
8632 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8633 || vectorizable_induction (stmt, NULL, NULL, node)
8634 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8635 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8636 else
8638 if (bb_vinfo)
8639 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8640 || vectorizable_conversion (stmt, NULL, NULL, node)
8641 || vectorizable_shift (stmt, NULL, NULL, node)
8642 || vectorizable_operation (stmt, NULL, NULL, node)
8643 || vectorizable_assignment (stmt, NULL, NULL, node)
8644 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8645 || vectorizable_call (stmt, NULL, NULL, node)
8646 || vectorizable_store (stmt, NULL, NULL, node)
8647 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8648 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8651 if (!ok)
8653 if (dump_enabled_p ())
8655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8656 "not vectorized: relevant stmt not ");
8657 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8658 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8661 return false;
8664 if (bb_vinfo)
8665 return true;
8667 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8668 need extra handling, except for vectorizable reductions. */
8669 if (STMT_VINFO_LIVE_P (stmt_info)
8670 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8671 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8673 if (!ok)
8675 if (dump_enabled_p ())
8677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8678 "not vectorized: live stmt not ");
8679 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8680 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8683 return false;
8686 return true;
8690 /* Function vect_transform_stmt.
8692 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8694 bool
8695 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8696 bool *grouped_store, slp_tree slp_node,
8697 slp_instance slp_node_instance)
8699 bool is_store = false;
8700 gimple *vec_stmt = NULL;
8701 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8702 bool done;
8704 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8705 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8707 switch (STMT_VINFO_TYPE (stmt_info))
8709 case type_demotion_vec_info_type:
8710 case type_promotion_vec_info_type:
8711 case type_conversion_vec_info_type:
8712 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8713 gcc_assert (done);
8714 break;
8716 case induc_vec_info_type:
8717 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8718 gcc_assert (done);
8719 break;
8721 case shift_vec_info_type:
8722 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8723 gcc_assert (done);
8724 break;
8726 case op_vec_info_type:
8727 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8728 gcc_assert (done);
8729 break;
8731 case assignment_vec_info_type:
8732 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8733 gcc_assert (done);
8734 break;
8736 case load_vec_info_type:
8737 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8738 slp_node_instance);
8739 gcc_assert (done);
8740 break;
8742 case store_vec_info_type:
8743 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8744 gcc_assert (done);
8745 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8747 /* In case of interleaving, the whole chain is vectorized when the
8748 last store in the chain is reached. Store stmts before the last
8749 one are skipped, and there vec_stmt_info shouldn't be freed
8750 meanwhile. */
8751 *grouped_store = true;
8752 if (STMT_VINFO_VEC_STMT (stmt_info))
8753 is_store = true;
8755 else
8756 is_store = true;
8757 break;
8759 case condition_vec_info_type:
8760 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8761 gcc_assert (done);
8762 break;
8764 case comparison_vec_info_type:
8765 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8766 gcc_assert (done);
8767 break;
8769 case call_vec_info_type:
8770 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8771 stmt = gsi_stmt (*gsi);
8772 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8773 is_store = true;
8774 break;
8776 case call_simd_clone_vec_info_type:
8777 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8778 stmt = gsi_stmt (*gsi);
8779 break;
8781 case reduc_vec_info_type:
8782 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8783 slp_node_instance);
8784 gcc_assert (done);
8785 break;
8787 default:
8788 if (!STMT_VINFO_LIVE_P (stmt_info))
8790 if (dump_enabled_p ())
8791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8792 "stmt not supported.\n");
8793 gcc_unreachable ();
8797 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8798 This would break hybrid SLP vectorization. */
8799 if (slp_node)
8800 gcc_assert (!vec_stmt
8801 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8803 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8804 is being vectorized, but outside the immediately enclosing loop. */
8805 if (vec_stmt
8806 && STMT_VINFO_LOOP_VINFO (stmt_info)
8807 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8808 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8809 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8810 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8811 || STMT_VINFO_RELEVANT (stmt_info) ==
8812 vect_used_in_outer_by_reduction))
8814 struct loop *innerloop = LOOP_VINFO_LOOP (
8815 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8816 imm_use_iterator imm_iter;
8817 use_operand_p use_p;
8818 tree scalar_dest;
8819 gimple *exit_phi;
8821 if (dump_enabled_p ())
8822 dump_printf_loc (MSG_NOTE, vect_location,
8823 "Record the vdef for outer-loop vectorization.\n");
8825 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8826 (to be used when vectorizing outer-loop stmts that use the DEF of
8827 STMT). */
8828 if (gimple_code (stmt) == GIMPLE_PHI)
8829 scalar_dest = PHI_RESULT (stmt);
8830 else
8831 scalar_dest = gimple_assign_lhs (stmt);
8833 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8835 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8837 exit_phi = USE_STMT (use_p);
8838 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8843 /* Handle stmts whose DEF is used outside the loop-nest that is
8844 being vectorized. */
8845 if (slp_node)
8847 gimple *slp_stmt;
8848 int i;
8849 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8850 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8852 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8853 if (STMT_VINFO_LIVE_P (slp_stmt_info))
8855 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8856 &vec_stmt);
8857 gcc_assert (done);
8861 else if (STMT_VINFO_LIVE_P (stmt_info)
8862 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8864 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8865 gcc_assert (done);
8868 if (vec_stmt)
8869 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8871 return is_store;
8875 /* Remove a group of stores (for SLP or interleaving), free their
8876 stmt_vec_info. */
8878 void
8879 vect_remove_stores (gimple *first_stmt)
8881 gimple *next = first_stmt;
8882 gimple *tmp;
8883 gimple_stmt_iterator next_si;
8885 while (next)
8887 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8889 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8890 if (is_pattern_stmt_p (stmt_info))
8891 next = STMT_VINFO_RELATED_STMT (stmt_info);
8892 /* Free the attached stmt_vec_info and remove the stmt. */
8893 next_si = gsi_for_stmt (next);
8894 unlink_stmt_vdef (next);
8895 gsi_remove (&next_si, true);
8896 release_defs (next);
8897 free_stmt_vec_info (next);
8898 next = tmp;
8903 /* Function new_stmt_vec_info.
8905 Create and initialize a new stmt_vec_info struct for STMT. */
8907 stmt_vec_info
8908 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8910 stmt_vec_info res;
8911 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8913 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8914 STMT_VINFO_STMT (res) = stmt;
8915 res->vinfo = vinfo;
8916 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8917 STMT_VINFO_LIVE_P (res) = false;
8918 STMT_VINFO_VECTYPE (res) = NULL;
8919 STMT_VINFO_VEC_STMT (res) = NULL;
8920 STMT_VINFO_VECTORIZABLE (res) = true;
8921 STMT_VINFO_IN_PATTERN_P (res) = false;
8922 STMT_VINFO_RELATED_STMT (res) = NULL;
8923 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8924 STMT_VINFO_DATA_REF (res) = NULL;
8925 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8926 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8928 if (gimple_code (stmt) == GIMPLE_PHI
8929 && is_loop_header_bb_p (gimple_bb (stmt)))
8930 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8931 else
8932 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8934 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8935 STMT_SLP_TYPE (res) = loop_vect;
8936 STMT_VINFO_NUM_SLP_USES (res) = 0;
8938 GROUP_FIRST_ELEMENT (res) = NULL;
8939 GROUP_NEXT_ELEMENT (res) = NULL;
8940 GROUP_SIZE (res) = 0;
8941 GROUP_STORE_COUNT (res) = 0;
8942 GROUP_GAP (res) = 0;
8943 GROUP_SAME_DR_STMT (res) = NULL;
8945 return res;
8949 /* Create a hash table for stmt_vec_info. */
8951 void
8952 init_stmt_vec_info_vec (void)
8954 gcc_assert (!stmt_vec_info_vec.exists ());
8955 stmt_vec_info_vec.create (50);
8959 /* Free hash table for stmt_vec_info. */
8961 void
8962 free_stmt_vec_info_vec (void)
8964 unsigned int i;
8965 stmt_vec_info info;
8966 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8967 if (info != NULL)
8968 free_stmt_vec_info (STMT_VINFO_STMT (info));
8969 gcc_assert (stmt_vec_info_vec.exists ());
8970 stmt_vec_info_vec.release ();
8974 /* Free stmt vectorization related info. */
8976 void
8977 free_stmt_vec_info (gimple *stmt)
8979 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8981 if (!stmt_info)
8982 return;
8984 /* Check if this statement has a related "pattern stmt"
8985 (introduced by the vectorizer during the pattern recognition
8986 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8987 too. */
8988 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8990 stmt_vec_info patt_info
8991 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8992 if (patt_info)
8994 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8995 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8996 gimple_set_bb (patt_stmt, NULL);
8997 tree lhs = gimple_get_lhs (patt_stmt);
8998 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8999 release_ssa_name (lhs);
9000 if (seq)
9002 gimple_stmt_iterator si;
9003 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9005 gimple *seq_stmt = gsi_stmt (si);
9006 gimple_set_bb (seq_stmt, NULL);
9007 lhs = gimple_get_lhs (seq_stmt);
9008 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9009 release_ssa_name (lhs);
9010 free_stmt_vec_info (seq_stmt);
9013 free_stmt_vec_info (patt_stmt);
9017 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9018 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9019 set_vinfo_for_stmt (stmt, NULL);
9020 free (stmt_info);
9024 /* Function get_vectype_for_scalar_type_and_size.
9026 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9027 by the target. */
9029 static tree
9030 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
9032 tree orig_scalar_type = scalar_type;
9033 machine_mode inner_mode = TYPE_MODE (scalar_type);
9034 machine_mode simd_mode;
9035 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9036 int nunits;
9037 tree vectype;
9039 if (nbytes == 0)
9040 return NULL_TREE;
9042 if (GET_MODE_CLASS (inner_mode) != MODE_INT
9043 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
9044 return NULL_TREE;
9046 /* For vector types of elements whose mode precision doesn't
9047 match their types precision we use a element type of mode
9048 precision. The vectorization routines will have to make sure
9049 they support the proper result truncation/extension.
9050 We also make sure to build vector types with INTEGER_TYPE
9051 component type only. */
9052 if (INTEGRAL_TYPE_P (scalar_type)
9053 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9054 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9055 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9056 TYPE_UNSIGNED (scalar_type));
9058 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9059 When the component mode passes the above test simply use a type
9060 corresponding to that mode. The theory is that any use that
9061 would cause problems with this will disable vectorization anyway. */
9062 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9063 && !INTEGRAL_TYPE_P (scalar_type))
9064 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9066 /* We can't build a vector type of elements with alignment bigger than
9067 their size. */
9068 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9069 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9070 TYPE_UNSIGNED (scalar_type));
9072 /* If we felt back to using the mode fail if there was
9073 no scalar type for it. */
9074 if (scalar_type == NULL_TREE)
9075 return NULL_TREE;
9077 /* If no size was supplied use the mode the target prefers. Otherwise
9078 lookup a vector mode of the specified size. */
9079 if (size == 0)
9080 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9081 else
9082 simd_mode = mode_for_vector (inner_mode, size / nbytes);
9083 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9084 if (nunits <= 1)
9085 return NULL_TREE;
9087 vectype = build_vector_type (scalar_type, nunits);
9089 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9090 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9091 return NULL_TREE;
9093 /* Re-attach the address-space qualifier if we canonicalized the scalar
9094 type. */
9095 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9096 return build_qualified_type
9097 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9099 return vectype;
9102 unsigned int current_vector_size;
9104 /* Function get_vectype_for_scalar_type.
9106 Returns the vector type corresponding to SCALAR_TYPE as supported
9107 by the target. */
9109 tree
9110 get_vectype_for_scalar_type (tree scalar_type)
9112 tree vectype;
9113 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9114 current_vector_size);
9115 if (vectype
9116 && current_vector_size == 0)
9117 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9118 return vectype;
9121 /* Function get_mask_type_for_scalar_type.
9123 Returns the mask type corresponding to a result of comparison
9124 of vectors of specified SCALAR_TYPE as supported by target. */
9126 tree
9127 get_mask_type_for_scalar_type (tree scalar_type)
9129 tree vectype = get_vectype_for_scalar_type (scalar_type);
9131 if (!vectype)
9132 return NULL;
9134 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9135 current_vector_size);
9138 /* Function get_same_sized_vectype
9140 Returns a vector type corresponding to SCALAR_TYPE of size
9141 VECTOR_TYPE if supported by the target. */
9143 tree
9144 get_same_sized_vectype (tree scalar_type, tree vector_type)
9146 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9147 return build_same_sized_truth_vector_type (vector_type);
9149 return get_vectype_for_scalar_type_and_size
9150 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9153 /* Function vect_is_simple_use.
9155 Input:
9156 VINFO - the vect info of the loop or basic block that is being vectorized.
9157 OPERAND - operand in the loop or bb.
9158 Output:
9159 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9160 DT - the type of definition
9162 Returns whether a stmt with OPERAND can be vectorized.
9163 For loops, supportable operands are constants, loop invariants, and operands
9164 that are defined by the current iteration of the loop. Unsupportable
9165 operands are those that are defined by a previous iteration of the loop (as
9166 is the case in reduction/induction computations).
9167 For basic blocks, supportable operands are constants and bb invariants.
9168 For now, operands defined outside the basic block are not supported. */
9170 bool
9171 vect_is_simple_use (tree operand, vec_info *vinfo,
9172 gimple **def_stmt, enum vect_def_type *dt)
9174 *def_stmt = NULL;
9175 *dt = vect_unknown_def_type;
9177 if (dump_enabled_p ())
9179 dump_printf_loc (MSG_NOTE, vect_location,
9180 "vect_is_simple_use: operand ");
9181 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9182 dump_printf (MSG_NOTE, "\n");
9185 if (CONSTANT_CLASS_P (operand))
9187 *dt = vect_constant_def;
9188 return true;
9191 if (is_gimple_min_invariant (operand))
9193 *dt = vect_external_def;
9194 return true;
9197 if (TREE_CODE (operand) != SSA_NAME)
9199 if (dump_enabled_p ())
9200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9201 "not ssa-name.\n");
9202 return false;
9205 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9207 *dt = vect_external_def;
9208 return true;
9211 *def_stmt = SSA_NAME_DEF_STMT (operand);
9212 if (dump_enabled_p ())
9214 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9215 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9218 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9219 *dt = vect_external_def;
9220 else
9222 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9223 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9226 if (dump_enabled_p ())
9228 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9229 switch (*dt)
9231 case vect_uninitialized_def:
9232 dump_printf (MSG_NOTE, "uninitialized\n");
9233 break;
9234 case vect_constant_def:
9235 dump_printf (MSG_NOTE, "constant\n");
9236 break;
9237 case vect_external_def:
9238 dump_printf (MSG_NOTE, "external\n");
9239 break;
9240 case vect_internal_def:
9241 dump_printf (MSG_NOTE, "internal\n");
9242 break;
9243 case vect_induction_def:
9244 dump_printf (MSG_NOTE, "induction\n");
9245 break;
9246 case vect_reduction_def:
9247 dump_printf (MSG_NOTE, "reduction\n");
9248 break;
9249 case vect_double_reduction_def:
9250 dump_printf (MSG_NOTE, "double reduction\n");
9251 break;
9252 case vect_nested_cycle:
9253 dump_printf (MSG_NOTE, "nested cycle\n");
9254 break;
9255 case vect_unknown_def_type:
9256 dump_printf (MSG_NOTE, "unknown\n");
9257 break;
9261 if (*dt == vect_unknown_def_type)
9263 if (dump_enabled_p ())
9264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9265 "Unsupported pattern.\n");
9266 return false;
9269 switch (gimple_code (*def_stmt))
9271 case GIMPLE_PHI:
9272 case GIMPLE_ASSIGN:
9273 case GIMPLE_CALL:
9274 break;
9275 default:
9276 if (dump_enabled_p ())
9277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9278 "unsupported defining stmt:\n");
9279 return false;
9282 return true;
9285 /* Function vect_is_simple_use.
9287 Same as vect_is_simple_use but also determines the vector operand
9288 type of OPERAND and stores it to *VECTYPE. If the definition of
9289 OPERAND is vect_uninitialized_def, vect_constant_def or
9290 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9291 is responsible to compute the best suited vector type for the
9292 scalar operand. */
9294 bool
9295 vect_is_simple_use (tree operand, vec_info *vinfo,
9296 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9298 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9299 return false;
9301 /* Now get a vector type if the def is internal, otherwise supply
9302 NULL_TREE and leave it up to the caller to figure out a proper
9303 type for the use stmt. */
9304 if (*dt == vect_internal_def
9305 || *dt == vect_induction_def
9306 || *dt == vect_reduction_def
9307 || *dt == vect_double_reduction_def
9308 || *dt == vect_nested_cycle)
9310 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9312 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9313 && !STMT_VINFO_RELEVANT (stmt_info)
9314 && !STMT_VINFO_LIVE_P (stmt_info))
9315 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9317 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9318 gcc_assert (*vectype != NULL_TREE);
9320 else if (*dt == vect_uninitialized_def
9321 || *dt == vect_constant_def
9322 || *dt == vect_external_def)
9323 *vectype = NULL_TREE;
9324 else
9325 gcc_unreachable ();
9327 return true;
9331 /* Function supportable_widening_operation
9333 Check whether an operation represented by the code CODE is a
9334 widening operation that is supported by the target platform in
9335 vector form (i.e., when operating on arguments of type VECTYPE_IN
9336 producing a result of type VECTYPE_OUT).
9338 Widening operations we currently support are NOP (CONVERT), FLOAT
9339 and WIDEN_MULT. This function checks if these operations are supported
9340 by the target platform either directly (via vector tree-codes), or via
9341 target builtins.
9343 Output:
9344 - CODE1 and CODE2 are codes of vector operations to be used when
9345 vectorizing the operation, if available.
9346 - MULTI_STEP_CVT determines the number of required intermediate steps in
9347 case of multi-step conversion (like char->short->int - in that case
9348 MULTI_STEP_CVT will be 1).
9349 - INTERM_TYPES contains the intermediate type required to perform the
9350 widening operation (short in the above example). */
9352 bool
9353 supportable_widening_operation (enum tree_code code, gimple *stmt,
9354 tree vectype_out, tree vectype_in,
9355 enum tree_code *code1, enum tree_code *code2,
9356 int *multi_step_cvt,
9357 vec<tree> *interm_types)
9359 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9360 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9361 struct loop *vect_loop = NULL;
9362 machine_mode vec_mode;
9363 enum insn_code icode1, icode2;
9364 optab optab1, optab2;
9365 tree vectype = vectype_in;
9366 tree wide_vectype = vectype_out;
9367 enum tree_code c1, c2;
9368 int i;
9369 tree prev_type, intermediate_type;
9370 machine_mode intermediate_mode, prev_mode;
9371 optab optab3, optab4;
9373 *multi_step_cvt = 0;
9374 if (loop_info)
9375 vect_loop = LOOP_VINFO_LOOP (loop_info);
9377 switch (code)
9379 case WIDEN_MULT_EXPR:
9380 /* The result of a vectorized widening operation usually requires
9381 two vectors (because the widened results do not fit into one vector).
9382 The generated vector results would normally be expected to be
9383 generated in the same order as in the original scalar computation,
9384 i.e. if 8 results are generated in each vector iteration, they are
9385 to be organized as follows:
9386 vect1: [res1,res2,res3,res4],
9387 vect2: [res5,res6,res7,res8].
9389 However, in the special case that the result of the widening
9390 operation is used in a reduction computation only, the order doesn't
9391 matter (because when vectorizing a reduction we change the order of
9392 the computation). Some targets can take advantage of this and
9393 generate more efficient code. For example, targets like Altivec,
9394 that support widen_mult using a sequence of {mult_even,mult_odd}
9395 generate the following vectors:
9396 vect1: [res1,res3,res5,res7],
9397 vect2: [res2,res4,res6,res8].
9399 When vectorizing outer-loops, we execute the inner-loop sequentially
9400 (each vectorized inner-loop iteration contributes to VF outer-loop
9401 iterations in parallel). We therefore don't allow to change the
9402 order of the computation in the inner-loop during outer-loop
9403 vectorization. */
9404 /* TODO: Another case in which order doesn't *really* matter is when we
9405 widen and then contract again, e.g. (short)((int)x * y >> 8).
9406 Normally, pack_trunc performs an even/odd permute, whereas the
9407 repack from an even/odd expansion would be an interleave, which
9408 would be significantly simpler for e.g. AVX2. */
9409 /* In any case, in order to avoid duplicating the code below, recurse
9410 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9411 are properly set up for the caller. If we fail, we'll continue with
9412 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9413 if (vect_loop
9414 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9415 && !nested_in_vect_loop_p (vect_loop, stmt)
9416 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9417 stmt, vectype_out, vectype_in,
9418 code1, code2, multi_step_cvt,
9419 interm_types))
9421 /* Elements in a vector with vect_used_by_reduction property cannot
9422 be reordered if the use chain with this property does not have the
9423 same operation. One such an example is s += a * b, where elements
9424 in a and b cannot be reordered. Here we check if the vector defined
9425 by STMT is only directly used in the reduction statement. */
9426 tree lhs = gimple_assign_lhs (stmt);
9427 use_operand_p dummy;
9428 gimple *use_stmt;
9429 stmt_vec_info use_stmt_info = NULL;
9430 if (single_imm_use (lhs, &dummy, &use_stmt)
9431 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9432 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9433 return true;
9435 c1 = VEC_WIDEN_MULT_LO_EXPR;
9436 c2 = VEC_WIDEN_MULT_HI_EXPR;
9437 break;
9439 case DOT_PROD_EXPR:
9440 c1 = DOT_PROD_EXPR;
9441 c2 = DOT_PROD_EXPR;
9442 break;
9444 case SAD_EXPR:
9445 c1 = SAD_EXPR;
9446 c2 = SAD_EXPR;
9447 break;
9449 case VEC_WIDEN_MULT_EVEN_EXPR:
9450 /* Support the recursion induced just above. */
9451 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9452 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9453 break;
9455 case WIDEN_LSHIFT_EXPR:
9456 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9457 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9458 break;
9460 CASE_CONVERT:
9461 c1 = VEC_UNPACK_LO_EXPR;
9462 c2 = VEC_UNPACK_HI_EXPR;
9463 break;
9465 case FLOAT_EXPR:
9466 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9467 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9468 break;
9470 case FIX_TRUNC_EXPR:
9471 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9472 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9473 computing the operation. */
9474 return false;
9476 default:
9477 gcc_unreachable ();
9480 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9481 std::swap (c1, c2);
9483 if (code == FIX_TRUNC_EXPR)
9485 /* The signedness is determined from output operand. */
9486 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9487 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9489 else
9491 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9492 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9495 if (!optab1 || !optab2)
9496 return false;
9498 vec_mode = TYPE_MODE (vectype);
9499 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9500 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9501 return false;
9503 *code1 = c1;
9504 *code2 = c2;
9506 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9507 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9508 /* For scalar masks we may have different boolean
9509 vector types having the same QImode. Thus we
9510 add additional check for elements number. */
9511 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9512 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9513 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9515 /* Check if it's a multi-step conversion that can be done using intermediate
9516 types. */
9518 prev_type = vectype;
9519 prev_mode = vec_mode;
9521 if (!CONVERT_EXPR_CODE_P (code))
9522 return false;
9524 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9525 intermediate steps in promotion sequence. We try
9526 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9527 not. */
9528 interm_types->create (MAX_INTERM_CVT_STEPS);
9529 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9531 intermediate_mode = insn_data[icode1].operand[0].mode;
9532 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9534 intermediate_type
9535 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9536 current_vector_size);
9537 if (intermediate_mode != TYPE_MODE (intermediate_type))
9538 return false;
9540 else
9541 intermediate_type
9542 = lang_hooks.types.type_for_mode (intermediate_mode,
9543 TYPE_UNSIGNED (prev_type));
9545 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9546 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9548 if (!optab3 || !optab4
9549 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9550 || insn_data[icode1].operand[0].mode != intermediate_mode
9551 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9552 || insn_data[icode2].operand[0].mode != intermediate_mode
9553 || ((icode1 = optab_handler (optab3, intermediate_mode))
9554 == CODE_FOR_nothing)
9555 || ((icode2 = optab_handler (optab4, intermediate_mode))
9556 == CODE_FOR_nothing))
9557 break;
9559 interm_types->quick_push (intermediate_type);
9560 (*multi_step_cvt)++;
9562 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9563 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9564 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9565 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9566 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9568 prev_type = intermediate_type;
9569 prev_mode = intermediate_mode;
9572 interm_types->release ();
9573 return false;
9577 /* Function supportable_narrowing_operation
9579 Check whether an operation represented by the code CODE is a
9580 narrowing operation that is supported by the target platform in
9581 vector form (i.e., when operating on arguments of type VECTYPE_IN
9582 and producing a result of type VECTYPE_OUT).
9584 Narrowing operations we currently support are NOP (CONVERT) and
9585 FIX_TRUNC. This function checks if these operations are supported by
9586 the target platform directly via vector tree-codes.
9588 Output:
9589 - CODE1 is the code of a vector operation to be used when
9590 vectorizing the operation, if available.
9591 - MULTI_STEP_CVT determines the number of required intermediate steps in
9592 case of multi-step conversion (like int->short->char - in that case
9593 MULTI_STEP_CVT will be 1).
9594 - INTERM_TYPES contains the intermediate type required to perform the
9595 narrowing operation (short in the above example). */
9597 bool
9598 supportable_narrowing_operation (enum tree_code code,
9599 tree vectype_out, tree vectype_in,
9600 enum tree_code *code1, int *multi_step_cvt,
9601 vec<tree> *interm_types)
9603 machine_mode vec_mode;
9604 enum insn_code icode1;
9605 optab optab1, interm_optab;
9606 tree vectype = vectype_in;
9607 tree narrow_vectype = vectype_out;
9608 enum tree_code c1;
9609 tree intermediate_type, prev_type;
9610 machine_mode intermediate_mode, prev_mode;
9611 int i;
9612 bool uns;
9614 *multi_step_cvt = 0;
9615 switch (code)
9617 CASE_CONVERT:
9618 c1 = VEC_PACK_TRUNC_EXPR;
9619 break;
9621 case FIX_TRUNC_EXPR:
9622 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9623 break;
9625 case FLOAT_EXPR:
9626 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9627 tree code and optabs used for computing the operation. */
9628 return false;
9630 default:
9631 gcc_unreachable ();
9634 if (code == FIX_TRUNC_EXPR)
9635 /* The signedness is determined from output operand. */
9636 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9637 else
9638 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9640 if (!optab1)
9641 return false;
9643 vec_mode = TYPE_MODE (vectype);
9644 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9645 return false;
9647 *code1 = c1;
9649 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9650 /* For scalar masks we may have different boolean
9651 vector types having the same QImode. Thus we
9652 add additional check for elements number. */
9653 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9654 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9655 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9657 /* Check if it's a multi-step conversion that can be done using intermediate
9658 types. */
9659 prev_mode = vec_mode;
9660 prev_type = vectype;
9661 if (code == FIX_TRUNC_EXPR)
9662 uns = TYPE_UNSIGNED (vectype_out);
9663 else
9664 uns = TYPE_UNSIGNED (vectype);
9666 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9667 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9668 costly than signed. */
9669 if (code == FIX_TRUNC_EXPR && uns)
9671 enum insn_code icode2;
9673 intermediate_type
9674 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9675 interm_optab
9676 = optab_for_tree_code (c1, intermediate_type, optab_default);
9677 if (interm_optab != unknown_optab
9678 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9679 && insn_data[icode1].operand[0].mode
9680 == insn_data[icode2].operand[0].mode)
9682 uns = false;
9683 optab1 = interm_optab;
9684 icode1 = icode2;
9688 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9689 intermediate steps in promotion sequence. We try
9690 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9691 interm_types->create (MAX_INTERM_CVT_STEPS);
9692 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9694 intermediate_mode = insn_data[icode1].operand[0].mode;
9695 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9697 intermediate_type
9698 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9699 current_vector_size);
9700 if (intermediate_mode != TYPE_MODE (intermediate_type))
9701 return false;
9703 else
9704 intermediate_type
9705 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9706 interm_optab
9707 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9708 optab_default);
9709 if (!interm_optab
9710 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9711 || insn_data[icode1].operand[0].mode != intermediate_mode
9712 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9713 == CODE_FOR_nothing))
9714 break;
9716 interm_types->quick_push (intermediate_type);
9717 (*multi_step_cvt)++;
9719 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9720 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9721 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9722 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9724 prev_mode = intermediate_mode;
9725 prev_type = intermediate_type;
9726 optab1 = interm_optab;
9729 interm_types->release ();
9730 return false;