Use ssizetype selectors for autovectorised VEC_PERM_EXPRs
[official-gcc.git] / gcc / tree-vect-stmts.c
blob8d1dc0425ccd639982448e82213b09ded23a8147
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
54 /* For lang_hooks.types.type_for_mode. */
55 #include "langhooks.h"
57 /* Says whether a statement is a load, a store of a vectorized statement
58 result, or a store of an invariant value. */
59 enum vec_load_store_type {
60 VLS_LOAD,
61 VLS_STORE,
62 VLS_STORE_INVARIANT
65 /* Return the vectorized type for the given statement. */
67 tree
68 stmt_vectype (struct _stmt_vec_info *stmt_info)
70 return STMT_VINFO_VECTYPE (stmt_info);
73 /* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
75 bool
76 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
78 gimple *stmt = STMT_VINFO_STMT (stmt_info);
79 basic_block bb = gimple_bb (stmt);
80 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
81 struct loop* loop;
83 if (!loop_vinfo)
84 return false;
86 loop = LOOP_VINFO_LOOP (loop_vinfo);
88 return (bb->loop_father == loop->inner);
91 /* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
95 unsigned
96 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
97 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
98 int misalign, enum vect_cost_model_location where)
100 if ((kind == vector_load || kind == unaligned_load)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_gather_load;
103 if ((kind == vector_store || kind == unaligned_store)
104 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
105 kind = vector_scatter_store;
106 if (body_cost_vec)
108 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
109 stmt_info_for_cost si = { count, kind,
110 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
111 misalign };
112 body_cost_vec->safe_push (si);
113 return (unsigned)
114 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 else
117 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
118 count, kind, stmt_info, misalign, where);
121 /* Return a variable of type ELEM_TYPE[NELEMS]. */
123 static tree
124 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
126 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
127 "vect_array");
130 /* ARRAY is an array of vectors created by create_vector_array.
131 Return an SSA_NAME for the vector in index N. The reference
132 is part of the vectorization of STMT and the vector is associated
133 with scalar destination SCALAR_DEST. */
135 static tree
136 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
137 tree array, unsigned HOST_WIDE_INT n)
139 tree vect_type, vect, vect_name, array_ref;
140 gimple *new_stmt;
142 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
143 vect_type = TREE_TYPE (TREE_TYPE (array));
144 vect = vect_create_destination_var (scalar_dest, vect_type);
145 array_ref = build4 (ARRAY_REF, vect_type, array,
146 build_int_cst (size_type_node, n),
147 NULL_TREE, NULL_TREE);
149 new_stmt = gimple_build_assign (vect, array_ref);
150 vect_name = make_ssa_name (vect, new_stmt);
151 gimple_assign_set_lhs (new_stmt, vect_name);
152 vect_finish_stmt_generation (stmt, new_stmt, gsi);
154 return vect_name;
157 /* ARRAY is an array of vectors created by create_vector_array.
158 Emit code to store SSA_NAME VECT in index N of the array.
159 The store is part of the vectorization of STMT. */
161 static void
162 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
163 tree array, unsigned HOST_WIDE_INT n)
165 tree array_ref;
166 gimple *new_stmt;
168 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
169 build_int_cst (size_type_node, n),
170 NULL_TREE, NULL_TREE);
172 new_stmt = gimple_build_assign (array_ref, vect);
173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
176 /* PTR is a pointer to an array of type TYPE. Return a representation
177 of *PTR. The memory reference replaces those in FIRST_DR
178 (and its group). */
180 static tree
181 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
183 tree mem_ref;
185 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
186 /* Arrays have the same alignment as their type. */
187 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
188 return mem_ref;
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
197 static void
198 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
199 enum vect_relevant relevant, bool live_p)
201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
202 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
203 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
204 gimple *pattern_stmt;
206 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: ", relevant, live_p);
210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
213 /* If this stmt is an original stmt in a pattern, we might need to mark its
214 related pattern stmt instead of the original stmt. However, such stmts
215 may have their own uses that are not in any pattern, in such cases the
216 stmt itself should be marked. */
217 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 /* This is the last stmt in a sequence that was detected as a
220 pattern that can potentially be vectorized. Don't mark the stmt
221 as relevant/live because it's not going to be vectorized.
222 Instead mark the pattern-stmt that replaces it. */
224 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
226 if (dump_enabled_p ())
227 dump_printf_loc (MSG_NOTE, vect_location,
228 "last stmt in pattern. don't mark"
229 " relevant/live.\n");
230 stmt_info = vinfo_for_stmt (pattern_stmt);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 stmt = pattern_stmt;
237 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
238 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
239 STMT_VINFO_RELEVANT (stmt_info) = relevant;
241 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
242 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "already marked relevant/live.\n");
247 return;
250 worklist->safe_push (stmt);
254 /* Function is_simple_and_all_uses_invariant
256 Return true if STMT is simple and all uses of it are invariant. */
258 bool
259 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
261 tree op;
262 gimple *def_stmt;
263 ssa_op_iter iter;
265 if (!is_gimple_assign (stmt))
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
315 *relevant = vect_used_in_scope;
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
322 if (dump_enabled_p ())
323 dump_printf_loc (MSG_NOTE, vect_location,
324 "vec_stmt_relevant_p: stmt has vdefs.\n");
325 *relevant = vect_used_in_scope;
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "vec_stmt_relevant_p: used out of loop.\n");
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
348 *live_p = true;
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
362 return (*live_p || *relevant);
366 /* Function exist_non_indexing_operands_for_use_p
368 USE is one of the uses attached to STMT. Check if USE is
369 used in STMT for anything other than indexing an array. */
371 static bool
372 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
377 /* USE corresponds to some operand in STMT. If there is no data
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
389 'var' in the second case corresponds to a def, not a use,
390 so USE cannot correspond to any operands that are not used
391 for array indexing.
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
396 if (!gimple_assign_copy_p (stmt))
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
400 switch (gimple_call_internal_fn (stmt))
402 case IFN_MASK_STORE:
403 operand = gimple_call_arg (stmt, 3);
404 if (operand == use)
405 return true;
406 /* FALLTHRU */
407 case IFN_MASK_LOAD:
408 operand = gimple_call_arg (stmt, 2);
409 if (operand == use)
410 return true;
411 break;
412 default:
413 break;
415 return false;
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
420 operand = gimple_assign_rhs1 (stmt);
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
424 if (operand == use)
425 return true;
427 return false;
432 Function process_use.
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
437 that defined USE. This is done by calling mark_relevant and passing it
438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
449 which does not need to be directly vectorized, then the liveness/relevance
450 of the respective DEF_STMT is left unchanged.
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
456 Return true if everything is as expected. Return false otherwise. */
458 static bool
459 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
460 enum vect_relevant relevant, vec<gimple *> *worklist,
461 bool force)
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
467 gimple *def_stmt;
468 enum vect_def_type dt;
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
473 return true;
475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
479 "not vectorized: unsupported use in stmt.\n");
480 return false;
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
491 return true;
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
515 return true;
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
531 switch (relevant)
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
536 break;
538 case vect_used_in_outer_by_reduction:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
540 relevant = vect_used_by_reduction;
541 break;
543 case vect_used_in_outer:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
545 relevant = vect_used_in_scope;
546 break;
548 case vect_used_in_scope:
549 break;
551 default:
552 gcc_unreachable ();
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
559 inner-loop:
560 d = def_stmt
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
569 switch (relevant)
571 case vect_unused_in_scope:
572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
575 break;
577 case vect_used_by_reduction:
578 case vect_used_only_live:
579 relevant = vect_used_in_outer_by_reduction;
580 break;
582 case vect_used_in_scope:
583 relevant = vect_used_in_outer;
584 break;
586 default:
587 gcc_unreachable ();
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
607 vect_mark_relevant (worklist, def_stmt, relevant, false);
608 return true;
612 /* Function vect_mark_stmts_to_be_vectorized.
614 Not all stmts in the loop need to be vectorized. For example:
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
621 3. j = j + 1
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
626 This pass detects such stmts. */
628 bool
629 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
635 gimple *stmt;
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
639 gimple *phi;
640 bool live_p;
641 enum vect_relevant relevant;
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "=== vect_mark_stmts_to_be_vectorized ===\n");
647 auto_vec<gimple *, 64> worklist;
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
655 phi = gsi_stmt (si);
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
663 vect_mark_relevant (&worklist, phi, relevant, live_p);
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
667 stmt = gsi_stmt (si);
668 if (dump_enabled_p ())
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
674 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
675 vect_mark_relevant (&worklist, stmt, relevant, live_p);
679 /* 2. Process_worklist */
680 while (worklist.length () > 0)
682 use_operand_p use_p;
683 ssa_op_iter iter;
685 stmt = worklist.pop ();
686 if (dump_enabled_p ())
688 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 of STMT. */
695 stmt_vinfo = vinfo_for_stmt (stmt);
696 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
701 One exception is when STMT has been identified as defining a reduction
702 variable; in this case we set the relevance to vect_used_by_reduction.
703 This is because we distinguish between two kinds of relevant stmts -
704 those that are used by a reduction computation, and those that are
705 (also) used by a regular computation. This allows us later on to
706 identify stmts that are used solely by a reduction, and therefore the
707 order of the results that they produce does not have to be kept. */
709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
711 case vect_reduction_def:
712 gcc_assert (relevant != vect_unused_in_scope);
713 if (relevant != vect_unused_in_scope
714 && relevant != vect_used_in_scope
715 && relevant != vect_used_by_reduction
716 && relevant != vect_used_only_live)
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of reduction.\n");
721 return false;
723 break;
725 case vect_nested_cycle:
726 if (relevant != vect_unused_in_scope
727 && relevant != vect_used_in_outer_by_reduction
728 && relevant != vect_used_in_outer)
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
732 "unsupported use of nested cycle.\n");
734 return false;
736 break;
738 case vect_double_reduction_def:
739 if (relevant != vect_unused_in_scope
740 && relevant != vect_used_by_reduction
741 && relevant != vect_used_only_live)
743 if (dump_enabled_p ())
744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
745 "unsupported use of double reduction.\n");
747 return false;
749 break;
751 default:
752 break;
755 if (is_pattern_stmt_p (stmt_vinfo))
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt))
762 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
763 tree op = gimple_assign_rhs1 (stmt);
765 i = 1;
766 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
768 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
769 relevant, &worklist, false)
770 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
771 relevant, &worklist, false))
772 return false;
773 i = 2;
775 for (; i < gimple_num_ops (stmt); i++)
777 op = gimple_op (stmt, i);
778 if (TREE_CODE (op) == SSA_NAME
779 && !process_use (stmt, op, loop_vinfo, relevant,
780 &worklist, false))
781 return false;
784 else if (is_gimple_call (stmt))
786 for (i = 0; i < gimple_call_num_args (stmt); i++)
788 tree arg = gimple_call_arg (stmt, i);
789 if (!process_use (stmt, arg, loop_vinfo, relevant,
790 &worklist, false))
791 return false;
795 else
796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
798 tree op = USE_FROM_PTR (use_p);
799 if (!process_use (stmt, op, loop_vinfo, relevant,
800 &worklist, false))
801 return false;
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
810 &worklist, true))
811 return false;
813 } /* while worklist */
815 return true;
819 /* Function vect_model_simple_cost.
821 Models cost for simple operations, i.e. those that only emit ncopies of a
822 single op. Right now, this does not account for multiple insns that could
823 be generated for the single vector op. We will handle that shortly. */
825 void
826 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
827 enum vect_def_type *dt,
828 int ndts,
829 stmt_vector_for_cost *prologue_cost_vec,
830 stmt_vector_for_cost *body_cost_vec)
832 int i;
833 int inside_cost = 0, prologue_cost = 0;
835 /* The SLP costs were already calculated during SLP tree build. */
836 if (PURE_SLP_STMT (stmt_info))
837 return;
839 /* Cost the "broadcast" of a scalar operand in to a vector operand.
840 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
841 cost model. */
842 for (i = 0; i < ndts; i++)
843 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
844 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
845 stmt_info, 0, vect_prologue);
847 /* Pass the inside-of-loop statements to the target-specific cost model. */
848 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
849 stmt_info, 0, vect_body);
851 if (dump_enabled_p ())
852 dump_printf_loc (MSG_NOTE, vect_location,
853 "vect_model_simple_cost: inside_cost = %d, "
854 "prologue_cost = %d .\n", inside_cost, prologue_cost);
858 /* Model cost for type demotion and promotion operations. PWR is normally
859 zero for single-step promotions and demotions. It will be one if
860 two-step promotion/demotion is required, and so on. Each additional
861 step doubles the number of instructions required. */
863 static void
864 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
865 enum vect_def_type *dt, int pwr)
867 int i, tmp;
868 int inside_cost = 0, prologue_cost = 0;
869 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
870 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
871 void *target_cost_data;
873 /* The SLP costs were already calculated during SLP tree build. */
874 if (PURE_SLP_STMT (stmt_info))
875 return;
877 if (loop_vinfo)
878 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
879 else
880 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
882 for (i = 0; i < pwr + 1; i++)
884 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
885 (i + 1) : i;
886 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
887 vec_promote_demote, stmt_info, 0,
888 vect_body);
891 /* FORNOW: Assuming maximum 2 args per stmts. */
892 for (i = 0; i < 2; i++)
893 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
894 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
895 stmt_info, 0, vect_prologue);
897 if (dump_enabled_p ())
898 dump_printf_loc (MSG_NOTE, vect_location,
899 "vect_model_promotion_demotion_cost: inside_cost = %d, "
900 "prologue_cost = %d .\n", inside_cost, prologue_cost);
903 /* Function vect_model_store_cost
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
908 void
909 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
910 vect_memory_access_type memory_access_type,
911 enum vect_def_type dt, slp_tree slp_node,
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
915 unsigned int inside_cost = 0, prologue_cost = 0;
916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
917 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
918 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
920 if (dt == vect_constant_def || dt == vect_external_def)
921 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
922 stmt_info, 0, vect_prologue);
924 /* Grouped stores update all elements in the group at once,
925 so we want the DR for the first statement. */
926 if (!slp_node && grouped_access_p)
928 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
929 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
932 /* True if we should include any once-per-group costs as well as
933 the cost of the statement itself. For SLP we only get called
934 once per group anyhow. */
935 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
937 /* We assume that the cost of a single store-lanes instruction is
938 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
939 access is instead being provided by a permute-and-store operation,
940 include the cost of the permutes. */
941 if (first_stmt_p
942 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
944 /* Uses a high and low interleave or shuffle operations for each
945 needed permute. */
946 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
947 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
948 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
949 stmt_info, 0, vect_body);
951 if (dump_enabled_p ())
952 dump_printf_loc (MSG_NOTE, vect_location,
953 "vect_model_store_cost: strided group_size = %d .\n",
954 group_size);
957 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
958 /* Costs of the stores. */
959 if (memory_access_type == VMAT_ELEMENTWISE
960 || memory_access_type == VMAT_GATHER_SCATTER)
961 /* N scalar stores plus extracting the elements. */
962 inside_cost += record_stmt_cost (body_cost_vec,
963 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
964 scalar_store, stmt_info, 0, vect_body);
965 else
966 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
968 if (memory_access_type == VMAT_ELEMENTWISE
969 || memory_access_type == VMAT_STRIDED_SLP)
970 inside_cost += record_stmt_cost (body_cost_vec,
971 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
972 vec_to_scalar, stmt_info, 0, vect_body);
974 if (dump_enabled_p ())
975 dump_printf_loc (MSG_NOTE, vect_location,
976 "vect_model_store_cost: inside_cost = %d, "
977 "prologue_cost = %d .\n", inside_cost, prologue_cost);
981 /* Calculate cost of DR's memory access. */
982 void
983 vect_get_store_cost (struct data_reference *dr, int ncopies,
984 unsigned int *inside_cost,
985 stmt_vector_for_cost *body_cost_vec)
987 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
988 gimple *stmt = DR_STMT (dr);
989 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
991 switch (alignment_support_scheme)
993 case dr_aligned:
995 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
996 vector_store, stmt_info, 0,
997 vect_body);
999 if (dump_enabled_p ())
1000 dump_printf_loc (MSG_NOTE, vect_location,
1001 "vect_model_store_cost: aligned.\n");
1002 break;
1005 case dr_unaligned_supported:
1007 /* Here, we assign an additional cost for the unaligned store. */
1008 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1009 unaligned_store, stmt_info,
1010 DR_MISALIGNMENT (dr), vect_body);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: unaligned supported by "
1014 "hardware.\n");
1015 break;
1018 case dr_unaligned_unsupported:
1020 *inside_cost = VECT_MAX_COST;
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1024 "vect_model_store_cost: unsupported access.\n");
1025 break;
1028 default:
1029 gcc_unreachable ();
1034 /* Function vect_model_load_cost
1036 Models cost for loads. In the case of grouped accesses, one access has
1037 the overhead of the grouped access attributed to it. Since unaligned
1038 accesses are supported for loads, we also account for the costs of the
1039 access scheme chosen. */
1041 void
1042 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1043 vect_memory_access_type memory_access_type,
1044 slp_tree slp_node,
1045 stmt_vector_for_cost *prologue_cost_vec,
1046 stmt_vector_for_cost *body_cost_vec)
1048 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1049 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1050 unsigned int inside_cost = 0, prologue_cost = 0;
1051 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1053 /* Grouped loads read all elements in the group at once,
1054 so we want the DR for the first statement. */
1055 if (!slp_node && grouped_access_p)
1057 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1058 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1061 /* True if we should include any once-per-group costs as well as
1062 the cost of the statement itself. For SLP we only get called
1063 once per group anyhow. */
1064 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1066 /* We assume that the cost of a single load-lanes instruction is
1067 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1068 access is instead being provided by a load-and-permute operation,
1069 include the cost of the permutes. */
1070 if (first_stmt_p
1071 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1073 /* Uses an even and odd extract operations or shuffle operations
1074 for each needed permute. */
1075 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1076 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1077 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1078 stmt_info, 0, vect_body);
1080 if (dump_enabled_p ())
1081 dump_printf_loc (MSG_NOTE, vect_location,
1082 "vect_model_load_cost: strided group_size = %d .\n",
1083 group_size);
1086 /* The loads themselves. */
1087 if (memory_access_type == VMAT_ELEMENTWISE
1088 || memory_access_type == VMAT_GATHER_SCATTER)
1090 /* N scalar loads plus gathering them into a vector. */
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1092 inside_cost += record_stmt_cost (body_cost_vec,
1093 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1094 scalar_load, stmt_info, 0, vect_body);
1096 else
1097 vect_get_load_cost (dr, ncopies, first_stmt_p,
1098 &inside_cost, &prologue_cost,
1099 prologue_cost_vec, body_cost_vec, true);
1100 if (memory_access_type == VMAT_ELEMENTWISE
1101 || memory_access_type == VMAT_STRIDED_SLP)
1102 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1103 stmt_info, 0, vect_body);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: inside_cost = %d, "
1108 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1112 /* Calculate cost of DR's memory access. */
1113 void
1114 vect_get_load_cost (struct data_reference *dr, int ncopies,
1115 bool add_realign_cost, unsigned int *inside_cost,
1116 unsigned int *prologue_cost,
1117 stmt_vector_for_cost *prologue_cost_vec,
1118 stmt_vector_for_cost *body_cost_vec,
1119 bool record_prologue_costs)
1121 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1122 gimple *stmt = DR_STMT (dr);
1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1125 switch (alignment_support_scheme)
1127 case dr_aligned:
1129 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1130 stmt_info, 0, vect_body);
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_NOTE, vect_location,
1134 "vect_model_load_cost: aligned.\n");
1136 break;
1138 case dr_unaligned_supported:
1140 /* Here, we assign an additional cost for the unaligned load. */
1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1142 unaligned_load, stmt_info,
1143 DR_MISALIGNMENT (dr), vect_body);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned supported by "
1148 "hardware.\n");
1150 break;
1152 case dr_explicit_realign:
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1155 vector_load, stmt_info, 0, vect_body);
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1157 vec_perm, stmt_info, 0, vect_body);
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
1161 prologue costs. */
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1164 stmt_info, 0, vect_body);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: explicit realign\n");
1170 break;
1172 case dr_explicit_realign_optimized:
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE, vect_location,
1176 "vect_model_load_cost: unaligned software "
1177 "pipelined.\n");
1179 /* Unaligned software pipeline has a load of an address, an initial
1180 load, and possibly a mask operation to "prime" the loop. However,
1181 if this is an access in a group of loads, which provide grouped
1182 access, then the above cost should only be considered for one
1183 access in the group. Inside the loop, there is a load op
1184 and a realignment op. */
1186 if (add_realign_cost && record_prologue_costs)
1188 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1189 vector_stmt, stmt_info,
1190 0, vect_prologue);
1191 if (targetm.vectorize.builtin_mask_for_load)
1192 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1193 vector_stmt, stmt_info,
1194 0, vect_prologue);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1198 stmt_info, 0, vect_body);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1200 stmt_info, 0, vect_body);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: explicit realign optimized"
1205 "\n");
1207 break;
1210 case dr_unaligned_unsupported:
1212 *inside_cost = VECT_MAX_COST;
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1216 "vect_model_load_cost: unsupported access.\n");
1217 break;
1220 default:
1221 gcc_unreachable ();
1225 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
1228 static void
1229 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1231 if (gsi)
1232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1233 else
1235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1238 if (loop_vinfo)
1240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1241 basic_block new_bb;
1242 edge pe;
1244 if (nested_in_vect_loop_p (loop, stmt))
1245 loop = loop->inner;
1247 pe = loop_preheader_edge (loop);
1248 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1249 gcc_assert (!new_bb);
1251 else
1253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1254 basic_block bb;
1255 gimple_stmt_iterator gsi_bb_start;
1257 gcc_assert (bb_vinfo);
1258 bb = BB_VINFO_BB (bb_vinfo);
1259 gsi_bb_start = gsi_after_labels (bb);
1260 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1264 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1272 /* Function vect_init_vector.
1274 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1275 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1276 vector type a vector with all elements equal to VAL is created first.
1277 Place the initialization at BSI if it is not NULL. Otherwise, place the
1278 initialization at the loop preheader.
1279 Return the DEF of INIT_STMT.
1280 It will be used in the vectorization of STMT. */
1282 tree
1283 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1285 gimple *init_stmt;
1286 tree new_temp;
1288 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1289 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1291 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1292 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1294 /* Scalar boolean value should be transformed into
1295 all zeros or all ones value before building a vector. */
1296 if (VECTOR_BOOLEAN_TYPE_P (type))
1298 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1299 tree false_val = build_zero_cst (TREE_TYPE (type));
1301 if (CONSTANT_CLASS_P (val))
1302 val = integer_zerop (val) ? false_val : true_val;
1303 else
1305 new_temp = make_ssa_name (TREE_TYPE (type));
1306 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1307 val, true_val, false_val);
1308 vect_init_vector_1 (stmt, init_stmt, gsi);
1309 val = new_temp;
1312 else if (CONSTANT_CLASS_P (val))
1313 val = fold_convert (TREE_TYPE (type), val);
1314 else
1316 new_temp = make_ssa_name (TREE_TYPE (type));
1317 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1318 init_stmt = gimple_build_assign (new_temp,
1319 fold_build1 (VIEW_CONVERT_EXPR,
1320 TREE_TYPE (type),
1321 val));
1322 else
1323 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1324 vect_init_vector_1 (stmt, init_stmt, gsi);
1325 val = new_temp;
1328 val = build_vector_from_val (type, val);
1331 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1332 init_stmt = gimple_build_assign (new_temp, val);
1333 vect_init_vector_1 (stmt, init_stmt, gsi);
1334 return new_temp;
1337 /* Function vect_get_vec_def_for_operand_1.
1339 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1340 DT that will be used in the vectorized stmt. */
1342 tree
1343 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1345 tree vec_oprnd;
1346 gimple *vec_stmt;
1347 stmt_vec_info def_stmt_info = NULL;
1349 switch (dt)
1351 /* operand is a constant or a loop invariant. */
1352 case vect_constant_def:
1353 case vect_external_def:
1354 /* Code should use vect_get_vec_def_for_operand. */
1355 gcc_unreachable ();
1357 /* operand is defined inside the loop. */
1358 case vect_internal_def:
1360 /* Get the def from the vectorized stmt. */
1361 def_stmt_info = vinfo_for_stmt (def_stmt);
1363 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1364 /* Get vectorized pattern statement. */
1365 if (!vec_stmt
1366 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1367 && !STMT_VINFO_RELEVANT (def_stmt_info))
1368 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1369 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1370 gcc_assert (vec_stmt);
1371 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1372 vec_oprnd = PHI_RESULT (vec_stmt);
1373 else if (is_gimple_call (vec_stmt))
1374 vec_oprnd = gimple_call_lhs (vec_stmt);
1375 else
1376 vec_oprnd = gimple_assign_lhs (vec_stmt);
1377 return vec_oprnd;
1380 /* operand is defined by a loop header phi. */
1381 case vect_reduction_def:
1382 case vect_double_reduction_def:
1383 case vect_nested_cycle:
1384 case vect_induction_def:
1386 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1388 /* Get the def from the vectorized stmt. */
1389 def_stmt_info = vinfo_for_stmt (def_stmt);
1390 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1391 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1392 vec_oprnd = PHI_RESULT (vec_stmt);
1393 else
1394 vec_oprnd = gimple_get_lhs (vec_stmt);
1395 return vec_oprnd;
1398 default:
1399 gcc_unreachable ();
1404 /* Function vect_get_vec_def_for_operand.
1406 OP is an operand in STMT. This function returns a (vector) def that will be
1407 used in the vectorized stmt for STMT.
1409 In the case that OP is an SSA_NAME which is defined in the loop, then
1410 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1412 In case OP is an invariant or constant, a new stmt that creates a vector def
1413 needs to be introduced. VECTYPE may be used to specify a required type for
1414 vector invariant. */
1416 tree
1417 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1419 gimple *def_stmt;
1420 enum vect_def_type dt;
1421 bool is_simple_use;
1422 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1425 if (dump_enabled_p ())
1427 dump_printf_loc (MSG_NOTE, vect_location,
1428 "vect_get_vec_def_for_operand: ");
1429 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1430 dump_printf (MSG_NOTE, "\n");
1433 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1434 gcc_assert (is_simple_use);
1435 if (def_stmt && dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1441 if (dt == vect_constant_def || dt == vect_external_def)
1443 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1444 tree vector_type;
1446 if (vectype)
1447 vector_type = vectype;
1448 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1449 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1450 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1451 else
1452 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1454 gcc_assert (vector_type);
1455 return vect_init_vector (stmt, op, vector_type, NULL);
1457 else
1458 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1462 /* Function vect_get_vec_def_for_stmt_copy
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1518 tree
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1521 gimple *vec_stmt_for_operand;
1522 stmt_vec_info def_stmt_info;
1524 /* Do nothing; can reuse same def. */
1525 if (dt == vect_external_def || dt == vect_constant_def )
1526 return vec_oprnd;
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1544 void
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
1549 tree vec_oprnd = vec_oprnds0->pop ();
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1552 vec_oprnds0->quick_push (vec_oprnd);
1554 if (vec_oprnds1 && vec_oprnds1->length ())
1556 vec_oprnd = vec_oprnds1->pop ();
1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1558 vec_oprnds1->quick_push (vec_oprnd);
1563 /* Get vectorized definitions for OP0 and OP1. */
1565 void
1566 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1567 vec<tree> *vec_oprnds0,
1568 vec<tree> *vec_oprnds1,
1569 slp_tree slp_node)
1571 if (slp_node)
1573 int nops = (op1 == NULL_TREE) ? 1 : 2;
1574 auto_vec<tree> ops (nops);
1575 auto_vec<vec<tree> > vec_defs (nops);
1577 ops.quick_push (op0);
1578 if (op1)
1579 ops.quick_push (op1);
1581 vect_get_slp_defs (ops, slp_node, &vec_defs);
1583 *vec_oprnds0 = vec_defs[0];
1584 if (op1)
1585 *vec_oprnds1 = vec_defs[1];
1587 else
1589 tree vec_oprnd;
1591 vec_oprnds0->create (1);
1592 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1593 vec_oprnds0->quick_push (vec_oprnd);
1595 if (op1)
1597 vec_oprnds1->create (1);
1598 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1599 vec_oprnds1->quick_push (vec_oprnd);
1605 /* Function vect_finish_stmt_generation.
1607 Insert a new stmt. */
1609 void
1610 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1611 gimple_stmt_iterator *gsi)
1613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1614 vec_info *vinfo = stmt_info->vinfo;
1616 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1618 if (!gsi_end_p (*gsi)
1619 && gimple_has_mem_ops (vec_stmt))
1621 gimple *at_stmt = gsi_stmt (*gsi);
1622 tree vuse = gimple_vuse (at_stmt);
1623 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1625 tree vdef = gimple_vdef (at_stmt);
1626 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1627 /* If we have an SSA vuse and insert a store, update virtual
1628 SSA form to avoid triggering the renamer. Do so only
1629 if we can easily see all uses - which is what almost always
1630 happens with the way vectorized stmts are inserted. */
1631 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1632 && ((is_gimple_assign (vec_stmt)
1633 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1634 || (is_gimple_call (vec_stmt)
1635 && !(gimple_call_flags (vec_stmt)
1636 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1638 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1639 gimple_set_vdef (vec_stmt, new_vdef);
1640 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1644 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1646 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1648 if (dump_enabled_p ())
1650 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1654 gimple_set_location (vec_stmt, gimple_location (stmt));
1656 /* While EH edges will generally prevent vectorization, stmt might
1657 e.g. be in a must-not-throw region. Ensure newly created stmts
1658 that could throw are part of the same region. */
1659 int lp_nr = lookup_stmt_eh_lp (stmt);
1660 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1661 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1664 /* We want to vectorize a call to combined function CFN with function
1665 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1666 as the types of all inputs. Check whether this is possible using
1667 an internal function, returning its code if so or IFN_LAST if not. */
1669 static internal_fn
1670 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1671 tree vectype_out, tree vectype_in)
1673 internal_fn ifn;
1674 if (internal_fn_p (cfn))
1675 ifn = as_internal_fn (cfn);
1676 else
1677 ifn = associated_internal_fn (fndecl);
1678 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1680 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1681 if (info.vectorizable)
1683 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1684 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1685 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1686 OPTIMIZE_FOR_SPEED))
1687 return ifn;
1690 return IFN_LAST;
1694 static tree permute_vec_elements (tree, tree, tree, gimple *,
1695 gimple_stmt_iterator *);
1697 /* STMT is a non-strided load or store, meaning that it accesses
1698 elements with a known constant step. Return -1 if that step
1699 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1701 static int
1702 compare_step_with_zero (gimple *stmt)
1704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1705 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1706 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1707 size_zero_node);
1710 /* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1713 static tree
1714 perm_mask_for_reverse (tree vectype)
1716 int i, nunits;
1718 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1720 /* The encoding has a single stepped pattern. */
1721 vec_perm_builder sel (nunits, 1, 3);
1722 for (i = 0; i < 3; ++i)
1723 sel.quick_push (nunits - 1 - i);
1725 vec_perm_indices indices (sel, 1, nunits);
1726 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1727 return NULL_TREE;
1728 return vect_gen_perm_mask_checked (vectype, indices);
1731 /* A subroutine of get_load_store_type, with a subset of the same
1732 arguments. Handle the case where STMT is part of a grouped load
1733 or store.
1735 For stores, the statements in the group are all consecutive
1736 and there is no gap at the end. For loads, the statements in the
1737 group might not be consecutive; there can be gaps between statements
1738 as well as at the end. */
1740 static bool
1741 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1742 vec_load_store_type vls_type,
1743 vect_memory_access_type *memory_access_type)
1745 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1746 vec_info *vinfo = stmt_info->vinfo;
1747 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1748 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1749 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1750 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1751 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1752 bool single_element_p = (stmt == first_stmt
1753 && !GROUP_NEXT_ELEMENT (stmt_info));
1754 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1755 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1757 /* True if the vectorized statements would access beyond the last
1758 statement in the group. */
1759 bool overrun_p = false;
1761 /* True if we can cope with such overrun by peeling for gaps, so that
1762 there is at least one final scalar iteration after the vector loop. */
1763 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1765 /* There can only be a gap at the end of the group if the stride is
1766 known at compile time. */
1767 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1769 /* Stores can't yet have gaps. */
1770 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1772 if (slp)
1774 if (STMT_VINFO_STRIDED_P (stmt_info))
1776 /* Try to use consecutive accesses of GROUP_SIZE elements,
1777 separated by the stride, until we have a complete vector.
1778 Fall back to scalar accesses if that isn't possible. */
1779 if (nunits % group_size == 0)
1780 *memory_access_type = VMAT_STRIDED_SLP;
1781 else
1782 *memory_access_type = VMAT_ELEMENTWISE;
1784 else
1786 overrun_p = loop_vinfo && gap != 0;
1787 if (overrun_p && vls_type != VLS_LOAD)
1789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1790 "Grouped store with gaps requires"
1791 " non-consecutive accesses\n");
1792 return false;
1794 /* An overrun is fine if the trailing elements are smaller
1795 than the alignment boundary B. Every vector access will
1796 be a multiple of B and so we are guaranteed to access a
1797 non-gap element in the same B-sized block. */
1798 if (overrun_p
1799 && gap < (vect_known_alignment_in_bytes (first_dr)
1800 / vect_get_scalar_dr_size (first_dr)))
1801 overrun_p = false;
1802 if (overrun_p && !can_overrun_p)
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806 "Peeling for outer loop is not supported\n");
1807 return false;
1809 *memory_access_type = VMAT_CONTIGUOUS;
1812 else
1814 /* We can always handle this case using elementwise accesses,
1815 but see if something more efficient is available. */
1816 *memory_access_type = VMAT_ELEMENTWISE;
1818 /* If there is a gap at the end of the group then these optimizations
1819 would access excess elements in the last iteration. */
1820 bool would_overrun_p = (gap != 0);
1821 /* An overrun is fine if the trailing elements are smaller than the
1822 alignment boundary B. Every vector access will be a multiple of B
1823 and so we are guaranteed to access a non-gap element in the
1824 same B-sized block. */
1825 if (would_overrun_p
1826 && gap < (vect_known_alignment_in_bytes (first_dr)
1827 / vect_get_scalar_dr_size (first_dr)))
1828 would_overrun_p = false;
1830 if (!STMT_VINFO_STRIDED_P (stmt_info)
1831 && (can_overrun_p || !would_overrun_p)
1832 && compare_step_with_zero (stmt) > 0)
1834 /* First try using LOAD/STORE_LANES. */
1835 if (vls_type == VLS_LOAD
1836 ? vect_load_lanes_supported (vectype, group_size)
1837 : vect_store_lanes_supported (vectype, group_size))
1839 *memory_access_type = VMAT_LOAD_STORE_LANES;
1840 overrun_p = would_overrun_p;
1843 /* If that fails, try using permuting loads. */
1844 if (*memory_access_type == VMAT_ELEMENTWISE
1845 && (vls_type == VLS_LOAD
1846 ? vect_grouped_load_supported (vectype, single_element_p,
1847 group_size)
1848 : vect_grouped_store_supported (vectype, group_size)))
1850 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1851 overrun_p = would_overrun_p;
1856 if (vls_type != VLS_LOAD && first_stmt == stmt)
1858 /* STMT is the leader of the group. Check the operands of all the
1859 stmts of the group. */
1860 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1861 while (next_stmt)
1863 gcc_assert (gimple_assign_single_p (next_stmt));
1864 tree op = gimple_assign_rhs1 (next_stmt);
1865 gimple *def_stmt;
1866 enum vect_def_type dt;
1867 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1869 if (dump_enabled_p ())
1870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1871 "use not simple.\n");
1872 return false;
1874 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1878 if (overrun_p)
1880 gcc_assert (can_overrun_p);
1881 if (dump_enabled_p ())
1882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1883 "Data access with gaps requires scalar "
1884 "epilogue loop\n");
1885 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1888 return true;
1891 /* A subroutine of get_load_store_type, with a subset of the same
1892 arguments. Handle the case where STMT is a load or store that
1893 accesses consecutive elements with a negative step. */
1895 static vect_memory_access_type
1896 get_negative_load_store_type (gimple *stmt, tree vectype,
1897 vec_load_store_type vls_type,
1898 unsigned int ncopies)
1900 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1901 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1902 dr_alignment_support alignment_support_scheme;
1904 if (ncopies > 1)
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1908 "multiple types with negative step.\n");
1909 return VMAT_ELEMENTWISE;
1912 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1913 if (alignment_support_scheme != dr_aligned
1914 && alignment_support_scheme != dr_unaligned_supported)
1916 if (dump_enabled_p ())
1917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1918 "negative step but alignment required.\n");
1919 return VMAT_ELEMENTWISE;
1922 if (vls_type == VLS_STORE_INVARIANT)
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_NOTE, vect_location,
1926 "negative step with invariant source;"
1927 " no permute needed.\n");
1928 return VMAT_CONTIGUOUS_DOWN;
1931 if (!perm_mask_for_reverse (vectype))
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 "negative step and reversing not supported.\n");
1936 return VMAT_ELEMENTWISE;
1939 return VMAT_CONTIGUOUS_REVERSE;
1942 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1943 if there is a memory access type that the vectorized form can use,
1944 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1945 or scatters, fill in GS_INFO accordingly.
1947 SLP says whether we're performing SLP rather than loop vectorization.
1948 VECTYPE is the vector type that the vectorized statements will use.
1949 NCOPIES is the number of vector statements that will be needed. */
1951 static bool
1952 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1953 vec_load_store_type vls_type, unsigned int ncopies,
1954 vect_memory_access_type *memory_access_type,
1955 gather_scatter_info *gs_info)
1957 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1958 vec_info *vinfo = stmt_info->vinfo;
1959 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1960 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1962 *memory_access_type = VMAT_GATHER_SCATTER;
1963 gimple *def_stmt;
1964 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1965 gcc_unreachable ();
1966 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1967 &gs_info->offset_dt,
1968 &gs_info->offset_vectype))
1970 if (dump_enabled_p ())
1971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1972 "%s index use not simple.\n",
1973 vls_type == VLS_LOAD ? "gather" : "scatter");
1974 return false;
1977 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1979 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1980 memory_access_type))
1981 return false;
1983 else if (STMT_VINFO_STRIDED_P (stmt_info))
1985 gcc_assert (!slp);
1986 *memory_access_type = VMAT_ELEMENTWISE;
1988 else
1990 int cmp = compare_step_with_zero (stmt);
1991 if (cmp < 0)
1992 *memory_access_type = get_negative_load_store_type
1993 (stmt, vectype, vls_type, ncopies);
1994 else if (cmp == 0)
1996 gcc_assert (vls_type == VLS_LOAD);
1997 *memory_access_type = VMAT_INVARIANT;
1999 else
2000 *memory_access_type = VMAT_CONTIGUOUS;
2003 /* FIXME: At the moment the cost model seems to underestimate the
2004 cost of using elementwise accesses. This check preserves the
2005 traditional behavior until that can be fixed. */
2006 if (*memory_access_type == VMAT_ELEMENTWISE
2007 && !STMT_VINFO_STRIDED_P (stmt_info))
2009 if (dump_enabled_p ())
2010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2011 "not falling back to elementwise accesses\n");
2012 return false;
2014 return true;
2017 /* Function vectorizable_mask_load_store.
2019 Check if STMT performs a conditional load or store that can be vectorized.
2020 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2021 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2022 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2024 static bool
2025 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2026 gimple **vec_stmt, slp_tree slp_node)
2028 tree vec_dest = NULL;
2029 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2030 stmt_vec_info prev_stmt_info;
2031 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2032 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2033 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2034 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2035 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2036 tree rhs_vectype = NULL_TREE;
2037 tree mask_vectype;
2038 tree elem_type;
2039 gimple *new_stmt;
2040 tree dummy;
2041 tree dataref_ptr = NULL_TREE;
2042 gimple *ptr_incr;
2043 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2044 int ncopies;
2045 int i, j;
2046 bool inv_p;
2047 gather_scatter_info gs_info;
2048 vec_load_store_type vls_type;
2049 tree mask;
2050 gimple *def_stmt;
2051 enum vect_def_type dt;
2053 if (slp_node != NULL)
2054 return false;
2056 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2057 gcc_assert (ncopies >= 1);
2059 mask = gimple_call_arg (stmt, 2);
2061 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2062 return false;
2064 /* FORNOW. This restriction should be relaxed. */
2065 if (nested_in_vect_loop && ncopies > 1)
2067 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2069 "multiple types in nested loop.");
2070 return false;
2073 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2074 return false;
2076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2077 && ! vec_stmt)
2078 return false;
2080 if (!STMT_VINFO_DATA_REF (stmt_info))
2081 return false;
2083 elem_type = TREE_TYPE (vectype);
2085 if (TREE_CODE (mask) != SSA_NAME)
2086 return false;
2088 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2089 return false;
2091 if (!mask_vectype)
2092 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2094 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2095 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2096 return false;
2098 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2100 tree rhs = gimple_call_arg (stmt, 3);
2101 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2102 return false;
2103 if (dt == vect_constant_def || dt == vect_external_def)
2104 vls_type = VLS_STORE_INVARIANT;
2105 else
2106 vls_type = VLS_STORE;
2108 else
2109 vls_type = VLS_LOAD;
2111 vect_memory_access_type memory_access_type;
2112 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2113 &memory_access_type, &gs_info))
2114 return false;
2116 if (memory_access_type == VMAT_GATHER_SCATTER)
2118 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2119 tree masktype
2120 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2121 if (TREE_CODE (masktype) == INTEGER_TYPE)
2123 if (dump_enabled_p ())
2124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2125 "masked gather with integer mask not supported.");
2126 return false;
2129 else if (memory_access_type != VMAT_CONTIGUOUS)
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2133 "unsupported access type for masked %s.\n",
2134 vls_type == VLS_LOAD ? "load" : "store");
2135 return false;
2137 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2138 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2139 TYPE_MODE (mask_vectype),
2140 vls_type == VLS_LOAD)
2141 || (rhs_vectype
2142 && !useless_type_conversion_p (vectype, rhs_vectype)))
2143 return false;
2145 if (!vec_stmt) /* transformation not required. */
2147 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2148 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2149 if (vls_type == VLS_LOAD)
2150 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2151 NULL, NULL, NULL);
2152 else
2153 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2154 dt, NULL, NULL, NULL);
2155 return true;
2157 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2159 /* Transform. */
2161 if (memory_access_type == VMAT_GATHER_SCATTER)
2163 tree vec_oprnd0 = NULL_TREE, op;
2164 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2165 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2166 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2167 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2168 tree mask_perm_mask = NULL_TREE;
2169 edge pe = loop_preheader_edge (loop);
2170 gimple_seq seq;
2171 basic_block new_bb;
2172 enum { NARROW, NONE, WIDEN } modifier;
2173 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2175 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2176 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2177 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2178 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2179 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2180 scaletype = TREE_VALUE (arglist);
2181 gcc_checking_assert (types_compatible_p (srctype, rettype)
2182 && types_compatible_p (srctype, masktype));
2184 if (nunits == gather_off_nunits)
2185 modifier = NONE;
2186 else if (nunits == gather_off_nunits / 2)
2188 modifier = WIDEN;
2190 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
2191 for (i = 0; i < gather_off_nunits; ++i)
2192 sel.quick_push (i | nunits);
2194 vec_perm_indices indices (sel, 1, gather_off_nunits);
2195 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
2196 indices);
2198 else if (nunits == gather_off_nunits * 2)
2200 modifier = NARROW;
2202 vec_perm_builder sel (nunits, nunits, 1);
2203 sel.quick_grow (nunits);
2204 for (i = 0; i < nunits; ++i)
2205 sel[i] = i < gather_off_nunits
2206 ? i : i + nunits - gather_off_nunits;
2207 vec_perm_indices indices (sel, 2, nunits);
2208 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2210 ncopies *= 2;
2212 for (i = 0; i < nunits; ++i)
2213 sel[i] = i | gather_off_nunits;
2214 indices.new_vector (sel, 2, gather_off_nunits);
2215 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2217 else
2218 gcc_unreachable ();
2220 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2222 ptr = fold_convert (ptrtype, gs_info.base);
2223 if (!is_gimple_min_invariant (ptr))
2225 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2226 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2227 gcc_assert (!new_bb);
2230 scale = build_int_cst (scaletype, gs_info.scale);
2232 prev_stmt_info = NULL;
2233 for (j = 0; j < ncopies; ++j)
2235 if (modifier == WIDEN && (j & 1))
2236 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2237 perm_mask, stmt, gsi);
2238 else if (j == 0)
2239 op = vec_oprnd0
2240 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2241 else
2242 op = vec_oprnd0
2243 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2245 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2247 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2248 == TYPE_VECTOR_SUBPARTS (idxtype));
2249 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2250 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2251 new_stmt
2252 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2253 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2254 op = var;
2257 if (mask_perm_mask && (j & 1))
2258 mask_op = permute_vec_elements (mask_op, mask_op,
2259 mask_perm_mask, stmt, gsi);
2260 else
2262 if (j == 0)
2263 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2264 else
2266 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2267 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2270 mask_op = vec_mask;
2271 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2273 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2274 == TYPE_VECTOR_SUBPARTS (masktype));
2275 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2276 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2277 new_stmt
2278 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2280 mask_op = var;
2284 new_stmt
2285 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2286 scale);
2288 if (!useless_type_conversion_p (vectype, rettype))
2290 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2291 == TYPE_VECTOR_SUBPARTS (rettype));
2292 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2293 gimple_call_set_lhs (new_stmt, op);
2294 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2295 var = make_ssa_name (vec_dest);
2296 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2297 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2299 else
2301 var = make_ssa_name (vec_dest, new_stmt);
2302 gimple_call_set_lhs (new_stmt, var);
2305 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2307 if (modifier == NARROW)
2309 if ((j & 1) == 0)
2311 prev_res = var;
2312 continue;
2314 var = permute_vec_elements (prev_res, var,
2315 perm_mask, stmt, gsi);
2316 new_stmt = SSA_NAME_DEF_STMT (var);
2319 if (prev_stmt_info == NULL)
2320 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2321 else
2322 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2323 prev_stmt_info = vinfo_for_stmt (new_stmt);
2326 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2327 from the IL. */
2328 if (STMT_VINFO_RELATED_STMT (stmt_info))
2330 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2331 stmt_info = vinfo_for_stmt (stmt);
2333 tree lhs = gimple_call_lhs (stmt);
2334 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2335 set_vinfo_for_stmt (new_stmt, stmt_info);
2336 set_vinfo_for_stmt (stmt, NULL);
2337 STMT_VINFO_STMT (stmt_info) = new_stmt;
2338 gsi_replace (gsi, new_stmt, true);
2339 return true;
2341 else if (vls_type != VLS_LOAD)
2343 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2344 prev_stmt_info = NULL;
2345 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2346 for (i = 0; i < ncopies; i++)
2348 unsigned align, misalign;
2350 if (i == 0)
2352 tree rhs = gimple_call_arg (stmt, 3);
2353 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2354 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2355 mask_vectype);
2356 /* We should have catched mismatched types earlier. */
2357 gcc_assert (useless_type_conversion_p (vectype,
2358 TREE_TYPE (vec_rhs)));
2359 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2360 NULL_TREE, &dummy, gsi,
2361 &ptr_incr, false, &inv_p);
2362 gcc_assert (!inv_p);
2364 else
2366 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2367 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2368 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2369 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2370 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2371 TYPE_SIZE_UNIT (vectype));
2374 align = DR_TARGET_ALIGNMENT (dr);
2375 if (aligned_access_p (dr))
2376 misalign = 0;
2377 else if (DR_MISALIGNMENT (dr) == -1)
2379 align = TYPE_ALIGN_UNIT (elem_type);
2380 misalign = 0;
2382 else
2383 misalign = DR_MISALIGNMENT (dr);
2384 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2385 misalign);
2386 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2387 misalign ? least_bit_hwi (misalign) : align);
2388 gcall *call
2389 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2390 ptr, vec_mask, vec_rhs);
2391 gimple_call_set_nothrow (call, true);
2392 new_stmt = call;
2393 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2394 if (i == 0)
2395 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2396 else
2397 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2398 prev_stmt_info = vinfo_for_stmt (new_stmt);
2401 else
2403 tree vec_mask = NULL_TREE;
2404 prev_stmt_info = NULL;
2405 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2406 for (i = 0; i < ncopies; i++)
2408 unsigned align, misalign;
2410 if (i == 0)
2412 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2413 mask_vectype);
2414 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2415 NULL_TREE, &dummy, gsi,
2416 &ptr_incr, false, &inv_p);
2417 gcc_assert (!inv_p);
2419 else
2421 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2422 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2423 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2424 TYPE_SIZE_UNIT (vectype));
2427 align = DR_TARGET_ALIGNMENT (dr);
2428 if (aligned_access_p (dr))
2429 misalign = 0;
2430 else if (DR_MISALIGNMENT (dr) == -1)
2432 align = TYPE_ALIGN_UNIT (elem_type);
2433 misalign = 0;
2435 else
2436 misalign = DR_MISALIGNMENT (dr);
2437 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2438 misalign);
2439 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2440 misalign ? least_bit_hwi (misalign) : align);
2441 gcall *call
2442 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2443 ptr, vec_mask);
2444 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2445 gimple_call_set_nothrow (call, true);
2446 vect_finish_stmt_generation (stmt, call, gsi);
2447 if (i == 0)
2448 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
2449 else
2450 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2451 prev_stmt_info = vinfo_for_stmt (call);
2455 if (vls_type == VLS_LOAD)
2457 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2458 from the IL. */
2459 if (STMT_VINFO_RELATED_STMT (stmt_info))
2461 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2462 stmt_info = vinfo_for_stmt (stmt);
2464 tree lhs = gimple_call_lhs (stmt);
2465 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2466 set_vinfo_for_stmt (new_stmt, stmt_info);
2467 set_vinfo_for_stmt (stmt, NULL);
2468 STMT_VINFO_STMT (stmt_info) = new_stmt;
2469 gsi_replace (gsi, new_stmt, true);
2472 return true;
2475 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2477 static bool
2478 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2479 gimple **vec_stmt, slp_tree slp_node,
2480 tree vectype_in, enum vect_def_type *dt)
2482 tree op, vectype;
2483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2484 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2485 unsigned ncopies, nunits;
2487 op = gimple_call_arg (stmt, 0);
2488 vectype = STMT_VINFO_VECTYPE (stmt_info);
2489 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2491 /* Multiple types in SLP are handled by creating the appropriate number of
2492 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2493 case of SLP. */
2494 if (slp_node)
2495 ncopies = 1;
2496 else
2497 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2499 gcc_assert (ncopies >= 1);
2501 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2502 if (! char_vectype)
2503 return false;
2505 unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2506 unsigned word_bytes = num_bytes / nunits;
2508 /* The encoding uses one stepped pattern for each byte in the word. */
2509 vec_perm_builder elts (num_bytes, word_bytes, 3);
2510 for (unsigned i = 0; i < 3; ++i)
2511 for (unsigned j = 0; j < word_bytes; ++j)
2512 elts.quick_push ((i + 1) * word_bytes - j - 1);
2514 vec_perm_indices indices (elts, 1, num_bytes);
2515 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2516 return false;
2518 if (! vec_stmt)
2520 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2521 if (dump_enabled_p ())
2522 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2523 "\n");
2524 if (! PURE_SLP_STMT (stmt_info))
2526 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2527 1, vector_stmt, stmt_info, 0, vect_prologue);
2528 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2529 ncopies, vec_perm, stmt_info, 0, vect_body);
2531 return true;
2534 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2536 /* Transform. */
2537 vec<tree> vec_oprnds = vNULL;
2538 gimple *new_stmt = NULL;
2539 stmt_vec_info prev_stmt_info = NULL;
2540 for (unsigned j = 0; j < ncopies; j++)
2542 /* Handle uses. */
2543 if (j == 0)
2544 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2545 else
2546 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2548 /* Arguments are ready. create the new vector stmt. */
2549 unsigned i;
2550 tree vop;
2551 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2553 tree tem = make_ssa_name (char_vectype);
2554 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2555 char_vectype, vop));
2556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2557 tree tem2 = make_ssa_name (char_vectype);
2558 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2559 tem, tem, bswap_vconst);
2560 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2561 tem = make_ssa_name (vectype);
2562 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2563 vectype, tem2));
2564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2565 if (slp_node)
2566 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2569 if (slp_node)
2570 continue;
2572 if (j == 0)
2573 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2574 else
2575 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2577 prev_stmt_info = vinfo_for_stmt (new_stmt);
2580 vec_oprnds.release ();
2581 return true;
2584 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2585 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2586 in a single step. On success, store the binary pack code in
2587 *CONVERT_CODE. */
2589 static bool
2590 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2591 tree_code *convert_code)
2593 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2594 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2595 return false;
2597 tree_code code;
2598 int multi_step_cvt = 0;
2599 auto_vec <tree, 8> interm_types;
2600 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2601 &code, &multi_step_cvt,
2602 &interm_types)
2603 || multi_step_cvt)
2604 return false;
2606 *convert_code = code;
2607 return true;
2610 /* Function vectorizable_call.
2612 Check if GS performs a function call that can be vectorized.
2613 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2614 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2615 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2617 static bool
2618 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2619 slp_tree slp_node)
2621 gcall *stmt;
2622 tree vec_dest;
2623 tree scalar_dest;
2624 tree op, type;
2625 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2626 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2627 tree vectype_out, vectype_in;
2628 int nunits_in;
2629 int nunits_out;
2630 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2632 vec_info *vinfo = stmt_info->vinfo;
2633 tree fndecl, new_temp, rhs_type;
2634 gimple *def_stmt;
2635 enum vect_def_type dt[3]
2636 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2637 int ndts = 3;
2638 gimple *new_stmt = NULL;
2639 int ncopies, j;
2640 vec<tree> vargs = vNULL;
2641 enum { NARROW, NONE, WIDEN } modifier;
2642 size_t i, nargs;
2643 tree lhs;
2645 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2646 return false;
2648 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2649 && ! vec_stmt)
2650 return false;
2652 /* Is GS a vectorizable call? */
2653 stmt = dyn_cast <gcall *> (gs);
2654 if (!stmt)
2655 return false;
2657 if (gimple_call_internal_p (stmt)
2658 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2659 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2660 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2661 slp_node);
2663 if (gimple_call_lhs (stmt) == NULL_TREE
2664 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2665 return false;
2667 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2669 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2671 /* Process function arguments. */
2672 rhs_type = NULL_TREE;
2673 vectype_in = NULL_TREE;
2674 nargs = gimple_call_num_args (stmt);
2676 /* Bail out if the function has more than three arguments, we do not have
2677 interesting builtin functions to vectorize with more than two arguments
2678 except for fma. No arguments is also not good. */
2679 if (nargs == 0 || nargs > 3)
2680 return false;
2682 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2683 if (gimple_call_internal_p (stmt)
2684 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2686 nargs = 0;
2687 rhs_type = unsigned_type_node;
2690 for (i = 0; i < nargs; i++)
2692 tree opvectype;
2694 op = gimple_call_arg (stmt, i);
2696 /* We can only handle calls with arguments of the same type. */
2697 if (rhs_type
2698 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2700 if (dump_enabled_p ())
2701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2702 "argument types differ.\n");
2703 return false;
2705 if (!rhs_type)
2706 rhs_type = TREE_TYPE (op);
2708 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2710 if (dump_enabled_p ())
2711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2712 "use not simple.\n");
2713 return false;
2716 if (!vectype_in)
2717 vectype_in = opvectype;
2718 else if (opvectype
2719 && opvectype != vectype_in)
2721 if (dump_enabled_p ())
2722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2723 "argument vector types differ.\n");
2724 return false;
2727 /* If all arguments are external or constant defs use a vector type with
2728 the same size as the output vector type. */
2729 if (!vectype_in)
2730 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2731 if (vec_stmt)
2732 gcc_assert (vectype_in);
2733 if (!vectype_in)
2735 if (dump_enabled_p ())
2737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2738 "no vectype for scalar type ");
2739 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2740 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2743 return false;
2746 /* FORNOW */
2747 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2748 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2749 if (nunits_in == nunits_out / 2)
2750 modifier = NARROW;
2751 else if (nunits_out == nunits_in)
2752 modifier = NONE;
2753 else if (nunits_out == nunits_in / 2)
2754 modifier = WIDEN;
2755 else
2756 return false;
2758 /* We only handle functions that do not read or clobber memory. */
2759 if (gimple_vuse (stmt))
2761 if (dump_enabled_p ())
2762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2763 "function reads from or writes to memory.\n");
2764 return false;
2767 /* For now, we only vectorize functions if a target specific builtin
2768 is available. TODO -- in some cases, it might be profitable to
2769 insert the calls for pieces of the vector, in order to be able
2770 to vectorize other operations in the loop. */
2771 fndecl = NULL_TREE;
2772 internal_fn ifn = IFN_LAST;
2773 combined_fn cfn = gimple_call_combined_fn (stmt);
2774 tree callee = gimple_call_fndecl (stmt);
2776 /* First try using an internal function. */
2777 tree_code convert_code = ERROR_MARK;
2778 if (cfn != CFN_LAST
2779 && (modifier == NONE
2780 || (modifier == NARROW
2781 && simple_integer_narrowing (vectype_out, vectype_in,
2782 &convert_code))))
2783 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2784 vectype_in);
2786 /* If that fails, try asking for a target-specific built-in function. */
2787 if (ifn == IFN_LAST)
2789 if (cfn != CFN_LAST)
2790 fndecl = targetm.vectorize.builtin_vectorized_function
2791 (cfn, vectype_out, vectype_in);
2792 else
2793 fndecl = targetm.vectorize.builtin_md_vectorized_function
2794 (callee, vectype_out, vectype_in);
2797 if (ifn == IFN_LAST && !fndecl)
2799 if (cfn == CFN_GOMP_SIMD_LANE
2800 && !slp_node
2801 && loop_vinfo
2802 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2803 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2804 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2805 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2807 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2808 { 0, 1, 2, ... vf - 1 } vector. */
2809 gcc_assert (nargs == 0);
2811 else if (modifier == NONE
2812 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2813 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2814 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2815 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2816 vectype_in, dt);
2817 else
2819 if (dump_enabled_p ())
2820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2821 "function is not vectorizable.\n");
2822 return false;
2826 if (slp_node)
2827 ncopies = 1;
2828 else if (modifier == NARROW && ifn == IFN_LAST)
2829 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2830 else
2831 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2833 /* Sanity check: make sure that at least one copy of the vectorized stmt
2834 needs to be generated. */
2835 gcc_assert (ncopies >= 1);
2837 if (!vec_stmt) /* transformation not required. */
2839 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2840 if (dump_enabled_p ())
2841 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2842 "\n");
2843 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2844 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2845 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2846 vec_promote_demote, stmt_info, 0, vect_body);
2848 return true;
2851 /* Transform. */
2853 if (dump_enabled_p ())
2854 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2856 /* Handle def. */
2857 scalar_dest = gimple_call_lhs (stmt);
2858 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2860 prev_stmt_info = NULL;
2861 if (modifier == NONE || ifn != IFN_LAST)
2863 tree prev_res = NULL_TREE;
2864 for (j = 0; j < ncopies; ++j)
2866 /* Build argument list for the vectorized call. */
2867 if (j == 0)
2868 vargs.create (nargs);
2869 else
2870 vargs.truncate (0);
2872 if (slp_node)
2874 auto_vec<vec<tree> > vec_defs (nargs);
2875 vec<tree> vec_oprnds0;
2877 for (i = 0; i < nargs; i++)
2878 vargs.quick_push (gimple_call_arg (stmt, i));
2879 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2880 vec_oprnds0 = vec_defs[0];
2882 /* Arguments are ready. Create the new vector stmt. */
2883 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2885 size_t k;
2886 for (k = 0; k < nargs; k++)
2888 vec<tree> vec_oprndsk = vec_defs[k];
2889 vargs[k] = vec_oprndsk[i];
2891 if (modifier == NARROW)
2893 tree half_res = make_ssa_name (vectype_in);
2894 gcall *call
2895 = gimple_build_call_internal_vec (ifn, vargs);
2896 gimple_call_set_lhs (call, half_res);
2897 gimple_call_set_nothrow (call, true);
2898 new_stmt = call;
2899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2900 if ((i & 1) == 0)
2902 prev_res = half_res;
2903 continue;
2905 new_temp = make_ssa_name (vec_dest);
2906 new_stmt = gimple_build_assign (new_temp, convert_code,
2907 prev_res, half_res);
2909 else
2911 gcall *call;
2912 if (ifn != IFN_LAST)
2913 call = gimple_build_call_internal_vec (ifn, vargs);
2914 else
2915 call = gimple_build_call_vec (fndecl, vargs);
2916 new_temp = make_ssa_name (vec_dest, call);
2917 gimple_call_set_lhs (call, new_temp);
2918 gimple_call_set_nothrow (call, true);
2919 new_stmt = call;
2921 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2922 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2925 for (i = 0; i < nargs; i++)
2927 vec<tree> vec_oprndsi = vec_defs[i];
2928 vec_oprndsi.release ();
2930 continue;
2933 for (i = 0; i < nargs; i++)
2935 op = gimple_call_arg (stmt, i);
2936 if (j == 0)
2937 vec_oprnd0
2938 = vect_get_vec_def_for_operand (op, stmt);
2939 else
2941 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2942 vec_oprnd0
2943 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2946 vargs.quick_push (vec_oprnd0);
2949 if (gimple_call_internal_p (stmt)
2950 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2952 tree_vector_builder v (vectype_out, 1, 3);
2953 for (int k = 0; k < 3; ++k)
2954 v.quick_push (build_int_cst (unsigned_type_node,
2955 j * nunits_out + k));
2956 tree cst = v.build ();
2957 tree new_var
2958 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2959 gimple *init_stmt = gimple_build_assign (new_var, cst);
2960 vect_init_vector_1 (stmt, init_stmt, NULL);
2961 new_temp = make_ssa_name (vec_dest);
2962 new_stmt = gimple_build_assign (new_temp, new_var);
2964 else if (modifier == NARROW)
2966 tree half_res = make_ssa_name (vectype_in);
2967 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2968 gimple_call_set_lhs (call, half_res);
2969 gimple_call_set_nothrow (call, true);
2970 new_stmt = call;
2971 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2972 if ((j & 1) == 0)
2974 prev_res = half_res;
2975 continue;
2977 new_temp = make_ssa_name (vec_dest);
2978 new_stmt = gimple_build_assign (new_temp, convert_code,
2979 prev_res, half_res);
2981 else
2983 gcall *call;
2984 if (ifn != IFN_LAST)
2985 call = gimple_build_call_internal_vec (ifn, vargs);
2986 else
2987 call = gimple_build_call_vec (fndecl, vargs);
2988 new_temp = make_ssa_name (vec_dest, new_stmt);
2989 gimple_call_set_lhs (call, new_temp);
2990 gimple_call_set_nothrow (call, true);
2991 new_stmt = call;
2993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2995 if (j == (modifier == NARROW ? 1 : 0))
2996 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2997 else
2998 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3000 prev_stmt_info = vinfo_for_stmt (new_stmt);
3003 else if (modifier == NARROW)
3005 for (j = 0; j < ncopies; ++j)
3007 /* Build argument list for the vectorized call. */
3008 if (j == 0)
3009 vargs.create (nargs * 2);
3010 else
3011 vargs.truncate (0);
3013 if (slp_node)
3015 auto_vec<vec<tree> > vec_defs (nargs);
3016 vec<tree> vec_oprnds0;
3018 for (i = 0; i < nargs; i++)
3019 vargs.quick_push (gimple_call_arg (stmt, i));
3020 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3021 vec_oprnds0 = vec_defs[0];
3023 /* Arguments are ready. Create the new vector stmt. */
3024 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3026 size_t k;
3027 vargs.truncate (0);
3028 for (k = 0; k < nargs; k++)
3030 vec<tree> vec_oprndsk = vec_defs[k];
3031 vargs.quick_push (vec_oprndsk[i]);
3032 vargs.quick_push (vec_oprndsk[i + 1]);
3034 gcall *call;
3035 if (ifn != IFN_LAST)
3036 call = gimple_build_call_internal_vec (ifn, vargs);
3037 else
3038 call = gimple_build_call_vec (fndecl, vargs);
3039 new_temp = make_ssa_name (vec_dest, call);
3040 gimple_call_set_lhs (call, new_temp);
3041 gimple_call_set_nothrow (call, true);
3042 new_stmt = call;
3043 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3044 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3047 for (i = 0; i < nargs; i++)
3049 vec<tree> vec_oprndsi = vec_defs[i];
3050 vec_oprndsi.release ();
3052 continue;
3055 for (i = 0; i < nargs; i++)
3057 op = gimple_call_arg (stmt, i);
3058 if (j == 0)
3060 vec_oprnd0
3061 = vect_get_vec_def_for_operand (op, stmt);
3062 vec_oprnd1
3063 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3065 else
3067 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3068 vec_oprnd0
3069 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3070 vec_oprnd1
3071 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3074 vargs.quick_push (vec_oprnd0);
3075 vargs.quick_push (vec_oprnd1);
3078 new_stmt = gimple_build_call_vec (fndecl, vargs);
3079 new_temp = make_ssa_name (vec_dest, new_stmt);
3080 gimple_call_set_lhs (new_stmt, new_temp);
3081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3083 if (j == 0)
3084 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3085 else
3086 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3088 prev_stmt_info = vinfo_for_stmt (new_stmt);
3091 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3093 else
3094 /* No current target implements this case. */
3095 return false;
3097 vargs.release ();
3099 /* The call in STMT might prevent it from being removed in dce.
3100 We however cannot remove it here, due to the way the ssa name
3101 it defines is mapped to the new definition. So just replace
3102 rhs of the statement with something harmless. */
3104 if (slp_node)
3105 return true;
3107 type = TREE_TYPE (scalar_dest);
3108 if (is_pattern_stmt_p (stmt_info))
3109 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3110 else
3111 lhs = gimple_call_lhs (stmt);
3113 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3114 set_vinfo_for_stmt (new_stmt, stmt_info);
3115 set_vinfo_for_stmt (stmt, NULL);
3116 STMT_VINFO_STMT (stmt_info) = new_stmt;
3117 gsi_replace (gsi, new_stmt, false);
3119 return true;
3123 struct simd_call_arg_info
3125 tree vectype;
3126 tree op;
3127 HOST_WIDE_INT linear_step;
3128 enum vect_def_type dt;
3129 unsigned int align;
3130 bool simd_lane_linear;
3133 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3134 is linear within simd lane (but not within whole loop), note it in
3135 *ARGINFO. */
3137 static void
3138 vect_simd_lane_linear (tree op, struct loop *loop,
3139 struct simd_call_arg_info *arginfo)
3141 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3143 if (!is_gimple_assign (def_stmt)
3144 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3145 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3146 return;
3148 tree base = gimple_assign_rhs1 (def_stmt);
3149 HOST_WIDE_INT linear_step = 0;
3150 tree v = gimple_assign_rhs2 (def_stmt);
3151 while (TREE_CODE (v) == SSA_NAME)
3153 tree t;
3154 def_stmt = SSA_NAME_DEF_STMT (v);
3155 if (is_gimple_assign (def_stmt))
3156 switch (gimple_assign_rhs_code (def_stmt))
3158 case PLUS_EXPR:
3159 t = gimple_assign_rhs2 (def_stmt);
3160 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3161 return;
3162 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3163 v = gimple_assign_rhs1 (def_stmt);
3164 continue;
3165 case MULT_EXPR:
3166 t = gimple_assign_rhs2 (def_stmt);
3167 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3168 return;
3169 linear_step = tree_to_shwi (t);
3170 v = gimple_assign_rhs1 (def_stmt);
3171 continue;
3172 CASE_CONVERT:
3173 t = gimple_assign_rhs1 (def_stmt);
3174 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3175 || (TYPE_PRECISION (TREE_TYPE (v))
3176 < TYPE_PRECISION (TREE_TYPE (t))))
3177 return;
3178 if (!linear_step)
3179 linear_step = 1;
3180 v = t;
3181 continue;
3182 default:
3183 return;
3185 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3186 && loop->simduid
3187 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3188 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3189 == loop->simduid))
3191 if (!linear_step)
3192 linear_step = 1;
3193 arginfo->linear_step = linear_step;
3194 arginfo->op = base;
3195 arginfo->simd_lane_linear = true;
3196 return;
3201 /* Function vectorizable_simd_clone_call.
3203 Check if STMT performs a function call that can be vectorized
3204 by calling a simd clone of the function.
3205 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3206 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3207 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3209 static bool
3210 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3211 gimple **vec_stmt, slp_tree slp_node)
3213 tree vec_dest;
3214 tree scalar_dest;
3215 tree op, type;
3216 tree vec_oprnd0 = NULL_TREE;
3217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3218 tree vectype;
3219 unsigned int nunits;
3220 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3221 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3222 vec_info *vinfo = stmt_info->vinfo;
3223 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3224 tree fndecl, new_temp;
3225 gimple *def_stmt;
3226 gimple *new_stmt = NULL;
3227 int ncopies, j;
3228 auto_vec<simd_call_arg_info> arginfo;
3229 vec<tree> vargs = vNULL;
3230 size_t i, nargs;
3231 tree lhs, rtype, ratype;
3232 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3234 /* Is STMT a vectorizable call? */
3235 if (!is_gimple_call (stmt))
3236 return false;
3238 fndecl = gimple_call_fndecl (stmt);
3239 if (fndecl == NULL_TREE)
3240 return false;
3242 struct cgraph_node *node = cgraph_node::get (fndecl);
3243 if (node == NULL || node->simd_clones == NULL)
3244 return false;
3246 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3247 return false;
3249 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3250 && ! vec_stmt)
3251 return false;
3253 if (gimple_call_lhs (stmt)
3254 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3255 return false;
3257 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3259 vectype = STMT_VINFO_VECTYPE (stmt_info);
3261 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3262 return false;
3264 /* FORNOW */
3265 if (slp_node)
3266 return false;
3268 /* Process function arguments. */
3269 nargs = gimple_call_num_args (stmt);
3271 /* Bail out if the function has zero arguments. */
3272 if (nargs == 0)
3273 return false;
3275 arginfo.reserve (nargs, true);
3277 for (i = 0; i < nargs; i++)
3279 simd_call_arg_info thisarginfo;
3280 affine_iv iv;
3282 thisarginfo.linear_step = 0;
3283 thisarginfo.align = 0;
3284 thisarginfo.op = NULL_TREE;
3285 thisarginfo.simd_lane_linear = false;
3287 op = gimple_call_arg (stmt, i);
3288 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3289 &thisarginfo.vectype)
3290 || thisarginfo.dt == vect_uninitialized_def)
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294 "use not simple.\n");
3295 return false;
3298 if (thisarginfo.dt == vect_constant_def
3299 || thisarginfo.dt == vect_external_def)
3300 gcc_assert (thisarginfo.vectype == NULL_TREE);
3301 else
3302 gcc_assert (thisarginfo.vectype != NULL_TREE);
3304 /* For linear arguments, the analyze phase should have saved
3305 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3306 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3307 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3309 gcc_assert (vec_stmt);
3310 thisarginfo.linear_step
3311 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3312 thisarginfo.op
3313 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3314 thisarginfo.simd_lane_linear
3315 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3316 == boolean_true_node);
3317 /* If loop has been peeled for alignment, we need to adjust it. */
3318 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3319 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3320 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3322 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3323 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3324 tree opt = TREE_TYPE (thisarginfo.op);
3325 bias = fold_convert (TREE_TYPE (step), bias);
3326 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3327 thisarginfo.op
3328 = fold_build2 (POINTER_TYPE_P (opt)
3329 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3330 thisarginfo.op, bias);
3333 else if (!vec_stmt
3334 && thisarginfo.dt != vect_constant_def
3335 && thisarginfo.dt != vect_external_def
3336 && loop_vinfo
3337 && TREE_CODE (op) == SSA_NAME
3338 && simple_iv (loop, loop_containing_stmt (stmt), op,
3339 &iv, false)
3340 && tree_fits_shwi_p (iv.step))
3342 thisarginfo.linear_step = tree_to_shwi (iv.step);
3343 thisarginfo.op = iv.base;
3345 else if ((thisarginfo.dt == vect_constant_def
3346 || thisarginfo.dt == vect_external_def)
3347 && POINTER_TYPE_P (TREE_TYPE (op)))
3348 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3349 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3350 linear too. */
3351 if (POINTER_TYPE_P (TREE_TYPE (op))
3352 && !thisarginfo.linear_step
3353 && !vec_stmt
3354 && thisarginfo.dt != vect_constant_def
3355 && thisarginfo.dt != vect_external_def
3356 && loop_vinfo
3357 && !slp_node
3358 && TREE_CODE (op) == SSA_NAME)
3359 vect_simd_lane_linear (op, loop, &thisarginfo);
3361 arginfo.quick_push (thisarginfo);
3364 unsigned int badness = 0;
3365 struct cgraph_node *bestn = NULL;
3366 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3367 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3368 else
3369 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3370 n = n->simdclone->next_clone)
3372 unsigned int this_badness = 0;
3373 if (n->simdclone->simdlen
3374 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3375 || n->simdclone->nargs != nargs)
3376 continue;
3377 if (n->simdclone->simdlen
3378 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3379 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3380 - exact_log2 (n->simdclone->simdlen)) * 1024;
3381 if (n->simdclone->inbranch)
3382 this_badness += 2048;
3383 int target_badness = targetm.simd_clone.usable (n);
3384 if (target_badness < 0)
3385 continue;
3386 this_badness += target_badness * 512;
3387 /* FORNOW: Have to add code to add the mask argument. */
3388 if (n->simdclone->inbranch)
3389 continue;
3390 for (i = 0; i < nargs; i++)
3392 switch (n->simdclone->args[i].arg_type)
3394 case SIMD_CLONE_ARG_TYPE_VECTOR:
3395 if (!useless_type_conversion_p
3396 (n->simdclone->args[i].orig_type,
3397 TREE_TYPE (gimple_call_arg (stmt, i))))
3398 i = -1;
3399 else if (arginfo[i].dt == vect_constant_def
3400 || arginfo[i].dt == vect_external_def
3401 || arginfo[i].linear_step)
3402 this_badness += 64;
3403 break;
3404 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3405 if (arginfo[i].dt != vect_constant_def
3406 && arginfo[i].dt != vect_external_def)
3407 i = -1;
3408 break;
3409 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3411 if (arginfo[i].dt == vect_constant_def
3412 || arginfo[i].dt == vect_external_def
3413 || (arginfo[i].linear_step
3414 != n->simdclone->args[i].linear_step))
3415 i = -1;
3416 break;
3417 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3418 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3419 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3420 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3421 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3422 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3423 /* FORNOW */
3424 i = -1;
3425 break;
3426 case SIMD_CLONE_ARG_TYPE_MASK:
3427 gcc_unreachable ();
3429 if (i == (size_t) -1)
3430 break;
3431 if (n->simdclone->args[i].alignment > arginfo[i].align)
3433 i = -1;
3434 break;
3436 if (arginfo[i].align)
3437 this_badness += (exact_log2 (arginfo[i].align)
3438 - exact_log2 (n->simdclone->args[i].alignment));
3440 if (i == (size_t) -1)
3441 continue;
3442 if (bestn == NULL || this_badness < badness)
3444 bestn = n;
3445 badness = this_badness;
3449 if (bestn == NULL)
3450 return false;
3452 for (i = 0; i < nargs; i++)
3453 if ((arginfo[i].dt == vect_constant_def
3454 || arginfo[i].dt == vect_external_def)
3455 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3457 arginfo[i].vectype
3458 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3459 i)));
3460 if (arginfo[i].vectype == NULL
3461 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3462 > bestn->simdclone->simdlen))
3463 return false;
3466 fndecl = bestn->decl;
3467 nunits = bestn->simdclone->simdlen;
3468 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3470 /* If the function isn't const, only allow it in simd loops where user
3471 has asserted that at least nunits consecutive iterations can be
3472 performed using SIMD instructions. */
3473 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3474 && gimple_vuse (stmt))
3475 return false;
3477 /* Sanity check: make sure that at least one copy of the vectorized stmt
3478 needs to be generated. */
3479 gcc_assert (ncopies >= 1);
3481 if (!vec_stmt) /* transformation not required. */
3483 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3484 for (i = 0; i < nargs; i++)
3485 if ((bestn->simdclone->args[i].arg_type
3486 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3487 || (bestn->simdclone->args[i].arg_type
3488 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3490 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3491 + 1);
3492 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3493 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3494 ? size_type_node : TREE_TYPE (arginfo[i].op);
3495 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3496 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3497 tree sll = arginfo[i].simd_lane_linear
3498 ? boolean_true_node : boolean_false_node;
3499 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3501 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3502 if (dump_enabled_p ())
3503 dump_printf_loc (MSG_NOTE, vect_location,
3504 "=== vectorizable_simd_clone_call ===\n");
3505 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3506 return true;
3509 /* Transform. */
3511 if (dump_enabled_p ())
3512 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3514 /* Handle def. */
3515 scalar_dest = gimple_call_lhs (stmt);
3516 vec_dest = NULL_TREE;
3517 rtype = NULL_TREE;
3518 ratype = NULL_TREE;
3519 if (scalar_dest)
3521 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3522 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3523 if (TREE_CODE (rtype) == ARRAY_TYPE)
3525 ratype = rtype;
3526 rtype = TREE_TYPE (ratype);
3530 prev_stmt_info = NULL;
3531 for (j = 0; j < ncopies; ++j)
3533 /* Build argument list for the vectorized call. */
3534 if (j == 0)
3535 vargs.create (nargs);
3536 else
3537 vargs.truncate (0);
3539 for (i = 0; i < nargs; i++)
3541 unsigned int k, l, m, o;
3542 tree atype;
3543 op = gimple_call_arg (stmt, i);
3544 switch (bestn->simdclone->args[i].arg_type)
3546 case SIMD_CLONE_ARG_TYPE_VECTOR:
3547 atype = bestn->simdclone->args[i].vector_type;
3548 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3549 for (m = j * o; m < (j + 1) * o; m++)
3551 if (TYPE_VECTOR_SUBPARTS (atype)
3552 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3554 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3555 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3556 / TYPE_VECTOR_SUBPARTS (atype));
3557 gcc_assert ((k & (k - 1)) == 0);
3558 if (m == 0)
3559 vec_oprnd0
3560 = vect_get_vec_def_for_operand (op, stmt);
3561 else
3563 vec_oprnd0 = arginfo[i].op;
3564 if ((m & (k - 1)) == 0)
3565 vec_oprnd0
3566 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3567 vec_oprnd0);
3569 arginfo[i].op = vec_oprnd0;
3570 vec_oprnd0
3571 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3572 bitsize_int (prec),
3573 bitsize_int ((m & (k - 1)) * prec));
3574 new_stmt
3575 = gimple_build_assign (make_ssa_name (atype),
3576 vec_oprnd0);
3577 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3578 vargs.safe_push (gimple_assign_lhs (new_stmt));
3580 else
3582 k = (TYPE_VECTOR_SUBPARTS (atype)
3583 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3584 gcc_assert ((k & (k - 1)) == 0);
3585 vec<constructor_elt, va_gc> *ctor_elts;
3586 if (k != 1)
3587 vec_alloc (ctor_elts, k);
3588 else
3589 ctor_elts = NULL;
3590 for (l = 0; l < k; l++)
3592 if (m == 0 && l == 0)
3593 vec_oprnd0
3594 = vect_get_vec_def_for_operand (op, stmt);
3595 else
3596 vec_oprnd0
3597 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3598 arginfo[i].op);
3599 arginfo[i].op = vec_oprnd0;
3600 if (k == 1)
3601 break;
3602 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3603 vec_oprnd0);
3605 if (k == 1)
3606 vargs.safe_push (vec_oprnd0);
3607 else
3609 vec_oprnd0 = build_constructor (atype, ctor_elts);
3610 new_stmt
3611 = gimple_build_assign (make_ssa_name (atype),
3612 vec_oprnd0);
3613 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3614 vargs.safe_push (gimple_assign_lhs (new_stmt));
3618 break;
3619 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3620 vargs.safe_push (op);
3621 break;
3622 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3623 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3624 if (j == 0)
3626 gimple_seq stmts;
3627 arginfo[i].op
3628 = force_gimple_operand (arginfo[i].op, &stmts, true,
3629 NULL_TREE);
3630 if (stmts != NULL)
3632 basic_block new_bb;
3633 edge pe = loop_preheader_edge (loop);
3634 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3635 gcc_assert (!new_bb);
3637 if (arginfo[i].simd_lane_linear)
3639 vargs.safe_push (arginfo[i].op);
3640 break;
3642 tree phi_res = copy_ssa_name (op);
3643 gphi *new_phi = create_phi_node (phi_res, loop->header);
3644 set_vinfo_for_stmt (new_phi,
3645 new_stmt_vec_info (new_phi, loop_vinfo));
3646 add_phi_arg (new_phi, arginfo[i].op,
3647 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3648 enum tree_code code
3649 = POINTER_TYPE_P (TREE_TYPE (op))
3650 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3651 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3652 ? sizetype : TREE_TYPE (op);
3653 widest_int cst
3654 = wi::mul (bestn->simdclone->args[i].linear_step,
3655 ncopies * nunits);
3656 tree tcst = wide_int_to_tree (type, cst);
3657 tree phi_arg = copy_ssa_name (op);
3658 new_stmt
3659 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3660 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3661 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3662 set_vinfo_for_stmt (new_stmt,
3663 new_stmt_vec_info (new_stmt, loop_vinfo));
3664 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3665 UNKNOWN_LOCATION);
3666 arginfo[i].op = phi_res;
3667 vargs.safe_push (phi_res);
3669 else
3671 enum tree_code code
3672 = POINTER_TYPE_P (TREE_TYPE (op))
3673 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3674 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3675 ? sizetype : TREE_TYPE (op);
3676 widest_int cst
3677 = wi::mul (bestn->simdclone->args[i].linear_step,
3678 j * nunits);
3679 tree tcst = wide_int_to_tree (type, cst);
3680 new_temp = make_ssa_name (TREE_TYPE (op));
3681 new_stmt = gimple_build_assign (new_temp, code,
3682 arginfo[i].op, tcst);
3683 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3684 vargs.safe_push (new_temp);
3686 break;
3687 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3688 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3689 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3690 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3691 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3692 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3693 default:
3694 gcc_unreachable ();
3698 new_stmt = gimple_build_call_vec (fndecl, vargs);
3699 if (vec_dest)
3701 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3702 if (ratype)
3703 new_temp = create_tmp_var (ratype);
3704 else if (TYPE_VECTOR_SUBPARTS (vectype)
3705 == TYPE_VECTOR_SUBPARTS (rtype))
3706 new_temp = make_ssa_name (vec_dest, new_stmt);
3707 else
3708 new_temp = make_ssa_name (rtype, new_stmt);
3709 gimple_call_set_lhs (new_stmt, new_temp);
3711 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3713 if (vec_dest)
3715 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3717 unsigned int k, l;
3718 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3719 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3720 gcc_assert ((k & (k - 1)) == 0);
3721 for (l = 0; l < k; l++)
3723 tree t;
3724 if (ratype)
3726 t = build_fold_addr_expr (new_temp);
3727 t = build2 (MEM_REF, vectype, t,
3728 build_int_cst (TREE_TYPE (t),
3729 l * prec / BITS_PER_UNIT));
3731 else
3732 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3733 bitsize_int (prec), bitsize_int (l * prec));
3734 new_stmt
3735 = gimple_build_assign (make_ssa_name (vectype), t);
3736 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3737 if (j == 0 && l == 0)
3738 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3739 else
3740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3742 prev_stmt_info = vinfo_for_stmt (new_stmt);
3745 if (ratype)
3747 tree clobber = build_constructor (ratype, NULL);
3748 TREE_THIS_VOLATILE (clobber) = 1;
3749 new_stmt = gimple_build_assign (new_temp, clobber);
3750 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3752 continue;
3754 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3756 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3757 / TYPE_VECTOR_SUBPARTS (rtype));
3758 gcc_assert ((k & (k - 1)) == 0);
3759 if ((j & (k - 1)) == 0)
3760 vec_alloc (ret_ctor_elts, k);
3761 if (ratype)
3763 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3764 for (m = 0; m < o; m++)
3766 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3767 size_int (m), NULL_TREE, NULL_TREE);
3768 new_stmt
3769 = gimple_build_assign (make_ssa_name (rtype), tem);
3770 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3771 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3772 gimple_assign_lhs (new_stmt));
3774 tree clobber = build_constructor (ratype, NULL);
3775 TREE_THIS_VOLATILE (clobber) = 1;
3776 new_stmt = gimple_build_assign (new_temp, clobber);
3777 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3779 else
3780 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3781 if ((j & (k - 1)) != k - 1)
3782 continue;
3783 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3784 new_stmt
3785 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3786 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3788 if ((unsigned) j == k - 1)
3789 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3790 else
3791 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3793 prev_stmt_info = vinfo_for_stmt (new_stmt);
3794 continue;
3796 else if (ratype)
3798 tree t = build_fold_addr_expr (new_temp);
3799 t = build2 (MEM_REF, vectype, t,
3800 build_int_cst (TREE_TYPE (t), 0));
3801 new_stmt
3802 = gimple_build_assign (make_ssa_name (vec_dest), t);
3803 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3804 tree clobber = build_constructor (ratype, NULL);
3805 TREE_THIS_VOLATILE (clobber) = 1;
3806 vect_finish_stmt_generation (stmt,
3807 gimple_build_assign (new_temp,
3808 clobber), gsi);
3812 if (j == 0)
3813 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3814 else
3815 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3817 prev_stmt_info = vinfo_for_stmt (new_stmt);
3820 vargs.release ();
3822 /* The call in STMT might prevent it from being removed in dce.
3823 We however cannot remove it here, due to the way the ssa name
3824 it defines is mapped to the new definition. So just replace
3825 rhs of the statement with something harmless. */
3827 if (slp_node)
3828 return true;
3830 if (scalar_dest)
3832 type = TREE_TYPE (scalar_dest);
3833 if (is_pattern_stmt_p (stmt_info))
3834 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3835 else
3836 lhs = gimple_call_lhs (stmt);
3837 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3839 else
3840 new_stmt = gimple_build_nop ();
3841 set_vinfo_for_stmt (new_stmt, stmt_info);
3842 set_vinfo_for_stmt (stmt, NULL);
3843 STMT_VINFO_STMT (stmt_info) = new_stmt;
3844 gsi_replace (gsi, new_stmt, true);
3845 unlink_stmt_vdef (stmt);
3847 return true;
3851 /* Function vect_gen_widened_results_half
3853 Create a vector stmt whose code, type, number of arguments, and result
3854 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3855 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3856 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3857 needs to be created (DECL is a function-decl of a target-builtin).
3858 STMT is the original scalar stmt that we are vectorizing. */
3860 static gimple *
3861 vect_gen_widened_results_half (enum tree_code code,
3862 tree decl,
3863 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3864 tree vec_dest, gimple_stmt_iterator *gsi,
3865 gimple *stmt)
3867 gimple *new_stmt;
3868 tree new_temp;
3870 /* Generate half of the widened result: */
3871 if (code == CALL_EXPR)
3873 /* Target specific support */
3874 if (op_type == binary_op)
3875 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3876 else
3877 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3878 new_temp = make_ssa_name (vec_dest, new_stmt);
3879 gimple_call_set_lhs (new_stmt, new_temp);
3881 else
3883 /* Generic support */
3884 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3885 if (op_type != binary_op)
3886 vec_oprnd1 = NULL;
3887 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3888 new_temp = make_ssa_name (vec_dest, new_stmt);
3889 gimple_assign_set_lhs (new_stmt, new_temp);
3891 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3893 return new_stmt;
3897 /* Get vectorized definitions for loop-based vectorization. For the first
3898 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3899 scalar operand), and for the rest we get a copy with
3900 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3901 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3902 The vectors are collected into VEC_OPRNDS. */
3904 static void
3905 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3906 vec<tree> *vec_oprnds, int multi_step_cvt)
3908 tree vec_oprnd;
3910 /* Get first vector operand. */
3911 /* All the vector operands except the very first one (that is scalar oprnd)
3912 are stmt copies. */
3913 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3914 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3915 else
3916 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3918 vec_oprnds->quick_push (vec_oprnd);
3920 /* Get second vector operand. */
3921 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3922 vec_oprnds->quick_push (vec_oprnd);
3924 *oprnd = vec_oprnd;
3926 /* For conversion in multiple steps, continue to get operands
3927 recursively. */
3928 if (multi_step_cvt)
3929 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3933 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3934 For multi-step conversions store the resulting vectors and call the function
3935 recursively. */
3937 static void
3938 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3939 int multi_step_cvt, gimple *stmt,
3940 vec<tree> vec_dsts,
3941 gimple_stmt_iterator *gsi,
3942 slp_tree slp_node, enum tree_code code,
3943 stmt_vec_info *prev_stmt_info)
3945 unsigned int i;
3946 tree vop0, vop1, new_tmp, vec_dest;
3947 gimple *new_stmt;
3948 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3950 vec_dest = vec_dsts.pop ();
3952 for (i = 0; i < vec_oprnds->length (); i += 2)
3954 /* Create demotion operation. */
3955 vop0 = (*vec_oprnds)[i];
3956 vop1 = (*vec_oprnds)[i + 1];
3957 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3958 new_tmp = make_ssa_name (vec_dest, new_stmt);
3959 gimple_assign_set_lhs (new_stmt, new_tmp);
3960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3962 if (multi_step_cvt)
3963 /* Store the resulting vector for next recursive call. */
3964 (*vec_oprnds)[i/2] = new_tmp;
3965 else
3967 /* This is the last step of the conversion sequence. Store the
3968 vectors in SLP_NODE or in vector info of the scalar statement
3969 (or in STMT_VINFO_RELATED_STMT chain). */
3970 if (slp_node)
3971 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3972 else
3974 if (!*prev_stmt_info)
3975 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3976 else
3977 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3979 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3984 /* For multi-step demotion operations we first generate demotion operations
3985 from the source type to the intermediate types, and then combine the
3986 results (stored in VEC_OPRNDS) in demotion operation to the destination
3987 type. */
3988 if (multi_step_cvt)
3990 /* At each level of recursion we have half of the operands we had at the
3991 previous level. */
3992 vec_oprnds->truncate ((i+1)/2);
3993 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3994 stmt, vec_dsts, gsi, slp_node,
3995 VEC_PACK_TRUNC_EXPR,
3996 prev_stmt_info);
3999 vec_dsts.quick_push (vec_dest);
4003 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4004 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4005 the resulting vectors and call the function recursively. */
4007 static void
4008 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4009 vec<tree> *vec_oprnds1,
4010 gimple *stmt, tree vec_dest,
4011 gimple_stmt_iterator *gsi,
4012 enum tree_code code1,
4013 enum tree_code code2, tree decl1,
4014 tree decl2, int op_type)
4016 int i;
4017 tree vop0, vop1, new_tmp1, new_tmp2;
4018 gimple *new_stmt1, *new_stmt2;
4019 vec<tree> vec_tmp = vNULL;
4021 vec_tmp.create (vec_oprnds0->length () * 2);
4022 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4024 if (op_type == binary_op)
4025 vop1 = (*vec_oprnds1)[i];
4026 else
4027 vop1 = NULL_TREE;
4029 /* Generate the two halves of promotion operation. */
4030 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4031 op_type, vec_dest, gsi, stmt);
4032 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4033 op_type, vec_dest, gsi, stmt);
4034 if (is_gimple_call (new_stmt1))
4036 new_tmp1 = gimple_call_lhs (new_stmt1);
4037 new_tmp2 = gimple_call_lhs (new_stmt2);
4039 else
4041 new_tmp1 = gimple_assign_lhs (new_stmt1);
4042 new_tmp2 = gimple_assign_lhs (new_stmt2);
4045 /* Store the results for the next step. */
4046 vec_tmp.quick_push (new_tmp1);
4047 vec_tmp.quick_push (new_tmp2);
4050 vec_oprnds0->release ();
4051 *vec_oprnds0 = vec_tmp;
4055 /* Check if STMT performs a conversion operation, that can be vectorized.
4056 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4057 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4058 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4060 static bool
4061 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4062 gimple **vec_stmt, slp_tree slp_node)
4064 tree vec_dest;
4065 tree scalar_dest;
4066 tree op0, op1 = NULL_TREE;
4067 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4068 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4069 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4070 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4071 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4072 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4073 tree new_temp;
4074 gimple *def_stmt;
4075 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4076 int ndts = 2;
4077 gimple *new_stmt = NULL;
4078 stmt_vec_info prev_stmt_info;
4079 int nunits_in;
4080 int nunits_out;
4081 tree vectype_out, vectype_in;
4082 int ncopies, i, j;
4083 tree lhs_type, rhs_type;
4084 enum { NARROW, NONE, WIDEN } modifier;
4085 vec<tree> vec_oprnds0 = vNULL;
4086 vec<tree> vec_oprnds1 = vNULL;
4087 tree vop0;
4088 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4089 vec_info *vinfo = stmt_info->vinfo;
4090 int multi_step_cvt = 0;
4091 vec<tree> interm_types = vNULL;
4092 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4093 int op_type;
4094 unsigned short fltsz;
4096 /* Is STMT a vectorizable conversion? */
4098 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4099 return false;
4101 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4102 && ! vec_stmt)
4103 return false;
4105 if (!is_gimple_assign (stmt))
4106 return false;
4108 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4109 return false;
4111 code = gimple_assign_rhs_code (stmt);
4112 if (!CONVERT_EXPR_CODE_P (code)
4113 && code != FIX_TRUNC_EXPR
4114 && code != FLOAT_EXPR
4115 && code != WIDEN_MULT_EXPR
4116 && code != WIDEN_LSHIFT_EXPR)
4117 return false;
4119 op_type = TREE_CODE_LENGTH (code);
4121 /* Check types of lhs and rhs. */
4122 scalar_dest = gimple_assign_lhs (stmt);
4123 lhs_type = TREE_TYPE (scalar_dest);
4124 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4126 op0 = gimple_assign_rhs1 (stmt);
4127 rhs_type = TREE_TYPE (op0);
4129 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4130 && !((INTEGRAL_TYPE_P (lhs_type)
4131 && INTEGRAL_TYPE_P (rhs_type))
4132 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4133 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4134 return false;
4136 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4137 && ((INTEGRAL_TYPE_P (lhs_type)
4138 && !type_has_mode_precision_p (lhs_type))
4139 || (INTEGRAL_TYPE_P (rhs_type)
4140 && !type_has_mode_precision_p (rhs_type))))
4142 if (dump_enabled_p ())
4143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4144 "type conversion to/from bit-precision unsupported."
4145 "\n");
4146 return false;
4149 /* Check the operands of the operation. */
4150 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4154 "use not simple.\n");
4155 return false;
4157 if (op_type == binary_op)
4159 bool ok;
4161 op1 = gimple_assign_rhs2 (stmt);
4162 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4163 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4164 OP1. */
4165 if (CONSTANT_CLASS_P (op0))
4166 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4167 else
4168 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4170 if (!ok)
4172 if (dump_enabled_p ())
4173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4174 "use not simple.\n");
4175 return false;
4179 /* If op0 is an external or constant defs use a vector type of
4180 the same size as the output vector type. */
4181 if (!vectype_in)
4182 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4183 if (vec_stmt)
4184 gcc_assert (vectype_in);
4185 if (!vectype_in)
4187 if (dump_enabled_p ())
4189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4190 "no vectype for scalar type ");
4191 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4192 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4195 return false;
4198 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4199 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4201 if (dump_enabled_p ())
4203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4204 "can't convert between boolean and non "
4205 "boolean vectors");
4206 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4207 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4210 return false;
4213 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4214 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4215 if (nunits_in < nunits_out)
4216 modifier = NARROW;
4217 else if (nunits_out == nunits_in)
4218 modifier = NONE;
4219 else
4220 modifier = WIDEN;
4222 /* Multiple types in SLP are handled by creating the appropriate number of
4223 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4224 case of SLP. */
4225 if (slp_node)
4226 ncopies = 1;
4227 else if (modifier == NARROW)
4228 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4229 else
4230 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4232 /* Sanity check: make sure that at least one copy of the vectorized stmt
4233 needs to be generated. */
4234 gcc_assert (ncopies >= 1);
4236 bool found_mode = false;
4237 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4238 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4239 opt_scalar_mode rhs_mode_iter;
4241 /* Supportable by target? */
4242 switch (modifier)
4244 case NONE:
4245 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4246 return false;
4247 if (supportable_convert_operation (code, vectype_out, vectype_in,
4248 &decl1, &code1))
4249 break;
4250 /* FALLTHRU */
4251 unsupported:
4252 if (dump_enabled_p ())
4253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4254 "conversion not supported by target.\n");
4255 return false;
4257 case WIDEN:
4258 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4259 &code1, &code2, &multi_step_cvt,
4260 &interm_types))
4262 /* Binary widening operation can only be supported directly by the
4263 architecture. */
4264 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4265 break;
4268 if (code != FLOAT_EXPR
4269 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4270 goto unsupported;
4272 fltsz = GET_MODE_SIZE (lhs_mode);
4273 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4275 rhs_mode = rhs_mode_iter.require ();
4276 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4277 break;
4279 cvt_type
4280 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4281 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4282 if (cvt_type == NULL_TREE)
4283 goto unsupported;
4285 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4287 if (!supportable_convert_operation (code, vectype_out,
4288 cvt_type, &decl1, &codecvt1))
4289 goto unsupported;
4291 else if (!supportable_widening_operation (code, stmt, vectype_out,
4292 cvt_type, &codecvt1,
4293 &codecvt2, &multi_step_cvt,
4294 &interm_types))
4295 continue;
4296 else
4297 gcc_assert (multi_step_cvt == 0);
4299 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4300 vectype_in, &code1, &code2,
4301 &multi_step_cvt, &interm_types))
4303 found_mode = true;
4304 break;
4308 if (!found_mode)
4309 goto unsupported;
4311 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4312 codecvt2 = ERROR_MARK;
4313 else
4315 multi_step_cvt++;
4316 interm_types.safe_push (cvt_type);
4317 cvt_type = NULL_TREE;
4319 break;
4321 case NARROW:
4322 gcc_assert (op_type == unary_op);
4323 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4324 &code1, &multi_step_cvt,
4325 &interm_types))
4326 break;
4328 if (code != FIX_TRUNC_EXPR
4329 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4330 goto unsupported;
4332 cvt_type
4333 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4334 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4335 if (cvt_type == NULL_TREE)
4336 goto unsupported;
4337 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4338 &decl1, &codecvt1))
4339 goto unsupported;
4340 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4341 &code1, &multi_step_cvt,
4342 &interm_types))
4343 break;
4344 goto unsupported;
4346 default:
4347 gcc_unreachable ();
4350 if (!vec_stmt) /* transformation not required. */
4352 if (dump_enabled_p ())
4353 dump_printf_loc (MSG_NOTE, vect_location,
4354 "=== vectorizable_conversion ===\n");
4355 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4357 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4358 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4360 else if (modifier == NARROW)
4362 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4363 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4365 else
4367 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4368 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4370 interm_types.release ();
4371 return true;
4374 /* Transform. */
4375 if (dump_enabled_p ())
4376 dump_printf_loc (MSG_NOTE, vect_location,
4377 "transform conversion. ncopies = %d.\n", ncopies);
4379 if (op_type == binary_op)
4381 if (CONSTANT_CLASS_P (op0))
4382 op0 = fold_convert (TREE_TYPE (op1), op0);
4383 else if (CONSTANT_CLASS_P (op1))
4384 op1 = fold_convert (TREE_TYPE (op0), op1);
4387 /* In case of multi-step conversion, we first generate conversion operations
4388 to the intermediate types, and then from that types to the final one.
4389 We create vector destinations for the intermediate type (TYPES) received
4390 from supportable_*_operation, and store them in the correct order
4391 for future use in vect_create_vectorized_*_stmts (). */
4392 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4393 vec_dest = vect_create_destination_var (scalar_dest,
4394 (cvt_type && modifier == WIDEN)
4395 ? cvt_type : vectype_out);
4396 vec_dsts.quick_push (vec_dest);
4398 if (multi_step_cvt)
4400 for (i = interm_types.length () - 1;
4401 interm_types.iterate (i, &intermediate_type); i--)
4403 vec_dest = vect_create_destination_var (scalar_dest,
4404 intermediate_type);
4405 vec_dsts.quick_push (vec_dest);
4409 if (cvt_type)
4410 vec_dest = vect_create_destination_var (scalar_dest,
4411 modifier == WIDEN
4412 ? vectype_out : cvt_type);
4414 if (!slp_node)
4416 if (modifier == WIDEN)
4418 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4419 if (op_type == binary_op)
4420 vec_oprnds1.create (1);
4422 else if (modifier == NARROW)
4423 vec_oprnds0.create (
4424 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4426 else if (code == WIDEN_LSHIFT_EXPR)
4427 vec_oprnds1.create (slp_node->vec_stmts_size);
4429 last_oprnd = op0;
4430 prev_stmt_info = NULL;
4431 switch (modifier)
4433 case NONE:
4434 for (j = 0; j < ncopies; j++)
4436 if (j == 0)
4437 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4438 else
4439 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4441 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4443 /* Arguments are ready, create the new vector stmt. */
4444 if (code1 == CALL_EXPR)
4446 new_stmt = gimple_build_call (decl1, 1, vop0);
4447 new_temp = make_ssa_name (vec_dest, new_stmt);
4448 gimple_call_set_lhs (new_stmt, new_temp);
4450 else
4452 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4453 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4454 new_temp = make_ssa_name (vec_dest, new_stmt);
4455 gimple_assign_set_lhs (new_stmt, new_temp);
4458 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4459 if (slp_node)
4460 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4461 else
4463 if (!prev_stmt_info)
4464 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4465 else
4466 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4467 prev_stmt_info = vinfo_for_stmt (new_stmt);
4471 break;
4473 case WIDEN:
4474 /* In case the vectorization factor (VF) is bigger than the number
4475 of elements that we can fit in a vectype (nunits), we have to
4476 generate more than one vector stmt - i.e - we need to "unroll"
4477 the vector stmt by a factor VF/nunits. */
4478 for (j = 0; j < ncopies; j++)
4480 /* Handle uses. */
4481 if (j == 0)
4483 if (slp_node)
4485 if (code == WIDEN_LSHIFT_EXPR)
4487 unsigned int k;
4489 vec_oprnd1 = op1;
4490 /* Store vec_oprnd1 for every vector stmt to be created
4491 for SLP_NODE. We check during the analysis that all
4492 the shift arguments are the same. */
4493 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4494 vec_oprnds1.quick_push (vec_oprnd1);
4496 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4497 slp_node);
4499 else
4500 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4501 &vec_oprnds1, slp_node);
4503 else
4505 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4506 vec_oprnds0.quick_push (vec_oprnd0);
4507 if (op_type == binary_op)
4509 if (code == WIDEN_LSHIFT_EXPR)
4510 vec_oprnd1 = op1;
4511 else
4512 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4513 vec_oprnds1.quick_push (vec_oprnd1);
4517 else
4519 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4520 vec_oprnds0.truncate (0);
4521 vec_oprnds0.quick_push (vec_oprnd0);
4522 if (op_type == binary_op)
4524 if (code == WIDEN_LSHIFT_EXPR)
4525 vec_oprnd1 = op1;
4526 else
4527 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4528 vec_oprnd1);
4529 vec_oprnds1.truncate (0);
4530 vec_oprnds1.quick_push (vec_oprnd1);
4534 /* Arguments are ready. Create the new vector stmts. */
4535 for (i = multi_step_cvt; i >= 0; i--)
4537 tree this_dest = vec_dsts[i];
4538 enum tree_code c1 = code1, c2 = code2;
4539 if (i == 0 && codecvt2 != ERROR_MARK)
4541 c1 = codecvt1;
4542 c2 = codecvt2;
4544 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4545 &vec_oprnds1,
4546 stmt, this_dest, gsi,
4547 c1, c2, decl1, decl2,
4548 op_type);
4551 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4553 if (cvt_type)
4555 if (codecvt1 == CALL_EXPR)
4557 new_stmt = gimple_build_call (decl1, 1, vop0);
4558 new_temp = make_ssa_name (vec_dest, new_stmt);
4559 gimple_call_set_lhs (new_stmt, new_temp);
4561 else
4563 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4564 new_temp = make_ssa_name (vec_dest);
4565 new_stmt = gimple_build_assign (new_temp, codecvt1,
4566 vop0);
4569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4571 else
4572 new_stmt = SSA_NAME_DEF_STMT (vop0);
4574 if (slp_node)
4575 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4576 else
4578 if (!prev_stmt_info)
4579 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4580 else
4581 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4582 prev_stmt_info = vinfo_for_stmt (new_stmt);
4587 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4588 break;
4590 case NARROW:
4591 /* In case the vectorization factor (VF) is bigger than the number
4592 of elements that we can fit in a vectype (nunits), we have to
4593 generate more than one vector stmt - i.e - we need to "unroll"
4594 the vector stmt by a factor VF/nunits. */
4595 for (j = 0; j < ncopies; j++)
4597 /* Handle uses. */
4598 if (slp_node)
4599 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4600 slp_node);
4601 else
4603 vec_oprnds0.truncate (0);
4604 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4605 vect_pow2 (multi_step_cvt) - 1);
4608 /* Arguments are ready. Create the new vector stmts. */
4609 if (cvt_type)
4610 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4612 if (codecvt1 == CALL_EXPR)
4614 new_stmt = gimple_build_call (decl1, 1, vop0);
4615 new_temp = make_ssa_name (vec_dest, new_stmt);
4616 gimple_call_set_lhs (new_stmt, new_temp);
4618 else
4620 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4621 new_temp = make_ssa_name (vec_dest);
4622 new_stmt = gimple_build_assign (new_temp, codecvt1,
4623 vop0);
4626 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4627 vec_oprnds0[i] = new_temp;
4630 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4631 stmt, vec_dsts, gsi,
4632 slp_node, code1,
4633 &prev_stmt_info);
4636 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4637 break;
4640 vec_oprnds0.release ();
4641 vec_oprnds1.release ();
4642 interm_types.release ();
4644 return true;
4648 /* Function vectorizable_assignment.
4650 Check if STMT performs an assignment (copy) that can be vectorized.
4651 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4652 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4653 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4655 static bool
4656 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4657 gimple **vec_stmt, slp_tree slp_node)
4659 tree vec_dest;
4660 tree scalar_dest;
4661 tree op;
4662 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4663 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4664 tree new_temp;
4665 gimple *def_stmt;
4666 enum vect_def_type dt[1] = {vect_unknown_def_type};
4667 int ndts = 1;
4668 int ncopies;
4669 int i, j;
4670 vec<tree> vec_oprnds = vNULL;
4671 tree vop;
4672 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4673 vec_info *vinfo = stmt_info->vinfo;
4674 gimple *new_stmt = NULL;
4675 stmt_vec_info prev_stmt_info = NULL;
4676 enum tree_code code;
4677 tree vectype_in;
4679 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4680 return false;
4682 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4683 && ! vec_stmt)
4684 return false;
4686 /* Is vectorizable assignment? */
4687 if (!is_gimple_assign (stmt))
4688 return false;
4690 scalar_dest = gimple_assign_lhs (stmt);
4691 if (TREE_CODE (scalar_dest) != SSA_NAME)
4692 return false;
4694 code = gimple_assign_rhs_code (stmt);
4695 if (gimple_assign_single_p (stmt)
4696 || code == PAREN_EXPR
4697 || CONVERT_EXPR_CODE_P (code))
4698 op = gimple_assign_rhs1 (stmt);
4699 else
4700 return false;
4702 if (code == VIEW_CONVERT_EXPR)
4703 op = TREE_OPERAND (op, 0);
4705 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4706 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4708 /* Multiple types in SLP are handled by creating the appropriate number of
4709 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4710 case of SLP. */
4711 if (slp_node)
4712 ncopies = 1;
4713 else
4714 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4716 gcc_assert (ncopies >= 1);
4718 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4720 if (dump_enabled_p ())
4721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4722 "use not simple.\n");
4723 return false;
4726 /* We can handle NOP_EXPR conversions that do not change the number
4727 of elements or the vector size. */
4728 if ((CONVERT_EXPR_CODE_P (code)
4729 || code == VIEW_CONVERT_EXPR)
4730 && (!vectype_in
4731 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4732 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4733 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4734 return false;
4736 /* We do not handle bit-precision changes. */
4737 if ((CONVERT_EXPR_CODE_P (code)
4738 || code == VIEW_CONVERT_EXPR)
4739 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4740 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4741 || !type_has_mode_precision_p (TREE_TYPE (op)))
4742 /* But a conversion that does not change the bit-pattern is ok. */
4743 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4744 > TYPE_PRECISION (TREE_TYPE (op)))
4745 && TYPE_UNSIGNED (TREE_TYPE (op)))
4746 /* Conversion between boolean types of different sizes is
4747 a simple assignment in case their vectypes are same
4748 boolean vectors. */
4749 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4750 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4752 if (dump_enabled_p ())
4753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4754 "type conversion to/from bit-precision "
4755 "unsupported.\n");
4756 return false;
4759 if (!vec_stmt) /* transformation not required. */
4761 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_NOTE, vect_location,
4764 "=== vectorizable_assignment ===\n");
4765 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4766 return true;
4769 /* Transform. */
4770 if (dump_enabled_p ())
4771 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4773 /* Handle def. */
4774 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4776 /* Handle use. */
4777 for (j = 0; j < ncopies; j++)
4779 /* Handle uses. */
4780 if (j == 0)
4781 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4782 else
4783 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4785 /* Arguments are ready. create the new vector stmt. */
4786 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4788 if (CONVERT_EXPR_CODE_P (code)
4789 || code == VIEW_CONVERT_EXPR)
4790 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4791 new_stmt = gimple_build_assign (vec_dest, vop);
4792 new_temp = make_ssa_name (vec_dest, new_stmt);
4793 gimple_assign_set_lhs (new_stmt, new_temp);
4794 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4795 if (slp_node)
4796 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4799 if (slp_node)
4800 continue;
4802 if (j == 0)
4803 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4804 else
4805 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4807 prev_stmt_info = vinfo_for_stmt (new_stmt);
4810 vec_oprnds.release ();
4811 return true;
4815 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4816 either as shift by a scalar or by a vector. */
4818 bool
4819 vect_supportable_shift (enum tree_code code, tree scalar_type)
4822 machine_mode vec_mode;
4823 optab optab;
4824 int icode;
4825 tree vectype;
4827 vectype = get_vectype_for_scalar_type (scalar_type);
4828 if (!vectype)
4829 return false;
4831 optab = optab_for_tree_code (code, vectype, optab_scalar);
4832 if (!optab
4833 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4835 optab = optab_for_tree_code (code, vectype, optab_vector);
4836 if (!optab
4837 || (optab_handler (optab, TYPE_MODE (vectype))
4838 == CODE_FOR_nothing))
4839 return false;
4842 vec_mode = TYPE_MODE (vectype);
4843 icode = (int) optab_handler (optab, vec_mode);
4844 if (icode == CODE_FOR_nothing)
4845 return false;
4847 return true;
4851 /* Function vectorizable_shift.
4853 Check if STMT performs a shift operation that can be vectorized.
4854 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4855 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4856 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4858 static bool
4859 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4860 gimple **vec_stmt, slp_tree slp_node)
4862 tree vec_dest;
4863 tree scalar_dest;
4864 tree op0, op1 = NULL;
4865 tree vec_oprnd1 = NULL_TREE;
4866 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4867 tree vectype;
4868 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4869 enum tree_code code;
4870 machine_mode vec_mode;
4871 tree new_temp;
4872 optab optab;
4873 int icode;
4874 machine_mode optab_op2_mode;
4875 gimple *def_stmt;
4876 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4877 int ndts = 2;
4878 gimple *new_stmt = NULL;
4879 stmt_vec_info prev_stmt_info;
4880 int nunits_in;
4881 int nunits_out;
4882 tree vectype_out;
4883 tree op1_vectype;
4884 int ncopies;
4885 int j, i;
4886 vec<tree> vec_oprnds0 = vNULL;
4887 vec<tree> vec_oprnds1 = vNULL;
4888 tree vop0, vop1;
4889 unsigned int k;
4890 bool scalar_shift_arg = true;
4891 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4892 vec_info *vinfo = stmt_info->vinfo;
4894 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4895 return false;
4897 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4898 && ! vec_stmt)
4899 return false;
4901 /* Is STMT a vectorizable binary/unary operation? */
4902 if (!is_gimple_assign (stmt))
4903 return false;
4905 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4906 return false;
4908 code = gimple_assign_rhs_code (stmt);
4910 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4911 || code == RROTATE_EXPR))
4912 return false;
4914 scalar_dest = gimple_assign_lhs (stmt);
4915 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4916 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
4918 if (dump_enabled_p ())
4919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4920 "bit-precision shifts not supported.\n");
4921 return false;
4924 op0 = gimple_assign_rhs1 (stmt);
4925 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4927 if (dump_enabled_p ())
4928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4929 "use not simple.\n");
4930 return false;
4932 /* If op0 is an external or constant def use a vector type with
4933 the same size as the output vector type. */
4934 if (!vectype)
4935 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4936 if (vec_stmt)
4937 gcc_assert (vectype);
4938 if (!vectype)
4940 if (dump_enabled_p ())
4941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4942 "no vectype for scalar type\n");
4943 return false;
4946 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4947 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4948 if (nunits_out != nunits_in)
4949 return false;
4951 op1 = gimple_assign_rhs2 (stmt);
4952 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4954 if (dump_enabled_p ())
4955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4956 "use not simple.\n");
4957 return false;
4960 /* Multiple types in SLP are handled by creating the appropriate number of
4961 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4962 case of SLP. */
4963 if (slp_node)
4964 ncopies = 1;
4965 else
4966 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4968 gcc_assert (ncopies >= 1);
4970 /* Determine whether the shift amount is a vector, or scalar. If the
4971 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4973 if ((dt[1] == vect_internal_def
4974 || dt[1] == vect_induction_def)
4975 && !slp_node)
4976 scalar_shift_arg = false;
4977 else if (dt[1] == vect_constant_def
4978 || dt[1] == vect_external_def
4979 || dt[1] == vect_internal_def)
4981 /* In SLP, need to check whether the shift count is the same,
4982 in loops if it is a constant or invariant, it is always
4983 a scalar shift. */
4984 if (slp_node)
4986 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4987 gimple *slpstmt;
4989 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4990 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4991 scalar_shift_arg = false;
4994 /* If the shift amount is computed by a pattern stmt we cannot
4995 use the scalar amount directly thus give up and use a vector
4996 shift. */
4997 if (dt[1] == vect_internal_def)
4999 gimple *def = SSA_NAME_DEF_STMT (op1);
5000 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5001 scalar_shift_arg = false;
5004 else
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5008 "operand mode requires invariant argument.\n");
5009 return false;
5012 /* Vector shifted by vector. */
5013 if (!scalar_shift_arg)
5015 optab = optab_for_tree_code (code, vectype, optab_vector);
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_NOTE, vect_location,
5018 "vector/vector shift/rotate found.\n");
5020 if (!op1_vectype)
5021 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5022 if (op1_vectype == NULL_TREE
5023 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5025 if (dump_enabled_p ())
5026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5027 "unusable type for last operand in"
5028 " vector/vector shift/rotate.\n");
5029 return false;
5032 /* See if the machine has a vector shifted by scalar insn and if not
5033 then see if it has a vector shifted by vector insn. */
5034 else
5036 optab = optab_for_tree_code (code, vectype, optab_scalar);
5037 if (optab
5038 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5040 if (dump_enabled_p ())
5041 dump_printf_loc (MSG_NOTE, vect_location,
5042 "vector/scalar shift/rotate found.\n");
5044 else
5046 optab = optab_for_tree_code (code, vectype, optab_vector);
5047 if (optab
5048 && (optab_handler (optab, TYPE_MODE (vectype))
5049 != CODE_FOR_nothing))
5051 scalar_shift_arg = false;
5053 if (dump_enabled_p ())
5054 dump_printf_loc (MSG_NOTE, vect_location,
5055 "vector/vector shift/rotate found.\n");
5057 /* Unlike the other binary operators, shifts/rotates have
5058 the rhs being int, instead of the same type as the lhs,
5059 so make sure the scalar is the right type if we are
5060 dealing with vectors of long long/long/short/char. */
5061 if (dt[1] == vect_constant_def)
5062 op1 = fold_convert (TREE_TYPE (vectype), op1);
5063 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5064 TREE_TYPE (op1)))
5066 if (slp_node
5067 && TYPE_MODE (TREE_TYPE (vectype))
5068 != TYPE_MODE (TREE_TYPE (op1)))
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5072 "unusable type for last operand in"
5073 " vector/vector shift/rotate.\n");
5074 return false;
5076 if (vec_stmt && !slp_node)
5078 op1 = fold_convert (TREE_TYPE (vectype), op1);
5079 op1 = vect_init_vector (stmt, op1,
5080 TREE_TYPE (vectype), NULL);
5087 /* Supportable by target? */
5088 if (!optab)
5090 if (dump_enabled_p ())
5091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5092 "no optab.\n");
5093 return false;
5095 vec_mode = TYPE_MODE (vectype);
5096 icode = (int) optab_handler (optab, vec_mode);
5097 if (icode == CODE_FOR_nothing)
5099 if (dump_enabled_p ())
5100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5101 "op not supported by target.\n");
5102 /* Check only during analysis. */
5103 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5104 || (!vec_stmt
5105 && !vect_worthwhile_without_simd_p (vinfo, code)))
5106 return false;
5107 if (dump_enabled_p ())
5108 dump_printf_loc (MSG_NOTE, vect_location,
5109 "proceeding using word mode.\n");
5112 /* Worthwhile without SIMD support? Check only during analysis. */
5113 if (!vec_stmt
5114 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5115 && !vect_worthwhile_without_simd_p (vinfo, code))
5117 if (dump_enabled_p ())
5118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5119 "not worthwhile without SIMD support.\n");
5120 return false;
5123 if (!vec_stmt) /* transformation not required. */
5125 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5126 if (dump_enabled_p ())
5127 dump_printf_loc (MSG_NOTE, vect_location,
5128 "=== vectorizable_shift ===\n");
5129 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5130 return true;
5133 /* Transform. */
5135 if (dump_enabled_p ())
5136 dump_printf_loc (MSG_NOTE, vect_location,
5137 "transform binary/unary operation.\n");
5139 /* Handle def. */
5140 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5142 prev_stmt_info = NULL;
5143 for (j = 0; j < ncopies; j++)
5145 /* Handle uses. */
5146 if (j == 0)
5148 if (scalar_shift_arg)
5150 /* Vector shl and shr insn patterns can be defined with scalar
5151 operand 2 (shift operand). In this case, use constant or loop
5152 invariant op1 directly, without extending it to vector mode
5153 first. */
5154 optab_op2_mode = insn_data[icode].operand[2].mode;
5155 if (!VECTOR_MODE_P (optab_op2_mode))
5157 if (dump_enabled_p ())
5158 dump_printf_loc (MSG_NOTE, vect_location,
5159 "operand 1 using scalar mode.\n");
5160 vec_oprnd1 = op1;
5161 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5162 vec_oprnds1.quick_push (vec_oprnd1);
5163 if (slp_node)
5165 /* Store vec_oprnd1 for every vector stmt to be created
5166 for SLP_NODE. We check during the analysis that all
5167 the shift arguments are the same.
5168 TODO: Allow different constants for different vector
5169 stmts generated for an SLP instance. */
5170 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5171 vec_oprnds1.quick_push (vec_oprnd1);
5176 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5177 (a special case for certain kind of vector shifts); otherwise,
5178 operand 1 should be of a vector type (the usual case). */
5179 if (vec_oprnd1)
5180 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5181 slp_node);
5182 else
5183 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5184 slp_node);
5186 else
5187 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5189 /* Arguments are ready. Create the new vector stmt. */
5190 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5192 vop1 = vec_oprnds1[i];
5193 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5194 new_temp = make_ssa_name (vec_dest, new_stmt);
5195 gimple_assign_set_lhs (new_stmt, new_temp);
5196 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5197 if (slp_node)
5198 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5201 if (slp_node)
5202 continue;
5204 if (j == 0)
5205 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5206 else
5207 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5208 prev_stmt_info = vinfo_for_stmt (new_stmt);
5211 vec_oprnds0.release ();
5212 vec_oprnds1.release ();
5214 return true;
5218 /* Function vectorizable_operation.
5220 Check if STMT performs a binary, unary or ternary operation that can
5221 be vectorized.
5222 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5223 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5224 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5226 static bool
5227 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5228 gimple **vec_stmt, slp_tree slp_node)
5230 tree vec_dest;
5231 tree scalar_dest;
5232 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5233 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5234 tree vectype;
5235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5236 enum tree_code code, orig_code;
5237 machine_mode vec_mode;
5238 tree new_temp;
5239 int op_type;
5240 optab optab;
5241 bool target_support_p;
5242 gimple *def_stmt;
5243 enum vect_def_type dt[3]
5244 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5245 int ndts = 3;
5246 gimple *new_stmt = NULL;
5247 stmt_vec_info prev_stmt_info;
5248 int nunits_in;
5249 int nunits_out;
5250 tree vectype_out;
5251 int ncopies;
5252 int j, i;
5253 vec<tree> vec_oprnds0 = vNULL;
5254 vec<tree> vec_oprnds1 = vNULL;
5255 vec<tree> vec_oprnds2 = vNULL;
5256 tree vop0, vop1, vop2;
5257 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5258 vec_info *vinfo = stmt_info->vinfo;
5260 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5261 return false;
5263 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5264 && ! vec_stmt)
5265 return false;
5267 /* Is STMT a vectorizable binary/unary operation? */
5268 if (!is_gimple_assign (stmt))
5269 return false;
5271 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5272 return false;
5274 orig_code = code = gimple_assign_rhs_code (stmt);
5276 /* For pointer addition and subtraction, we should use the normal
5277 plus and minus for the vector operation. */
5278 if (code == POINTER_PLUS_EXPR)
5279 code = PLUS_EXPR;
5280 if (code == POINTER_DIFF_EXPR)
5281 code = MINUS_EXPR;
5283 /* Support only unary or binary operations. */
5284 op_type = TREE_CODE_LENGTH (code);
5285 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5287 if (dump_enabled_p ())
5288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5289 "num. args = %d (not unary/binary/ternary op).\n",
5290 op_type);
5291 return false;
5294 scalar_dest = gimple_assign_lhs (stmt);
5295 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5297 /* Most operations cannot handle bit-precision types without extra
5298 truncations. */
5299 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5300 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5301 /* Exception are bitwise binary operations. */
5302 && code != BIT_IOR_EXPR
5303 && code != BIT_XOR_EXPR
5304 && code != BIT_AND_EXPR)
5306 if (dump_enabled_p ())
5307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5308 "bit-precision arithmetic not supported.\n");
5309 return false;
5312 op0 = gimple_assign_rhs1 (stmt);
5313 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5315 if (dump_enabled_p ())
5316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5317 "use not simple.\n");
5318 return false;
5320 /* If op0 is an external or constant def use a vector type with
5321 the same size as the output vector type. */
5322 if (!vectype)
5324 /* For boolean type we cannot determine vectype by
5325 invariant value (don't know whether it is a vector
5326 of booleans or vector of integers). We use output
5327 vectype because operations on boolean don't change
5328 type. */
5329 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5331 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5333 if (dump_enabled_p ())
5334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5335 "not supported operation on bool value.\n");
5336 return false;
5338 vectype = vectype_out;
5340 else
5341 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5343 if (vec_stmt)
5344 gcc_assert (vectype);
5345 if (!vectype)
5347 if (dump_enabled_p ())
5349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5350 "no vectype for scalar type ");
5351 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5352 TREE_TYPE (op0));
5353 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5356 return false;
5359 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5360 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5361 if (nunits_out != nunits_in)
5362 return false;
5364 if (op_type == binary_op || op_type == ternary_op)
5366 op1 = gimple_assign_rhs2 (stmt);
5367 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5369 if (dump_enabled_p ())
5370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5371 "use not simple.\n");
5372 return false;
5375 if (op_type == ternary_op)
5377 op2 = gimple_assign_rhs3 (stmt);
5378 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5382 "use not simple.\n");
5383 return false;
5387 /* Multiple types in SLP are handled by creating the appropriate number of
5388 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5389 case of SLP. */
5390 if (slp_node)
5391 ncopies = 1;
5392 else
5393 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5395 gcc_assert (ncopies >= 1);
5397 /* Shifts are handled in vectorizable_shift (). */
5398 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5399 || code == RROTATE_EXPR)
5400 return false;
5402 /* Supportable by target? */
5404 vec_mode = TYPE_MODE (vectype);
5405 if (code == MULT_HIGHPART_EXPR)
5406 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5407 else
5409 optab = optab_for_tree_code (code, vectype, optab_default);
5410 if (!optab)
5412 if (dump_enabled_p ())
5413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5414 "no optab.\n");
5415 return false;
5417 target_support_p = (optab_handler (optab, vec_mode)
5418 != CODE_FOR_nothing);
5421 if (!target_support_p)
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5425 "op not supported by target.\n");
5426 /* Check only during analysis. */
5427 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5428 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5429 return false;
5430 if (dump_enabled_p ())
5431 dump_printf_loc (MSG_NOTE, vect_location,
5432 "proceeding using word mode.\n");
5435 /* Worthwhile without SIMD support? Check only during analysis. */
5436 if (!VECTOR_MODE_P (vec_mode)
5437 && !vec_stmt
5438 && !vect_worthwhile_without_simd_p (vinfo, code))
5440 if (dump_enabled_p ())
5441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5442 "not worthwhile without SIMD support.\n");
5443 return false;
5446 if (!vec_stmt) /* transformation not required. */
5448 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5449 if (dump_enabled_p ())
5450 dump_printf_loc (MSG_NOTE, vect_location,
5451 "=== vectorizable_operation ===\n");
5452 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5453 return true;
5456 /* Transform. */
5458 if (dump_enabled_p ())
5459 dump_printf_loc (MSG_NOTE, vect_location,
5460 "transform binary/unary operation.\n");
5462 /* Handle def. */
5463 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5465 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5466 vectors with unsigned elements, but the result is signed. So, we
5467 need to compute the MINUS_EXPR into vectype temporary and
5468 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5469 tree vec_cvt_dest = NULL_TREE;
5470 if (orig_code == POINTER_DIFF_EXPR)
5471 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5473 /* In case the vectorization factor (VF) is bigger than the number
5474 of elements that we can fit in a vectype (nunits), we have to generate
5475 more than one vector stmt - i.e - we need to "unroll" the
5476 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5477 from one copy of the vector stmt to the next, in the field
5478 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5479 stages to find the correct vector defs to be used when vectorizing
5480 stmts that use the defs of the current stmt. The example below
5481 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5482 we need to create 4 vectorized stmts):
5484 before vectorization:
5485 RELATED_STMT VEC_STMT
5486 S1: x = memref - -
5487 S2: z = x + 1 - -
5489 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5490 there):
5491 RELATED_STMT VEC_STMT
5492 VS1_0: vx0 = memref0 VS1_1 -
5493 VS1_1: vx1 = memref1 VS1_2 -
5494 VS1_2: vx2 = memref2 VS1_3 -
5495 VS1_3: vx3 = memref3 - -
5496 S1: x = load - VS1_0
5497 S2: z = x + 1 - -
5499 step2: vectorize stmt S2 (done here):
5500 To vectorize stmt S2 we first need to find the relevant vector
5501 def for the first operand 'x'. This is, as usual, obtained from
5502 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5503 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5504 relevant vector def 'vx0'. Having found 'vx0' we can generate
5505 the vector stmt VS2_0, and as usual, record it in the
5506 STMT_VINFO_VEC_STMT of stmt S2.
5507 When creating the second copy (VS2_1), we obtain the relevant vector
5508 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5509 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5510 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5511 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5512 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5513 chain of stmts and pointers:
5514 RELATED_STMT VEC_STMT
5515 VS1_0: vx0 = memref0 VS1_1 -
5516 VS1_1: vx1 = memref1 VS1_2 -
5517 VS1_2: vx2 = memref2 VS1_3 -
5518 VS1_3: vx3 = memref3 - -
5519 S1: x = load - VS1_0
5520 VS2_0: vz0 = vx0 + v1 VS2_1 -
5521 VS2_1: vz1 = vx1 + v1 VS2_2 -
5522 VS2_2: vz2 = vx2 + v1 VS2_3 -
5523 VS2_3: vz3 = vx3 + v1 - -
5524 S2: z = x + 1 - VS2_0 */
5526 prev_stmt_info = NULL;
5527 for (j = 0; j < ncopies; j++)
5529 /* Handle uses. */
5530 if (j == 0)
5532 if (op_type == binary_op || op_type == ternary_op)
5533 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5534 slp_node);
5535 else
5536 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5537 slp_node);
5538 if (op_type == ternary_op)
5539 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5540 slp_node);
5542 else
5544 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5545 if (op_type == ternary_op)
5547 tree vec_oprnd = vec_oprnds2.pop ();
5548 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5549 vec_oprnd));
5553 /* Arguments are ready. Create the new vector stmt. */
5554 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5556 vop1 = ((op_type == binary_op || op_type == ternary_op)
5557 ? vec_oprnds1[i] : NULL_TREE);
5558 vop2 = ((op_type == ternary_op)
5559 ? vec_oprnds2[i] : NULL_TREE);
5560 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5561 new_temp = make_ssa_name (vec_dest, new_stmt);
5562 gimple_assign_set_lhs (new_stmt, new_temp);
5563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5564 if (vec_cvt_dest)
5566 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5567 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5568 new_temp);
5569 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5570 gimple_assign_set_lhs (new_stmt, new_temp);
5571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5573 if (slp_node)
5574 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5577 if (slp_node)
5578 continue;
5580 if (j == 0)
5581 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5582 else
5583 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5584 prev_stmt_info = vinfo_for_stmt (new_stmt);
5587 vec_oprnds0.release ();
5588 vec_oprnds1.release ();
5589 vec_oprnds2.release ();
5591 return true;
5594 /* A helper function to ensure data reference DR's base alignment. */
5596 static void
5597 ensure_base_align (struct data_reference *dr)
5599 if (!dr->aux)
5600 return;
5602 if (DR_VECT_AUX (dr)->base_misaligned)
5604 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5606 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5608 if (decl_in_symtab_p (base_decl))
5609 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5610 else
5612 SET_DECL_ALIGN (base_decl, align_base_to);
5613 DECL_USER_ALIGN (base_decl) = 1;
5615 DR_VECT_AUX (dr)->base_misaligned = false;
5620 /* Function get_group_alias_ptr_type.
5622 Return the alias type for the group starting at FIRST_STMT. */
5624 static tree
5625 get_group_alias_ptr_type (gimple *first_stmt)
5627 struct data_reference *first_dr, *next_dr;
5628 gimple *next_stmt;
5630 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5631 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5632 while (next_stmt)
5634 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5635 if (get_alias_set (DR_REF (first_dr))
5636 != get_alias_set (DR_REF (next_dr)))
5638 if (dump_enabled_p ())
5639 dump_printf_loc (MSG_NOTE, vect_location,
5640 "conflicting alias set types.\n");
5641 return ptr_type_node;
5643 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5645 return reference_alias_ptr_type (DR_REF (first_dr));
5649 /* Function vectorizable_store.
5651 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5652 can be vectorized.
5653 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5654 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5655 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5657 static bool
5658 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5659 slp_tree slp_node)
5661 tree scalar_dest;
5662 tree data_ref;
5663 tree op;
5664 tree vec_oprnd = NULL_TREE;
5665 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5666 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5667 tree elem_type;
5668 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5669 struct loop *loop = NULL;
5670 machine_mode vec_mode;
5671 tree dummy;
5672 enum dr_alignment_support alignment_support_scheme;
5673 gimple *def_stmt;
5674 enum vect_def_type dt;
5675 stmt_vec_info prev_stmt_info = NULL;
5676 tree dataref_ptr = NULL_TREE;
5677 tree dataref_offset = NULL_TREE;
5678 gimple *ptr_incr = NULL;
5679 int ncopies;
5680 int j;
5681 gimple *next_stmt, *first_stmt;
5682 bool grouped_store;
5683 unsigned int group_size, i;
5684 vec<tree> oprnds = vNULL;
5685 vec<tree> result_chain = vNULL;
5686 bool inv_p;
5687 tree offset = NULL_TREE;
5688 vec<tree> vec_oprnds = vNULL;
5689 bool slp = (slp_node != NULL);
5690 unsigned int vec_num;
5691 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5692 vec_info *vinfo = stmt_info->vinfo;
5693 tree aggr_type;
5694 gather_scatter_info gs_info;
5695 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5696 gimple *new_stmt;
5697 int vf;
5698 vec_load_store_type vls_type;
5699 tree ref_type;
5701 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5702 return false;
5704 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5705 && ! vec_stmt)
5706 return false;
5708 /* Is vectorizable store? */
5710 if (!is_gimple_assign (stmt))
5711 return false;
5713 scalar_dest = gimple_assign_lhs (stmt);
5714 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5715 && is_pattern_stmt_p (stmt_info))
5716 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5717 if (TREE_CODE (scalar_dest) != ARRAY_REF
5718 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5719 && TREE_CODE (scalar_dest) != INDIRECT_REF
5720 && TREE_CODE (scalar_dest) != COMPONENT_REF
5721 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5722 && TREE_CODE (scalar_dest) != REALPART_EXPR
5723 && TREE_CODE (scalar_dest) != MEM_REF)
5724 return false;
5726 /* Cannot have hybrid store SLP -- that would mean storing to the
5727 same location twice. */
5728 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5730 gcc_assert (gimple_assign_single_p (stmt));
5732 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5733 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5735 if (loop_vinfo)
5737 loop = LOOP_VINFO_LOOP (loop_vinfo);
5738 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5740 else
5741 vf = 1;
5743 /* Multiple types in SLP are handled by creating the appropriate number of
5744 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5745 case of SLP. */
5746 if (slp)
5747 ncopies = 1;
5748 else
5749 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5751 gcc_assert (ncopies >= 1);
5753 /* FORNOW. This restriction should be relaxed. */
5754 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5756 if (dump_enabled_p ())
5757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5758 "multiple types in nested loop.\n");
5759 return false;
5762 op = gimple_assign_rhs1 (stmt);
5764 /* In the case this is a store from a constant make sure
5765 native_encode_expr can handle it. */
5766 if (CONSTANT_CLASS_P (op) && native_encode_expr (op, NULL, 64) == 0)
5767 return false;
5769 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773 "use not simple.\n");
5774 return false;
5777 if (dt == vect_constant_def || dt == vect_external_def)
5778 vls_type = VLS_STORE_INVARIANT;
5779 else
5780 vls_type = VLS_STORE;
5782 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5783 return false;
5785 elem_type = TREE_TYPE (vectype);
5786 vec_mode = TYPE_MODE (vectype);
5788 /* FORNOW. In some cases can vectorize even if data-type not supported
5789 (e.g. - array initialization with 0). */
5790 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5791 return false;
5793 if (!STMT_VINFO_DATA_REF (stmt_info))
5794 return false;
5796 vect_memory_access_type memory_access_type;
5797 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5798 &memory_access_type, &gs_info))
5799 return false;
5801 if (!vec_stmt) /* transformation not required. */
5803 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5804 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5805 /* The SLP costs are calculated during SLP analysis. */
5806 if (!PURE_SLP_STMT (stmt_info))
5807 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5808 NULL, NULL, NULL);
5809 return true;
5811 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5813 /* Transform. */
5815 ensure_base_align (dr);
5817 if (memory_access_type == VMAT_GATHER_SCATTER)
5819 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5820 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5821 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5822 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5823 edge pe = loop_preheader_edge (loop);
5824 gimple_seq seq;
5825 basic_block new_bb;
5826 enum { NARROW, NONE, WIDEN } modifier;
5827 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5829 if (nunits == (unsigned int) scatter_off_nunits)
5830 modifier = NONE;
5831 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5833 modifier = WIDEN;
5835 vec_perm_builder sel (scatter_off_nunits, scatter_off_nunits, 1);
5836 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5837 sel.quick_push (i | nunits);
5839 vec_perm_indices indices (sel, 1, scatter_off_nunits);
5840 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5841 indices);
5842 gcc_assert (perm_mask != NULL_TREE);
5844 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5846 modifier = NARROW;
5848 vec_perm_builder sel (nunits, nunits, 1);
5849 for (i = 0; i < (unsigned int) nunits; ++i)
5850 sel.quick_push (i | scatter_off_nunits);
5852 vec_perm_indices indices (sel, 2, nunits);
5853 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
5854 gcc_assert (perm_mask != NULL_TREE);
5855 ncopies *= 2;
5857 else
5858 gcc_unreachable ();
5860 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5861 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5862 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5863 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5864 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5865 scaletype = TREE_VALUE (arglist);
5867 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5868 && TREE_CODE (rettype) == VOID_TYPE);
5870 ptr = fold_convert (ptrtype, gs_info.base);
5871 if (!is_gimple_min_invariant (ptr))
5873 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5874 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5875 gcc_assert (!new_bb);
5878 /* Currently we support only unconditional scatter stores,
5879 so mask should be all ones. */
5880 mask = build_int_cst (masktype, -1);
5881 mask = vect_init_vector (stmt, mask, masktype, NULL);
5883 scale = build_int_cst (scaletype, gs_info.scale);
5885 prev_stmt_info = NULL;
5886 for (j = 0; j < ncopies; ++j)
5888 if (j == 0)
5890 src = vec_oprnd1
5891 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5892 op = vec_oprnd0
5893 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5895 else if (modifier != NONE && (j & 1))
5897 if (modifier == WIDEN)
5899 src = vec_oprnd1
5900 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5901 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5902 stmt, gsi);
5904 else if (modifier == NARROW)
5906 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5907 stmt, gsi);
5908 op = vec_oprnd0
5909 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5910 vec_oprnd0);
5912 else
5913 gcc_unreachable ();
5915 else
5917 src = vec_oprnd1
5918 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5919 op = vec_oprnd0
5920 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5921 vec_oprnd0);
5924 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5926 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5927 == TYPE_VECTOR_SUBPARTS (srctype));
5928 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5929 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5930 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5931 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5932 src = var;
5935 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5937 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5938 == TYPE_VECTOR_SUBPARTS (idxtype));
5939 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5940 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5941 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5942 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5943 op = var;
5946 new_stmt
5947 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5949 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5951 if (prev_stmt_info == NULL)
5952 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5953 else
5954 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5955 prev_stmt_info = vinfo_for_stmt (new_stmt);
5957 return true;
5960 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5961 if (grouped_store)
5963 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5964 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5965 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5967 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5969 /* FORNOW */
5970 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5972 /* We vectorize all the stmts of the interleaving group when we
5973 reach the last stmt in the group. */
5974 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5975 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5976 && !slp)
5978 *vec_stmt = NULL;
5979 return true;
5982 if (slp)
5984 grouped_store = false;
5985 /* VEC_NUM is the number of vect stmts to be created for this
5986 group. */
5987 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5988 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5989 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5990 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5991 op = gimple_assign_rhs1 (first_stmt);
5993 else
5994 /* VEC_NUM is the number of vect stmts to be created for this
5995 group. */
5996 vec_num = group_size;
5998 ref_type = get_group_alias_ptr_type (first_stmt);
6000 else
6002 first_stmt = stmt;
6003 first_dr = dr;
6004 group_size = vec_num = 1;
6005 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6008 if (dump_enabled_p ())
6009 dump_printf_loc (MSG_NOTE, vect_location,
6010 "transform store. ncopies = %d\n", ncopies);
6012 if (memory_access_type == VMAT_ELEMENTWISE
6013 || memory_access_type == VMAT_STRIDED_SLP)
6015 gimple_stmt_iterator incr_gsi;
6016 bool insert_after;
6017 gimple *incr;
6018 tree offvar;
6019 tree ivstep;
6020 tree running_off;
6021 gimple_seq stmts = NULL;
6022 tree stride_base, stride_step, alias_off;
6023 tree vec_oprnd;
6024 unsigned int g;
6026 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6028 stride_base
6029 = fold_build_pointer_plus
6030 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6031 size_binop (PLUS_EXPR,
6032 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6033 convert_to_ptrofftype (DR_INIT (first_dr))));
6034 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6036 /* For a store with loop-invariant (but other than power-of-2)
6037 stride (i.e. not a grouped access) like so:
6039 for (i = 0; i < n; i += stride)
6040 array[i] = ...;
6042 we generate a new induction variable and new stores from
6043 the components of the (vectorized) rhs:
6045 for (j = 0; ; j += VF*stride)
6046 vectemp = ...;
6047 tmp1 = vectemp[0];
6048 array[j] = tmp1;
6049 tmp2 = vectemp[1];
6050 array[j + stride] = tmp2;
6054 unsigned nstores = nunits;
6055 unsigned lnel = 1;
6056 tree ltype = elem_type;
6057 tree lvectype = vectype;
6058 if (slp)
6060 if (group_size < nunits
6061 && nunits % group_size == 0)
6063 nstores = nunits / group_size;
6064 lnel = group_size;
6065 ltype = build_vector_type (elem_type, group_size);
6066 lvectype = vectype;
6068 /* First check if vec_extract optab doesn't support extraction
6069 of vector elts directly. */
6070 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6071 machine_mode vmode;
6072 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6073 || !VECTOR_MODE_P (vmode)
6074 || (convert_optab_handler (vec_extract_optab,
6075 TYPE_MODE (vectype), vmode)
6076 == CODE_FOR_nothing))
6078 /* Try to avoid emitting an extract of vector elements
6079 by performing the extracts using an integer type of the
6080 same size, extracting from a vector of those and then
6081 re-interpreting it as the original vector type if
6082 supported. */
6083 unsigned lsize
6084 = group_size * GET_MODE_BITSIZE (elmode);
6085 elmode = int_mode_for_size (lsize, 0).require ();
6086 /* If we can't construct such a vector fall back to
6087 element extracts from the original vector type and
6088 element size stores. */
6089 if (mode_for_vector (elmode,
6090 nunits / group_size).exists (&vmode)
6091 && VECTOR_MODE_P (vmode)
6092 && (convert_optab_handler (vec_extract_optab,
6093 vmode, elmode)
6094 != CODE_FOR_nothing))
6096 nstores = nunits / group_size;
6097 lnel = group_size;
6098 ltype = build_nonstandard_integer_type (lsize, 1);
6099 lvectype = build_vector_type (ltype, nstores);
6101 /* Else fall back to vector extraction anyway.
6102 Fewer stores are more important than avoiding spilling
6103 of the vector we extract from. Compared to the
6104 construction case in vectorizable_load no store-forwarding
6105 issue exists here for reasonable archs. */
6108 else if (group_size >= nunits
6109 && group_size % nunits == 0)
6111 nstores = 1;
6112 lnel = nunits;
6113 ltype = vectype;
6114 lvectype = vectype;
6116 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6117 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6120 ivstep = stride_step;
6121 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6122 build_int_cst (TREE_TYPE (ivstep), vf));
6124 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6126 create_iv (stride_base, ivstep, NULL,
6127 loop, &incr_gsi, insert_after,
6128 &offvar, NULL);
6129 incr = gsi_stmt (incr_gsi);
6130 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6132 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6133 if (stmts)
6134 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6136 prev_stmt_info = NULL;
6137 alias_off = build_int_cst (ref_type, 0);
6138 next_stmt = first_stmt;
6139 for (g = 0; g < group_size; g++)
6141 running_off = offvar;
6142 if (g)
6144 tree size = TYPE_SIZE_UNIT (ltype);
6145 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6146 size);
6147 tree newoff = copy_ssa_name (running_off, NULL);
6148 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6149 running_off, pos);
6150 vect_finish_stmt_generation (stmt, incr, gsi);
6151 running_off = newoff;
6153 unsigned int group_el = 0;
6154 unsigned HOST_WIDE_INT
6155 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6156 for (j = 0; j < ncopies; j++)
6158 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6159 and first_stmt == stmt. */
6160 if (j == 0)
6162 if (slp)
6164 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6165 slp_node);
6166 vec_oprnd = vec_oprnds[0];
6168 else
6170 gcc_assert (gimple_assign_single_p (next_stmt));
6171 op = gimple_assign_rhs1 (next_stmt);
6172 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6175 else
6177 if (slp)
6178 vec_oprnd = vec_oprnds[j];
6179 else
6181 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6182 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6185 /* Pun the vector to extract from if necessary. */
6186 if (lvectype != vectype)
6188 tree tem = make_ssa_name (lvectype);
6189 gimple *pun
6190 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6191 lvectype, vec_oprnd));
6192 vect_finish_stmt_generation (stmt, pun, gsi);
6193 vec_oprnd = tem;
6195 for (i = 0; i < nstores; i++)
6197 tree newref, newoff;
6198 gimple *incr, *assign;
6199 tree size = TYPE_SIZE (ltype);
6200 /* Extract the i'th component. */
6201 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6202 bitsize_int (i), size);
6203 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6204 size, pos);
6206 elem = force_gimple_operand_gsi (gsi, elem, true,
6207 NULL_TREE, true,
6208 GSI_SAME_STMT);
6210 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6211 group_el * elsz);
6212 newref = build2 (MEM_REF, ltype,
6213 running_off, this_off);
6215 /* And store it to *running_off. */
6216 assign = gimple_build_assign (newref, elem);
6217 vect_finish_stmt_generation (stmt, assign, gsi);
6219 group_el += lnel;
6220 if (! slp
6221 || group_el == group_size)
6223 newoff = copy_ssa_name (running_off, NULL);
6224 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6225 running_off, stride_step);
6226 vect_finish_stmt_generation (stmt, incr, gsi);
6228 running_off = newoff;
6229 group_el = 0;
6231 if (g == group_size - 1
6232 && !slp)
6234 if (j == 0 && i == 0)
6235 STMT_VINFO_VEC_STMT (stmt_info)
6236 = *vec_stmt = assign;
6237 else
6238 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6239 prev_stmt_info = vinfo_for_stmt (assign);
6243 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6244 if (slp)
6245 break;
6248 vec_oprnds.release ();
6249 return true;
6252 auto_vec<tree> dr_chain (group_size);
6253 oprnds.create (group_size);
6255 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6256 gcc_assert (alignment_support_scheme);
6257 /* Targets with store-lane instructions must not require explicit
6258 realignment. */
6259 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6260 || alignment_support_scheme == dr_aligned
6261 || alignment_support_scheme == dr_unaligned_supported);
6263 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6264 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6265 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6267 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6268 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6269 else
6270 aggr_type = vectype;
6272 /* In case the vectorization factor (VF) is bigger than the number
6273 of elements that we can fit in a vectype (nunits), we have to generate
6274 more than one vector stmt - i.e - we need to "unroll" the
6275 vector stmt by a factor VF/nunits. For more details see documentation in
6276 vect_get_vec_def_for_copy_stmt. */
6278 /* In case of interleaving (non-unit grouped access):
6280 S1: &base + 2 = x2
6281 S2: &base = x0
6282 S3: &base + 1 = x1
6283 S4: &base + 3 = x3
6285 We create vectorized stores starting from base address (the access of the
6286 first stmt in the chain (S2 in the above example), when the last store stmt
6287 of the chain (S4) is reached:
6289 VS1: &base = vx2
6290 VS2: &base + vec_size*1 = vx0
6291 VS3: &base + vec_size*2 = vx1
6292 VS4: &base + vec_size*3 = vx3
6294 Then permutation statements are generated:
6296 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6297 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6300 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6301 (the order of the data-refs in the output of vect_permute_store_chain
6302 corresponds to the order of scalar stmts in the interleaving chain - see
6303 the documentation of vect_permute_store_chain()).
6305 In case of both multiple types and interleaving, above vector stores and
6306 permutation stmts are created for every copy. The result vector stmts are
6307 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6308 STMT_VINFO_RELATED_STMT for the next copies.
6311 prev_stmt_info = NULL;
6312 for (j = 0; j < ncopies; j++)
6315 if (j == 0)
6317 if (slp)
6319 /* Get vectorized arguments for SLP_NODE. */
6320 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6321 NULL, slp_node);
6323 vec_oprnd = vec_oprnds[0];
6325 else
6327 /* For interleaved stores we collect vectorized defs for all the
6328 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6329 used as an input to vect_permute_store_chain(), and OPRNDS as
6330 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6332 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6333 OPRNDS are of size 1. */
6334 next_stmt = first_stmt;
6335 for (i = 0; i < group_size; i++)
6337 /* Since gaps are not supported for interleaved stores,
6338 GROUP_SIZE is the exact number of stmts in the chain.
6339 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6340 there is no interleaving, GROUP_SIZE is 1, and only one
6341 iteration of the loop will be executed. */
6342 gcc_assert (next_stmt
6343 && gimple_assign_single_p (next_stmt));
6344 op = gimple_assign_rhs1 (next_stmt);
6346 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6347 dr_chain.quick_push (vec_oprnd);
6348 oprnds.quick_push (vec_oprnd);
6349 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6353 /* We should have catched mismatched types earlier. */
6354 gcc_assert (useless_type_conversion_p (vectype,
6355 TREE_TYPE (vec_oprnd)));
6356 bool simd_lane_access_p
6357 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6358 if (simd_lane_access_p
6359 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6360 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6361 && integer_zerop (DR_OFFSET (first_dr))
6362 && integer_zerop (DR_INIT (first_dr))
6363 && alias_sets_conflict_p (get_alias_set (aggr_type),
6364 get_alias_set (TREE_TYPE (ref_type))))
6366 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6367 dataref_offset = build_int_cst (ref_type, 0);
6368 inv_p = false;
6370 else
6371 dataref_ptr
6372 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6373 simd_lane_access_p ? loop : NULL,
6374 offset, &dummy, gsi, &ptr_incr,
6375 simd_lane_access_p, &inv_p);
6376 gcc_assert (bb_vinfo || !inv_p);
6378 else
6380 /* For interleaved stores we created vectorized defs for all the
6381 defs stored in OPRNDS in the previous iteration (previous copy).
6382 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6383 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6384 next copy.
6385 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6386 OPRNDS are of size 1. */
6387 for (i = 0; i < group_size; i++)
6389 op = oprnds[i];
6390 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6391 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6392 dr_chain[i] = vec_oprnd;
6393 oprnds[i] = vec_oprnd;
6395 if (dataref_offset)
6396 dataref_offset
6397 = int_const_binop (PLUS_EXPR, dataref_offset,
6398 TYPE_SIZE_UNIT (aggr_type));
6399 else
6400 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6401 TYPE_SIZE_UNIT (aggr_type));
6404 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6406 tree vec_array;
6408 /* Combine all the vectors into an array. */
6409 vec_array = create_vector_array (vectype, vec_num);
6410 for (i = 0; i < vec_num; i++)
6412 vec_oprnd = dr_chain[i];
6413 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6416 /* Emit:
6417 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6418 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6419 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6420 vec_array);
6421 gimple_call_set_lhs (call, data_ref);
6422 gimple_call_set_nothrow (call, true);
6423 new_stmt = call;
6424 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6426 else
6428 new_stmt = NULL;
6429 if (grouped_store)
6431 if (j == 0)
6432 result_chain.create (group_size);
6433 /* Permute. */
6434 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6435 &result_chain);
6438 next_stmt = first_stmt;
6439 for (i = 0; i < vec_num; i++)
6441 unsigned align, misalign;
6443 if (i > 0)
6444 /* Bump the vector pointer. */
6445 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6446 stmt, NULL_TREE);
6448 if (slp)
6449 vec_oprnd = vec_oprnds[i];
6450 else if (grouped_store)
6451 /* For grouped stores vectorized defs are interleaved in
6452 vect_permute_store_chain(). */
6453 vec_oprnd = result_chain[i];
6455 data_ref = fold_build2 (MEM_REF, vectype,
6456 dataref_ptr,
6457 dataref_offset
6458 ? dataref_offset
6459 : build_int_cst (ref_type, 0));
6460 align = DR_TARGET_ALIGNMENT (first_dr);
6461 if (aligned_access_p (first_dr))
6462 misalign = 0;
6463 else if (DR_MISALIGNMENT (first_dr) == -1)
6465 align = dr_alignment (vect_dr_behavior (first_dr));
6466 misalign = 0;
6467 TREE_TYPE (data_ref)
6468 = build_aligned_type (TREE_TYPE (data_ref),
6469 align * BITS_PER_UNIT);
6471 else
6473 TREE_TYPE (data_ref)
6474 = build_aligned_type (TREE_TYPE (data_ref),
6475 TYPE_ALIGN (elem_type));
6476 misalign = DR_MISALIGNMENT (first_dr);
6478 if (dataref_offset == NULL_TREE
6479 && TREE_CODE (dataref_ptr) == SSA_NAME)
6480 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6481 misalign);
6483 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6485 tree perm_mask = perm_mask_for_reverse (vectype);
6486 tree perm_dest
6487 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6488 vectype);
6489 tree new_temp = make_ssa_name (perm_dest);
6491 /* Generate the permute statement. */
6492 gimple *perm_stmt
6493 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6494 vec_oprnd, perm_mask);
6495 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6497 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6498 vec_oprnd = new_temp;
6501 /* Arguments are ready. Create the new vector stmt. */
6502 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6503 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6505 if (slp)
6506 continue;
6508 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6509 if (!next_stmt)
6510 break;
6513 if (!slp)
6515 if (j == 0)
6516 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6517 else
6518 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6519 prev_stmt_info = vinfo_for_stmt (new_stmt);
6523 oprnds.release ();
6524 result_chain.release ();
6525 vec_oprnds.release ();
6527 return true;
6530 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6531 VECTOR_CST mask. No checks are made that the target platform supports the
6532 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6533 vect_gen_perm_mask_checked. */
6535 tree
6536 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
6538 tree mask_type;
6540 unsigned int nunits = sel.length ();
6541 gcc_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
6543 mask_type = build_vector_type (ssizetype, nunits);
6544 return vec_perm_indices_to_tree (mask_type, sel);
6547 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6548 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6550 tree
6551 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
6553 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
6554 return vect_gen_perm_mask_any (vectype, sel);
6557 /* Given a vector variable X and Y, that was generated for the scalar
6558 STMT, generate instructions to permute the vector elements of X and Y
6559 using permutation mask MASK_VEC, insert them at *GSI and return the
6560 permuted vector variable. */
6562 static tree
6563 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6564 gimple_stmt_iterator *gsi)
6566 tree vectype = TREE_TYPE (x);
6567 tree perm_dest, data_ref;
6568 gimple *perm_stmt;
6570 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6571 data_ref = make_ssa_name (perm_dest);
6573 /* Generate the permute statement. */
6574 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6575 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6577 return data_ref;
6580 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6581 inserting them on the loops preheader edge. Returns true if we
6582 were successful in doing so (and thus STMT can be moved then),
6583 otherwise returns false. */
6585 static bool
6586 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6588 ssa_op_iter i;
6589 tree op;
6590 bool any = false;
6592 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6594 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6595 if (!gimple_nop_p (def_stmt)
6596 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6598 /* Make sure we don't need to recurse. While we could do
6599 so in simple cases when there are more complex use webs
6600 we don't have an easy way to preserve stmt order to fulfil
6601 dependencies within them. */
6602 tree op2;
6603 ssa_op_iter i2;
6604 if (gimple_code (def_stmt) == GIMPLE_PHI)
6605 return false;
6606 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6608 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6609 if (!gimple_nop_p (def_stmt2)
6610 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6611 return false;
6613 any = true;
6617 if (!any)
6618 return true;
6620 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6622 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6623 if (!gimple_nop_p (def_stmt)
6624 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6626 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6627 gsi_remove (&gsi, false);
6628 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6632 return true;
6635 /* vectorizable_load.
6637 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6638 can be vectorized.
6639 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6640 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6641 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6643 static bool
6644 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6645 slp_tree slp_node, slp_instance slp_node_instance)
6647 tree scalar_dest;
6648 tree vec_dest = NULL;
6649 tree data_ref = NULL;
6650 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6651 stmt_vec_info prev_stmt_info;
6652 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6653 struct loop *loop = NULL;
6654 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6655 bool nested_in_vect_loop = false;
6656 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6657 tree elem_type;
6658 tree new_temp;
6659 machine_mode mode;
6660 gimple *new_stmt = NULL;
6661 tree dummy;
6662 enum dr_alignment_support alignment_support_scheme;
6663 tree dataref_ptr = NULL_TREE;
6664 tree dataref_offset = NULL_TREE;
6665 gimple *ptr_incr = NULL;
6666 int ncopies;
6667 int i, j, group_size, group_gap_adj;
6668 tree msq = NULL_TREE, lsq;
6669 tree offset = NULL_TREE;
6670 tree byte_offset = NULL_TREE;
6671 tree realignment_token = NULL_TREE;
6672 gphi *phi = NULL;
6673 vec<tree> dr_chain = vNULL;
6674 bool grouped_load = false;
6675 gimple *first_stmt;
6676 gimple *first_stmt_for_drptr = NULL;
6677 bool inv_p;
6678 bool compute_in_loop = false;
6679 struct loop *at_loop;
6680 int vec_num;
6681 bool slp = (slp_node != NULL);
6682 bool slp_perm = false;
6683 enum tree_code code;
6684 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6685 int vf;
6686 tree aggr_type;
6687 gather_scatter_info gs_info;
6688 vec_info *vinfo = stmt_info->vinfo;
6689 tree ref_type;
6691 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6692 return false;
6694 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6695 && ! vec_stmt)
6696 return false;
6698 /* Is vectorizable load? */
6699 if (!is_gimple_assign (stmt))
6700 return false;
6702 scalar_dest = gimple_assign_lhs (stmt);
6703 if (TREE_CODE (scalar_dest) != SSA_NAME)
6704 return false;
6706 code = gimple_assign_rhs_code (stmt);
6707 if (code != ARRAY_REF
6708 && code != BIT_FIELD_REF
6709 && code != INDIRECT_REF
6710 && code != COMPONENT_REF
6711 && code != IMAGPART_EXPR
6712 && code != REALPART_EXPR
6713 && code != MEM_REF
6714 && TREE_CODE_CLASS (code) != tcc_declaration)
6715 return false;
6717 if (!STMT_VINFO_DATA_REF (stmt_info))
6718 return false;
6720 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6721 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6723 if (loop_vinfo)
6725 loop = LOOP_VINFO_LOOP (loop_vinfo);
6726 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6727 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6729 else
6730 vf = 1;
6732 /* Multiple types in SLP are handled by creating the appropriate number of
6733 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6734 case of SLP. */
6735 if (slp)
6736 ncopies = 1;
6737 else
6738 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6740 gcc_assert (ncopies >= 1);
6742 /* FORNOW. This restriction should be relaxed. */
6743 if (nested_in_vect_loop && ncopies > 1)
6745 if (dump_enabled_p ())
6746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6747 "multiple types in nested loop.\n");
6748 return false;
6751 /* Invalidate assumptions made by dependence analysis when vectorization
6752 on the unrolled body effectively re-orders stmts. */
6753 if (ncopies > 1
6754 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6755 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6756 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6758 if (dump_enabled_p ())
6759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6760 "cannot perform implicit CSE when unrolling "
6761 "with negative dependence distance\n");
6762 return false;
6765 elem_type = TREE_TYPE (vectype);
6766 mode = TYPE_MODE (vectype);
6768 /* FORNOW. In some cases can vectorize even if data-type not supported
6769 (e.g. - data copies). */
6770 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6772 if (dump_enabled_p ())
6773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6774 "Aligned load, but unsupported type.\n");
6775 return false;
6778 /* Check if the load is a part of an interleaving chain. */
6779 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6781 grouped_load = true;
6782 /* FORNOW */
6783 gcc_assert (!nested_in_vect_loop);
6784 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6786 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6787 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6789 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6790 slp_perm = true;
6792 /* Invalidate assumptions made by dependence analysis when vectorization
6793 on the unrolled body effectively re-orders stmts. */
6794 if (!PURE_SLP_STMT (stmt_info)
6795 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6796 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6797 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6799 if (dump_enabled_p ())
6800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6801 "cannot perform implicit CSE when performing "
6802 "group loads with negative dependence distance\n");
6803 return false;
6806 /* Similarly when the stmt is a load that is both part of a SLP
6807 instance and a loop vectorized stmt via the same-dr mechanism
6808 we have to give up. */
6809 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6810 && (STMT_SLP_TYPE (stmt_info)
6811 != STMT_SLP_TYPE (vinfo_for_stmt
6812 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6814 if (dump_enabled_p ())
6815 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6816 "conflicting SLP types for CSEd load\n");
6817 return false;
6821 vect_memory_access_type memory_access_type;
6822 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6823 &memory_access_type, &gs_info))
6824 return false;
6826 if (!vec_stmt) /* transformation not required. */
6828 if (!slp)
6829 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6830 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6831 /* The SLP costs are calculated during SLP analysis. */
6832 if (!PURE_SLP_STMT (stmt_info))
6833 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6834 NULL, NULL, NULL);
6835 return true;
6838 if (!slp)
6839 gcc_assert (memory_access_type
6840 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6842 if (dump_enabled_p ())
6843 dump_printf_loc (MSG_NOTE, vect_location,
6844 "transform load. ncopies = %d\n", ncopies);
6846 /* Transform. */
6848 ensure_base_align (dr);
6850 if (memory_access_type == VMAT_GATHER_SCATTER)
6852 tree vec_oprnd0 = NULL_TREE, op;
6853 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6854 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6855 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6856 edge pe = loop_preheader_edge (loop);
6857 gimple_seq seq;
6858 basic_block new_bb;
6859 enum { NARROW, NONE, WIDEN } modifier;
6860 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6862 if (nunits == gather_off_nunits)
6863 modifier = NONE;
6864 else if (nunits == gather_off_nunits / 2)
6866 modifier = WIDEN;
6868 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
6869 for (i = 0; i < gather_off_nunits; ++i)
6870 sel.quick_push (i | nunits);
6872 vec_perm_indices indices (sel, 1, gather_off_nunits);
6873 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6874 indices);
6876 else if (nunits == gather_off_nunits * 2)
6878 modifier = NARROW;
6880 vec_perm_builder sel (nunits, nunits, 1);
6881 for (i = 0; i < nunits; ++i)
6882 sel.quick_push (i < gather_off_nunits
6883 ? i : i + nunits - gather_off_nunits);
6885 vec_perm_indices indices (sel, 2, nunits);
6886 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6887 ncopies *= 2;
6889 else
6890 gcc_unreachable ();
6892 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6893 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6894 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6895 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6896 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6897 scaletype = TREE_VALUE (arglist);
6898 gcc_checking_assert (types_compatible_p (srctype, rettype));
6900 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6902 ptr = fold_convert (ptrtype, gs_info.base);
6903 if (!is_gimple_min_invariant (ptr))
6905 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6906 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6907 gcc_assert (!new_bb);
6910 /* Currently we support only unconditional gather loads,
6911 so mask should be all ones. */
6912 if (TREE_CODE (masktype) == INTEGER_TYPE)
6913 mask = build_int_cst (masktype, -1);
6914 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6916 mask = build_int_cst (TREE_TYPE (masktype), -1);
6917 mask = build_vector_from_val (masktype, mask);
6918 mask = vect_init_vector (stmt, mask, masktype, NULL);
6920 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6922 REAL_VALUE_TYPE r;
6923 long tmp[6];
6924 for (j = 0; j < 6; ++j)
6925 tmp[j] = -1;
6926 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6927 mask = build_real (TREE_TYPE (masktype), r);
6928 mask = build_vector_from_val (masktype, mask);
6929 mask = vect_init_vector (stmt, mask, masktype, NULL);
6931 else
6932 gcc_unreachable ();
6934 scale = build_int_cst (scaletype, gs_info.scale);
6936 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6937 merge = build_int_cst (TREE_TYPE (rettype), 0);
6938 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6940 REAL_VALUE_TYPE r;
6941 long tmp[6];
6942 for (j = 0; j < 6; ++j)
6943 tmp[j] = 0;
6944 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6945 merge = build_real (TREE_TYPE (rettype), r);
6947 else
6948 gcc_unreachable ();
6949 merge = build_vector_from_val (rettype, merge);
6950 merge = vect_init_vector (stmt, merge, rettype, NULL);
6952 prev_stmt_info = NULL;
6953 for (j = 0; j < ncopies; ++j)
6955 if (modifier == WIDEN && (j & 1))
6956 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6957 perm_mask, stmt, gsi);
6958 else if (j == 0)
6959 op = vec_oprnd0
6960 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6961 else
6962 op = vec_oprnd0
6963 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6965 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6967 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6968 == TYPE_VECTOR_SUBPARTS (idxtype));
6969 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6970 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6971 new_stmt
6972 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6973 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6974 op = var;
6977 new_stmt
6978 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6980 if (!useless_type_conversion_p (vectype, rettype))
6982 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6983 == TYPE_VECTOR_SUBPARTS (rettype));
6984 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6985 gimple_call_set_lhs (new_stmt, op);
6986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6987 var = make_ssa_name (vec_dest);
6988 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6989 new_stmt
6990 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6992 else
6994 var = make_ssa_name (vec_dest, new_stmt);
6995 gimple_call_set_lhs (new_stmt, var);
6998 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7000 if (modifier == NARROW)
7002 if ((j & 1) == 0)
7004 prev_res = var;
7005 continue;
7007 var = permute_vec_elements (prev_res, var,
7008 perm_mask, stmt, gsi);
7009 new_stmt = SSA_NAME_DEF_STMT (var);
7012 if (prev_stmt_info == NULL)
7013 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7014 else
7015 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7016 prev_stmt_info = vinfo_for_stmt (new_stmt);
7018 return true;
7021 if (memory_access_type == VMAT_ELEMENTWISE
7022 || memory_access_type == VMAT_STRIDED_SLP)
7024 gimple_stmt_iterator incr_gsi;
7025 bool insert_after;
7026 gimple *incr;
7027 tree offvar;
7028 tree ivstep;
7029 tree running_off;
7030 vec<constructor_elt, va_gc> *v = NULL;
7031 gimple_seq stmts = NULL;
7032 tree stride_base, stride_step, alias_off;
7034 gcc_assert (!nested_in_vect_loop);
7036 if (slp && grouped_load)
7038 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7039 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7040 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7041 ref_type = get_group_alias_ptr_type (first_stmt);
7043 else
7045 first_stmt = stmt;
7046 first_dr = dr;
7047 group_size = 1;
7048 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7051 stride_base
7052 = fold_build_pointer_plus
7053 (DR_BASE_ADDRESS (first_dr),
7054 size_binop (PLUS_EXPR,
7055 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7056 convert_to_ptrofftype (DR_INIT (first_dr))));
7057 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7059 /* For a load with loop-invariant (but other than power-of-2)
7060 stride (i.e. not a grouped access) like so:
7062 for (i = 0; i < n; i += stride)
7063 ... = array[i];
7065 we generate a new induction variable and new accesses to
7066 form a new vector (or vectors, depending on ncopies):
7068 for (j = 0; ; j += VF*stride)
7069 tmp1 = array[j];
7070 tmp2 = array[j + stride];
7072 vectemp = {tmp1, tmp2, ...}
7075 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7076 build_int_cst (TREE_TYPE (stride_step), vf));
7078 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7080 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7081 loop, &incr_gsi, insert_after,
7082 &offvar, NULL);
7083 incr = gsi_stmt (incr_gsi);
7084 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7086 stride_step = force_gimple_operand (unshare_expr (stride_step),
7087 &stmts, true, NULL_TREE);
7088 if (stmts)
7089 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7091 prev_stmt_info = NULL;
7092 running_off = offvar;
7093 alias_off = build_int_cst (ref_type, 0);
7094 int nloads = nunits;
7095 int lnel = 1;
7096 tree ltype = TREE_TYPE (vectype);
7097 tree lvectype = vectype;
7098 auto_vec<tree> dr_chain;
7099 if (memory_access_type == VMAT_STRIDED_SLP)
7101 if (group_size < nunits)
7103 /* First check if vec_init optab supports construction from
7104 vector elts directly. */
7105 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7106 machine_mode vmode;
7107 if (mode_for_vector (elmode, group_size).exists (&vmode)
7108 && VECTOR_MODE_P (vmode)
7109 && (convert_optab_handler (vec_init_optab,
7110 TYPE_MODE (vectype), vmode)
7111 != CODE_FOR_nothing))
7113 nloads = nunits / group_size;
7114 lnel = group_size;
7115 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7117 else
7119 /* Otherwise avoid emitting a constructor of vector elements
7120 by performing the loads using an integer type of the same
7121 size, constructing a vector of those and then
7122 re-interpreting it as the original vector type.
7123 This avoids a huge runtime penalty due to the general
7124 inability to perform store forwarding from smaller stores
7125 to a larger load. */
7126 unsigned lsize
7127 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7128 elmode = int_mode_for_size (lsize, 0).require ();
7129 /* If we can't construct such a vector fall back to
7130 element loads of the original vector type. */
7131 if (mode_for_vector (elmode,
7132 nunits / group_size).exists (&vmode)
7133 && VECTOR_MODE_P (vmode)
7134 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7135 != CODE_FOR_nothing))
7137 nloads = nunits / group_size;
7138 lnel = group_size;
7139 ltype = build_nonstandard_integer_type (lsize, 1);
7140 lvectype = build_vector_type (ltype, nloads);
7144 else
7146 nloads = 1;
7147 lnel = nunits;
7148 ltype = vectype;
7150 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7152 if (slp)
7154 /* For SLP permutation support we need to load the whole group,
7155 not only the number of vector stmts the permutation result
7156 fits in. */
7157 if (slp_perm)
7159 ncopies = (group_size * vf + nunits - 1) / nunits;
7160 dr_chain.create (ncopies);
7162 else
7163 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7165 int group_el = 0;
7166 unsigned HOST_WIDE_INT
7167 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7168 for (j = 0; j < ncopies; j++)
7170 if (nloads > 1)
7171 vec_alloc (v, nloads);
7172 for (i = 0; i < nloads; i++)
7174 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7175 group_el * elsz);
7176 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7177 build2 (MEM_REF, ltype,
7178 running_off, this_off));
7179 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7180 if (nloads > 1)
7181 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7182 gimple_assign_lhs (new_stmt));
7184 group_el += lnel;
7185 if (! slp
7186 || group_el == group_size)
7188 tree newoff = copy_ssa_name (running_off);
7189 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7190 running_off, stride_step);
7191 vect_finish_stmt_generation (stmt, incr, gsi);
7193 running_off = newoff;
7194 group_el = 0;
7197 if (nloads > 1)
7199 tree vec_inv = build_constructor (lvectype, v);
7200 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7201 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7202 if (lvectype != vectype)
7204 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7205 VIEW_CONVERT_EXPR,
7206 build1 (VIEW_CONVERT_EXPR,
7207 vectype, new_temp));
7208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7212 if (slp)
7214 if (slp_perm)
7215 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7216 else
7217 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7219 else
7221 if (j == 0)
7222 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7223 else
7224 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7225 prev_stmt_info = vinfo_for_stmt (new_stmt);
7228 if (slp_perm)
7230 unsigned n_perms;
7231 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7232 slp_node_instance, false, &n_perms);
7234 return true;
7237 if (grouped_load)
7239 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7240 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7241 /* For SLP vectorization we directly vectorize a subchain
7242 without permutation. */
7243 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7244 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7245 /* For BB vectorization always use the first stmt to base
7246 the data ref pointer on. */
7247 if (bb_vinfo)
7248 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7250 /* Check if the chain of loads is already vectorized. */
7251 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7252 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7253 ??? But we can only do so if there is exactly one
7254 as we have no way to get at the rest. Leave the CSE
7255 opportunity alone.
7256 ??? With the group load eventually participating
7257 in multiple different permutations (having multiple
7258 slp nodes which refer to the same group) the CSE
7259 is even wrong code. See PR56270. */
7260 && !slp)
7262 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7263 return true;
7265 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7266 group_gap_adj = 0;
7268 /* VEC_NUM is the number of vect stmts to be created for this group. */
7269 if (slp)
7271 grouped_load = false;
7272 /* For SLP permutation support we need to load the whole group,
7273 not only the number of vector stmts the permutation result
7274 fits in. */
7275 if (slp_perm)
7277 vec_num = (group_size * vf + nunits - 1) / nunits;
7278 group_gap_adj = vf * group_size - nunits * vec_num;
7280 else
7282 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7283 group_gap_adj
7284 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7287 else
7288 vec_num = group_size;
7290 ref_type = get_group_alias_ptr_type (first_stmt);
7292 else
7294 first_stmt = stmt;
7295 first_dr = dr;
7296 group_size = vec_num = 1;
7297 group_gap_adj = 0;
7298 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7301 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7302 gcc_assert (alignment_support_scheme);
7303 /* Targets with load-lane instructions must not require explicit
7304 realignment. */
7305 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7306 || alignment_support_scheme == dr_aligned
7307 || alignment_support_scheme == dr_unaligned_supported);
7309 /* In case the vectorization factor (VF) is bigger than the number
7310 of elements that we can fit in a vectype (nunits), we have to generate
7311 more than one vector stmt - i.e - we need to "unroll" the
7312 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7313 from one copy of the vector stmt to the next, in the field
7314 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7315 stages to find the correct vector defs to be used when vectorizing
7316 stmts that use the defs of the current stmt. The example below
7317 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7318 need to create 4 vectorized stmts):
7320 before vectorization:
7321 RELATED_STMT VEC_STMT
7322 S1: x = memref - -
7323 S2: z = x + 1 - -
7325 step 1: vectorize stmt S1:
7326 We first create the vector stmt VS1_0, and, as usual, record a
7327 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7328 Next, we create the vector stmt VS1_1, and record a pointer to
7329 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7330 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7331 stmts and pointers:
7332 RELATED_STMT VEC_STMT
7333 VS1_0: vx0 = memref0 VS1_1 -
7334 VS1_1: vx1 = memref1 VS1_2 -
7335 VS1_2: vx2 = memref2 VS1_3 -
7336 VS1_3: vx3 = memref3 - -
7337 S1: x = load - VS1_0
7338 S2: z = x + 1 - -
7340 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7341 information we recorded in RELATED_STMT field is used to vectorize
7342 stmt S2. */
7344 /* In case of interleaving (non-unit grouped access):
7346 S1: x2 = &base + 2
7347 S2: x0 = &base
7348 S3: x1 = &base + 1
7349 S4: x3 = &base + 3
7351 Vectorized loads are created in the order of memory accesses
7352 starting from the access of the first stmt of the chain:
7354 VS1: vx0 = &base
7355 VS2: vx1 = &base + vec_size*1
7356 VS3: vx3 = &base + vec_size*2
7357 VS4: vx4 = &base + vec_size*3
7359 Then permutation statements are generated:
7361 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7362 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7365 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7366 (the order of the data-refs in the output of vect_permute_load_chain
7367 corresponds to the order of scalar stmts in the interleaving chain - see
7368 the documentation of vect_permute_load_chain()).
7369 The generation of permutation stmts and recording them in
7370 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7372 In case of both multiple types and interleaving, the vector loads and
7373 permutation stmts above are created for every copy. The result vector
7374 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7375 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7377 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7378 on a target that supports unaligned accesses (dr_unaligned_supported)
7379 we generate the following code:
7380 p = initial_addr;
7381 indx = 0;
7382 loop {
7383 p = p + indx * vectype_size;
7384 vec_dest = *(p);
7385 indx = indx + 1;
7388 Otherwise, the data reference is potentially unaligned on a target that
7389 does not support unaligned accesses (dr_explicit_realign_optimized) -
7390 then generate the following code, in which the data in each iteration is
7391 obtained by two vector loads, one from the previous iteration, and one
7392 from the current iteration:
7393 p1 = initial_addr;
7394 msq_init = *(floor(p1))
7395 p2 = initial_addr + VS - 1;
7396 realignment_token = call target_builtin;
7397 indx = 0;
7398 loop {
7399 p2 = p2 + indx * vectype_size
7400 lsq = *(floor(p2))
7401 vec_dest = realign_load (msq, lsq, realignment_token)
7402 indx = indx + 1;
7403 msq = lsq;
7404 } */
7406 /* If the misalignment remains the same throughout the execution of the
7407 loop, we can create the init_addr and permutation mask at the loop
7408 preheader. Otherwise, it needs to be created inside the loop.
7409 This can only occur when vectorizing memory accesses in the inner-loop
7410 nested within an outer-loop that is being vectorized. */
7412 if (nested_in_vect_loop
7413 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
7415 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7416 compute_in_loop = true;
7419 if ((alignment_support_scheme == dr_explicit_realign_optimized
7420 || alignment_support_scheme == dr_explicit_realign)
7421 && !compute_in_loop)
7423 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7424 alignment_support_scheme, NULL_TREE,
7425 &at_loop);
7426 if (alignment_support_scheme == dr_explicit_realign_optimized)
7428 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7429 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7430 size_one_node);
7433 else
7434 at_loop = loop;
7436 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7437 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7439 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7440 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7441 else
7442 aggr_type = vectype;
7444 prev_stmt_info = NULL;
7445 int group_elt = 0;
7446 for (j = 0; j < ncopies; j++)
7448 /* 1. Create the vector or array pointer update chain. */
7449 if (j == 0)
7451 bool simd_lane_access_p
7452 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7453 if (simd_lane_access_p
7454 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7455 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7456 && integer_zerop (DR_OFFSET (first_dr))
7457 && integer_zerop (DR_INIT (first_dr))
7458 && alias_sets_conflict_p (get_alias_set (aggr_type),
7459 get_alias_set (TREE_TYPE (ref_type)))
7460 && (alignment_support_scheme == dr_aligned
7461 || alignment_support_scheme == dr_unaligned_supported))
7463 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7464 dataref_offset = build_int_cst (ref_type, 0);
7465 inv_p = false;
7467 else if (first_stmt_for_drptr
7468 && first_stmt != first_stmt_for_drptr)
7470 dataref_ptr
7471 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7472 at_loop, offset, &dummy, gsi,
7473 &ptr_incr, simd_lane_access_p,
7474 &inv_p, byte_offset);
7475 /* Adjust the pointer by the difference to first_stmt. */
7476 data_reference_p ptrdr
7477 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7478 tree diff = fold_convert (sizetype,
7479 size_binop (MINUS_EXPR,
7480 DR_INIT (first_dr),
7481 DR_INIT (ptrdr)));
7482 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7483 stmt, diff);
7485 else
7486 dataref_ptr
7487 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7488 offset, &dummy, gsi, &ptr_incr,
7489 simd_lane_access_p, &inv_p,
7490 byte_offset);
7492 else if (dataref_offset)
7493 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7494 TYPE_SIZE_UNIT (aggr_type));
7495 else
7496 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7497 TYPE_SIZE_UNIT (aggr_type));
7499 if (grouped_load || slp_perm)
7500 dr_chain.create (vec_num);
7502 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7504 tree vec_array;
7506 vec_array = create_vector_array (vectype, vec_num);
7508 /* Emit:
7509 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7510 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7511 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7512 data_ref);
7513 gimple_call_set_lhs (call, vec_array);
7514 gimple_call_set_nothrow (call, true);
7515 new_stmt = call;
7516 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7518 /* Extract each vector into an SSA_NAME. */
7519 for (i = 0; i < vec_num; i++)
7521 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7522 vec_array, i);
7523 dr_chain.quick_push (new_temp);
7526 /* Record the mapping between SSA_NAMEs and statements. */
7527 vect_record_grouped_load_vectors (stmt, dr_chain);
7529 else
7531 for (i = 0; i < vec_num; i++)
7533 if (i > 0)
7534 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7535 stmt, NULL_TREE);
7537 /* 2. Create the vector-load in the loop. */
7538 switch (alignment_support_scheme)
7540 case dr_aligned:
7541 case dr_unaligned_supported:
7543 unsigned int align, misalign;
7545 data_ref
7546 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7547 dataref_offset
7548 ? dataref_offset
7549 : build_int_cst (ref_type, 0));
7550 align = DR_TARGET_ALIGNMENT (dr);
7551 if (alignment_support_scheme == dr_aligned)
7553 gcc_assert (aligned_access_p (first_dr));
7554 misalign = 0;
7556 else if (DR_MISALIGNMENT (first_dr) == -1)
7558 align = dr_alignment (vect_dr_behavior (first_dr));
7559 misalign = 0;
7560 TREE_TYPE (data_ref)
7561 = build_aligned_type (TREE_TYPE (data_ref),
7562 align * BITS_PER_UNIT);
7564 else
7566 TREE_TYPE (data_ref)
7567 = build_aligned_type (TREE_TYPE (data_ref),
7568 TYPE_ALIGN (elem_type));
7569 misalign = DR_MISALIGNMENT (first_dr);
7571 if (dataref_offset == NULL_TREE
7572 && TREE_CODE (dataref_ptr) == SSA_NAME)
7573 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7574 align, misalign);
7575 break;
7577 case dr_explicit_realign:
7579 tree ptr, bump;
7581 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7583 if (compute_in_loop)
7584 msq = vect_setup_realignment (first_stmt, gsi,
7585 &realignment_token,
7586 dr_explicit_realign,
7587 dataref_ptr, NULL);
7589 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7590 ptr = copy_ssa_name (dataref_ptr);
7591 else
7592 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7593 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7594 new_stmt = gimple_build_assign
7595 (ptr, BIT_AND_EXPR, dataref_ptr,
7596 build_int_cst
7597 (TREE_TYPE (dataref_ptr),
7598 -(HOST_WIDE_INT) align));
7599 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7600 data_ref
7601 = build2 (MEM_REF, vectype, ptr,
7602 build_int_cst (ref_type, 0));
7603 vec_dest = vect_create_destination_var (scalar_dest,
7604 vectype);
7605 new_stmt = gimple_build_assign (vec_dest, data_ref);
7606 new_temp = make_ssa_name (vec_dest, new_stmt);
7607 gimple_assign_set_lhs (new_stmt, new_temp);
7608 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7609 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7610 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7611 msq = new_temp;
7613 bump = size_binop (MULT_EXPR, vs,
7614 TYPE_SIZE_UNIT (elem_type));
7615 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7616 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7617 new_stmt = gimple_build_assign
7618 (NULL_TREE, BIT_AND_EXPR, ptr,
7619 build_int_cst
7620 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7621 ptr = copy_ssa_name (ptr, new_stmt);
7622 gimple_assign_set_lhs (new_stmt, ptr);
7623 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7624 data_ref
7625 = build2 (MEM_REF, vectype, ptr,
7626 build_int_cst (ref_type, 0));
7627 break;
7629 case dr_explicit_realign_optimized:
7631 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7632 new_temp = copy_ssa_name (dataref_ptr);
7633 else
7634 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7635 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7636 new_stmt = gimple_build_assign
7637 (new_temp, BIT_AND_EXPR, dataref_ptr,
7638 build_int_cst (TREE_TYPE (dataref_ptr),
7639 -(HOST_WIDE_INT) align));
7640 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7641 data_ref
7642 = build2 (MEM_REF, vectype, new_temp,
7643 build_int_cst (ref_type, 0));
7644 break;
7646 default:
7647 gcc_unreachable ();
7649 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7650 new_stmt = gimple_build_assign (vec_dest, data_ref);
7651 new_temp = make_ssa_name (vec_dest, new_stmt);
7652 gimple_assign_set_lhs (new_stmt, new_temp);
7653 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7655 /* 3. Handle explicit realignment if necessary/supported.
7656 Create in loop:
7657 vec_dest = realign_load (msq, lsq, realignment_token) */
7658 if (alignment_support_scheme == dr_explicit_realign_optimized
7659 || alignment_support_scheme == dr_explicit_realign)
7661 lsq = gimple_assign_lhs (new_stmt);
7662 if (!realignment_token)
7663 realignment_token = dataref_ptr;
7664 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7665 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7666 msq, lsq, realignment_token);
7667 new_temp = make_ssa_name (vec_dest, new_stmt);
7668 gimple_assign_set_lhs (new_stmt, new_temp);
7669 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7671 if (alignment_support_scheme == dr_explicit_realign_optimized)
7673 gcc_assert (phi);
7674 if (i == vec_num - 1 && j == ncopies - 1)
7675 add_phi_arg (phi, lsq,
7676 loop_latch_edge (containing_loop),
7677 UNKNOWN_LOCATION);
7678 msq = lsq;
7682 /* 4. Handle invariant-load. */
7683 if (inv_p && !bb_vinfo)
7685 gcc_assert (!grouped_load);
7686 /* If we have versioned for aliasing or the loop doesn't
7687 have any data dependencies that would preclude this,
7688 then we are sure this is a loop invariant load and
7689 thus we can insert it on the preheader edge. */
7690 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7691 && !nested_in_vect_loop
7692 && hoist_defs_of_uses (stmt, loop))
7694 if (dump_enabled_p ())
7696 dump_printf_loc (MSG_NOTE, vect_location,
7697 "hoisting out of the vectorized "
7698 "loop: ");
7699 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7701 tree tem = copy_ssa_name (scalar_dest);
7702 gsi_insert_on_edge_immediate
7703 (loop_preheader_edge (loop),
7704 gimple_build_assign (tem,
7705 unshare_expr
7706 (gimple_assign_rhs1 (stmt))));
7707 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7708 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7709 set_vinfo_for_stmt (new_stmt,
7710 new_stmt_vec_info (new_stmt, vinfo));
7712 else
7714 gimple_stmt_iterator gsi2 = *gsi;
7715 gsi_next (&gsi2);
7716 new_temp = vect_init_vector (stmt, scalar_dest,
7717 vectype, &gsi2);
7718 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7722 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7724 tree perm_mask = perm_mask_for_reverse (vectype);
7725 new_temp = permute_vec_elements (new_temp, new_temp,
7726 perm_mask, stmt, gsi);
7727 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7730 /* Collect vector loads and later create their permutation in
7731 vect_transform_grouped_load (). */
7732 if (grouped_load || slp_perm)
7733 dr_chain.quick_push (new_temp);
7735 /* Store vector loads in the corresponding SLP_NODE. */
7736 if (slp && !slp_perm)
7737 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7739 /* With SLP permutation we load the gaps as well, without
7740 we need to skip the gaps after we manage to fully load
7741 all elements. group_gap_adj is GROUP_SIZE here. */
7742 group_elt += nunits;
7743 if (group_gap_adj != 0 && ! slp_perm
7744 && group_elt == group_size - group_gap_adj)
7746 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7747 * group_gap_adj);
7748 tree bump = wide_int_to_tree (sizetype, bump_val);
7749 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7750 stmt, bump);
7751 group_elt = 0;
7754 /* Bump the vector pointer to account for a gap or for excess
7755 elements loaded for a permuted SLP load. */
7756 if (group_gap_adj != 0 && slp_perm)
7758 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7759 * group_gap_adj);
7760 tree bump = wide_int_to_tree (sizetype, bump_val);
7761 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7762 stmt, bump);
7766 if (slp && !slp_perm)
7767 continue;
7769 if (slp_perm)
7771 unsigned n_perms;
7772 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7773 slp_node_instance, false,
7774 &n_perms))
7776 dr_chain.release ();
7777 return false;
7780 else
7782 if (grouped_load)
7784 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7785 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7786 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7788 else
7790 if (j == 0)
7791 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7792 else
7793 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7794 prev_stmt_info = vinfo_for_stmt (new_stmt);
7797 dr_chain.release ();
7800 return true;
7803 /* Function vect_is_simple_cond.
7805 Input:
7806 LOOP - the loop that is being vectorized.
7807 COND - Condition that is checked for simple use.
7809 Output:
7810 *COMP_VECTYPE - the vector type for the comparison.
7811 *DTS - The def types for the arguments of the comparison
7813 Returns whether a COND can be vectorized. Checks whether
7814 condition operands are supportable using vec_is_simple_use. */
7816 static bool
7817 vect_is_simple_cond (tree cond, vec_info *vinfo,
7818 tree *comp_vectype, enum vect_def_type *dts,
7819 tree vectype)
7821 tree lhs, rhs;
7822 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7824 /* Mask case. */
7825 if (TREE_CODE (cond) == SSA_NAME
7826 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7828 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7829 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7830 &dts[0], comp_vectype)
7831 || !*comp_vectype
7832 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7833 return false;
7834 return true;
7837 if (!COMPARISON_CLASS_P (cond))
7838 return false;
7840 lhs = TREE_OPERAND (cond, 0);
7841 rhs = TREE_OPERAND (cond, 1);
7843 if (TREE_CODE (lhs) == SSA_NAME)
7845 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7846 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7847 return false;
7849 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7850 || TREE_CODE (lhs) == FIXED_CST)
7851 dts[0] = vect_constant_def;
7852 else
7853 return false;
7855 if (TREE_CODE (rhs) == SSA_NAME)
7857 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7858 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7859 return false;
7861 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7862 || TREE_CODE (rhs) == FIXED_CST)
7863 dts[1] = vect_constant_def;
7864 else
7865 return false;
7867 if (vectype1 && vectype2
7868 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7869 return false;
7871 *comp_vectype = vectype1 ? vectype1 : vectype2;
7872 /* Invariant comparison. */
7873 if (! *comp_vectype)
7875 tree scalar_type = TREE_TYPE (lhs);
7876 /* If we can widen the comparison to match vectype do so. */
7877 if (INTEGRAL_TYPE_P (scalar_type)
7878 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7879 TYPE_SIZE (TREE_TYPE (vectype))))
7880 scalar_type = build_nonstandard_integer_type
7881 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7882 TYPE_UNSIGNED (scalar_type));
7883 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7886 return true;
7889 /* vectorizable_condition.
7891 Check if STMT is conditional modify expression that can be vectorized.
7892 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7893 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7894 at GSI.
7896 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7897 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7898 else clause if it is 2).
7900 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7902 bool
7903 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7904 gimple **vec_stmt, tree reduc_def, int reduc_index,
7905 slp_tree slp_node)
7907 tree scalar_dest = NULL_TREE;
7908 tree vec_dest = NULL_TREE;
7909 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7910 tree then_clause, else_clause;
7911 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7912 tree comp_vectype = NULL_TREE;
7913 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7914 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7915 tree vec_compare;
7916 tree new_temp;
7917 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7918 enum vect_def_type dts[4]
7919 = {vect_unknown_def_type, vect_unknown_def_type,
7920 vect_unknown_def_type, vect_unknown_def_type};
7921 int ndts = 4;
7922 int ncopies;
7923 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7924 stmt_vec_info prev_stmt_info = NULL;
7925 int i, j;
7926 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7927 vec<tree> vec_oprnds0 = vNULL;
7928 vec<tree> vec_oprnds1 = vNULL;
7929 vec<tree> vec_oprnds2 = vNULL;
7930 vec<tree> vec_oprnds3 = vNULL;
7931 tree vec_cmp_type;
7932 bool masked = false;
7934 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7935 return false;
7937 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7939 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7940 return false;
7942 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7943 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7944 && reduc_def))
7945 return false;
7947 /* FORNOW: not yet supported. */
7948 if (STMT_VINFO_LIVE_P (stmt_info))
7950 if (dump_enabled_p ())
7951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7952 "value used after loop.\n");
7953 return false;
7957 /* Is vectorizable conditional operation? */
7958 if (!is_gimple_assign (stmt))
7959 return false;
7961 code = gimple_assign_rhs_code (stmt);
7963 if (code != COND_EXPR)
7964 return false;
7966 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7967 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7969 if (slp_node)
7970 ncopies = 1;
7971 else
7972 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7974 gcc_assert (ncopies >= 1);
7975 if (reduc_index && ncopies > 1)
7976 return false; /* FORNOW */
7978 cond_expr = gimple_assign_rhs1 (stmt);
7979 then_clause = gimple_assign_rhs2 (stmt);
7980 else_clause = gimple_assign_rhs3 (stmt);
7982 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7983 &comp_vectype, &dts[0], vectype)
7984 || !comp_vectype)
7985 return false;
7987 gimple *def_stmt;
7988 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7989 &vectype1))
7990 return false;
7991 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7992 &vectype2))
7993 return false;
7995 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7996 return false;
7998 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7999 return false;
8001 masked = !COMPARISON_CLASS_P (cond_expr);
8002 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8004 if (vec_cmp_type == NULL_TREE)
8005 return false;
8007 cond_code = TREE_CODE (cond_expr);
8008 if (!masked)
8010 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8011 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8014 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8016 /* Boolean values may have another representation in vectors
8017 and therefore we prefer bit operations over comparison for
8018 them (which also works for scalar masks). We store opcodes
8019 to use in bitop1 and bitop2. Statement is vectorized as
8020 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8021 depending on bitop1 and bitop2 arity. */
8022 switch (cond_code)
8024 case GT_EXPR:
8025 bitop1 = BIT_NOT_EXPR;
8026 bitop2 = BIT_AND_EXPR;
8027 break;
8028 case GE_EXPR:
8029 bitop1 = BIT_NOT_EXPR;
8030 bitop2 = BIT_IOR_EXPR;
8031 break;
8032 case LT_EXPR:
8033 bitop1 = BIT_NOT_EXPR;
8034 bitop2 = BIT_AND_EXPR;
8035 std::swap (cond_expr0, cond_expr1);
8036 break;
8037 case LE_EXPR:
8038 bitop1 = BIT_NOT_EXPR;
8039 bitop2 = BIT_IOR_EXPR;
8040 std::swap (cond_expr0, cond_expr1);
8041 break;
8042 case NE_EXPR:
8043 bitop1 = BIT_XOR_EXPR;
8044 break;
8045 case EQ_EXPR:
8046 bitop1 = BIT_XOR_EXPR;
8047 bitop2 = BIT_NOT_EXPR;
8048 break;
8049 default:
8050 return false;
8052 cond_code = SSA_NAME;
8055 if (!vec_stmt)
8057 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8058 if (bitop1 != NOP_EXPR)
8060 machine_mode mode = TYPE_MODE (comp_vectype);
8061 optab optab;
8063 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8064 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8065 return false;
8067 if (bitop2 != NOP_EXPR)
8069 optab = optab_for_tree_code (bitop2, comp_vectype,
8070 optab_default);
8071 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8072 return false;
8075 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8076 cond_code))
8078 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8079 return true;
8081 return false;
8084 /* Transform. */
8086 if (!slp_node)
8088 vec_oprnds0.create (1);
8089 vec_oprnds1.create (1);
8090 vec_oprnds2.create (1);
8091 vec_oprnds3.create (1);
8094 /* Handle def. */
8095 scalar_dest = gimple_assign_lhs (stmt);
8096 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8098 /* Handle cond expr. */
8099 for (j = 0; j < ncopies; j++)
8101 gassign *new_stmt = NULL;
8102 if (j == 0)
8104 if (slp_node)
8106 auto_vec<tree, 4> ops;
8107 auto_vec<vec<tree>, 4> vec_defs;
8109 if (masked)
8110 ops.safe_push (cond_expr);
8111 else
8113 ops.safe_push (cond_expr0);
8114 ops.safe_push (cond_expr1);
8116 ops.safe_push (then_clause);
8117 ops.safe_push (else_clause);
8118 vect_get_slp_defs (ops, slp_node, &vec_defs);
8119 vec_oprnds3 = vec_defs.pop ();
8120 vec_oprnds2 = vec_defs.pop ();
8121 if (!masked)
8122 vec_oprnds1 = vec_defs.pop ();
8123 vec_oprnds0 = vec_defs.pop ();
8125 else
8127 gimple *gtemp;
8128 if (masked)
8130 vec_cond_lhs
8131 = vect_get_vec_def_for_operand (cond_expr, stmt,
8132 comp_vectype);
8133 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8134 &gtemp, &dts[0]);
8136 else
8138 vec_cond_lhs
8139 = vect_get_vec_def_for_operand (cond_expr0,
8140 stmt, comp_vectype);
8141 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8143 vec_cond_rhs
8144 = vect_get_vec_def_for_operand (cond_expr1,
8145 stmt, comp_vectype);
8146 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8148 if (reduc_index == 1)
8149 vec_then_clause = reduc_def;
8150 else
8152 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8153 stmt);
8154 vect_is_simple_use (then_clause, loop_vinfo,
8155 &gtemp, &dts[2]);
8157 if (reduc_index == 2)
8158 vec_else_clause = reduc_def;
8159 else
8161 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8162 stmt);
8163 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8167 else
8169 vec_cond_lhs
8170 = vect_get_vec_def_for_stmt_copy (dts[0],
8171 vec_oprnds0.pop ());
8172 if (!masked)
8173 vec_cond_rhs
8174 = vect_get_vec_def_for_stmt_copy (dts[1],
8175 vec_oprnds1.pop ());
8177 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8178 vec_oprnds2.pop ());
8179 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8180 vec_oprnds3.pop ());
8183 if (!slp_node)
8185 vec_oprnds0.quick_push (vec_cond_lhs);
8186 if (!masked)
8187 vec_oprnds1.quick_push (vec_cond_rhs);
8188 vec_oprnds2.quick_push (vec_then_clause);
8189 vec_oprnds3.quick_push (vec_else_clause);
8192 /* Arguments are ready. Create the new vector stmt. */
8193 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8195 vec_then_clause = vec_oprnds2[i];
8196 vec_else_clause = vec_oprnds3[i];
8198 if (masked)
8199 vec_compare = vec_cond_lhs;
8200 else
8202 vec_cond_rhs = vec_oprnds1[i];
8203 if (bitop1 == NOP_EXPR)
8204 vec_compare = build2 (cond_code, vec_cmp_type,
8205 vec_cond_lhs, vec_cond_rhs);
8206 else
8208 new_temp = make_ssa_name (vec_cmp_type);
8209 if (bitop1 == BIT_NOT_EXPR)
8210 new_stmt = gimple_build_assign (new_temp, bitop1,
8211 vec_cond_rhs);
8212 else
8213 new_stmt
8214 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8215 vec_cond_rhs);
8216 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8217 if (bitop2 == NOP_EXPR)
8218 vec_compare = new_temp;
8219 else if (bitop2 == BIT_NOT_EXPR)
8221 /* Instead of doing ~x ? y : z do x ? z : y. */
8222 vec_compare = new_temp;
8223 std::swap (vec_then_clause, vec_else_clause);
8225 else
8227 vec_compare = make_ssa_name (vec_cmp_type);
8228 new_stmt
8229 = gimple_build_assign (vec_compare, bitop2,
8230 vec_cond_lhs, new_temp);
8231 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8235 new_temp = make_ssa_name (vec_dest);
8236 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8237 vec_compare, vec_then_clause,
8238 vec_else_clause);
8239 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8240 if (slp_node)
8241 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8244 if (slp_node)
8245 continue;
8247 if (j == 0)
8248 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8249 else
8250 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8252 prev_stmt_info = vinfo_for_stmt (new_stmt);
8255 vec_oprnds0.release ();
8256 vec_oprnds1.release ();
8257 vec_oprnds2.release ();
8258 vec_oprnds3.release ();
8260 return true;
8263 /* vectorizable_comparison.
8265 Check if STMT is comparison expression that can be vectorized.
8266 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8267 comparison, put it in VEC_STMT, and insert it at GSI.
8269 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8271 static bool
8272 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8273 gimple **vec_stmt, tree reduc_def,
8274 slp_tree slp_node)
8276 tree lhs, rhs1, rhs2;
8277 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8278 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8279 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8280 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8281 tree new_temp;
8282 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8283 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8284 int ndts = 2;
8285 unsigned nunits;
8286 int ncopies;
8287 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8288 stmt_vec_info prev_stmt_info = NULL;
8289 int i, j;
8290 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8291 vec<tree> vec_oprnds0 = vNULL;
8292 vec<tree> vec_oprnds1 = vNULL;
8293 gimple *def_stmt;
8294 tree mask_type;
8295 tree mask;
8297 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8298 return false;
8300 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8301 return false;
8303 mask_type = vectype;
8304 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8306 if (slp_node)
8307 ncopies = 1;
8308 else
8309 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8311 gcc_assert (ncopies >= 1);
8312 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8313 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8314 && reduc_def))
8315 return false;
8317 if (STMT_VINFO_LIVE_P (stmt_info))
8319 if (dump_enabled_p ())
8320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8321 "value used after loop.\n");
8322 return false;
8325 if (!is_gimple_assign (stmt))
8326 return false;
8328 code = gimple_assign_rhs_code (stmt);
8330 if (TREE_CODE_CLASS (code) != tcc_comparison)
8331 return false;
8333 rhs1 = gimple_assign_rhs1 (stmt);
8334 rhs2 = gimple_assign_rhs2 (stmt);
8336 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8337 &dts[0], &vectype1))
8338 return false;
8340 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8341 &dts[1], &vectype2))
8342 return false;
8344 if (vectype1 && vectype2
8345 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8346 return false;
8348 vectype = vectype1 ? vectype1 : vectype2;
8350 /* Invariant comparison. */
8351 if (!vectype)
8353 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8354 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8355 return false;
8357 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8358 return false;
8360 /* Can't compare mask and non-mask types. */
8361 if (vectype1 && vectype2
8362 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8363 return false;
8365 /* Boolean values may have another representation in vectors
8366 and therefore we prefer bit operations over comparison for
8367 them (which also works for scalar masks). We store opcodes
8368 to use in bitop1 and bitop2. Statement is vectorized as
8369 BITOP2 (rhs1 BITOP1 rhs2) or
8370 rhs1 BITOP2 (BITOP1 rhs2)
8371 depending on bitop1 and bitop2 arity. */
8372 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8374 if (code == GT_EXPR)
8376 bitop1 = BIT_NOT_EXPR;
8377 bitop2 = BIT_AND_EXPR;
8379 else if (code == GE_EXPR)
8381 bitop1 = BIT_NOT_EXPR;
8382 bitop2 = BIT_IOR_EXPR;
8384 else if (code == LT_EXPR)
8386 bitop1 = BIT_NOT_EXPR;
8387 bitop2 = BIT_AND_EXPR;
8388 std::swap (rhs1, rhs2);
8389 std::swap (dts[0], dts[1]);
8391 else if (code == LE_EXPR)
8393 bitop1 = BIT_NOT_EXPR;
8394 bitop2 = BIT_IOR_EXPR;
8395 std::swap (rhs1, rhs2);
8396 std::swap (dts[0], dts[1]);
8398 else
8400 bitop1 = BIT_XOR_EXPR;
8401 if (code == EQ_EXPR)
8402 bitop2 = BIT_NOT_EXPR;
8406 if (!vec_stmt)
8408 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8409 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8410 dts, ndts, NULL, NULL);
8411 if (bitop1 == NOP_EXPR)
8412 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8413 else
8415 machine_mode mode = TYPE_MODE (vectype);
8416 optab optab;
8418 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8419 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8420 return false;
8422 if (bitop2 != NOP_EXPR)
8424 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8425 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8426 return false;
8428 return true;
8432 /* Transform. */
8433 if (!slp_node)
8435 vec_oprnds0.create (1);
8436 vec_oprnds1.create (1);
8439 /* Handle def. */
8440 lhs = gimple_assign_lhs (stmt);
8441 mask = vect_create_destination_var (lhs, mask_type);
8443 /* Handle cmp expr. */
8444 for (j = 0; j < ncopies; j++)
8446 gassign *new_stmt = NULL;
8447 if (j == 0)
8449 if (slp_node)
8451 auto_vec<tree, 2> ops;
8452 auto_vec<vec<tree>, 2> vec_defs;
8454 ops.safe_push (rhs1);
8455 ops.safe_push (rhs2);
8456 vect_get_slp_defs (ops, slp_node, &vec_defs);
8457 vec_oprnds1 = vec_defs.pop ();
8458 vec_oprnds0 = vec_defs.pop ();
8460 else
8462 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8463 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8466 else
8468 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8469 vec_oprnds0.pop ());
8470 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8471 vec_oprnds1.pop ());
8474 if (!slp_node)
8476 vec_oprnds0.quick_push (vec_rhs1);
8477 vec_oprnds1.quick_push (vec_rhs2);
8480 /* Arguments are ready. Create the new vector stmt. */
8481 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8483 vec_rhs2 = vec_oprnds1[i];
8485 new_temp = make_ssa_name (mask);
8486 if (bitop1 == NOP_EXPR)
8488 new_stmt = gimple_build_assign (new_temp, code,
8489 vec_rhs1, vec_rhs2);
8490 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8492 else
8494 if (bitop1 == BIT_NOT_EXPR)
8495 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8496 else
8497 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8498 vec_rhs2);
8499 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8500 if (bitop2 != NOP_EXPR)
8502 tree res = make_ssa_name (mask);
8503 if (bitop2 == BIT_NOT_EXPR)
8504 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8505 else
8506 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8507 new_temp);
8508 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8511 if (slp_node)
8512 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8515 if (slp_node)
8516 continue;
8518 if (j == 0)
8519 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8520 else
8521 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8523 prev_stmt_info = vinfo_for_stmt (new_stmt);
8526 vec_oprnds0.release ();
8527 vec_oprnds1.release ();
8529 return true;
8532 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8533 can handle all live statements in the node. Otherwise return true
8534 if STMT is not live or if vectorizable_live_operation can handle it.
8535 GSI and VEC_STMT are as for vectorizable_live_operation. */
8537 static bool
8538 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8539 slp_tree slp_node, gimple **vec_stmt)
8541 if (slp_node)
8543 gimple *slp_stmt;
8544 unsigned int i;
8545 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8547 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8548 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8549 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8550 vec_stmt))
8551 return false;
8554 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8555 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8556 return false;
8558 return true;
8561 /* Make sure the statement is vectorizable. */
8563 bool
8564 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8565 slp_instance node_instance)
8567 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8568 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8569 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8570 bool ok;
8571 gimple *pattern_stmt;
8572 gimple_seq pattern_def_seq;
8574 if (dump_enabled_p ())
8576 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8577 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8580 if (gimple_has_volatile_ops (stmt))
8582 if (dump_enabled_p ())
8583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8584 "not vectorized: stmt has volatile operands\n");
8586 return false;
8589 /* Skip stmts that do not need to be vectorized. In loops this is expected
8590 to include:
8591 - the COND_EXPR which is the loop exit condition
8592 - any LABEL_EXPRs in the loop
8593 - computations that are used only for array indexing or loop control.
8594 In basic blocks we only analyze statements that are a part of some SLP
8595 instance, therefore, all the statements are relevant.
8597 Pattern statement needs to be analyzed instead of the original statement
8598 if the original statement is not relevant. Otherwise, we analyze both
8599 statements. In basic blocks we are called from some SLP instance
8600 traversal, don't analyze pattern stmts instead, the pattern stmts
8601 already will be part of SLP instance. */
8603 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8604 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8605 && !STMT_VINFO_LIVE_P (stmt_info))
8607 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8608 && pattern_stmt
8609 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8610 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8612 /* Analyze PATTERN_STMT instead of the original stmt. */
8613 stmt = pattern_stmt;
8614 stmt_info = vinfo_for_stmt (pattern_stmt);
8615 if (dump_enabled_p ())
8617 dump_printf_loc (MSG_NOTE, vect_location,
8618 "==> examining pattern statement: ");
8619 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8622 else
8624 if (dump_enabled_p ())
8625 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8627 return true;
8630 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8631 && node == NULL
8632 && pattern_stmt
8633 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8634 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8636 /* Analyze PATTERN_STMT too. */
8637 if (dump_enabled_p ())
8639 dump_printf_loc (MSG_NOTE, vect_location,
8640 "==> examining pattern statement: ");
8641 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8645 node_instance))
8646 return false;
8649 if (is_pattern_stmt_p (stmt_info)
8650 && node == NULL
8651 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8653 gimple_stmt_iterator si;
8655 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8657 gimple *pattern_def_stmt = gsi_stmt (si);
8658 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8659 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8661 /* Analyze def stmt of STMT if it's a pattern stmt. */
8662 if (dump_enabled_p ())
8664 dump_printf_loc (MSG_NOTE, vect_location,
8665 "==> examining pattern def statement: ");
8666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8669 if (!vect_analyze_stmt (pattern_def_stmt,
8670 need_to_vectorize, node, node_instance))
8671 return false;
8676 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8678 case vect_internal_def:
8679 break;
8681 case vect_reduction_def:
8682 case vect_nested_cycle:
8683 gcc_assert (!bb_vinfo
8684 && (relevance == vect_used_in_outer
8685 || relevance == vect_used_in_outer_by_reduction
8686 || relevance == vect_used_by_reduction
8687 || relevance == vect_unused_in_scope
8688 || relevance == vect_used_only_live));
8689 break;
8691 case vect_induction_def:
8692 gcc_assert (!bb_vinfo);
8693 break;
8695 case vect_constant_def:
8696 case vect_external_def:
8697 case vect_unknown_def_type:
8698 default:
8699 gcc_unreachable ();
8702 if (STMT_VINFO_RELEVANT_P (stmt_info))
8704 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8705 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8706 || (is_gimple_call (stmt)
8707 && gimple_call_lhs (stmt) == NULL_TREE));
8708 *need_to_vectorize = true;
8711 if (PURE_SLP_STMT (stmt_info) && !node)
8713 dump_printf_loc (MSG_NOTE, vect_location,
8714 "handled only by SLP analysis\n");
8715 return true;
8718 ok = true;
8719 if (!bb_vinfo
8720 && (STMT_VINFO_RELEVANT_P (stmt_info)
8721 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8722 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8723 || vectorizable_conversion (stmt, NULL, NULL, node)
8724 || vectorizable_shift (stmt, NULL, NULL, node)
8725 || vectorizable_operation (stmt, NULL, NULL, node)
8726 || vectorizable_assignment (stmt, NULL, NULL, node)
8727 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8728 || vectorizable_call (stmt, NULL, NULL, node)
8729 || vectorizable_store (stmt, NULL, NULL, node)
8730 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8731 || vectorizable_induction (stmt, NULL, NULL, node)
8732 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8733 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8734 else
8736 if (bb_vinfo)
8737 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8738 || vectorizable_conversion (stmt, NULL, NULL, node)
8739 || vectorizable_shift (stmt, NULL, NULL, node)
8740 || vectorizable_operation (stmt, NULL, NULL, node)
8741 || vectorizable_assignment (stmt, NULL, NULL, node)
8742 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8743 || vectorizable_call (stmt, NULL, NULL, node)
8744 || vectorizable_store (stmt, NULL, NULL, node)
8745 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8746 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8749 if (!ok)
8751 if (dump_enabled_p ())
8753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8754 "not vectorized: relevant stmt not ");
8755 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8756 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8759 return false;
8762 if (bb_vinfo)
8763 return true;
8765 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8766 need extra handling, except for vectorizable reductions. */
8767 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8768 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
8770 if (dump_enabled_p ())
8772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8773 "not vectorized: live stmt not supported: ");
8774 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8777 return false;
8780 return true;
8784 /* Function vect_transform_stmt.
8786 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8788 bool
8789 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8790 bool *grouped_store, slp_tree slp_node,
8791 slp_instance slp_node_instance)
8793 bool is_store = false;
8794 gimple *vec_stmt = NULL;
8795 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8796 bool done;
8798 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8799 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8801 switch (STMT_VINFO_TYPE (stmt_info))
8803 case type_demotion_vec_info_type:
8804 case type_promotion_vec_info_type:
8805 case type_conversion_vec_info_type:
8806 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8807 gcc_assert (done);
8808 break;
8810 case induc_vec_info_type:
8811 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8812 gcc_assert (done);
8813 break;
8815 case shift_vec_info_type:
8816 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8817 gcc_assert (done);
8818 break;
8820 case op_vec_info_type:
8821 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8822 gcc_assert (done);
8823 break;
8825 case assignment_vec_info_type:
8826 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8827 gcc_assert (done);
8828 break;
8830 case load_vec_info_type:
8831 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8832 slp_node_instance);
8833 gcc_assert (done);
8834 break;
8836 case store_vec_info_type:
8837 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8838 gcc_assert (done);
8839 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8841 /* In case of interleaving, the whole chain is vectorized when the
8842 last store in the chain is reached. Store stmts before the last
8843 one are skipped, and there vec_stmt_info shouldn't be freed
8844 meanwhile. */
8845 *grouped_store = true;
8846 if (STMT_VINFO_VEC_STMT (stmt_info))
8847 is_store = true;
8849 else
8850 is_store = true;
8851 break;
8853 case condition_vec_info_type:
8854 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8855 gcc_assert (done);
8856 break;
8858 case comparison_vec_info_type:
8859 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8860 gcc_assert (done);
8861 break;
8863 case call_vec_info_type:
8864 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8865 stmt = gsi_stmt (*gsi);
8866 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8867 is_store = true;
8868 break;
8870 case call_simd_clone_vec_info_type:
8871 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8872 stmt = gsi_stmt (*gsi);
8873 break;
8875 case reduc_vec_info_type:
8876 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8877 slp_node_instance);
8878 gcc_assert (done);
8879 break;
8881 default:
8882 if (!STMT_VINFO_LIVE_P (stmt_info))
8884 if (dump_enabled_p ())
8885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8886 "stmt not supported.\n");
8887 gcc_unreachable ();
8891 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8892 This would break hybrid SLP vectorization. */
8893 if (slp_node)
8894 gcc_assert (!vec_stmt
8895 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8897 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8898 is being vectorized, but outside the immediately enclosing loop. */
8899 if (vec_stmt
8900 && STMT_VINFO_LOOP_VINFO (stmt_info)
8901 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8902 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8903 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8904 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8905 || STMT_VINFO_RELEVANT (stmt_info) ==
8906 vect_used_in_outer_by_reduction))
8908 struct loop *innerloop = LOOP_VINFO_LOOP (
8909 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8910 imm_use_iterator imm_iter;
8911 use_operand_p use_p;
8912 tree scalar_dest;
8913 gimple *exit_phi;
8915 if (dump_enabled_p ())
8916 dump_printf_loc (MSG_NOTE, vect_location,
8917 "Record the vdef for outer-loop vectorization.\n");
8919 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8920 (to be used when vectorizing outer-loop stmts that use the DEF of
8921 STMT). */
8922 if (gimple_code (stmt) == GIMPLE_PHI)
8923 scalar_dest = PHI_RESULT (stmt);
8924 else
8925 scalar_dest = gimple_assign_lhs (stmt);
8927 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8929 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8931 exit_phi = USE_STMT (use_p);
8932 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8937 /* Handle stmts whose DEF is used outside the loop-nest that is
8938 being vectorized. */
8939 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8941 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
8942 gcc_assert (done);
8945 if (vec_stmt)
8946 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8948 return is_store;
8952 /* Remove a group of stores (for SLP or interleaving), free their
8953 stmt_vec_info. */
8955 void
8956 vect_remove_stores (gimple *first_stmt)
8958 gimple *next = first_stmt;
8959 gimple *tmp;
8960 gimple_stmt_iterator next_si;
8962 while (next)
8964 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8966 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8967 if (is_pattern_stmt_p (stmt_info))
8968 next = STMT_VINFO_RELATED_STMT (stmt_info);
8969 /* Free the attached stmt_vec_info and remove the stmt. */
8970 next_si = gsi_for_stmt (next);
8971 unlink_stmt_vdef (next);
8972 gsi_remove (&next_si, true);
8973 release_defs (next);
8974 free_stmt_vec_info (next);
8975 next = tmp;
8980 /* Function new_stmt_vec_info.
8982 Create and initialize a new stmt_vec_info struct for STMT. */
8984 stmt_vec_info
8985 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8987 stmt_vec_info res;
8988 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8990 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8991 STMT_VINFO_STMT (res) = stmt;
8992 res->vinfo = vinfo;
8993 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8994 STMT_VINFO_LIVE_P (res) = false;
8995 STMT_VINFO_VECTYPE (res) = NULL;
8996 STMT_VINFO_VEC_STMT (res) = NULL;
8997 STMT_VINFO_VECTORIZABLE (res) = true;
8998 STMT_VINFO_IN_PATTERN_P (res) = false;
8999 STMT_VINFO_RELATED_STMT (res) = NULL;
9000 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9001 STMT_VINFO_DATA_REF (res) = NULL;
9002 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9003 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9005 if (gimple_code (stmt) == GIMPLE_PHI
9006 && is_loop_header_bb_p (gimple_bb (stmt)))
9007 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9008 else
9009 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9011 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9012 STMT_SLP_TYPE (res) = loop_vect;
9013 STMT_VINFO_NUM_SLP_USES (res) = 0;
9015 GROUP_FIRST_ELEMENT (res) = NULL;
9016 GROUP_NEXT_ELEMENT (res) = NULL;
9017 GROUP_SIZE (res) = 0;
9018 GROUP_STORE_COUNT (res) = 0;
9019 GROUP_GAP (res) = 0;
9020 GROUP_SAME_DR_STMT (res) = NULL;
9022 return res;
9026 /* Create a hash table for stmt_vec_info. */
9028 void
9029 init_stmt_vec_info_vec (void)
9031 gcc_assert (!stmt_vec_info_vec.exists ());
9032 stmt_vec_info_vec.create (50);
9036 /* Free hash table for stmt_vec_info. */
9038 void
9039 free_stmt_vec_info_vec (void)
9041 unsigned int i;
9042 stmt_vec_info info;
9043 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9044 if (info != NULL)
9045 free_stmt_vec_info (STMT_VINFO_STMT (info));
9046 gcc_assert (stmt_vec_info_vec.exists ());
9047 stmt_vec_info_vec.release ();
9051 /* Free stmt vectorization related info. */
9053 void
9054 free_stmt_vec_info (gimple *stmt)
9056 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9058 if (!stmt_info)
9059 return;
9061 /* Check if this statement has a related "pattern stmt"
9062 (introduced by the vectorizer during the pattern recognition
9063 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9064 too. */
9065 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9067 stmt_vec_info patt_info
9068 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9069 if (patt_info)
9071 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9072 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9073 gimple_set_bb (patt_stmt, NULL);
9074 tree lhs = gimple_get_lhs (patt_stmt);
9075 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9076 release_ssa_name (lhs);
9077 if (seq)
9079 gimple_stmt_iterator si;
9080 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9082 gimple *seq_stmt = gsi_stmt (si);
9083 gimple_set_bb (seq_stmt, NULL);
9084 lhs = gimple_get_lhs (seq_stmt);
9085 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9086 release_ssa_name (lhs);
9087 free_stmt_vec_info (seq_stmt);
9090 free_stmt_vec_info (patt_stmt);
9094 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9095 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9096 set_vinfo_for_stmt (stmt, NULL);
9097 free (stmt_info);
9101 /* Function get_vectype_for_scalar_type_and_size.
9103 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9104 by the target. */
9106 static tree
9107 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
9109 tree orig_scalar_type = scalar_type;
9110 scalar_mode inner_mode;
9111 machine_mode simd_mode;
9112 int nunits;
9113 tree vectype;
9115 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9116 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9117 return NULL_TREE;
9119 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9121 /* For vector types of elements whose mode precision doesn't
9122 match their types precision we use a element type of mode
9123 precision. The vectorization routines will have to make sure
9124 they support the proper result truncation/extension.
9125 We also make sure to build vector types with INTEGER_TYPE
9126 component type only. */
9127 if (INTEGRAL_TYPE_P (scalar_type)
9128 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9129 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9130 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9131 TYPE_UNSIGNED (scalar_type));
9133 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9134 When the component mode passes the above test simply use a type
9135 corresponding to that mode. The theory is that any use that
9136 would cause problems with this will disable vectorization anyway. */
9137 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9138 && !INTEGRAL_TYPE_P (scalar_type))
9139 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9141 /* We can't build a vector type of elements with alignment bigger than
9142 their size. */
9143 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9144 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9145 TYPE_UNSIGNED (scalar_type));
9147 /* If we felt back to using the mode fail if there was
9148 no scalar type for it. */
9149 if (scalar_type == NULL_TREE)
9150 return NULL_TREE;
9152 /* If no size was supplied use the mode the target prefers. Otherwise
9153 lookup a vector mode of the specified size. */
9154 if (size == 0)
9155 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9156 else if (!mode_for_vector (inner_mode, size / nbytes).exists (&simd_mode))
9157 return NULL_TREE;
9158 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9159 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9160 if (nunits < 1)
9161 return NULL_TREE;
9163 vectype = build_vector_type (scalar_type, nunits);
9165 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9166 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9167 return NULL_TREE;
9169 /* Re-attach the address-space qualifier if we canonicalized the scalar
9170 type. */
9171 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9172 return build_qualified_type
9173 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9175 return vectype;
9178 unsigned int current_vector_size;
9180 /* Function get_vectype_for_scalar_type.
9182 Returns the vector type corresponding to SCALAR_TYPE as supported
9183 by the target. */
9185 tree
9186 get_vectype_for_scalar_type (tree scalar_type)
9188 tree vectype;
9189 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9190 current_vector_size);
9191 if (vectype
9192 && current_vector_size == 0)
9193 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9194 return vectype;
9197 /* Function get_mask_type_for_scalar_type.
9199 Returns the mask type corresponding to a result of comparison
9200 of vectors of specified SCALAR_TYPE as supported by target. */
9202 tree
9203 get_mask_type_for_scalar_type (tree scalar_type)
9205 tree vectype = get_vectype_for_scalar_type (scalar_type);
9207 if (!vectype)
9208 return NULL;
9210 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9211 current_vector_size);
9214 /* Function get_same_sized_vectype
9216 Returns a vector type corresponding to SCALAR_TYPE of size
9217 VECTOR_TYPE if supported by the target. */
9219 tree
9220 get_same_sized_vectype (tree scalar_type, tree vector_type)
9222 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9223 return build_same_sized_truth_vector_type (vector_type);
9225 return get_vectype_for_scalar_type_and_size
9226 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9229 /* Function vect_is_simple_use.
9231 Input:
9232 VINFO - the vect info of the loop or basic block that is being vectorized.
9233 OPERAND - operand in the loop or bb.
9234 Output:
9235 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9236 DT - the type of definition
9238 Returns whether a stmt with OPERAND can be vectorized.
9239 For loops, supportable operands are constants, loop invariants, and operands
9240 that are defined by the current iteration of the loop. Unsupportable
9241 operands are those that are defined by a previous iteration of the loop (as
9242 is the case in reduction/induction computations).
9243 For basic blocks, supportable operands are constants and bb invariants.
9244 For now, operands defined outside the basic block are not supported. */
9246 bool
9247 vect_is_simple_use (tree operand, vec_info *vinfo,
9248 gimple **def_stmt, enum vect_def_type *dt)
9250 *def_stmt = NULL;
9251 *dt = vect_unknown_def_type;
9253 if (dump_enabled_p ())
9255 dump_printf_loc (MSG_NOTE, vect_location,
9256 "vect_is_simple_use: operand ");
9257 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9258 dump_printf (MSG_NOTE, "\n");
9261 if (CONSTANT_CLASS_P (operand))
9263 *dt = vect_constant_def;
9264 return true;
9267 if (is_gimple_min_invariant (operand))
9269 *dt = vect_external_def;
9270 return true;
9273 if (TREE_CODE (operand) != SSA_NAME)
9275 if (dump_enabled_p ())
9276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9277 "not ssa-name.\n");
9278 return false;
9281 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9283 *dt = vect_external_def;
9284 return true;
9287 *def_stmt = SSA_NAME_DEF_STMT (operand);
9288 if (dump_enabled_p ())
9290 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9291 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9294 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9295 *dt = vect_external_def;
9296 else
9298 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9299 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9302 if (dump_enabled_p ())
9304 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9305 switch (*dt)
9307 case vect_uninitialized_def:
9308 dump_printf (MSG_NOTE, "uninitialized\n");
9309 break;
9310 case vect_constant_def:
9311 dump_printf (MSG_NOTE, "constant\n");
9312 break;
9313 case vect_external_def:
9314 dump_printf (MSG_NOTE, "external\n");
9315 break;
9316 case vect_internal_def:
9317 dump_printf (MSG_NOTE, "internal\n");
9318 break;
9319 case vect_induction_def:
9320 dump_printf (MSG_NOTE, "induction\n");
9321 break;
9322 case vect_reduction_def:
9323 dump_printf (MSG_NOTE, "reduction\n");
9324 break;
9325 case vect_double_reduction_def:
9326 dump_printf (MSG_NOTE, "double reduction\n");
9327 break;
9328 case vect_nested_cycle:
9329 dump_printf (MSG_NOTE, "nested cycle\n");
9330 break;
9331 case vect_unknown_def_type:
9332 dump_printf (MSG_NOTE, "unknown\n");
9333 break;
9337 if (*dt == vect_unknown_def_type)
9339 if (dump_enabled_p ())
9340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9341 "Unsupported pattern.\n");
9342 return false;
9345 switch (gimple_code (*def_stmt))
9347 case GIMPLE_PHI:
9348 case GIMPLE_ASSIGN:
9349 case GIMPLE_CALL:
9350 break;
9351 default:
9352 if (dump_enabled_p ())
9353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9354 "unsupported defining stmt:\n");
9355 return false;
9358 return true;
9361 /* Function vect_is_simple_use.
9363 Same as vect_is_simple_use but also determines the vector operand
9364 type of OPERAND and stores it to *VECTYPE. If the definition of
9365 OPERAND is vect_uninitialized_def, vect_constant_def or
9366 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9367 is responsible to compute the best suited vector type for the
9368 scalar operand. */
9370 bool
9371 vect_is_simple_use (tree operand, vec_info *vinfo,
9372 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9374 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9375 return false;
9377 /* Now get a vector type if the def is internal, otherwise supply
9378 NULL_TREE and leave it up to the caller to figure out a proper
9379 type for the use stmt. */
9380 if (*dt == vect_internal_def
9381 || *dt == vect_induction_def
9382 || *dt == vect_reduction_def
9383 || *dt == vect_double_reduction_def
9384 || *dt == vect_nested_cycle)
9386 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9388 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9389 && !STMT_VINFO_RELEVANT (stmt_info)
9390 && !STMT_VINFO_LIVE_P (stmt_info))
9391 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9393 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9394 gcc_assert (*vectype != NULL_TREE);
9396 else if (*dt == vect_uninitialized_def
9397 || *dt == vect_constant_def
9398 || *dt == vect_external_def)
9399 *vectype = NULL_TREE;
9400 else
9401 gcc_unreachable ();
9403 return true;
9407 /* Function supportable_widening_operation
9409 Check whether an operation represented by the code CODE is a
9410 widening operation that is supported by the target platform in
9411 vector form (i.e., when operating on arguments of type VECTYPE_IN
9412 producing a result of type VECTYPE_OUT).
9414 Widening operations we currently support are NOP (CONVERT), FLOAT
9415 and WIDEN_MULT. This function checks if these operations are supported
9416 by the target platform either directly (via vector tree-codes), or via
9417 target builtins.
9419 Output:
9420 - CODE1 and CODE2 are codes of vector operations to be used when
9421 vectorizing the operation, if available.
9422 - MULTI_STEP_CVT determines the number of required intermediate steps in
9423 case of multi-step conversion (like char->short->int - in that case
9424 MULTI_STEP_CVT will be 1).
9425 - INTERM_TYPES contains the intermediate type required to perform the
9426 widening operation (short in the above example). */
9428 bool
9429 supportable_widening_operation (enum tree_code code, gimple *stmt,
9430 tree vectype_out, tree vectype_in,
9431 enum tree_code *code1, enum tree_code *code2,
9432 int *multi_step_cvt,
9433 vec<tree> *interm_types)
9435 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9436 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9437 struct loop *vect_loop = NULL;
9438 machine_mode vec_mode;
9439 enum insn_code icode1, icode2;
9440 optab optab1, optab2;
9441 tree vectype = vectype_in;
9442 tree wide_vectype = vectype_out;
9443 enum tree_code c1, c2;
9444 int i;
9445 tree prev_type, intermediate_type;
9446 machine_mode intermediate_mode, prev_mode;
9447 optab optab3, optab4;
9449 *multi_step_cvt = 0;
9450 if (loop_info)
9451 vect_loop = LOOP_VINFO_LOOP (loop_info);
9453 switch (code)
9455 case WIDEN_MULT_EXPR:
9456 /* The result of a vectorized widening operation usually requires
9457 two vectors (because the widened results do not fit into one vector).
9458 The generated vector results would normally be expected to be
9459 generated in the same order as in the original scalar computation,
9460 i.e. if 8 results are generated in each vector iteration, they are
9461 to be organized as follows:
9462 vect1: [res1,res2,res3,res4],
9463 vect2: [res5,res6,res7,res8].
9465 However, in the special case that the result of the widening
9466 operation is used in a reduction computation only, the order doesn't
9467 matter (because when vectorizing a reduction we change the order of
9468 the computation). Some targets can take advantage of this and
9469 generate more efficient code. For example, targets like Altivec,
9470 that support widen_mult using a sequence of {mult_even,mult_odd}
9471 generate the following vectors:
9472 vect1: [res1,res3,res5,res7],
9473 vect2: [res2,res4,res6,res8].
9475 When vectorizing outer-loops, we execute the inner-loop sequentially
9476 (each vectorized inner-loop iteration contributes to VF outer-loop
9477 iterations in parallel). We therefore don't allow to change the
9478 order of the computation in the inner-loop during outer-loop
9479 vectorization. */
9480 /* TODO: Another case in which order doesn't *really* matter is when we
9481 widen and then contract again, e.g. (short)((int)x * y >> 8).
9482 Normally, pack_trunc performs an even/odd permute, whereas the
9483 repack from an even/odd expansion would be an interleave, which
9484 would be significantly simpler for e.g. AVX2. */
9485 /* In any case, in order to avoid duplicating the code below, recurse
9486 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9487 are properly set up for the caller. If we fail, we'll continue with
9488 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9489 if (vect_loop
9490 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9491 && !nested_in_vect_loop_p (vect_loop, stmt)
9492 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9493 stmt, vectype_out, vectype_in,
9494 code1, code2, multi_step_cvt,
9495 interm_types))
9497 /* Elements in a vector with vect_used_by_reduction property cannot
9498 be reordered if the use chain with this property does not have the
9499 same operation. One such an example is s += a * b, where elements
9500 in a and b cannot be reordered. Here we check if the vector defined
9501 by STMT is only directly used in the reduction statement. */
9502 tree lhs = gimple_assign_lhs (stmt);
9503 use_operand_p dummy;
9504 gimple *use_stmt;
9505 stmt_vec_info use_stmt_info = NULL;
9506 if (single_imm_use (lhs, &dummy, &use_stmt)
9507 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9508 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9509 return true;
9511 c1 = VEC_WIDEN_MULT_LO_EXPR;
9512 c2 = VEC_WIDEN_MULT_HI_EXPR;
9513 break;
9515 case DOT_PROD_EXPR:
9516 c1 = DOT_PROD_EXPR;
9517 c2 = DOT_PROD_EXPR;
9518 break;
9520 case SAD_EXPR:
9521 c1 = SAD_EXPR;
9522 c2 = SAD_EXPR;
9523 break;
9525 case VEC_WIDEN_MULT_EVEN_EXPR:
9526 /* Support the recursion induced just above. */
9527 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9528 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9529 break;
9531 case WIDEN_LSHIFT_EXPR:
9532 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9533 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9534 break;
9536 CASE_CONVERT:
9537 c1 = VEC_UNPACK_LO_EXPR;
9538 c2 = VEC_UNPACK_HI_EXPR;
9539 break;
9541 case FLOAT_EXPR:
9542 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9543 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9544 break;
9546 case FIX_TRUNC_EXPR:
9547 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9548 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9549 computing the operation. */
9550 return false;
9552 default:
9553 gcc_unreachable ();
9556 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9557 std::swap (c1, c2);
9559 if (code == FIX_TRUNC_EXPR)
9561 /* The signedness is determined from output operand. */
9562 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9563 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9565 else
9567 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9568 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9571 if (!optab1 || !optab2)
9572 return false;
9574 vec_mode = TYPE_MODE (vectype);
9575 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9576 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9577 return false;
9579 *code1 = c1;
9580 *code2 = c2;
9582 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9583 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9584 /* For scalar masks we may have different boolean
9585 vector types having the same QImode. Thus we
9586 add additional check for elements number. */
9587 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9588 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9589 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9591 /* Check if it's a multi-step conversion that can be done using intermediate
9592 types. */
9594 prev_type = vectype;
9595 prev_mode = vec_mode;
9597 if (!CONVERT_EXPR_CODE_P (code))
9598 return false;
9600 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9601 intermediate steps in promotion sequence. We try
9602 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9603 not. */
9604 interm_types->create (MAX_INTERM_CVT_STEPS);
9605 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9607 intermediate_mode = insn_data[icode1].operand[0].mode;
9608 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9610 intermediate_type
9611 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9612 current_vector_size);
9613 if (intermediate_mode != TYPE_MODE (intermediate_type))
9614 return false;
9616 else
9617 intermediate_type
9618 = lang_hooks.types.type_for_mode (intermediate_mode,
9619 TYPE_UNSIGNED (prev_type));
9621 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9622 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9624 if (!optab3 || !optab4
9625 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9626 || insn_data[icode1].operand[0].mode != intermediate_mode
9627 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9628 || insn_data[icode2].operand[0].mode != intermediate_mode
9629 || ((icode1 = optab_handler (optab3, intermediate_mode))
9630 == CODE_FOR_nothing)
9631 || ((icode2 = optab_handler (optab4, intermediate_mode))
9632 == CODE_FOR_nothing))
9633 break;
9635 interm_types->quick_push (intermediate_type);
9636 (*multi_step_cvt)++;
9638 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9639 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9640 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9641 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9642 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9644 prev_type = intermediate_type;
9645 prev_mode = intermediate_mode;
9648 interm_types->release ();
9649 return false;
9653 /* Function supportable_narrowing_operation
9655 Check whether an operation represented by the code CODE is a
9656 narrowing operation that is supported by the target platform in
9657 vector form (i.e., when operating on arguments of type VECTYPE_IN
9658 and producing a result of type VECTYPE_OUT).
9660 Narrowing operations we currently support are NOP (CONVERT) and
9661 FIX_TRUNC. This function checks if these operations are supported by
9662 the target platform directly via vector tree-codes.
9664 Output:
9665 - CODE1 is the code of a vector operation to be used when
9666 vectorizing the operation, if available.
9667 - MULTI_STEP_CVT determines the number of required intermediate steps in
9668 case of multi-step conversion (like int->short->char - in that case
9669 MULTI_STEP_CVT will be 1).
9670 - INTERM_TYPES contains the intermediate type required to perform the
9671 narrowing operation (short in the above example). */
9673 bool
9674 supportable_narrowing_operation (enum tree_code code,
9675 tree vectype_out, tree vectype_in,
9676 enum tree_code *code1, int *multi_step_cvt,
9677 vec<tree> *interm_types)
9679 machine_mode vec_mode;
9680 enum insn_code icode1;
9681 optab optab1, interm_optab;
9682 tree vectype = vectype_in;
9683 tree narrow_vectype = vectype_out;
9684 enum tree_code c1;
9685 tree intermediate_type, prev_type;
9686 machine_mode intermediate_mode, prev_mode;
9687 int i;
9688 bool uns;
9690 *multi_step_cvt = 0;
9691 switch (code)
9693 CASE_CONVERT:
9694 c1 = VEC_PACK_TRUNC_EXPR;
9695 break;
9697 case FIX_TRUNC_EXPR:
9698 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9699 break;
9701 case FLOAT_EXPR:
9702 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9703 tree code and optabs used for computing the operation. */
9704 return false;
9706 default:
9707 gcc_unreachable ();
9710 if (code == FIX_TRUNC_EXPR)
9711 /* The signedness is determined from output operand. */
9712 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9713 else
9714 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9716 if (!optab1)
9717 return false;
9719 vec_mode = TYPE_MODE (vectype);
9720 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9721 return false;
9723 *code1 = c1;
9725 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9726 /* For scalar masks we may have different boolean
9727 vector types having the same QImode. Thus we
9728 add additional check for elements number. */
9729 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9730 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9731 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9733 /* Check if it's a multi-step conversion that can be done using intermediate
9734 types. */
9735 prev_mode = vec_mode;
9736 prev_type = vectype;
9737 if (code == FIX_TRUNC_EXPR)
9738 uns = TYPE_UNSIGNED (vectype_out);
9739 else
9740 uns = TYPE_UNSIGNED (vectype);
9742 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9743 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9744 costly than signed. */
9745 if (code == FIX_TRUNC_EXPR && uns)
9747 enum insn_code icode2;
9749 intermediate_type
9750 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9751 interm_optab
9752 = optab_for_tree_code (c1, intermediate_type, optab_default);
9753 if (interm_optab != unknown_optab
9754 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9755 && insn_data[icode1].operand[0].mode
9756 == insn_data[icode2].operand[0].mode)
9758 uns = false;
9759 optab1 = interm_optab;
9760 icode1 = icode2;
9764 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9765 intermediate steps in promotion sequence. We try
9766 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9767 interm_types->create (MAX_INTERM_CVT_STEPS);
9768 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9770 intermediate_mode = insn_data[icode1].operand[0].mode;
9771 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9773 intermediate_type
9774 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9775 current_vector_size);
9776 if (intermediate_mode != TYPE_MODE (intermediate_type))
9777 return false;
9779 else
9780 intermediate_type
9781 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9782 interm_optab
9783 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9784 optab_default);
9785 if (!interm_optab
9786 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9787 || insn_data[icode1].operand[0].mode != intermediate_mode
9788 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9789 == CODE_FOR_nothing))
9790 break;
9792 interm_types->quick_push (intermediate_type);
9793 (*multi_step_cvt)++;
9795 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9796 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9797 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9798 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9800 prev_mode = intermediate_mode;
9801 prev_type = intermediate_type;
9802 optab1 = interm_optab;
9805 interm_types->release ();
9806 return false;