Use explicit encodings for simple permutes
[official-gcc.git] / gcc / tree-vect-stmts.c
blob0f77567c9d918cd13b22d4616831a7e8a6809cac
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
54 /* For lang_hooks.types.type_for_mode. */
55 #include "langhooks.h"
57 /* Says whether a statement is a load, a store of a vectorized statement
58 result, or a store of an invariant value. */
59 enum vec_load_store_type {
60 VLS_LOAD,
61 VLS_STORE,
62 VLS_STORE_INVARIANT
65 /* Return the vectorized type for the given statement. */
67 tree
68 stmt_vectype (struct _stmt_vec_info *stmt_info)
70 return STMT_VINFO_VECTYPE (stmt_info);
73 /* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
75 bool
76 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
78 gimple *stmt = STMT_VINFO_STMT (stmt_info);
79 basic_block bb = gimple_bb (stmt);
80 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
81 struct loop* loop;
83 if (!loop_vinfo)
84 return false;
86 loop = LOOP_VINFO_LOOP (loop_vinfo);
88 return (bb->loop_father == loop->inner);
91 /* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
95 unsigned
96 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
97 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
98 int misalign, enum vect_cost_model_location where)
100 if ((kind == vector_load || kind == unaligned_load)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_gather_load;
103 if ((kind == vector_store || kind == unaligned_store)
104 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
105 kind = vector_scatter_store;
106 if (body_cost_vec)
108 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
109 stmt_info_for_cost si = { count, kind,
110 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
111 misalign };
112 body_cost_vec->safe_push (si);
113 return (unsigned)
114 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 else
117 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
118 count, kind, stmt_info, misalign, where);
121 /* Return a variable of type ELEM_TYPE[NELEMS]. */
123 static tree
124 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
126 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
127 "vect_array");
130 /* ARRAY is an array of vectors created by create_vector_array.
131 Return an SSA_NAME for the vector in index N. The reference
132 is part of the vectorization of STMT and the vector is associated
133 with scalar destination SCALAR_DEST. */
135 static tree
136 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
137 tree array, unsigned HOST_WIDE_INT n)
139 tree vect_type, vect, vect_name, array_ref;
140 gimple *new_stmt;
142 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
143 vect_type = TREE_TYPE (TREE_TYPE (array));
144 vect = vect_create_destination_var (scalar_dest, vect_type);
145 array_ref = build4 (ARRAY_REF, vect_type, array,
146 build_int_cst (size_type_node, n),
147 NULL_TREE, NULL_TREE);
149 new_stmt = gimple_build_assign (vect, array_ref);
150 vect_name = make_ssa_name (vect, new_stmt);
151 gimple_assign_set_lhs (new_stmt, vect_name);
152 vect_finish_stmt_generation (stmt, new_stmt, gsi);
154 return vect_name;
157 /* ARRAY is an array of vectors created by create_vector_array.
158 Emit code to store SSA_NAME VECT in index N of the array.
159 The store is part of the vectorization of STMT. */
161 static void
162 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
163 tree array, unsigned HOST_WIDE_INT n)
165 tree array_ref;
166 gimple *new_stmt;
168 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
169 build_int_cst (size_type_node, n),
170 NULL_TREE, NULL_TREE);
172 new_stmt = gimple_build_assign (array_ref, vect);
173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
176 /* PTR is a pointer to an array of type TYPE. Return a representation
177 of *PTR. The memory reference replaces those in FIRST_DR
178 (and its group). */
180 static tree
181 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
183 tree mem_ref;
185 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
186 /* Arrays have the same alignment as their type. */
187 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
188 return mem_ref;
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
197 static void
198 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
199 enum vect_relevant relevant, bool live_p)
201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
202 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
203 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
204 gimple *pattern_stmt;
206 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: ", relevant, live_p);
210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
213 /* If this stmt is an original stmt in a pattern, we might need to mark its
214 related pattern stmt instead of the original stmt. However, such stmts
215 may have their own uses that are not in any pattern, in such cases the
216 stmt itself should be marked. */
217 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 /* This is the last stmt in a sequence that was detected as a
220 pattern that can potentially be vectorized. Don't mark the stmt
221 as relevant/live because it's not going to be vectorized.
222 Instead mark the pattern-stmt that replaces it. */
224 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
226 if (dump_enabled_p ())
227 dump_printf_loc (MSG_NOTE, vect_location,
228 "last stmt in pattern. don't mark"
229 " relevant/live.\n");
230 stmt_info = vinfo_for_stmt (pattern_stmt);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 stmt = pattern_stmt;
237 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
238 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
239 STMT_VINFO_RELEVANT (stmt_info) = relevant;
241 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
242 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "already marked relevant/live.\n");
247 return;
250 worklist->safe_push (stmt);
254 /* Function is_simple_and_all_uses_invariant
256 Return true if STMT is simple and all uses of it are invariant. */
258 bool
259 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
261 tree op;
262 gimple *def_stmt;
263 ssa_op_iter iter;
265 if (!is_gimple_assign (stmt))
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
315 *relevant = vect_used_in_scope;
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
322 if (dump_enabled_p ())
323 dump_printf_loc (MSG_NOTE, vect_location,
324 "vec_stmt_relevant_p: stmt has vdefs.\n");
325 *relevant = vect_used_in_scope;
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "vec_stmt_relevant_p: used out of loop.\n");
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
348 *live_p = true;
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
362 return (*live_p || *relevant);
366 /* Function exist_non_indexing_operands_for_use_p
368 USE is one of the uses attached to STMT. Check if USE is
369 used in STMT for anything other than indexing an array. */
371 static bool
372 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
377 /* USE corresponds to some operand in STMT. If there is no data
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
389 'var' in the second case corresponds to a def, not a use,
390 so USE cannot correspond to any operands that are not used
391 for array indexing.
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
396 if (!gimple_assign_copy_p (stmt))
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
400 switch (gimple_call_internal_fn (stmt))
402 case IFN_MASK_STORE:
403 operand = gimple_call_arg (stmt, 3);
404 if (operand == use)
405 return true;
406 /* FALLTHRU */
407 case IFN_MASK_LOAD:
408 operand = gimple_call_arg (stmt, 2);
409 if (operand == use)
410 return true;
411 break;
412 default:
413 break;
415 return false;
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
420 operand = gimple_assign_rhs1 (stmt);
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
424 if (operand == use)
425 return true;
427 return false;
432 Function process_use.
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
437 that defined USE. This is done by calling mark_relevant and passing it
438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
449 which does not need to be directly vectorized, then the liveness/relevance
450 of the respective DEF_STMT is left unchanged.
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
456 Return true if everything is as expected. Return false otherwise. */
458 static bool
459 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
460 enum vect_relevant relevant, vec<gimple *> *worklist,
461 bool force)
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
467 gimple *def_stmt;
468 enum vect_def_type dt;
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
473 return true;
475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
479 "not vectorized: unsupported use in stmt.\n");
480 return false;
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
491 return true;
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
515 return true;
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
531 switch (relevant)
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
536 break;
538 case vect_used_in_outer_by_reduction:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
540 relevant = vect_used_by_reduction;
541 break;
543 case vect_used_in_outer:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
545 relevant = vect_used_in_scope;
546 break;
548 case vect_used_in_scope:
549 break;
551 default:
552 gcc_unreachable ();
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
559 inner-loop:
560 d = def_stmt
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
569 switch (relevant)
571 case vect_unused_in_scope:
572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
575 break;
577 case vect_used_by_reduction:
578 case vect_used_only_live:
579 relevant = vect_used_in_outer_by_reduction;
580 break;
582 case vect_used_in_scope:
583 relevant = vect_used_in_outer;
584 break;
586 default:
587 gcc_unreachable ();
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
607 vect_mark_relevant (worklist, def_stmt, relevant, false);
608 return true;
612 /* Function vect_mark_stmts_to_be_vectorized.
614 Not all stmts in the loop need to be vectorized. For example:
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
621 3. j = j + 1
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
626 This pass detects such stmts. */
628 bool
629 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
635 gimple *stmt;
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
639 gimple *phi;
640 bool live_p;
641 enum vect_relevant relevant;
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "=== vect_mark_stmts_to_be_vectorized ===\n");
647 auto_vec<gimple *, 64> worklist;
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
655 phi = gsi_stmt (si);
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
663 vect_mark_relevant (&worklist, phi, relevant, live_p);
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
667 stmt = gsi_stmt (si);
668 if (dump_enabled_p ())
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
674 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
675 vect_mark_relevant (&worklist, stmt, relevant, live_p);
679 /* 2. Process_worklist */
680 while (worklist.length () > 0)
682 use_operand_p use_p;
683 ssa_op_iter iter;
685 stmt = worklist.pop ();
686 if (dump_enabled_p ())
688 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 of STMT. */
695 stmt_vinfo = vinfo_for_stmt (stmt);
696 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
701 One exception is when STMT has been identified as defining a reduction
702 variable; in this case we set the relevance to vect_used_by_reduction.
703 This is because we distinguish between two kinds of relevant stmts -
704 those that are used by a reduction computation, and those that are
705 (also) used by a regular computation. This allows us later on to
706 identify stmts that are used solely by a reduction, and therefore the
707 order of the results that they produce does not have to be kept. */
709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
711 case vect_reduction_def:
712 gcc_assert (relevant != vect_unused_in_scope);
713 if (relevant != vect_unused_in_scope
714 && relevant != vect_used_in_scope
715 && relevant != vect_used_by_reduction
716 && relevant != vect_used_only_live)
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of reduction.\n");
721 return false;
723 break;
725 case vect_nested_cycle:
726 if (relevant != vect_unused_in_scope
727 && relevant != vect_used_in_outer_by_reduction
728 && relevant != vect_used_in_outer)
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
732 "unsupported use of nested cycle.\n");
734 return false;
736 break;
738 case vect_double_reduction_def:
739 if (relevant != vect_unused_in_scope
740 && relevant != vect_used_by_reduction
741 && relevant != vect_used_only_live)
743 if (dump_enabled_p ())
744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
745 "unsupported use of double reduction.\n");
747 return false;
749 break;
751 default:
752 break;
755 if (is_pattern_stmt_p (stmt_vinfo))
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt))
762 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
763 tree op = gimple_assign_rhs1 (stmt);
765 i = 1;
766 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
768 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
769 relevant, &worklist, false)
770 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
771 relevant, &worklist, false))
772 return false;
773 i = 2;
775 for (; i < gimple_num_ops (stmt); i++)
777 op = gimple_op (stmt, i);
778 if (TREE_CODE (op) == SSA_NAME
779 && !process_use (stmt, op, loop_vinfo, relevant,
780 &worklist, false))
781 return false;
784 else if (is_gimple_call (stmt))
786 for (i = 0; i < gimple_call_num_args (stmt); i++)
788 tree arg = gimple_call_arg (stmt, i);
789 if (!process_use (stmt, arg, loop_vinfo, relevant,
790 &worklist, false))
791 return false;
795 else
796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
798 tree op = USE_FROM_PTR (use_p);
799 if (!process_use (stmt, op, loop_vinfo, relevant,
800 &worklist, false))
801 return false;
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
810 &worklist, true))
811 return false;
813 } /* while worklist */
815 return true;
819 /* Function vect_model_simple_cost.
821 Models cost for simple operations, i.e. those that only emit ncopies of a
822 single op. Right now, this does not account for multiple insns that could
823 be generated for the single vector op. We will handle that shortly. */
825 void
826 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
827 enum vect_def_type *dt,
828 int ndts,
829 stmt_vector_for_cost *prologue_cost_vec,
830 stmt_vector_for_cost *body_cost_vec)
832 int i;
833 int inside_cost = 0, prologue_cost = 0;
835 /* The SLP costs were already calculated during SLP tree build. */
836 if (PURE_SLP_STMT (stmt_info))
837 return;
839 /* Cost the "broadcast" of a scalar operand in to a vector operand.
840 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
841 cost model. */
842 for (i = 0; i < ndts; i++)
843 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
844 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
845 stmt_info, 0, vect_prologue);
847 /* Pass the inside-of-loop statements to the target-specific cost model. */
848 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
849 stmt_info, 0, vect_body);
851 if (dump_enabled_p ())
852 dump_printf_loc (MSG_NOTE, vect_location,
853 "vect_model_simple_cost: inside_cost = %d, "
854 "prologue_cost = %d .\n", inside_cost, prologue_cost);
858 /* Model cost for type demotion and promotion operations. PWR is normally
859 zero for single-step promotions and demotions. It will be one if
860 two-step promotion/demotion is required, and so on. Each additional
861 step doubles the number of instructions required. */
863 static void
864 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
865 enum vect_def_type *dt, int pwr)
867 int i, tmp;
868 int inside_cost = 0, prologue_cost = 0;
869 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
870 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
871 void *target_cost_data;
873 /* The SLP costs were already calculated during SLP tree build. */
874 if (PURE_SLP_STMT (stmt_info))
875 return;
877 if (loop_vinfo)
878 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
879 else
880 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
882 for (i = 0; i < pwr + 1; i++)
884 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
885 (i + 1) : i;
886 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
887 vec_promote_demote, stmt_info, 0,
888 vect_body);
891 /* FORNOW: Assuming maximum 2 args per stmts. */
892 for (i = 0; i < 2; i++)
893 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
894 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
895 stmt_info, 0, vect_prologue);
897 if (dump_enabled_p ())
898 dump_printf_loc (MSG_NOTE, vect_location,
899 "vect_model_promotion_demotion_cost: inside_cost = %d, "
900 "prologue_cost = %d .\n", inside_cost, prologue_cost);
903 /* Function vect_model_store_cost
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
908 void
909 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
910 vect_memory_access_type memory_access_type,
911 enum vect_def_type dt, slp_tree slp_node,
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
915 unsigned int inside_cost = 0, prologue_cost = 0;
916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
917 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
918 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
920 if (dt == vect_constant_def || dt == vect_external_def)
921 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
922 stmt_info, 0, vect_prologue);
924 /* Grouped stores update all elements in the group at once,
925 so we want the DR for the first statement. */
926 if (!slp_node && grouped_access_p)
928 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
929 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
932 /* True if we should include any once-per-group costs as well as
933 the cost of the statement itself. For SLP we only get called
934 once per group anyhow. */
935 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
937 /* We assume that the cost of a single store-lanes instruction is
938 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
939 access is instead being provided by a permute-and-store operation,
940 include the cost of the permutes. */
941 if (first_stmt_p
942 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
944 /* Uses a high and low interleave or shuffle operations for each
945 needed permute. */
946 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
947 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
948 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
949 stmt_info, 0, vect_body);
951 if (dump_enabled_p ())
952 dump_printf_loc (MSG_NOTE, vect_location,
953 "vect_model_store_cost: strided group_size = %d .\n",
954 group_size);
957 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
958 /* Costs of the stores. */
959 if (memory_access_type == VMAT_ELEMENTWISE
960 || memory_access_type == VMAT_GATHER_SCATTER)
961 /* N scalar stores plus extracting the elements. */
962 inside_cost += record_stmt_cost (body_cost_vec,
963 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
964 scalar_store, stmt_info, 0, vect_body);
965 else
966 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
968 if (memory_access_type == VMAT_ELEMENTWISE
969 || memory_access_type == VMAT_STRIDED_SLP)
970 inside_cost += record_stmt_cost (body_cost_vec,
971 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
972 vec_to_scalar, stmt_info, 0, vect_body);
974 if (dump_enabled_p ())
975 dump_printf_loc (MSG_NOTE, vect_location,
976 "vect_model_store_cost: inside_cost = %d, "
977 "prologue_cost = %d .\n", inside_cost, prologue_cost);
981 /* Calculate cost of DR's memory access. */
982 void
983 vect_get_store_cost (struct data_reference *dr, int ncopies,
984 unsigned int *inside_cost,
985 stmt_vector_for_cost *body_cost_vec)
987 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
988 gimple *stmt = DR_STMT (dr);
989 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
991 switch (alignment_support_scheme)
993 case dr_aligned:
995 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
996 vector_store, stmt_info, 0,
997 vect_body);
999 if (dump_enabled_p ())
1000 dump_printf_loc (MSG_NOTE, vect_location,
1001 "vect_model_store_cost: aligned.\n");
1002 break;
1005 case dr_unaligned_supported:
1007 /* Here, we assign an additional cost for the unaligned store. */
1008 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1009 unaligned_store, stmt_info,
1010 DR_MISALIGNMENT (dr), vect_body);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: unaligned supported by "
1014 "hardware.\n");
1015 break;
1018 case dr_unaligned_unsupported:
1020 *inside_cost = VECT_MAX_COST;
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1024 "vect_model_store_cost: unsupported access.\n");
1025 break;
1028 default:
1029 gcc_unreachable ();
1034 /* Function vect_model_load_cost
1036 Models cost for loads. In the case of grouped accesses, one access has
1037 the overhead of the grouped access attributed to it. Since unaligned
1038 accesses are supported for loads, we also account for the costs of the
1039 access scheme chosen. */
1041 void
1042 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1043 vect_memory_access_type memory_access_type,
1044 slp_tree slp_node,
1045 stmt_vector_for_cost *prologue_cost_vec,
1046 stmt_vector_for_cost *body_cost_vec)
1048 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1049 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1050 unsigned int inside_cost = 0, prologue_cost = 0;
1051 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1053 /* Grouped loads read all elements in the group at once,
1054 so we want the DR for the first statement. */
1055 if (!slp_node && grouped_access_p)
1057 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1058 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1061 /* True if we should include any once-per-group costs as well as
1062 the cost of the statement itself. For SLP we only get called
1063 once per group anyhow. */
1064 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1066 /* We assume that the cost of a single load-lanes instruction is
1067 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1068 access is instead being provided by a load-and-permute operation,
1069 include the cost of the permutes. */
1070 if (first_stmt_p
1071 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1073 /* Uses an even and odd extract operations or shuffle operations
1074 for each needed permute. */
1075 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1076 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1077 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1078 stmt_info, 0, vect_body);
1080 if (dump_enabled_p ())
1081 dump_printf_loc (MSG_NOTE, vect_location,
1082 "vect_model_load_cost: strided group_size = %d .\n",
1083 group_size);
1086 /* The loads themselves. */
1087 if (memory_access_type == VMAT_ELEMENTWISE
1088 || memory_access_type == VMAT_GATHER_SCATTER)
1090 /* N scalar loads plus gathering them into a vector. */
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1092 inside_cost += record_stmt_cost (body_cost_vec,
1093 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1094 scalar_load, stmt_info, 0, vect_body);
1096 else
1097 vect_get_load_cost (dr, ncopies, first_stmt_p,
1098 &inside_cost, &prologue_cost,
1099 prologue_cost_vec, body_cost_vec, true);
1100 if (memory_access_type == VMAT_ELEMENTWISE
1101 || memory_access_type == VMAT_STRIDED_SLP)
1102 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1103 stmt_info, 0, vect_body);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: inside_cost = %d, "
1108 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1112 /* Calculate cost of DR's memory access. */
1113 void
1114 vect_get_load_cost (struct data_reference *dr, int ncopies,
1115 bool add_realign_cost, unsigned int *inside_cost,
1116 unsigned int *prologue_cost,
1117 stmt_vector_for_cost *prologue_cost_vec,
1118 stmt_vector_for_cost *body_cost_vec,
1119 bool record_prologue_costs)
1121 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1122 gimple *stmt = DR_STMT (dr);
1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1125 switch (alignment_support_scheme)
1127 case dr_aligned:
1129 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1130 stmt_info, 0, vect_body);
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_NOTE, vect_location,
1134 "vect_model_load_cost: aligned.\n");
1136 break;
1138 case dr_unaligned_supported:
1140 /* Here, we assign an additional cost for the unaligned load. */
1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1142 unaligned_load, stmt_info,
1143 DR_MISALIGNMENT (dr), vect_body);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned supported by "
1148 "hardware.\n");
1150 break;
1152 case dr_explicit_realign:
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1155 vector_load, stmt_info, 0, vect_body);
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1157 vec_perm, stmt_info, 0, vect_body);
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
1161 prologue costs. */
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1164 stmt_info, 0, vect_body);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: explicit realign\n");
1170 break;
1172 case dr_explicit_realign_optimized:
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE, vect_location,
1176 "vect_model_load_cost: unaligned software "
1177 "pipelined.\n");
1179 /* Unaligned software pipeline has a load of an address, an initial
1180 load, and possibly a mask operation to "prime" the loop. However,
1181 if this is an access in a group of loads, which provide grouped
1182 access, then the above cost should only be considered for one
1183 access in the group. Inside the loop, there is a load op
1184 and a realignment op. */
1186 if (add_realign_cost && record_prologue_costs)
1188 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1189 vector_stmt, stmt_info,
1190 0, vect_prologue);
1191 if (targetm.vectorize.builtin_mask_for_load)
1192 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1193 vector_stmt, stmt_info,
1194 0, vect_prologue);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1198 stmt_info, 0, vect_body);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1200 stmt_info, 0, vect_body);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: explicit realign optimized"
1205 "\n");
1207 break;
1210 case dr_unaligned_unsupported:
1212 *inside_cost = VECT_MAX_COST;
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1216 "vect_model_load_cost: unsupported access.\n");
1217 break;
1220 default:
1221 gcc_unreachable ();
1225 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
1228 static void
1229 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1231 if (gsi)
1232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1233 else
1235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1238 if (loop_vinfo)
1240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1241 basic_block new_bb;
1242 edge pe;
1244 if (nested_in_vect_loop_p (loop, stmt))
1245 loop = loop->inner;
1247 pe = loop_preheader_edge (loop);
1248 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1249 gcc_assert (!new_bb);
1251 else
1253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1254 basic_block bb;
1255 gimple_stmt_iterator gsi_bb_start;
1257 gcc_assert (bb_vinfo);
1258 bb = BB_VINFO_BB (bb_vinfo);
1259 gsi_bb_start = gsi_after_labels (bb);
1260 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1264 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1272 /* Function vect_init_vector.
1274 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1275 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1276 vector type a vector with all elements equal to VAL is created first.
1277 Place the initialization at BSI if it is not NULL. Otherwise, place the
1278 initialization at the loop preheader.
1279 Return the DEF of INIT_STMT.
1280 It will be used in the vectorization of STMT. */
1282 tree
1283 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1285 gimple *init_stmt;
1286 tree new_temp;
1288 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1289 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1291 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1292 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1294 /* Scalar boolean value should be transformed into
1295 all zeros or all ones value before building a vector. */
1296 if (VECTOR_BOOLEAN_TYPE_P (type))
1298 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1299 tree false_val = build_zero_cst (TREE_TYPE (type));
1301 if (CONSTANT_CLASS_P (val))
1302 val = integer_zerop (val) ? false_val : true_val;
1303 else
1305 new_temp = make_ssa_name (TREE_TYPE (type));
1306 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1307 val, true_val, false_val);
1308 vect_init_vector_1 (stmt, init_stmt, gsi);
1309 val = new_temp;
1312 else if (CONSTANT_CLASS_P (val))
1313 val = fold_convert (TREE_TYPE (type), val);
1314 else
1316 new_temp = make_ssa_name (TREE_TYPE (type));
1317 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1318 init_stmt = gimple_build_assign (new_temp,
1319 fold_build1 (VIEW_CONVERT_EXPR,
1320 TREE_TYPE (type),
1321 val));
1322 else
1323 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1324 vect_init_vector_1 (stmt, init_stmt, gsi);
1325 val = new_temp;
1328 val = build_vector_from_val (type, val);
1331 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1332 init_stmt = gimple_build_assign (new_temp, val);
1333 vect_init_vector_1 (stmt, init_stmt, gsi);
1334 return new_temp;
1337 /* Function vect_get_vec_def_for_operand_1.
1339 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1340 DT that will be used in the vectorized stmt. */
1342 tree
1343 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1345 tree vec_oprnd;
1346 gimple *vec_stmt;
1347 stmt_vec_info def_stmt_info = NULL;
1349 switch (dt)
1351 /* operand is a constant or a loop invariant. */
1352 case vect_constant_def:
1353 case vect_external_def:
1354 /* Code should use vect_get_vec_def_for_operand. */
1355 gcc_unreachable ();
1357 /* operand is defined inside the loop. */
1358 case vect_internal_def:
1360 /* Get the def from the vectorized stmt. */
1361 def_stmt_info = vinfo_for_stmt (def_stmt);
1363 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1364 /* Get vectorized pattern statement. */
1365 if (!vec_stmt
1366 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1367 && !STMT_VINFO_RELEVANT (def_stmt_info))
1368 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1369 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1370 gcc_assert (vec_stmt);
1371 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1372 vec_oprnd = PHI_RESULT (vec_stmt);
1373 else if (is_gimple_call (vec_stmt))
1374 vec_oprnd = gimple_call_lhs (vec_stmt);
1375 else
1376 vec_oprnd = gimple_assign_lhs (vec_stmt);
1377 return vec_oprnd;
1380 /* operand is defined by a loop header phi. */
1381 case vect_reduction_def:
1382 case vect_double_reduction_def:
1383 case vect_nested_cycle:
1384 case vect_induction_def:
1386 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1388 /* Get the def from the vectorized stmt. */
1389 def_stmt_info = vinfo_for_stmt (def_stmt);
1390 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1391 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1392 vec_oprnd = PHI_RESULT (vec_stmt);
1393 else
1394 vec_oprnd = gimple_get_lhs (vec_stmt);
1395 return vec_oprnd;
1398 default:
1399 gcc_unreachable ();
1404 /* Function vect_get_vec_def_for_operand.
1406 OP is an operand in STMT. This function returns a (vector) def that will be
1407 used in the vectorized stmt for STMT.
1409 In the case that OP is an SSA_NAME which is defined in the loop, then
1410 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1412 In case OP is an invariant or constant, a new stmt that creates a vector def
1413 needs to be introduced. VECTYPE may be used to specify a required type for
1414 vector invariant. */
1416 tree
1417 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1419 gimple *def_stmt;
1420 enum vect_def_type dt;
1421 bool is_simple_use;
1422 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1425 if (dump_enabled_p ())
1427 dump_printf_loc (MSG_NOTE, vect_location,
1428 "vect_get_vec_def_for_operand: ");
1429 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1430 dump_printf (MSG_NOTE, "\n");
1433 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1434 gcc_assert (is_simple_use);
1435 if (def_stmt && dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1441 if (dt == vect_constant_def || dt == vect_external_def)
1443 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1444 tree vector_type;
1446 if (vectype)
1447 vector_type = vectype;
1448 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1449 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1450 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1451 else
1452 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1454 gcc_assert (vector_type);
1455 return vect_init_vector (stmt, op, vector_type, NULL);
1457 else
1458 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1462 /* Function vect_get_vec_def_for_stmt_copy
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1518 tree
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1521 gimple *vec_stmt_for_operand;
1522 stmt_vec_info def_stmt_info;
1524 /* Do nothing; can reuse same def. */
1525 if (dt == vect_external_def || dt == vect_constant_def )
1526 return vec_oprnd;
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1544 void
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
1549 tree vec_oprnd = vec_oprnds0->pop ();
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1552 vec_oprnds0->quick_push (vec_oprnd);
1554 if (vec_oprnds1 && vec_oprnds1->length ())
1556 vec_oprnd = vec_oprnds1->pop ();
1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1558 vec_oprnds1->quick_push (vec_oprnd);
1563 /* Get vectorized definitions for OP0 and OP1. */
1565 void
1566 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1567 vec<tree> *vec_oprnds0,
1568 vec<tree> *vec_oprnds1,
1569 slp_tree slp_node)
1571 if (slp_node)
1573 int nops = (op1 == NULL_TREE) ? 1 : 2;
1574 auto_vec<tree> ops (nops);
1575 auto_vec<vec<tree> > vec_defs (nops);
1577 ops.quick_push (op0);
1578 if (op1)
1579 ops.quick_push (op1);
1581 vect_get_slp_defs (ops, slp_node, &vec_defs);
1583 *vec_oprnds0 = vec_defs[0];
1584 if (op1)
1585 *vec_oprnds1 = vec_defs[1];
1587 else
1589 tree vec_oprnd;
1591 vec_oprnds0->create (1);
1592 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1593 vec_oprnds0->quick_push (vec_oprnd);
1595 if (op1)
1597 vec_oprnds1->create (1);
1598 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1599 vec_oprnds1->quick_push (vec_oprnd);
1605 /* Function vect_finish_stmt_generation.
1607 Insert a new stmt. */
1609 void
1610 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1611 gimple_stmt_iterator *gsi)
1613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1614 vec_info *vinfo = stmt_info->vinfo;
1616 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1618 if (!gsi_end_p (*gsi)
1619 && gimple_has_mem_ops (vec_stmt))
1621 gimple *at_stmt = gsi_stmt (*gsi);
1622 tree vuse = gimple_vuse (at_stmt);
1623 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1625 tree vdef = gimple_vdef (at_stmt);
1626 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1627 /* If we have an SSA vuse and insert a store, update virtual
1628 SSA form to avoid triggering the renamer. Do so only
1629 if we can easily see all uses - which is what almost always
1630 happens with the way vectorized stmts are inserted. */
1631 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1632 && ((is_gimple_assign (vec_stmt)
1633 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1634 || (is_gimple_call (vec_stmt)
1635 && !(gimple_call_flags (vec_stmt)
1636 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1638 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1639 gimple_set_vdef (vec_stmt, new_vdef);
1640 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1644 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1646 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1648 if (dump_enabled_p ())
1650 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1654 gimple_set_location (vec_stmt, gimple_location (stmt));
1656 /* While EH edges will generally prevent vectorization, stmt might
1657 e.g. be in a must-not-throw region. Ensure newly created stmts
1658 that could throw are part of the same region. */
1659 int lp_nr = lookup_stmt_eh_lp (stmt);
1660 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1661 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1664 /* We want to vectorize a call to combined function CFN with function
1665 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1666 as the types of all inputs. Check whether this is possible using
1667 an internal function, returning its code if so or IFN_LAST if not. */
1669 static internal_fn
1670 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1671 tree vectype_out, tree vectype_in)
1673 internal_fn ifn;
1674 if (internal_fn_p (cfn))
1675 ifn = as_internal_fn (cfn);
1676 else
1677 ifn = associated_internal_fn (fndecl);
1678 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1680 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1681 if (info.vectorizable)
1683 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1684 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1685 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1686 OPTIMIZE_FOR_SPEED))
1687 return ifn;
1690 return IFN_LAST;
1694 static tree permute_vec_elements (tree, tree, tree, gimple *,
1695 gimple_stmt_iterator *);
1697 /* STMT is a non-strided load or store, meaning that it accesses
1698 elements with a known constant step. Return -1 if that step
1699 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1701 static int
1702 compare_step_with_zero (gimple *stmt)
1704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1705 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1706 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1707 size_zero_node);
1710 /* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1713 static tree
1714 perm_mask_for_reverse (tree vectype)
1716 int i, nunits;
1718 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1720 /* The encoding has a single stepped pattern. */
1721 vec_perm_builder sel (nunits, 1, 3);
1722 for (i = 0; i < 3; ++i)
1723 sel.quick_push (nunits - 1 - i);
1725 vec_perm_indices indices (sel, 1, nunits);
1726 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1727 return NULL_TREE;
1728 return vect_gen_perm_mask_checked (vectype, indices);
1731 /* A subroutine of get_load_store_type, with a subset of the same
1732 arguments. Handle the case where STMT is part of a grouped load
1733 or store.
1735 For stores, the statements in the group are all consecutive
1736 and there is no gap at the end. For loads, the statements in the
1737 group might not be consecutive; there can be gaps between statements
1738 as well as at the end. */
1740 static bool
1741 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1742 vec_load_store_type vls_type,
1743 vect_memory_access_type *memory_access_type)
1745 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1746 vec_info *vinfo = stmt_info->vinfo;
1747 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1748 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1749 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1750 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1751 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1752 bool single_element_p = (stmt == first_stmt
1753 && !GROUP_NEXT_ELEMENT (stmt_info));
1754 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1755 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1757 /* True if the vectorized statements would access beyond the last
1758 statement in the group. */
1759 bool overrun_p = false;
1761 /* True if we can cope with such overrun by peeling for gaps, so that
1762 there is at least one final scalar iteration after the vector loop. */
1763 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1765 /* There can only be a gap at the end of the group if the stride is
1766 known at compile time. */
1767 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1769 /* Stores can't yet have gaps. */
1770 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1772 if (slp)
1774 if (STMT_VINFO_STRIDED_P (stmt_info))
1776 /* Try to use consecutive accesses of GROUP_SIZE elements,
1777 separated by the stride, until we have a complete vector.
1778 Fall back to scalar accesses if that isn't possible. */
1779 if (nunits % group_size == 0)
1780 *memory_access_type = VMAT_STRIDED_SLP;
1781 else
1782 *memory_access_type = VMAT_ELEMENTWISE;
1784 else
1786 overrun_p = loop_vinfo && gap != 0;
1787 if (overrun_p && vls_type != VLS_LOAD)
1789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1790 "Grouped store with gaps requires"
1791 " non-consecutive accesses\n");
1792 return false;
1794 /* An overrun is fine if the trailing elements are smaller
1795 than the alignment boundary B. Every vector access will
1796 be a multiple of B and so we are guaranteed to access a
1797 non-gap element in the same B-sized block. */
1798 if (overrun_p
1799 && gap < (vect_known_alignment_in_bytes (first_dr)
1800 / vect_get_scalar_dr_size (first_dr)))
1801 overrun_p = false;
1802 if (overrun_p && !can_overrun_p)
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806 "Peeling for outer loop is not supported\n");
1807 return false;
1809 *memory_access_type = VMAT_CONTIGUOUS;
1812 else
1814 /* We can always handle this case using elementwise accesses,
1815 but see if something more efficient is available. */
1816 *memory_access_type = VMAT_ELEMENTWISE;
1818 /* If there is a gap at the end of the group then these optimizations
1819 would access excess elements in the last iteration. */
1820 bool would_overrun_p = (gap != 0);
1821 /* An overrun is fine if the trailing elements are smaller than the
1822 alignment boundary B. Every vector access will be a multiple of B
1823 and so we are guaranteed to access a non-gap element in the
1824 same B-sized block. */
1825 if (would_overrun_p
1826 && gap < (vect_known_alignment_in_bytes (first_dr)
1827 / vect_get_scalar_dr_size (first_dr)))
1828 would_overrun_p = false;
1830 if (!STMT_VINFO_STRIDED_P (stmt_info)
1831 && (can_overrun_p || !would_overrun_p)
1832 && compare_step_with_zero (stmt) > 0)
1834 /* First try using LOAD/STORE_LANES. */
1835 if (vls_type == VLS_LOAD
1836 ? vect_load_lanes_supported (vectype, group_size)
1837 : vect_store_lanes_supported (vectype, group_size))
1839 *memory_access_type = VMAT_LOAD_STORE_LANES;
1840 overrun_p = would_overrun_p;
1843 /* If that fails, try using permuting loads. */
1844 if (*memory_access_type == VMAT_ELEMENTWISE
1845 && (vls_type == VLS_LOAD
1846 ? vect_grouped_load_supported (vectype, single_element_p,
1847 group_size)
1848 : vect_grouped_store_supported (vectype, group_size)))
1850 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1851 overrun_p = would_overrun_p;
1856 if (vls_type != VLS_LOAD && first_stmt == stmt)
1858 /* STMT is the leader of the group. Check the operands of all the
1859 stmts of the group. */
1860 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1861 while (next_stmt)
1863 gcc_assert (gimple_assign_single_p (next_stmt));
1864 tree op = gimple_assign_rhs1 (next_stmt);
1865 gimple *def_stmt;
1866 enum vect_def_type dt;
1867 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1869 if (dump_enabled_p ())
1870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1871 "use not simple.\n");
1872 return false;
1874 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1878 if (overrun_p)
1880 gcc_assert (can_overrun_p);
1881 if (dump_enabled_p ())
1882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1883 "Data access with gaps requires scalar "
1884 "epilogue loop\n");
1885 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1888 return true;
1891 /* A subroutine of get_load_store_type, with a subset of the same
1892 arguments. Handle the case where STMT is a load or store that
1893 accesses consecutive elements with a negative step. */
1895 static vect_memory_access_type
1896 get_negative_load_store_type (gimple *stmt, tree vectype,
1897 vec_load_store_type vls_type,
1898 unsigned int ncopies)
1900 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1901 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1902 dr_alignment_support alignment_support_scheme;
1904 if (ncopies > 1)
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1908 "multiple types with negative step.\n");
1909 return VMAT_ELEMENTWISE;
1912 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1913 if (alignment_support_scheme != dr_aligned
1914 && alignment_support_scheme != dr_unaligned_supported)
1916 if (dump_enabled_p ())
1917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1918 "negative step but alignment required.\n");
1919 return VMAT_ELEMENTWISE;
1922 if (vls_type == VLS_STORE_INVARIANT)
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_NOTE, vect_location,
1926 "negative step with invariant source;"
1927 " no permute needed.\n");
1928 return VMAT_CONTIGUOUS_DOWN;
1931 if (!perm_mask_for_reverse (vectype))
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 "negative step and reversing not supported.\n");
1936 return VMAT_ELEMENTWISE;
1939 return VMAT_CONTIGUOUS_REVERSE;
1942 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1943 if there is a memory access type that the vectorized form can use,
1944 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1945 or scatters, fill in GS_INFO accordingly.
1947 SLP says whether we're performing SLP rather than loop vectorization.
1948 VECTYPE is the vector type that the vectorized statements will use.
1949 NCOPIES is the number of vector statements that will be needed. */
1951 static bool
1952 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1953 vec_load_store_type vls_type, unsigned int ncopies,
1954 vect_memory_access_type *memory_access_type,
1955 gather_scatter_info *gs_info)
1957 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1958 vec_info *vinfo = stmt_info->vinfo;
1959 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1960 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1962 *memory_access_type = VMAT_GATHER_SCATTER;
1963 gimple *def_stmt;
1964 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1965 gcc_unreachable ();
1966 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1967 &gs_info->offset_dt,
1968 &gs_info->offset_vectype))
1970 if (dump_enabled_p ())
1971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1972 "%s index use not simple.\n",
1973 vls_type == VLS_LOAD ? "gather" : "scatter");
1974 return false;
1977 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1979 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1980 memory_access_type))
1981 return false;
1983 else if (STMT_VINFO_STRIDED_P (stmt_info))
1985 gcc_assert (!slp);
1986 *memory_access_type = VMAT_ELEMENTWISE;
1988 else
1990 int cmp = compare_step_with_zero (stmt);
1991 if (cmp < 0)
1992 *memory_access_type = get_negative_load_store_type
1993 (stmt, vectype, vls_type, ncopies);
1994 else if (cmp == 0)
1996 gcc_assert (vls_type == VLS_LOAD);
1997 *memory_access_type = VMAT_INVARIANT;
1999 else
2000 *memory_access_type = VMAT_CONTIGUOUS;
2003 /* FIXME: At the moment the cost model seems to underestimate the
2004 cost of using elementwise accesses. This check preserves the
2005 traditional behavior until that can be fixed. */
2006 if (*memory_access_type == VMAT_ELEMENTWISE
2007 && !STMT_VINFO_STRIDED_P (stmt_info))
2009 if (dump_enabled_p ())
2010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2011 "not falling back to elementwise accesses\n");
2012 return false;
2014 return true;
2017 /* Function vectorizable_mask_load_store.
2019 Check if STMT performs a conditional load or store that can be vectorized.
2020 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2021 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2022 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2024 static bool
2025 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2026 gimple **vec_stmt, slp_tree slp_node)
2028 tree vec_dest = NULL;
2029 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2030 stmt_vec_info prev_stmt_info;
2031 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2032 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2033 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2034 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2035 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2036 tree rhs_vectype = NULL_TREE;
2037 tree mask_vectype;
2038 tree elem_type;
2039 gimple *new_stmt;
2040 tree dummy;
2041 tree dataref_ptr = NULL_TREE;
2042 gimple *ptr_incr;
2043 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2044 int ncopies;
2045 int i, j;
2046 bool inv_p;
2047 gather_scatter_info gs_info;
2048 vec_load_store_type vls_type;
2049 tree mask;
2050 gimple *def_stmt;
2051 enum vect_def_type dt;
2053 if (slp_node != NULL)
2054 return false;
2056 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2057 gcc_assert (ncopies >= 1);
2059 mask = gimple_call_arg (stmt, 2);
2061 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2062 return false;
2064 /* FORNOW. This restriction should be relaxed. */
2065 if (nested_in_vect_loop && ncopies > 1)
2067 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2069 "multiple types in nested loop.");
2070 return false;
2073 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2074 return false;
2076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2077 && ! vec_stmt)
2078 return false;
2080 if (!STMT_VINFO_DATA_REF (stmt_info))
2081 return false;
2083 elem_type = TREE_TYPE (vectype);
2085 if (TREE_CODE (mask) != SSA_NAME)
2086 return false;
2088 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2089 return false;
2091 if (!mask_vectype)
2092 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2094 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2095 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2096 return false;
2098 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2100 tree rhs = gimple_call_arg (stmt, 3);
2101 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2102 return false;
2103 if (dt == vect_constant_def || dt == vect_external_def)
2104 vls_type = VLS_STORE_INVARIANT;
2105 else
2106 vls_type = VLS_STORE;
2108 else
2109 vls_type = VLS_LOAD;
2111 vect_memory_access_type memory_access_type;
2112 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2113 &memory_access_type, &gs_info))
2114 return false;
2116 if (memory_access_type == VMAT_GATHER_SCATTER)
2118 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2119 tree masktype
2120 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2121 if (TREE_CODE (masktype) == INTEGER_TYPE)
2123 if (dump_enabled_p ())
2124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2125 "masked gather with integer mask not supported.");
2126 return false;
2129 else if (memory_access_type != VMAT_CONTIGUOUS)
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2133 "unsupported access type for masked %s.\n",
2134 vls_type == VLS_LOAD ? "load" : "store");
2135 return false;
2137 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2138 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2139 TYPE_MODE (mask_vectype),
2140 vls_type == VLS_LOAD)
2141 || (rhs_vectype
2142 && !useless_type_conversion_p (vectype, rhs_vectype)))
2143 return false;
2145 if (!vec_stmt) /* transformation not required. */
2147 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2148 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2149 if (vls_type == VLS_LOAD)
2150 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2151 NULL, NULL, NULL);
2152 else
2153 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2154 dt, NULL, NULL, NULL);
2155 return true;
2157 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2159 /* Transform. */
2161 if (memory_access_type == VMAT_GATHER_SCATTER)
2163 tree vec_oprnd0 = NULL_TREE, op;
2164 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2165 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2166 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2167 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2168 tree mask_perm_mask = NULL_TREE;
2169 edge pe = loop_preheader_edge (loop);
2170 gimple_seq seq;
2171 basic_block new_bb;
2172 enum { NARROW, NONE, WIDEN } modifier;
2173 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2175 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2176 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2177 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2178 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2179 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2180 scaletype = TREE_VALUE (arglist);
2181 gcc_checking_assert (types_compatible_p (srctype, rettype)
2182 && types_compatible_p (srctype, masktype));
2184 if (nunits == gather_off_nunits)
2185 modifier = NONE;
2186 else if (nunits == gather_off_nunits / 2)
2188 modifier = WIDEN;
2190 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
2191 for (i = 0; i < gather_off_nunits; ++i)
2192 sel.quick_push (i | nunits);
2194 vec_perm_indices indices (sel, 1, gather_off_nunits);
2195 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
2196 indices);
2198 else if (nunits == gather_off_nunits * 2)
2200 modifier = NARROW;
2202 vec_perm_builder sel (nunits, nunits, 1);
2203 sel.quick_grow (nunits);
2204 for (i = 0; i < nunits; ++i)
2205 sel[i] = i < gather_off_nunits
2206 ? i : i + nunits - gather_off_nunits;
2207 vec_perm_indices indices (sel, 2, nunits);
2208 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2210 ncopies *= 2;
2212 for (i = 0; i < nunits; ++i)
2213 sel[i] = i | gather_off_nunits;
2214 indices.new_vector (sel, 2, gather_off_nunits);
2215 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2217 else
2218 gcc_unreachable ();
2220 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2222 ptr = fold_convert (ptrtype, gs_info.base);
2223 if (!is_gimple_min_invariant (ptr))
2225 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2226 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2227 gcc_assert (!new_bb);
2230 scale = build_int_cst (scaletype, gs_info.scale);
2232 prev_stmt_info = NULL;
2233 for (j = 0; j < ncopies; ++j)
2235 if (modifier == WIDEN && (j & 1))
2236 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2237 perm_mask, stmt, gsi);
2238 else if (j == 0)
2239 op = vec_oprnd0
2240 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2241 else
2242 op = vec_oprnd0
2243 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2245 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2247 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2248 == TYPE_VECTOR_SUBPARTS (idxtype));
2249 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2250 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2251 new_stmt
2252 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2253 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2254 op = var;
2257 if (mask_perm_mask && (j & 1))
2258 mask_op = permute_vec_elements (mask_op, mask_op,
2259 mask_perm_mask, stmt, gsi);
2260 else
2262 if (j == 0)
2263 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2264 else
2266 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2267 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2270 mask_op = vec_mask;
2271 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2273 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2274 == TYPE_VECTOR_SUBPARTS (masktype));
2275 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2276 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2277 new_stmt
2278 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2280 mask_op = var;
2284 new_stmt
2285 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2286 scale);
2288 if (!useless_type_conversion_p (vectype, rettype))
2290 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2291 == TYPE_VECTOR_SUBPARTS (rettype));
2292 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2293 gimple_call_set_lhs (new_stmt, op);
2294 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2295 var = make_ssa_name (vec_dest);
2296 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2297 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2299 else
2301 var = make_ssa_name (vec_dest, new_stmt);
2302 gimple_call_set_lhs (new_stmt, var);
2305 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2307 if (modifier == NARROW)
2309 if ((j & 1) == 0)
2311 prev_res = var;
2312 continue;
2314 var = permute_vec_elements (prev_res, var,
2315 perm_mask, stmt, gsi);
2316 new_stmt = SSA_NAME_DEF_STMT (var);
2319 if (prev_stmt_info == NULL)
2320 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2321 else
2322 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2323 prev_stmt_info = vinfo_for_stmt (new_stmt);
2326 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2327 from the IL. */
2328 if (STMT_VINFO_RELATED_STMT (stmt_info))
2330 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2331 stmt_info = vinfo_for_stmt (stmt);
2333 tree lhs = gimple_call_lhs (stmt);
2334 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2335 set_vinfo_for_stmt (new_stmt, stmt_info);
2336 set_vinfo_for_stmt (stmt, NULL);
2337 STMT_VINFO_STMT (stmt_info) = new_stmt;
2338 gsi_replace (gsi, new_stmt, true);
2339 return true;
2341 else if (vls_type != VLS_LOAD)
2343 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2344 prev_stmt_info = NULL;
2345 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2346 for (i = 0; i < ncopies; i++)
2348 unsigned align, misalign;
2350 if (i == 0)
2352 tree rhs = gimple_call_arg (stmt, 3);
2353 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2354 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2355 mask_vectype);
2356 /* We should have catched mismatched types earlier. */
2357 gcc_assert (useless_type_conversion_p (vectype,
2358 TREE_TYPE (vec_rhs)));
2359 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2360 NULL_TREE, &dummy, gsi,
2361 &ptr_incr, false, &inv_p);
2362 gcc_assert (!inv_p);
2364 else
2366 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2367 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2368 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2369 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2370 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2371 TYPE_SIZE_UNIT (vectype));
2374 align = DR_TARGET_ALIGNMENT (dr);
2375 if (aligned_access_p (dr))
2376 misalign = 0;
2377 else if (DR_MISALIGNMENT (dr) == -1)
2379 align = TYPE_ALIGN_UNIT (elem_type);
2380 misalign = 0;
2382 else
2383 misalign = DR_MISALIGNMENT (dr);
2384 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2385 misalign);
2386 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2387 misalign ? least_bit_hwi (misalign) : align);
2388 gcall *call
2389 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2390 ptr, vec_mask, vec_rhs);
2391 gimple_call_set_nothrow (call, true);
2392 new_stmt = call;
2393 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2394 if (i == 0)
2395 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2396 else
2397 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2398 prev_stmt_info = vinfo_for_stmt (new_stmt);
2401 else
2403 tree vec_mask = NULL_TREE;
2404 prev_stmt_info = NULL;
2405 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2406 for (i = 0; i < ncopies; i++)
2408 unsigned align, misalign;
2410 if (i == 0)
2412 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2413 mask_vectype);
2414 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2415 NULL_TREE, &dummy, gsi,
2416 &ptr_incr, false, &inv_p);
2417 gcc_assert (!inv_p);
2419 else
2421 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2422 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2423 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2424 TYPE_SIZE_UNIT (vectype));
2427 align = DR_TARGET_ALIGNMENT (dr);
2428 if (aligned_access_p (dr))
2429 misalign = 0;
2430 else if (DR_MISALIGNMENT (dr) == -1)
2432 align = TYPE_ALIGN_UNIT (elem_type);
2433 misalign = 0;
2435 else
2436 misalign = DR_MISALIGNMENT (dr);
2437 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2438 misalign);
2439 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2440 misalign ? least_bit_hwi (misalign) : align);
2441 gcall *call
2442 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2443 ptr, vec_mask);
2444 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2445 gimple_call_set_nothrow (call, true);
2446 vect_finish_stmt_generation (stmt, call, gsi);
2447 if (i == 0)
2448 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
2449 else
2450 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2451 prev_stmt_info = vinfo_for_stmt (call);
2455 if (vls_type == VLS_LOAD)
2457 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2458 from the IL. */
2459 if (STMT_VINFO_RELATED_STMT (stmt_info))
2461 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2462 stmt_info = vinfo_for_stmt (stmt);
2464 tree lhs = gimple_call_lhs (stmt);
2465 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2466 set_vinfo_for_stmt (new_stmt, stmt_info);
2467 set_vinfo_for_stmt (stmt, NULL);
2468 STMT_VINFO_STMT (stmt_info) = new_stmt;
2469 gsi_replace (gsi, new_stmt, true);
2472 return true;
2475 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2477 static bool
2478 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2479 gimple **vec_stmt, slp_tree slp_node,
2480 tree vectype_in, enum vect_def_type *dt)
2482 tree op, vectype;
2483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2484 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2485 unsigned ncopies, nunits;
2487 op = gimple_call_arg (stmt, 0);
2488 vectype = STMT_VINFO_VECTYPE (stmt_info);
2489 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2491 /* Multiple types in SLP are handled by creating the appropriate number of
2492 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2493 case of SLP. */
2494 if (slp_node)
2495 ncopies = 1;
2496 else
2497 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2499 gcc_assert (ncopies >= 1);
2501 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2502 if (! char_vectype)
2503 return false;
2505 unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2506 unsigned word_bytes = num_bytes / nunits;
2508 /* The encoding uses one stepped pattern for each byte in the word. */
2509 vec_perm_builder elts (num_bytes, word_bytes, 3);
2510 for (unsigned i = 0; i < 3; ++i)
2511 for (unsigned j = 0; j < word_bytes; ++j)
2512 elts.quick_push ((i + 1) * word_bytes - j - 1);
2514 vec_perm_indices indices (elts, 1, num_bytes);
2515 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2516 return false;
2518 if (! vec_stmt)
2520 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2521 if (dump_enabled_p ())
2522 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2523 "\n");
2524 if (! PURE_SLP_STMT (stmt_info))
2526 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2527 1, vector_stmt, stmt_info, 0, vect_prologue);
2528 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2529 ncopies, vec_perm, stmt_info, 0, vect_body);
2531 return true;
2534 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2536 /* Transform. */
2537 vec<tree> vec_oprnds = vNULL;
2538 gimple *new_stmt = NULL;
2539 stmt_vec_info prev_stmt_info = NULL;
2540 for (unsigned j = 0; j < ncopies; j++)
2542 /* Handle uses. */
2543 if (j == 0)
2544 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2545 else
2546 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2548 /* Arguments are ready. create the new vector stmt. */
2549 unsigned i;
2550 tree vop;
2551 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2553 tree tem = make_ssa_name (char_vectype);
2554 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2555 char_vectype, vop));
2556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2557 tree tem2 = make_ssa_name (char_vectype);
2558 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2559 tem, tem, bswap_vconst);
2560 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2561 tem = make_ssa_name (vectype);
2562 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2563 vectype, tem2));
2564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2565 if (slp_node)
2566 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2569 if (slp_node)
2570 continue;
2572 if (j == 0)
2573 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2574 else
2575 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2577 prev_stmt_info = vinfo_for_stmt (new_stmt);
2580 vec_oprnds.release ();
2581 return true;
2584 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2585 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2586 in a single step. On success, store the binary pack code in
2587 *CONVERT_CODE. */
2589 static bool
2590 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2591 tree_code *convert_code)
2593 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2594 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2595 return false;
2597 tree_code code;
2598 int multi_step_cvt = 0;
2599 auto_vec <tree, 8> interm_types;
2600 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2601 &code, &multi_step_cvt,
2602 &interm_types)
2603 || multi_step_cvt)
2604 return false;
2606 *convert_code = code;
2607 return true;
2610 /* Function vectorizable_call.
2612 Check if GS performs a function call that can be vectorized.
2613 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2614 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2615 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2617 static bool
2618 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2619 slp_tree slp_node)
2621 gcall *stmt;
2622 tree vec_dest;
2623 tree scalar_dest;
2624 tree op, type;
2625 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2626 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2627 tree vectype_out, vectype_in;
2628 int nunits_in;
2629 int nunits_out;
2630 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2632 vec_info *vinfo = stmt_info->vinfo;
2633 tree fndecl, new_temp, rhs_type;
2634 gimple *def_stmt;
2635 enum vect_def_type dt[3]
2636 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2637 int ndts = 3;
2638 gimple *new_stmt = NULL;
2639 int ncopies, j;
2640 vec<tree> vargs = vNULL;
2641 enum { NARROW, NONE, WIDEN } modifier;
2642 size_t i, nargs;
2643 tree lhs;
2645 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2646 return false;
2648 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2649 && ! vec_stmt)
2650 return false;
2652 /* Is GS a vectorizable call? */
2653 stmt = dyn_cast <gcall *> (gs);
2654 if (!stmt)
2655 return false;
2657 if (gimple_call_internal_p (stmt)
2658 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2659 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2660 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2661 slp_node);
2663 if (gimple_call_lhs (stmt) == NULL_TREE
2664 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2665 return false;
2667 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2669 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2671 /* Process function arguments. */
2672 rhs_type = NULL_TREE;
2673 vectype_in = NULL_TREE;
2674 nargs = gimple_call_num_args (stmt);
2676 /* Bail out if the function has more than three arguments, we do not have
2677 interesting builtin functions to vectorize with more than two arguments
2678 except for fma. No arguments is also not good. */
2679 if (nargs == 0 || nargs > 3)
2680 return false;
2682 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2683 if (gimple_call_internal_p (stmt)
2684 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2686 nargs = 0;
2687 rhs_type = unsigned_type_node;
2690 for (i = 0; i < nargs; i++)
2692 tree opvectype;
2694 op = gimple_call_arg (stmt, i);
2696 /* We can only handle calls with arguments of the same type. */
2697 if (rhs_type
2698 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2700 if (dump_enabled_p ())
2701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2702 "argument types differ.\n");
2703 return false;
2705 if (!rhs_type)
2706 rhs_type = TREE_TYPE (op);
2708 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2710 if (dump_enabled_p ())
2711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2712 "use not simple.\n");
2713 return false;
2716 if (!vectype_in)
2717 vectype_in = opvectype;
2718 else if (opvectype
2719 && opvectype != vectype_in)
2721 if (dump_enabled_p ())
2722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2723 "argument vector types differ.\n");
2724 return false;
2727 /* If all arguments are external or constant defs use a vector type with
2728 the same size as the output vector type. */
2729 if (!vectype_in)
2730 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2731 if (vec_stmt)
2732 gcc_assert (vectype_in);
2733 if (!vectype_in)
2735 if (dump_enabled_p ())
2737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2738 "no vectype for scalar type ");
2739 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2740 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2743 return false;
2746 /* FORNOW */
2747 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2748 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2749 if (nunits_in == nunits_out / 2)
2750 modifier = NARROW;
2751 else if (nunits_out == nunits_in)
2752 modifier = NONE;
2753 else if (nunits_out == nunits_in / 2)
2754 modifier = WIDEN;
2755 else
2756 return false;
2758 /* We only handle functions that do not read or clobber memory. */
2759 if (gimple_vuse (stmt))
2761 if (dump_enabled_p ())
2762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2763 "function reads from or writes to memory.\n");
2764 return false;
2767 /* For now, we only vectorize functions if a target specific builtin
2768 is available. TODO -- in some cases, it might be profitable to
2769 insert the calls for pieces of the vector, in order to be able
2770 to vectorize other operations in the loop. */
2771 fndecl = NULL_TREE;
2772 internal_fn ifn = IFN_LAST;
2773 combined_fn cfn = gimple_call_combined_fn (stmt);
2774 tree callee = gimple_call_fndecl (stmt);
2776 /* First try using an internal function. */
2777 tree_code convert_code = ERROR_MARK;
2778 if (cfn != CFN_LAST
2779 && (modifier == NONE
2780 || (modifier == NARROW
2781 && simple_integer_narrowing (vectype_out, vectype_in,
2782 &convert_code))))
2783 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2784 vectype_in);
2786 /* If that fails, try asking for a target-specific built-in function. */
2787 if (ifn == IFN_LAST)
2789 if (cfn != CFN_LAST)
2790 fndecl = targetm.vectorize.builtin_vectorized_function
2791 (cfn, vectype_out, vectype_in);
2792 else
2793 fndecl = targetm.vectorize.builtin_md_vectorized_function
2794 (callee, vectype_out, vectype_in);
2797 if (ifn == IFN_LAST && !fndecl)
2799 if (cfn == CFN_GOMP_SIMD_LANE
2800 && !slp_node
2801 && loop_vinfo
2802 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2803 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2804 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2805 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2807 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2808 { 0, 1, 2, ... vf - 1 } vector. */
2809 gcc_assert (nargs == 0);
2811 else if (modifier == NONE
2812 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2813 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2814 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2815 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2816 vectype_in, dt);
2817 else
2819 if (dump_enabled_p ())
2820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2821 "function is not vectorizable.\n");
2822 return false;
2826 if (slp_node)
2827 ncopies = 1;
2828 else if (modifier == NARROW && ifn == IFN_LAST)
2829 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2830 else
2831 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2833 /* Sanity check: make sure that at least one copy of the vectorized stmt
2834 needs to be generated. */
2835 gcc_assert (ncopies >= 1);
2837 if (!vec_stmt) /* transformation not required. */
2839 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2840 if (dump_enabled_p ())
2841 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2842 "\n");
2843 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2844 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2845 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2846 vec_promote_demote, stmt_info, 0, vect_body);
2848 return true;
2851 /* Transform. */
2853 if (dump_enabled_p ())
2854 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2856 /* Handle def. */
2857 scalar_dest = gimple_call_lhs (stmt);
2858 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2860 prev_stmt_info = NULL;
2861 if (modifier == NONE || ifn != IFN_LAST)
2863 tree prev_res = NULL_TREE;
2864 for (j = 0; j < ncopies; ++j)
2866 /* Build argument list for the vectorized call. */
2867 if (j == 0)
2868 vargs.create (nargs);
2869 else
2870 vargs.truncate (0);
2872 if (slp_node)
2874 auto_vec<vec<tree> > vec_defs (nargs);
2875 vec<tree> vec_oprnds0;
2877 for (i = 0; i < nargs; i++)
2878 vargs.quick_push (gimple_call_arg (stmt, i));
2879 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2880 vec_oprnds0 = vec_defs[0];
2882 /* Arguments are ready. Create the new vector stmt. */
2883 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2885 size_t k;
2886 for (k = 0; k < nargs; k++)
2888 vec<tree> vec_oprndsk = vec_defs[k];
2889 vargs[k] = vec_oprndsk[i];
2891 if (modifier == NARROW)
2893 tree half_res = make_ssa_name (vectype_in);
2894 gcall *call
2895 = gimple_build_call_internal_vec (ifn, vargs);
2896 gimple_call_set_lhs (call, half_res);
2897 gimple_call_set_nothrow (call, true);
2898 new_stmt = call;
2899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2900 if ((i & 1) == 0)
2902 prev_res = half_res;
2903 continue;
2905 new_temp = make_ssa_name (vec_dest);
2906 new_stmt = gimple_build_assign (new_temp, convert_code,
2907 prev_res, half_res);
2909 else
2911 gcall *call;
2912 if (ifn != IFN_LAST)
2913 call = gimple_build_call_internal_vec (ifn, vargs);
2914 else
2915 call = gimple_build_call_vec (fndecl, vargs);
2916 new_temp = make_ssa_name (vec_dest, call);
2917 gimple_call_set_lhs (call, new_temp);
2918 gimple_call_set_nothrow (call, true);
2919 new_stmt = call;
2921 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2922 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2925 for (i = 0; i < nargs; i++)
2927 vec<tree> vec_oprndsi = vec_defs[i];
2928 vec_oprndsi.release ();
2930 continue;
2933 for (i = 0; i < nargs; i++)
2935 op = gimple_call_arg (stmt, i);
2936 if (j == 0)
2937 vec_oprnd0
2938 = vect_get_vec_def_for_operand (op, stmt);
2939 else
2941 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2942 vec_oprnd0
2943 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2946 vargs.quick_push (vec_oprnd0);
2949 if (gimple_call_internal_p (stmt)
2950 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2952 tree_vector_builder v (vectype_out, 1, 3);
2953 for (int k = 0; k < 3; ++k)
2954 v.quick_push (build_int_cst (unsigned_type_node,
2955 j * nunits_out + k));
2956 tree cst = v.build ();
2957 tree new_var
2958 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2959 gimple *init_stmt = gimple_build_assign (new_var, cst);
2960 vect_init_vector_1 (stmt, init_stmt, NULL);
2961 new_temp = make_ssa_name (vec_dest);
2962 new_stmt = gimple_build_assign (new_temp, new_var);
2964 else if (modifier == NARROW)
2966 tree half_res = make_ssa_name (vectype_in);
2967 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2968 gimple_call_set_lhs (call, half_res);
2969 gimple_call_set_nothrow (call, true);
2970 new_stmt = call;
2971 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2972 if ((j & 1) == 0)
2974 prev_res = half_res;
2975 continue;
2977 new_temp = make_ssa_name (vec_dest);
2978 new_stmt = gimple_build_assign (new_temp, convert_code,
2979 prev_res, half_res);
2981 else
2983 gcall *call;
2984 if (ifn != IFN_LAST)
2985 call = gimple_build_call_internal_vec (ifn, vargs);
2986 else
2987 call = gimple_build_call_vec (fndecl, vargs);
2988 new_temp = make_ssa_name (vec_dest, new_stmt);
2989 gimple_call_set_lhs (call, new_temp);
2990 gimple_call_set_nothrow (call, true);
2991 new_stmt = call;
2993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2995 if (j == (modifier == NARROW ? 1 : 0))
2996 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2997 else
2998 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3000 prev_stmt_info = vinfo_for_stmt (new_stmt);
3003 else if (modifier == NARROW)
3005 for (j = 0; j < ncopies; ++j)
3007 /* Build argument list for the vectorized call. */
3008 if (j == 0)
3009 vargs.create (nargs * 2);
3010 else
3011 vargs.truncate (0);
3013 if (slp_node)
3015 auto_vec<vec<tree> > vec_defs (nargs);
3016 vec<tree> vec_oprnds0;
3018 for (i = 0; i < nargs; i++)
3019 vargs.quick_push (gimple_call_arg (stmt, i));
3020 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3021 vec_oprnds0 = vec_defs[0];
3023 /* Arguments are ready. Create the new vector stmt. */
3024 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3026 size_t k;
3027 vargs.truncate (0);
3028 for (k = 0; k < nargs; k++)
3030 vec<tree> vec_oprndsk = vec_defs[k];
3031 vargs.quick_push (vec_oprndsk[i]);
3032 vargs.quick_push (vec_oprndsk[i + 1]);
3034 gcall *call;
3035 if (ifn != IFN_LAST)
3036 call = gimple_build_call_internal_vec (ifn, vargs);
3037 else
3038 call = gimple_build_call_vec (fndecl, vargs);
3039 new_temp = make_ssa_name (vec_dest, call);
3040 gimple_call_set_lhs (call, new_temp);
3041 gimple_call_set_nothrow (call, true);
3042 new_stmt = call;
3043 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3044 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3047 for (i = 0; i < nargs; i++)
3049 vec<tree> vec_oprndsi = vec_defs[i];
3050 vec_oprndsi.release ();
3052 continue;
3055 for (i = 0; i < nargs; i++)
3057 op = gimple_call_arg (stmt, i);
3058 if (j == 0)
3060 vec_oprnd0
3061 = vect_get_vec_def_for_operand (op, stmt);
3062 vec_oprnd1
3063 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3065 else
3067 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3068 vec_oprnd0
3069 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3070 vec_oprnd1
3071 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3074 vargs.quick_push (vec_oprnd0);
3075 vargs.quick_push (vec_oprnd1);
3078 new_stmt = gimple_build_call_vec (fndecl, vargs);
3079 new_temp = make_ssa_name (vec_dest, new_stmt);
3080 gimple_call_set_lhs (new_stmt, new_temp);
3081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3083 if (j == 0)
3084 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3085 else
3086 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3088 prev_stmt_info = vinfo_for_stmt (new_stmt);
3091 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3093 else
3094 /* No current target implements this case. */
3095 return false;
3097 vargs.release ();
3099 /* The call in STMT might prevent it from being removed in dce.
3100 We however cannot remove it here, due to the way the ssa name
3101 it defines is mapped to the new definition. So just replace
3102 rhs of the statement with something harmless. */
3104 if (slp_node)
3105 return true;
3107 type = TREE_TYPE (scalar_dest);
3108 if (is_pattern_stmt_p (stmt_info))
3109 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3110 else
3111 lhs = gimple_call_lhs (stmt);
3113 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3114 set_vinfo_for_stmt (new_stmt, stmt_info);
3115 set_vinfo_for_stmt (stmt, NULL);
3116 STMT_VINFO_STMT (stmt_info) = new_stmt;
3117 gsi_replace (gsi, new_stmt, false);
3119 return true;
3123 struct simd_call_arg_info
3125 tree vectype;
3126 tree op;
3127 HOST_WIDE_INT linear_step;
3128 enum vect_def_type dt;
3129 unsigned int align;
3130 bool simd_lane_linear;
3133 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3134 is linear within simd lane (but not within whole loop), note it in
3135 *ARGINFO. */
3137 static void
3138 vect_simd_lane_linear (tree op, struct loop *loop,
3139 struct simd_call_arg_info *arginfo)
3141 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3143 if (!is_gimple_assign (def_stmt)
3144 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3145 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3146 return;
3148 tree base = gimple_assign_rhs1 (def_stmt);
3149 HOST_WIDE_INT linear_step = 0;
3150 tree v = gimple_assign_rhs2 (def_stmt);
3151 while (TREE_CODE (v) == SSA_NAME)
3153 tree t;
3154 def_stmt = SSA_NAME_DEF_STMT (v);
3155 if (is_gimple_assign (def_stmt))
3156 switch (gimple_assign_rhs_code (def_stmt))
3158 case PLUS_EXPR:
3159 t = gimple_assign_rhs2 (def_stmt);
3160 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3161 return;
3162 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3163 v = gimple_assign_rhs1 (def_stmt);
3164 continue;
3165 case MULT_EXPR:
3166 t = gimple_assign_rhs2 (def_stmt);
3167 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3168 return;
3169 linear_step = tree_to_shwi (t);
3170 v = gimple_assign_rhs1 (def_stmt);
3171 continue;
3172 CASE_CONVERT:
3173 t = gimple_assign_rhs1 (def_stmt);
3174 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3175 || (TYPE_PRECISION (TREE_TYPE (v))
3176 < TYPE_PRECISION (TREE_TYPE (t))))
3177 return;
3178 if (!linear_step)
3179 linear_step = 1;
3180 v = t;
3181 continue;
3182 default:
3183 return;
3185 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3186 && loop->simduid
3187 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3188 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3189 == loop->simduid))
3191 if (!linear_step)
3192 linear_step = 1;
3193 arginfo->linear_step = linear_step;
3194 arginfo->op = base;
3195 arginfo->simd_lane_linear = true;
3196 return;
3201 /* Function vectorizable_simd_clone_call.
3203 Check if STMT performs a function call that can be vectorized
3204 by calling a simd clone of the function.
3205 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3206 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3207 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3209 static bool
3210 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3211 gimple **vec_stmt, slp_tree slp_node)
3213 tree vec_dest;
3214 tree scalar_dest;
3215 tree op, type;
3216 tree vec_oprnd0 = NULL_TREE;
3217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3218 tree vectype;
3219 unsigned int nunits;
3220 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3221 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3222 vec_info *vinfo = stmt_info->vinfo;
3223 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3224 tree fndecl, new_temp;
3225 gimple *def_stmt;
3226 gimple *new_stmt = NULL;
3227 int ncopies, j;
3228 auto_vec<simd_call_arg_info> arginfo;
3229 vec<tree> vargs = vNULL;
3230 size_t i, nargs;
3231 tree lhs, rtype, ratype;
3232 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3234 /* Is STMT a vectorizable call? */
3235 if (!is_gimple_call (stmt))
3236 return false;
3238 fndecl = gimple_call_fndecl (stmt);
3239 if (fndecl == NULL_TREE)
3240 return false;
3242 struct cgraph_node *node = cgraph_node::get (fndecl);
3243 if (node == NULL || node->simd_clones == NULL)
3244 return false;
3246 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3247 return false;
3249 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3250 && ! vec_stmt)
3251 return false;
3253 if (gimple_call_lhs (stmt)
3254 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3255 return false;
3257 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3259 vectype = STMT_VINFO_VECTYPE (stmt_info);
3261 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3262 return false;
3264 /* FORNOW */
3265 if (slp_node)
3266 return false;
3268 /* Process function arguments. */
3269 nargs = gimple_call_num_args (stmt);
3271 /* Bail out if the function has zero arguments. */
3272 if (nargs == 0)
3273 return false;
3275 arginfo.reserve (nargs, true);
3277 for (i = 0; i < nargs; i++)
3279 simd_call_arg_info thisarginfo;
3280 affine_iv iv;
3282 thisarginfo.linear_step = 0;
3283 thisarginfo.align = 0;
3284 thisarginfo.op = NULL_TREE;
3285 thisarginfo.simd_lane_linear = false;
3287 op = gimple_call_arg (stmt, i);
3288 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3289 &thisarginfo.vectype)
3290 || thisarginfo.dt == vect_uninitialized_def)
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294 "use not simple.\n");
3295 return false;
3298 if (thisarginfo.dt == vect_constant_def
3299 || thisarginfo.dt == vect_external_def)
3300 gcc_assert (thisarginfo.vectype == NULL_TREE);
3301 else
3302 gcc_assert (thisarginfo.vectype != NULL_TREE);
3304 /* For linear arguments, the analyze phase should have saved
3305 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3306 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3307 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3309 gcc_assert (vec_stmt);
3310 thisarginfo.linear_step
3311 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3312 thisarginfo.op
3313 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3314 thisarginfo.simd_lane_linear
3315 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3316 == boolean_true_node);
3317 /* If loop has been peeled for alignment, we need to adjust it. */
3318 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3319 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3320 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3322 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3323 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3324 tree opt = TREE_TYPE (thisarginfo.op);
3325 bias = fold_convert (TREE_TYPE (step), bias);
3326 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3327 thisarginfo.op
3328 = fold_build2 (POINTER_TYPE_P (opt)
3329 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3330 thisarginfo.op, bias);
3333 else if (!vec_stmt
3334 && thisarginfo.dt != vect_constant_def
3335 && thisarginfo.dt != vect_external_def
3336 && loop_vinfo
3337 && TREE_CODE (op) == SSA_NAME
3338 && simple_iv (loop, loop_containing_stmt (stmt), op,
3339 &iv, false)
3340 && tree_fits_shwi_p (iv.step))
3342 thisarginfo.linear_step = tree_to_shwi (iv.step);
3343 thisarginfo.op = iv.base;
3345 else if ((thisarginfo.dt == vect_constant_def
3346 || thisarginfo.dt == vect_external_def)
3347 && POINTER_TYPE_P (TREE_TYPE (op)))
3348 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3349 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3350 linear too. */
3351 if (POINTER_TYPE_P (TREE_TYPE (op))
3352 && !thisarginfo.linear_step
3353 && !vec_stmt
3354 && thisarginfo.dt != vect_constant_def
3355 && thisarginfo.dt != vect_external_def
3356 && loop_vinfo
3357 && !slp_node
3358 && TREE_CODE (op) == SSA_NAME)
3359 vect_simd_lane_linear (op, loop, &thisarginfo);
3361 arginfo.quick_push (thisarginfo);
3364 unsigned int badness = 0;
3365 struct cgraph_node *bestn = NULL;
3366 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3367 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3368 else
3369 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3370 n = n->simdclone->next_clone)
3372 unsigned int this_badness = 0;
3373 if (n->simdclone->simdlen
3374 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3375 || n->simdclone->nargs != nargs)
3376 continue;
3377 if (n->simdclone->simdlen
3378 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3379 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3380 - exact_log2 (n->simdclone->simdlen)) * 1024;
3381 if (n->simdclone->inbranch)
3382 this_badness += 2048;
3383 int target_badness = targetm.simd_clone.usable (n);
3384 if (target_badness < 0)
3385 continue;
3386 this_badness += target_badness * 512;
3387 /* FORNOW: Have to add code to add the mask argument. */
3388 if (n->simdclone->inbranch)
3389 continue;
3390 for (i = 0; i < nargs; i++)
3392 switch (n->simdclone->args[i].arg_type)
3394 case SIMD_CLONE_ARG_TYPE_VECTOR:
3395 if (!useless_type_conversion_p
3396 (n->simdclone->args[i].orig_type,
3397 TREE_TYPE (gimple_call_arg (stmt, i))))
3398 i = -1;
3399 else if (arginfo[i].dt == vect_constant_def
3400 || arginfo[i].dt == vect_external_def
3401 || arginfo[i].linear_step)
3402 this_badness += 64;
3403 break;
3404 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3405 if (arginfo[i].dt != vect_constant_def
3406 && arginfo[i].dt != vect_external_def)
3407 i = -1;
3408 break;
3409 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3411 if (arginfo[i].dt == vect_constant_def
3412 || arginfo[i].dt == vect_external_def
3413 || (arginfo[i].linear_step
3414 != n->simdclone->args[i].linear_step))
3415 i = -1;
3416 break;
3417 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3418 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3419 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3420 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3421 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3422 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3423 /* FORNOW */
3424 i = -1;
3425 break;
3426 case SIMD_CLONE_ARG_TYPE_MASK:
3427 gcc_unreachable ();
3429 if (i == (size_t) -1)
3430 break;
3431 if (n->simdclone->args[i].alignment > arginfo[i].align)
3433 i = -1;
3434 break;
3436 if (arginfo[i].align)
3437 this_badness += (exact_log2 (arginfo[i].align)
3438 - exact_log2 (n->simdclone->args[i].alignment));
3440 if (i == (size_t) -1)
3441 continue;
3442 if (bestn == NULL || this_badness < badness)
3444 bestn = n;
3445 badness = this_badness;
3449 if (bestn == NULL)
3450 return false;
3452 for (i = 0; i < nargs; i++)
3453 if ((arginfo[i].dt == vect_constant_def
3454 || arginfo[i].dt == vect_external_def)
3455 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3457 arginfo[i].vectype
3458 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3459 i)));
3460 if (arginfo[i].vectype == NULL
3461 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3462 > bestn->simdclone->simdlen))
3463 return false;
3466 fndecl = bestn->decl;
3467 nunits = bestn->simdclone->simdlen;
3468 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3470 /* If the function isn't const, only allow it in simd loops where user
3471 has asserted that at least nunits consecutive iterations can be
3472 performed using SIMD instructions. */
3473 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3474 && gimple_vuse (stmt))
3475 return false;
3477 /* Sanity check: make sure that at least one copy of the vectorized stmt
3478 needs to be generated. */
3479 gcc_assert (ncopies >= 1);
3481 if (!vec_stmt) /* transformation not required. */
3483 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3484 for (i = 0; i < nargs; i++)
3485 if ((bestn->simdclone->args[i].arg_type
3486 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3487 || (bestn->simdclone->args[i].arg_type
3488 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3490 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3491 + 1);
3492 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3493 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3494 ? size_type_node : TREE_TYPE (arginfo[i].op);
3495 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3496 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3497 tree sll = arginfo[i].simd_lane_linear
3498 ? boolean_true_node : boolean_false_node;
3499 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3501 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3502 if (dump_enabled_p ())
3503 dump_printf_loc (MSG_NOTE, vect_location,
3504 "=== vectorizable_simd_clone_call ===\n");
3505 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3506 return true;
3509 /* Transform. */
3511 if (dump_enabled_p ())
3512 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3514 /* Handle def. */
3515 scalar_dest = gimple_call_lhs (stmt);
3516 vec_dest = NULL_TREE;
3517 rtype = NULL_TREE;
3518 ratype = NULL_TREE;
3519 if (scalar_dest)
3521 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3522 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3523 if (TREE_CODE (rtype) == ARRAY_TYPE)
3525 ratype = rtype;
3526 rtype = TREE_TYPE (ratype);
3530 prev_stmt_info = NULL;
3531 for (j = 0; j < ncopies; ++j)
3533 /* Build argument list for the vectorized call. */
3534 if (j == 0)
3535 vargs.create (nargs);
3536 else
3537 vargs.truncate (0);
3539 for (i = 0; i < nargs; i++)
3541 unsigned int k, l, m, o;
3542 tree atype;
3543 op = gimple_call_arg (stmt, i);
3544 switch (bestn->simdclone->args[i].arg_type)
3546 case SIMD_CLONE_ARG_TYPE_VECTOR:
3547 atype = bestn->simdclone->args[i].vector_type;
3548 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3549 for (m = j * o; m < (j + 1) * o; m++)
3551 if (TYPE_VECTOR_SUBPARTS (atype)
3552 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3554 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3555 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3556 / TYPE_VECTOR_SUBPARTS (atype));
3557 gcc_assert ((k & (k - 1)) == 0);
3558 if (m == 0)
3559 vec_oprnd0
3560 = vect_get_vec_def_for_operand (op, stmt);
3561 else
3563 vec_oprnd0 = arginfo[i].op;
3564 if ((m & (k - 1)) == 0)
3565 vec_oprnd0
3566 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3567 vec_oprnd0);
3569 arginfo[i].op = vec_oprnd0;
3570 vec_oprnd0
3571 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3572 bitsize_int (prec),
3573 bitsize_int ((m & (k - 1)) * prec));
3574 new_stmt
3575 = gimple_build_assign (make_ssa_name (atype),
3576 vec_oprnd0);
3577 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3578 vargs.safe_push (gimple_assign_lhs (new_stmt));
3580 else
3582 k = (TYPE_VECTOR_SUBPARTS (atype)
3583 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3584 gcc_assert ((k & (k - 1)) == 0);
3585 vec<constructor_elt, va_gc> *ctor_elts;
3586 if (k != 1)
3587 vec_alloc (ctor_elts, k);
3588 else
3589 ctor_elts = NULL;
3590 for (l = 0; l < k; l++)
3592 if (m == 0 && l == 0)
3593 vec_oprnd0
3594 = vect_get_vec_def_for_operand (op, stmt);
3595 else
3596 vec_oprnd0
3597 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3598 arginfo[i].op);
3599 arginfo[i].op = vec_oprnd0;
3600 if (k == 1)
3601 break;
3602 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3603 vec_oprnd0);
3605 if (k == 1)
3606 vargs.safe_push (vec_oprnd0);
3607 else
3609 vec_oprnd0 = build_constructor (atype, ctor_elts);
3610 new_stmt
3611 = gimple_build_assign (make_ssa_name (atype),
3612 vec_oprnd0);
3613 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3614 vargs.safe_push (gimple_assign_lhs (new_stmt));
3618 break;
3619 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3620 vargs.safe_push (op);
3621 break;
3622 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3623 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3624 if (j == 0)
3626 gimple_seq stmts;
3627 arginfo[i].op
3628 = force_gimple_operand (arginfo[i].op, &stmts, true,
3629 NULL_TREE);
3630 if (stmts != NULL)
3632 basic_block new_bb;
3633 edge pe = loop_preheader_edge (loop);
3634 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3635 gcc_assert (!new_bb);
3637 if (arginfo[i].simd_lane_linear)
3639 vargs.safe_push (arginfo[i].op);
3640 break;
3642 tree phi_res = copy_ssa_name (op);
3643 gphi *new_phi = create_phi_node (phi_res, loop->header);
3644 set_vinfo_for_stmt (new_phi,
3645 new_stmt_vec_info (new_phi, loop_vinfo));
3646 add_phi_arg (new_phi, arginfo[i].op,
3647 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3648 enum tree_code code
3649 = POINTER_TYPE_P (TREE_TYPE (op))
3650 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3651 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3652 ? sizetype : TREE_TYPE (op);
3653 widest_int cst
3654 = wi::mul (bestn->simdclone->args[i].linear_step,
3655 ncopies * nunits);
3656 tree tcst = wide_int_to_tree (type, cst);
3657 tree phi_arg = copy_ssa_name (op);
3658 new_stmt
3659 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3660 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3661 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3662 set_vinfo_for_stmt (new_stmt,
3663 new_stmt_vec_info (new_stmt, loop_vinfo));
3664 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3665 UNKNOWN_LOCATION);
3666 arginfo[i].op = phi_res;
3667 vargs.safe_push (phi_res);
3669 else
3671 enum tree_code code
3672 = POINTER_TYPE_P (TREE_TYPE (op))
3673 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3674 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3675 ? sizetype : TREE_TYPE (op);
3676 widest_int cst
3677 = wi::mul (bestn->simdclone->args[i].linear_step,
3678 j * nunits);
3679 tree tcst = wide_int_to_tree (type, cst);
3680 new_temp = make_ssa_name (TREE_TYPE (op));
3681 new_stmt = gimple_build_assign (new_temp, code,
3682 arginfo[i].op, tcst);
3683 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3684 vargs.safe_push (new_temp);
3686 break;
3687 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3688 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3689 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3690 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3691 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3692 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3693 default:
3694 gcc_unreachable ();
3698 new_stmt = gimple_build_call_vec (fndecl, vargs);
3699 if (vec_dest)
3701 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3702 if (ratype)
3703 new_temp = create_tmp_var (ratype);
3704 else if (TYPE_VECTOR_SUBPARTS (vectype)
3705 == TYPE_VECTOR_SUBPARTS (rtype))
3706 new_temp = make_ssa_name (vec_dest, new_stmt);
3707 else
3708 new_temp = make_ssa_name (rtype, new_stmt);
3709 gimple_call_set_lhs (new_stmt, new_temp);
3711 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3713 if (vec_dest)
3715 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3717 unsigned int k, l;
3718 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3719 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3720 gcc_assert ((k & (k - 1)) == 0);
3721 for (l = 0; l < k; l++)
3723 tree t;
3724 if (ratype)
3726 t = build_fold_addr_expr (new_temp);
3727 t = build2 (MEM_REF, vectype, t,
3728 build_int_cst (TREE_TYPE (t),
3729 l * prec / BITS_PER_UNIT));
3731 else
3732 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3733 bitsize_int (prec), bitsize_int (l * prec));
3734 new_stmt
3735 = gimple_build_assign (make_ssa_name (vectype), t);
3736 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3737 if (j == 0 && l == 0)
3738 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3739 else
3740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3742 prev_stmt_info = vinfo_for_stmt (new_stmt);
3745 if (ratype)
3747 tree clobber = build_constructor (ratype, NULL);
3748 TREE_THIS_VOLATILE (clobber) = 1;
3749 new_stmt = gimple_build_assign (new_temp, clobber);
3750 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3752 continue;
3754 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3756 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3757 / TYPE_VECTOR_SUBPARTS (rtype));
3758 gcc_assert ((k & (k - 1)) == 0);
3759 if ((j & (k - 1)) == 0)
3760 vec_alloc (ret_ctor_elts, k);
3761 if (ratype)
3763 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3764 for (m = 0; m < o; m++)
3766 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3767 size_int (m), NULL_TREE, NULL_TREE);
3768 new_stmt
3769 = gimple_build_assign (make_ssa_name (rtype), tem);
3770 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3771 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3772 gimple_assign_lhs (new_stmt));
3774 tree clobber = build_constructor (ratype, NULL);
3775 TREE_THIS_VOLATILE (clobber) = 1;
3776 new_stmt = gimple_build_assign (new_temp, clobber);
3777 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3779 else
3780 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3781 if ((j & (k - 1)) != k - 1)
3782 continue;
3783 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3784 new_stmt
3785 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3786 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3788 if ((unsigned) j == k - 1)
3789 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3790 else
3791 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3793 prev_stmt_info = vinfo_for_stmt (new_stmt);
3794 continue;
3796 else if (ratype)
3798 tree t = build_fold_addr_expr (new_temp);
3799 t = build2 (MEM_REF, vectype, t,
3800 build_int_cst (TREE_TYPE (t), 0));
3801 new_stmt
3802 = gimple_build_assign (make_ssa_name (vec_dest), t);
3803 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3804 tree clobber = build_constructor (ratype, NULL);
3805 TREE_THIS_VOLATILE (clobber) = 1;
3806 vect_finish_stmt_generation (stmt,
3807 gimple_build_assign (new_temp,
3808 clobber), gsi);
3812 if (j == 0)
3813 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3814 else
3815 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3817 prev_stmt_info = vinfo_for_stmt (new_stmt);
3820 vargs.release ();
3822 /* The call in STMT might prevent it from being removed in dce.
3823 We however cannot remove it here, due to the way the ssa name
3824 it defines is mapped to the new definition. So just replace
3825 rhs of the statement with something harmless. */
3827 if (slp_node)
3828 return true;
3830 if (scalar_dest)
3832 type = TREE_TYPE (scalar_dest);
3833 if (is_pattern_stmt_p (stmt_info))
3834 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3835 else
3836 lhs = gimple_call_lhs (stmt);
3837 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3839 else
3840 new_stmt = gimple_build_nop ();
3841 set_vinfo_for_stmt (new_stmt, stmt_info);
3842 set_vinfo_for_stmt (stmt, NULL);
3843 STMT_VINFO_STMT (stmt_info) = new_stmt;
3844 gsi_replace (gsi, new_stmt, true);
3845 unlink_stmt_vdef (stmt);
3847 return true;
3851 /* Function vect_gen_widened_results_half
3853 Create a vector stmt whose code, type, number of arguments, and result
3854 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3855 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3856 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3857 needs to be created (DECL is a function-decl of a target-builtin).
3858 STMT is the original scalar stmt that we are vectorizing. */
3860 static gimple *
3861 vect_gen_widened_results_half (enum tree_code code,
3862 tree decl,
3863 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3864 tree vec_dest, gimple_stmt_iterator *gsi,
3865 gimple *stmt)
3867 gimple *new_stmt;
3868 tree new_temp;
3870 /* Generate half of the widened result: */
3871 if (code == CALL_EXPR)
3873 /* Target specific support */
3874 if (op_type == binary_op)
3875 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3876 else
3877 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3878 new_temp = make_ssa_name (vec_dest, new_stmt);
3879 gimple_call_set_lhs (new_stmt, new_temp);
3881 else
3883 /* Generic support */
3884 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3885 if (op_type != binary_op)
3886 vec_oprnd1 = NULL;
3887 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3888 new_temp = make_ssa_name (vec_dest, new_stmt);
3889 gimple_assign_set_lhs (new_stmt, new_temp);
3891 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3893 return new_stmt;
3897 /* Get vectorized definitions for loop-based vectorization. For the first
3898 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3899 scalar operand), and for the rest we get a copy with
3900 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3901 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3902 The vectors are collected into VEC_OPRNDS. */
3904 static void
3905 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3906 vec<tree> *vec_oprnds, int multi_step_cvt)
3908 tree vec_oprnd;
3910 /* Get first vector operand. */
3911 /* All the vector operands except the very first one (that is scalar oprnd)
3912 are stmt copies. */
3913 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3914 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3915 else
3916 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3918 vec_oprnds->quick_push (vec_oprnd);
3920 /* Get second vector operand. */
3921 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3922 vec_oprnds->quick_push (vec_oprnd);
3924 *oprnd = vec_oprnd;
3926 /* For conversion in multiple steps, continue to get operands
3927 recursively. */
3928 if (multi_step_cvt)
3929 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3933 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3934 For multi-step conversions store the resulting vectors and call the function
3935 recursively. */
3937 static void
3938 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3939 int multi_step_cvt, gimple *stmt,
3940 vec<tree> vec_dsts,
3941 gimple_stmt_iterator *gsi,
3942 slp_tree slp_node, enum tree_code code,
3943 stmt_vec_info *prev_stmt_info)
3945 unsigned int i;
3946 tree vop0, vop1, new_tmp, vec_dest;
3947 gimple *new_stmt;
3948 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3950 vec_dest = vec_dsts.pop ();
3952 for (i = 0; i < vec_oprnds->length (); i += 2)
3954 /* Create demotion operation. */
3955 vop0 = (*vec_oprnds)[i];
3956 vop1 = (*vec_oprnds)[i + 1];
3957 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3958 new_tmp = make_ssa_name (vec_dest, new_stmt);
3959 gimple_assign_set_lhs (new_stmt, new_tmp);
3960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3962 if (multi_step_cvt)
3963 /* Store the resulting vector for next recursive call. */
3964 (*vec_oprnds)[i/2] = new_tmp;
3965 else
3967 /* This is the last step of the conversion sequence. Store the
3968 vectors in SLP_NODE or in vector info of the scalar statement
3969 (or in STMT_VINFO_RELATED_STMT chain). */
3970 if (slp_node)
3971 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3972 else
3974 if (!*prev_stmt_info)
3975 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3976 else
3977 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3979 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3984 /* For multi-step demotion operations we first generate demotion operations
3985 from the source type to the intermediate types, and then combine the
3986 results (stored in VEC_OPRNDS) in demotion operation to the destination
3987 type. */
3988 if (multi_step_cvt)
3990 /* At each level of recursion we have half of the operands we had at the
3991 previous level. */
3992 vec_oprnds->truncate ((i+1)/2);
3993 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3994 stmt, vec_dsts, gsi, slp_node,
3995 VEC_PACK_TRUNC_EXPR,
3996 prev_stmt_info);
3999 vec_dsts.quick_push (vec_dest);
4003 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4004 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4005 the resulting vectors and call the function recursively. */
4007 static void
4008 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4009 vec<tree> *vec_oprnds1,
4010 gimple *stmt, tree vec_dest,
4011 gimple_stmt_iterator *gsi,
4012 enum tree_code code1,
4013 enum tree_code code2, tree decl1,
4014 tree decl2, int op_type)
4016 int i;
4017 tree vop0, vop1, new_tmp1, new_tmp2;
4018 gimple *new_stmt1, *new_stmt2;
4019 vec<tree> vec_tmp = vNULL;
4021 vec_tmp.create (vec_oprnds0->length () * 2);
4022 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4024 if (op_type == binary_op)
4025 vop1 = (*vec_oprnds1)[i];
4026 else
4027 vop1 = NULL_TREE;
4029 /* Generate the two halves of promotion operation. */
4030 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4031 op_type, vec_dest, gsi, stmt);
4032 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4033 op_type, vec_dest, gsi, stmt);
4034 if (is_gimple_call (new_stmt1))
4036 new_tmp1 = gimple_call_lhs (new_stmt1);
4037 new_tmp2 = gimple_call_lhs (new_stmt2);
4039 else
4041 new_tmp1 = gimple_assign_lhs (new_stmt1);
4042 new_tmp2 = gimple_assign_lhs (new_stmt2);
4045 /* Store the results for the next step. */
4046 vec_tmp.quick_push (new_tmp1);
4047 vec_tmp.quick_push (new_tmp2);
4050 vec_oprnds0->release ();
4051 *vec_oprnds0 = vec_tmp;
4055 /* Check if STMT performs a conversion operation, that can be vectorized.
4056 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4057 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4058 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4060 static bool
4061 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4062 gimple **vec_stmt, slp_tree slp_node)
4064 tree vec_dest;
4065 tree scalar_dest;
4066 tree op0, op1 = NULL_TREE;
4067 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4068 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4069 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4070 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4071 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4072 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4073 tree new_temp;
4074 gimple *def_stmt;
4075 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4076 int ndts = 2;
4077 gimple *new_stmt = NULL;
4078 stmt_vec_info prev_stmt_info;
4079 int nunits_in;
4080 int nunits_out;
4081 tree vectype_out, vectype_in;
4082 int ncopies, i, j;
4083 tree lhs_type, rhs_type;
4084 enum { NARROW, NONE, WIDEN } modifier;
4085 vec<tree> vec_oprnds0 = vNULL;
4086 vec<tree> vec_oprnds1 = vNULL;
4087 tree vop0;
4088 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4089 vec_info *vinfo = stmt_info->vinfo;
4090 int multi_step_cvt = 0;
4091 vec<tree> interm_types = vNULL;
4092 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4093 int op_type;
4094 unsigned short fltsz;
4096 /* Is STMT a vectorizable conversion? */
4098 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4099 return false;
4101 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4102 && ! vec_stmt)
4103 return false;
4105 if (!is_gimple_assign (stmt))
4106 return false;
4108 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4109 return false;
4111 code = gimple_assign_rhs_code (stmt);
4112 if (!CONVERT_EXPR_CODE_P (code)
4113 && code != FIX_TRUNC_EXPR
4114 && code != FLOAT_EXPR
4115 && code != WIDEN_MULT_EXPR
4116 && code != WIDEN_LSHIFT_EXPR)
4117 return false;
4119 op_type = TREE_CODE_LENGTH (code);
4121 /* Check types of lhs and rhs. */
4122 scalar_dest = gimple_assign_lhs (stmt);
4123 lhs_type = TREE_TYPE (scalar_dest);
4124 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4126 op0 = gimple_assign_rhs1 (stmt);
4127 rhs_type = TREE_TYPE (op0);
4129 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4130 && !((INTEGRAL_TYPE_P (lhs_type)
4131 && INTEGRAL_TYPE_P (rhs_type))
4132 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4133 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4134 return false;
4136 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4137 && ((INTEGRAL_TYPE_P (lhs_type)
4138 && !type_has_mode_precision_p (lhs_type))
4139 || (INTEGRAL_TYPE_P (rhs_type)
4140 && !type_has_mode_precision_p (rhs_type))))
4142 if (dump_enabled_p ())
4143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4144 "type conversion to/from bit-precision unsupported."
4145 "\n");
4146 return false;
4149 /* Check the operands of the operation. */
4150 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4154 "use not simple.\n");
4155 return false;
4157 if (op_type == binary_op)
4159 bool ok;
4161 op1 = gimple_assign_rhs2 (stmt);
4162 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4163 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4164 OP1. */
4165 if (CONSTANT_CLASS_P (op0))
4166 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4167 else
4168 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4170 if (!ok)
4172 if (dump_enabled_p ())
4173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4174 "use not simple.\n");
4175 return false;
4179 /* If op0 is an external or constant defs use a vector type of
4180 the same size as the output vector type. */
4181 if (!vectype_in)
4182 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4183 if (vec_stmt)
4184 gcc_assert (vectype_in);
4185 if (!vectype_in)
4187 if (dump_enabled_p ())
4189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4190 "no vectype for scalar type ");
4191 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4192 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4195 return false;
4198 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4199 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4201 if (dump_enabled_p ())
4203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4204 "can't convert between boolean and non "
4205 "boolean vectors");
4206 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4207 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4210 return false;
4213 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4214 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4215 if (nunits_in < nunits_out)
4216 modifier = NARROW;
4217 else if (nunits_out == nunits_in)
4218 modifier = NONE;
4219 else
4220 modifier = WIDEN;
4222 /* Multiple types in SLP are handled by creating the appropriate number of
4223 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4224 case of SLP. */
4225 if (slp_node)
4226 ncopies = 1;
4227 else if (modifier == NARROW)
4228 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4229 else
4230 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4232 /* Sanity check: make sure that at least one copy of the vectorized stmt
4233 needs to be generated. */
4234 gcc_assert (ncopies >= 1);
4236 bool found_mode = false;
4237 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4238 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4239 opt_scalar_mode rhs_mode_iter;
4241 /* Supportable by target? */
4242 switch (modifier)
4244 case NONE:
4245 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4246 return false;
4247 if (supportable_convert_operation (code, vectype_out, vectype_in,
4248 &decl1, &code1))
4249 break;
4250 /* FALLTHRU */
4251 unsupported:
4252 if (dump_enabled_p ())
4253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4254 "conversion not supported by target.\n");
4255 return false;
4257 case WIDEN:
4258 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4259 &code1, &code2, &multi_step_cvt,
4260 &interm_types))
4262 /* Binary widening operation can only be supported directly by the
4263 architecture. */
4264 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4265 break;
4268 if (code != FLOAT_EXPR
4269 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4270 goto unsupported;
4272 fltsz = GET_MODE_SIZE (lhs_mode);
4273 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4275 rhs_mode = rhs_mode_iter.require ();
4276 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4277 break;
4279 cvt_type
4280 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4281 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4282 if (cvt_type == NULL_TREE)
4283 goto unsupported;
4285 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4287 if (!supportable_convert_operation (code, vectype_out,
4288 cvt_type, &decl1, &codecvt1))
4289 goto unsupported;
4291 else if (!supportable_widening_operation (code, stmt, vectype_out,
4292 cvt_type, &codecvt1,
4293 &codecvt2, &multi_step_cvt,
4294 &interm_types))
4295 continue;
4296 else
4297 gcc_assert (multi_step_cvt == 0);
4299 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4300 vectype_in, &code1, &code2,
4301 &multi_step_cvt, &interm_types))
4303 found_mode = true;
4304 break;
4308 if (!found_mode)
4309 goto unsupported;
4311 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4312 codecvt2 = ERROR_MARK;
4313 else
4315 multi_step_cvt++;
4316 interm_types.safe_push (cvt_type);
4317 cvt_type = NULL_TREE;
4319 break;
4321 case NARROW:
4322 gcc_assert (op_type == unary_op);
4323 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4324 &code1, &multi_step_cvt,
4325 &interm_types))
4326 break;
4328 if (code != FIX_TRUNC_EXPR
4329 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4330 goto unsupported;
4332 cvt_type
4333 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4334 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4335 if (cvt_type == NULL_TREE)
4336 goto unsupported;
4337 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4338 &decl1, &codecvt1))
4339 goto unsupported;
4340 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4341 &code1, &multi_step_cvt,
4342 &interm_types))
4343 break;
4344 goto unsupported;
4346 default:
4347 gcc_unreachable ();
4350 if (!vec_stmt) /* transformation not required. */
4352 if (dump_enabled_p ())
4353 dump_printf_loc (MSG_NOTE, vect_location,
4354 "=== vectorizable_conversion ===\n");
4355 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4357 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4358 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4360 else if (modifier == NARROW)
4362 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4363 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4365 else
4367 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4368 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4370 interm_types.release ();
4371 return true;
4374 /* Transform. */
4375 if (dump_enabled_p ())
4376 dump_printf_loc (MSG_NOTE, vect_location,
4377 "transform conversion. ncopies = %d.\n", ncopies);
4379 if (op_type == binary_op)
4381 if (CONSTANT_CLASS_P (op0))
4382 op0 = fold_convert (TREE_TYPE (op1), op0);
4383 else if (CONSTANT_CLASS_P (op1))
4384 op1 = fold_convert (TREE_TYPE (op0), op1);
4387 /* In case of multi-step conversion, we first generate conversion operations
4388 to the intermediate types, and then from that types to the final one.
4389 We create vector destinations for the intermediate type (TYPES) received
4390 from supportable_*_operation, and store them in the correct order
4391 for future use in vect_create_vectorized_*_stmts (). */
4392 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4393 vec_dest = vect_create_destination_var (scalar_dest,
4394 (cvt_type && modifier == WIDEN)
4395 ? cvt_type : vectype_out);
4396 vec_dsts.quick_push (vec_dest);
4398 if (multi_step_cvt)
4400 for (i = interm_types.length () - 1;
4401 interm_types.iterate (i, &intermediate_type); i--)
4403 vec_dest = vect_create_destination_var (scalar_dest,
4404 intermediate_type);
4405 vec_dsts.quick_push (vec_dest);
4409 if (cvt_type)
4410 vec_dest = vect_create_destination_var (scalar_dest,
4411 modifier == WIDEN
4412 ? vectype_out : cvt_type);
4414 if (!slp_node)
4416 if (modifier == WIDEN)
4418 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4419 if (op_type == binary_op)
4420 vec_oprnds1.create (1);
4422 else if (modifier == NARROW)
4423 vec_oprnds0.create (
4424 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4426 else if (code == WIDEN_LSHIFT_EXPR)
4427 vec_oprnds1.create (slp_node->vec_stmts_size);
4429 last_oprnd = op0;
4430 prev_stmt_info = NULL;
4431 switch (modifier)
4433 case NONE:
4434 for (j = 0; j < ncopies; j++)
4436 if (j == 0)
4437 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4438 else
4439 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4441 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4443 /* Arguments are ready, create the new vector stmt. */
4444 if (code1 == CALL_EXPR)
4446 new_stmt = gimple_build_call (decl1, 1, vop0);
4447 new_temp = make_ssa_name (vec_dest, new_stmt);
4448 gimple_call_set_lhs (new_stmt, new_temp);
4450 else
4452 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4453 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4454 new_temp = make_ssa_name (vec_dest, new_stmt);
4455 gimple_assign_set_lhs (new_stmt, new_temp);
4458 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4459 if (slp_node)
4460 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4461 else
4463 if (!prev_stmt_info)
4464 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4465 else
4466 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4467 prev_stmt_info = vinfo_for_stmt (new_stmt);
4471 break;
4473 case WIDEN:
4474 /* In case the vectorization factor (VF) is bigger than the number
4475 of elements that we can fit in a vectype (nunits), we have to
4476 generate more than one vector stmt - i.e - we need to "unroll"
4477 the vector stmt by a factor VF/nunits. */
4478 for (j = 0; j < ncopies; j++)
4480 /* Handle uses. */
4481 if (j == 0)
4483 if (slp_node)
4485 if (code == WIDEN_LSHIFT_EXPR)
4487 unsigned int k;
4489 vec_oprnd1 = op1;
4490 /* Store vec_oprnd1 for every vector stmt to be created
4491 for SLP_NODE. We check during the analysis that all
4492 the shift arguments are the same. */
4493 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4494 vec_oprnds1.quick_push (vec_oprnd1);
4496 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4497 slp_node);
4499 else
4500 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4501 &vec_oprnds1, slp_node);
4503 else
4505 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4506 vec_oprnds0.quick_push (vec_oprnd0);
4507 if (op_type == binary_op)
4509 if (code == WIDEN_LSHIFT_EXPR)
4510 vec_oprnd1 = op1;
4511 else
4512 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4513 vec_oprnds1.quick_push (vec_oprnd1);
4517 else
4519 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4520 vec_oprnds0.truncate (0);
4521 vec_oprnds0.quick_push (vec_oprnd0);
4522 if (op_type == binary_op)
4524 if (code == WIDEN_LSHIFT_EXPR)
4525 vec_oprnd1 = op1;
4526 else
4527 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4528 vec_oprnd1);
4529 vec_oprnds1.truncate (0);
4530 vec_oprnds1.quick_push (vec_oprnd1);
4534 /* Arguments are ready. Create the new vector stmts. */
4535 for (i = multi_step_cvt; i >= 0; i--)
4537 tree this_dest = vec_dsts[i];
4538 enum tree_code c1 = code1, c2 = code2;
4539 if (i == 0 && codecvt2 != ERROR_MARK)
4541 c1 = codecvt1;
4542 c2 = codecvt2;
4544 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4545 &vec_oprnds1,
4546 stmt, this_dest, gsi,
4547 c1, c2, decl1, decl2,
4548 op_type);
4551 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4553 if (cvt_type)
4555 if (codecvt1 == CALL_EXPR)
4557 new_stmt = gimple_build_call (decl1, 1, vop0);
4558 new_temp = make_ssa_name (vec_dest, new_stmt);
4559 gimple_call_set_lhs (new_stmt, new_temp);
4561 else
4563 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4564 new_temp = make_ssa_name (vec_dest);
4565 new_stmt = gimple_build_assign (new_temp, codecvt1,
4566 vop0);
4569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4571 else
4572 new_stmt = SSA_NAME_DEF_STMT (vop0);
4574 if (slp_node)
4575 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4576 else
4578 if (!prev_stmt_info)
4579 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4580 else
4581 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4582 prev_stmt_info = vinfo_for_stmt (new_stmt);
4587 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4588 break;
4590 case NARROW:
4591 /* In case the vectorization factor (VF) is bigger than the number
4592 of elements that we can fit in a vectype (nunits), we have to
4593 generate more than one vector stmt - i.e - we need to "unroll"
4594 the vector stmt by a factor VF/nunits. */
4595 for (j = 0; j < ncopies; j++)
4597 /* Handle uses. */
4598 if (slp_node)
4599 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4600 slp_node);
4601 else
4603 vec_oprnds0.truncate (0);
4604 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4605 vect_pow2 (multi_step_cvt) - 1);
4608 /* Arguments are ready. Create the new vector stmts. */
4609 if (cvt_type)
4610 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4612 if (codecvt1 == CALL_EXPR)
4614 new_stmt = gimple_build_call (decl1, 1, vop0);
4615 new_temp = make_ssa_name (vec_dest, new_stmt);
4616 gimple_call_set_lhs (new_stmt, new_temp);
4618 else
4620 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4621 new_temp = make_ssa_name (vec_dest);
4622 new_stmt = gimple_build_assign (new_temp, codecvt1,
4623 vop0);
4626 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4627 vec_oprnds0[i] = new_temp;
4630 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4631 stmt, vec_dsts, gsi,
4632 slp_node, code1,
4633 &prev_stmt_info);
4636 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4637 break;
4640 vec_oprnds0.release ();
4641 vec_oprnds1.release ();
4642 interm_types.release ();
4644 return true;
4648 /* Function vectorizable_assignment.
4650 Check if STMT performs an assignment (copy) that can be vectorized.
4651 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4652 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4653 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4655 static bool
4656 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4657 gimple **vec_stmt, slp_tree slp_node)
4659 tree vec_dest;
4660 tree scalar_dest;
4661 tree op;
4662 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4663 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4664 tree new_temp;
4665 gimple *def_stmt;
4666 enum vect_def_type dt[1] = {vect_unknown_def_type};
4667 int ndts = 1;
4668 int ncopies;
4669 int i, j;
4670 vec<tree> vec_oprnds = vNULL;
4671 tree vop;
4672 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4673 vec_info *vinfo = stmt_info->vinfo;
4674 gimple *new_stmt = NULL;
4675 stmt_vec_info prev_stmt_info = NULL;
4676 enum tree_code code;
4677 tree vectype_in;
4679 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4680 return false;
4682 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4683 && ! vec_stmt)
4684 return false;
4686 /* Is vectorizable assignment? */
4687 if (!is_gimple_assign (stmt))
4688 return false;
4690 scalar_dest = gimple_assign_lhs (stmt);
4691 if (TREE_CODE (scalar_dest) != SSA_NAME)
4692 return false;
4694 code = gimple_assign_rhs_code (stmt);
4695 if (gimple_assign_single_p (stmt)
4696 || code == PAREN_EXPR
4697 || CONVERT_EXPR_CODE_P (code))
4698 op = gimple_assign_rhs1 (stmt);
4699 else
4700 return false;
4702 if (code == VIEW_CONVERT_EXPR)
4703 op = TREE_OPERAND (op, 0);
4705 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4706 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4708 /* Multiple types in SLP are handled by creating the appropriate number of
4709 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4710 case of SLP. */
4711 if (slp_node)
4712 ncopies = 1;
4713 else
4714 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4716 gcc_assert (ncopies >= 1);
4718 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4720 if (dump_enabled_p ())
4721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4722 "use not simple.\n");
4723 return false;
4726 /* We can handle NOP_EXPR conversions that do not change the number
4727 of elements or the vector size. */
4728 if ((CONVERT_EXPR_CODE_P (code)
4729 || code == VIEW_CONVERT_EXPR)
4730 && (!vectype_in
4731 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4732 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4733 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4734 return false;
4736 /* We do not handle bit-precision changes. */
4737 if ((CONVERT_EXPR_CODE_P (code)
4738 || code == VIEW_CONVERT_EXPR)
4739 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4740 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4741 || !type_has_mode_precision_p (TREE_TYPE (op)))
4742 /* But a conversion that does not change the bit-pattern is ok. */
4743 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4744 > TYPE_PRECISION (TREE_TYPE (op)))
4745 && TYPE_UNSIGNED (TREE_TYPE (op)))
4746 /* Conversion between boolean types of different sizes is
4747 a simple assignment in case their vectypes are same
4748 boolean vectors. */
4749 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4750 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4752 if (dump_enabled_p ())
4753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4754 "type conversion to/from bit-precision "
4755 "unsupported.\n");
4756 return false;
4759 if (!vec_stmt) /* transformation not required. */
4761 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_NOTE, vect_location,
4764 "=== vectorizable_assignment ===\n");
4765 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4766 return true;
4769 /* Transform. */
4770 if (dump_enabled_p ())
4771 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4773 /* Handle def. */
4774 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4776 /* Handle use. */
4777 for (j = 0; j < ncopies; j++)
4779 /* Handle uses. */
4780 if (j == 0)
4781 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4782 else
4783 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4785 /* Arguments are ready. create the new vector stmt. */
4786 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4788 if (CONVERT_EXPR_CODE_P (code)
4789 || code == VIEW_CONVERT_EXPR)
4790 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4791 new_stmt = gimple_build_assign (vec_dest, vop);
4792 new_temp = make_ssa_name (vec_dest, new_stmt);
4793 gimple_assign_set_lhs (new_stmt, new_temp);
4794 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4795 if (slp_node)
4796 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4799 if (slp_node)
4800 continue;
4802 if (j == 0)
4803 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4804 else
4805 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4807 prev_stmt_info = vinfo_for_stmt (new_stmt);
4810 vec_oprnds.release ();
4811 return true;
4815 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4816 either as shift by a scalar or by a vector. */
4818 bool
4819 vect_supportable_shift (enum tree_code code, tree scalar_type)
4822 machine_mode vec_mode;
4823 optab optab;
4824 int icode;
4825 tree vectype;
4827 vectype = get_vectype_for_scalar_type (scalar_type);
4828 if (!vectype)
4829 return false;
4831 optab = optab_for_tree_code (code, vectype, optab_scalar);
4832 if (!optab
4833 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4835 optab = optab_for_tree_code (code, vectype, optab_vector);
4836 if (!optab
4837 || (optab_handler (optab, TYPE_MODE (vectype))
4838 == CODE_FOR_nothing))
4839 return false;
4842 vec_mode = TYPE_MODE (vectype);
4843 icode = (int) optab_handler (optab, vec_mode);
4844 if (icode == CODE_FOR_nothing)
4845 return false;
4847 return true;
4851 /* Function vectorizable_shift.
4853 Check if STMT performs a shift operation that can be vectorized.
4854 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4855 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4856 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4858 static bool
4859 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4860 gimple **vec_stmt, slp_tree slp_node)
4862 tree vec_dest;
4863 tree scalar_dest;
4864 tree op0, op1 = NULL;
4865 tree vec_oprnd1 = NULL_TREE;
4866 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4867 tree vectype;
4868 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4869 enum tree_code code;
4870 machine_mode vec_mode;
4871 tree new_temp;
4872 optab optab;
4873 int icode;
4874 machine_mode optab_op2_mode;
4875 gimple *def_stmt;
4876 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4877 int ndts = 2;
4878 gimple *new_stmt = NULL;
4879 stmt_vec_info prev_stmt_info;
4880 int nunits_in;
4881 int nunits_out;
4882 tree vectype_out;
4883 tree op1_vectype;
4884 int ncopies;
4885 int j, i;
4886 vec<tree> vec_oprnds0 = vNULL;
4887 vec<tree> vec_oprnds1 = vNULL;
4888 tree vop0, vop1;
4889 unsigned int k;
4890 bool scalar_shift_arg = true;
4891 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4892 vec_info *vinfo = stmt_info->vinfo;
4894 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4895 return false;
4897 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4898 && ! vec_stmt)
4899 return false;
4901 /* Is STMT a vectorizable binary/unary operation? */
4902 if (!is_gimple_assign (stmt))
4903 return false;
4905 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4906 return false;
4908 code = gimple_assign_rhs_code (stmt);
4910 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4911 || code == RROTATE_EXPR))
4912 return false;
4914 scalar_dest = gimple_assign_lhs (stmt);
4915 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4916 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
4918 if (dump_enabled_p ())
4919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4920 "bit-precision shifts not supported.\n");
4921 return false;
4924 op0 = gimple_assign_rhs1 (stmt);
4925 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4927 if (dump_enabled_p ())
4928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4929 "use not simple.\n");
4930 return false;
4932 /* If op0 is an external or constant def use a vector type with
4933 the same size as the output vector type. */
4934 if (!vectype)
4935 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4936 if (vec_stmt)
4937 gcc_assert (vectype);
4938 if (!vectype)
4940 if (dump_enabled_p ())
4941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4942 "no vectype for scalar type\n");
4943 return false;
4946 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4947 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4948 if (nunits_out != nunits_in)
4949 return false;
4951 op1 = gimple_assign_rhs2 (stmt);
4952 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4954 if (dump_enabled_p ())
4955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4956 "use not simple.\n");
4957 return false;
4960 /* Multiple types in SLP are handled by creating the appropriate number of
4961 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4962 case of SLP. */
4963 if (slp_node)
4964 ncopies = 1;
4965 else
4966 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4968 gcc_assert (ncopies >= 1);
4970 /* Determine whether the shift amount is a vector, or scalar. If the
4971 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4973 if ((dt[1] == vect_internal_def
4974 || dt[1] == vect_induction_def)
4975 && !slp_node)
4976 scalar_shift_arg = false;
4977 else if (dt[1] == vect_constant_def
4978 || dt[1] == vect_external_def
4979 || dt[1] == vect_internal_def)
4981 /* In SLP, need to check whether the shift count is the same,
4982 in loops if it is a constant or invariant, it is always
4983 a scalar shift. */
4984 if (slp_node)
4986 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4987 gimple *slpstmt;
4989 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4990 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4991 scalar_shift_arg = false;
4994 /* If the shift amount is computed by a pattern stmt we cannot
4995 use the scalar amount directly thus give up and use a vector
4996 shift. */
4997 if (dt[1] == vect_internal_def)
4999 gimple *def = SSA_NAME_DEF_STMT (op1);
5000 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5001 scalar_shift_arg = false;
5004 else
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5008 "operand mode requires invariant argument.\n");
5009 return false;
5012 /* Vector shifted by vector. */
5013 if (!scalar_shift_arg)
5015 optab = optab_for_tree_code (code, vectype, optab_vector);
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_NOTE, vect_location,
5018 "vector/vector shift/rotate found.\n");
5020 if (!op1_vectype)
5021 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5022 if (op1_vectype == NULL_TREE
5023 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5025 if (dump_enabled_p ())
5026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5027 "unusable type for last operand in"
5028 " vector/vector shift/rotate.\n");
5029 return false;
5032 /* See if the machine has a vector shifted by scalar insn and if not
5033 then see if it has a vector shifted by vector insn. */
5034 else
5036 optab = optab_for_tree_code (code, vectype, optab_scalar);
5037 if (optab
5038 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5040 if (dump_enabled_p ())
5041 dump_printf_loc (MSG_NOTE, vect_location,
5042 "vector/scalar shift/rotate found.\n");
5044 else
5046 optab = optab_for_tree_code (code, vectype, optab_vector);
5047 if (optab
5048 && (optab_handler (optab, TYPE_MODE (vectype))
5049 != CODE_FOR_nothing))
5051 scalar_shift_arg = false;
5053 if (dump_enabled_p ())
5054 dump_printf_loc (MSG_NOTE, vect_location,
5055 "vector/vector shift/rotate found.\n");
5057 /* Unlike the other binary operators, shifts/rotates have
5058 the rhs being int, instead of the same type as the lhs,
5059 so make sure the scalar is the right type if we are
5060 dealing with vectors of long long/long/short/char. */
5061 if (dt[1] == vect_constant_def)
5062 op1 = fold_convert (TREE_TYPE (vectype), op1);
5063 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5064 TREE_TYPE (op1)))
5066 if (slp_node
5067 && TYPE_MODE (TREE_TYPE (vectype))
5068 != TYPE_MODE (TREE_TYPE (op1)))
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5072 "unusable type for last operand in"
5073 " vector/vector shift/rotate.\n");
5074 return false;
5076 if (vec_stmt && !slp_node)
5078 op1 = fold_convert (TREE_TYPE (vectype), op1);
5079 op1 = vect_init_vector (stmt, op1,
5080 TREE_TYPE (vectype), NULL);
5087 /* Supportable by target? */
5088 if (!optab)
5090 if (dump_enabled_p ())
5091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5092 "no optab.\n");
5093 return false;
5095 vec_mode = TYPE_MODE (vectype);
5096 icode = (int) optab_handler (optab, vec_mode);
5097 if (icode == CODE_FOR_nothing)
5099 if (dump_enabled_p ())
5100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5101 "op not supported by target.\n");
5102 /* Check only during analysis. */
5103 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5104 || (!vec_stmt
5105 && !vect_worthwhile_without_simd_p (vinfo, code)))
5106 return false;
5107 if (dump_enabled_p ())
5108 dump_printf_loc (MSG_NOTE, vect_location,
5109 "proceeding using word mode.\n");
5112 /* Worthwhile without SIMD support? Check only during analysis. */
5113 if (!vec_stmt
5114 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5115 && !vect_worthwhile_without_simd_p (vinfo, code))
5117 if (dump_enabled_p ())
5118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5119 "not worthwhile without SIMD support.\n");
5120 return false;
5123 if (!vec_stmt) /* transformation not required. */
5125 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5126 if (dump_enabled_p ())
5127 dump_printf_loc (MSG_NOTE, vect_location,
5128 "=== vectorizable_shift ===\n");
5129 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5130 return true;
5133 /* Transform. */
5135 if (dump_enabled_p ())
5136 dump_printf_loc (MSG_NOTE, vect_location,
5137 "transform binary/unary operation.\n");
5139 /* Handle def. */
5140 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5142 prev_stmt_info = NULL;
5143 for (j = 0; j < ncopies; j++)
5145 /* Handle uses. */
5146 if (j == 0)
5148 if (scalar_shift_arg)
5150 /* Vector shl and shr insn patterns can be defined with scalar
5151 operand 2 (shift operand). In this case, use constant or loop
5152 invariant op1 directly, without extending it to vector mode
5153 first. */
5154 optab_op2_mode = insn_data[icode].operand[2].mode;
5155 if (!VECTOR_MODE_P (optab_op2_mode))
5157 if (dump_enabled_p ())
5158 dump_printf_loc (MSG_NOTE, vect_location,
5159 "operand 1 using scalar mode.\n");
5160 vec_oprnd1 = op1;
5161 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5162 vec_oprnds1.quick_push (vec_oprnd1);
5163 if (slp_node)
5165 /* Store vec_oprnd1 for every vector stmt to be created
5166 for SLP_NODE. We check during the analysis that all
5167 the shift arguments are the same.
5168 TODO: Allow different constants for different vector
5169 stmts generated for an SLP instance. */
5170 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5171 vec_oprnds1.quick_push (vec_oprnd1);
5176 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5177 (a special case for certain kind of vector shifts); otherwise,
5178 operand 1 should be of a vector type (the usual case). */
5179 if (vec_oprnd1)
5180 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5181 slp_node);
5182 else
5183 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5184 slp_node);
5186 else
5187 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5189 /* Arguments are ready. Create the new vector stmt. */
5190 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5192 vop1 = vec_oprnds1[i];
5193 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5194 new_temp = make_ssa_name (vec_dest, new_stmt);
5195 gimple_assign_set_lhs (new_stmt, new_temp);
5196 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5197 if (slp_node)
5198 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5201 if (slp_node)
5202 continue;
5204 if (j == 0)
5205 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5206 else
5207 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5208 prev_stmt_info = vinfo_for_stmt (new_stmt);
5211 vec_oprnds0.release ();
5212 vec_oprnds1.release ();
5214 return true;
5218 /* Function vectorizable_operation.
5220 Check if STMT performs a binary, unary or ternary operation that can
5221 be vectorized.
5222 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5223 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5224 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5226 static bool
5227 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5228 gimple **vec_stmt, slp_tree slp_node)
5230 tree vec_dest;
5231 tree scalar_dest;
5232 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5233 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5234 tree vectype;
5235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5236 enum tree_code code, orig_code;
5237 machine_mode vec_mode;
5238 tree new_temp;
5239 int op_type;
5240 optab optab;
5241 bool target_support_p;
5242 gimple *def_stmt;
5243 enum vect_def_type dt[3]
5244 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5245 int ndts = 3;
5246 gimple *new_stmt = NULL;
5247 stmt_vec_info prev_stmt_info;
5248 int nunits_in;
5249 int nunits_out;
5250 tree vectype_out;
5251 int ncopies;
5252 int j, i;
5253 vec<tree> vec_oprnds0 = vNULL;
5254 vec<tree> vec_oprnds1 = vNULL;
5255 vec<tree> vec_oprnds2 = vNULL;
5256 tree vop0, vop1, vop2;
5257 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5258 vec_info *vinfo = stmt_info->vinfo;
5260 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5261 return false;
5263 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5264 && ! vec_stmt)
5265 return false;
5267 /* Is STMT a vectorizable binary/unary operation? */
5268 if (!is_gimple_assign (stmt))
5269 return false;
5271 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5272 return false;
5274 orig_code = code = gimple_assign_rhs_code (stmt);
5276 /* For pointer addition and subtraction, we should use the normal
5277 plus and minus for the vector operation. */
5278 if (code == POINTER_PLUS_EXPR)
5279 code = PLUS_EXPR;
5280 if (code == POINTER_DIFF_EXPR)
5281 code = MINUS_EXPR;
5283 /* Support only unary or binary operations. */
5284 op_type = TREE_CODE_LENGTH (code);
5285 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5287 if (dump_enabled_p ())
5288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5289 "num. args = %d (not unary/binary/ternary op).\n",
5290 op_type);
5291 return false;
5294 scalar_dest = gimple_assign_lhs (stmt);
5295 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5297 /* Most operations cannot handle bit-precision types without extra
5298 truncations. */
5299 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5300 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5301 /* Exception are bitwise binary operations. */
5302 && code != BIT_IOR_EXPR
5303 && code != BIT_XOR_EXPR
5304 && code != BIT_AND_EXPR)
5306 if (dump_enabled_p ())
5307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5308 "bit-precision arithmetic not supported.\n");
5309 return false;
5312 op0 = gimple_assign_rhs1 (stmt);
5313 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5315 if (dump_enabled_p ())
5316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5317 "use not simple.\n");
5318 return false;
5320 /* If op0 is an external or constant def use a vector type with
5321 the same size as the output vector type. */
5322 if (!vectype)
5324 /* For boolean type we cannot determine vectype by
5325 invariant value (don't know whether it is a vector
5326 of booleans or vector of integers). We use output
5327 vectype because operations on boolean don't change
5328 type. */
5329 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5331 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5333 if (dump_enabled_p ())
5334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5335 "not supported operation on bool value.\n");
5336 return false;
5338 vectype = vectype_out;
5340 else
5341 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5343 if (vec_stmt)
5344 gcc_assert (vectype);
5345 if (!vectype)
5347 if (dump_enabled_p ())
5349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5350 "no vectype for scalar type ");
5351 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5352 TREE_TYPE (op0));
5353 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5356 return false;
5359 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5360 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5361 if (nunits_out != nunits_in)
5362 return false;
5364 if (op_type == binary_op || op_type == ternary_op)
5366 op1 = gimple_assign_rhs2 (stmt);
5367 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5369 if (dump_enabled_p ())
5370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5371 "use not simple.\n");
5372 return false;
5375 if (op_type == ternary_op)
5377 op2 = gimple_assign_rhs3 (stmt);
5378 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5382 "use not simple.\n");
5383 return false;
5387 /* Multiple types in SLP are handled by creating the appropriate number of
5388 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5389 case of SLP. */
5390 if (slp_node)
5391 ncopies = 1;
5392 else
5393 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5395 gcc_assert (ncopies >= 1);
5397 /* Shifts are handled in vectorizable_shift (). */
5398 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5399 || code == RROTATE_EXPR)
5400 return false;
5402 /* Supportable by target? */
5404 vec_mode = TYPE_MODE (vectype);
5405 if (code == MULT_HIGHPART_EXPR)
5406 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5407 else
5409 optab = optab_for_tree_code (code, vectype, optab_default);
5410 if (!optab)
5412 if (dump_enabled_p ())
5413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5414 "no optab.\n");
5415 return false;
5417 target_support_p = (optab_handler (optab, vec_mode)
5418 != CODE_FOR_nothing);
5421 if (!target_support_p)
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5425 "op not supported by target.\n");
5426 /* Check only during analysis. */
5427 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5428 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5429 return false;
5430 if (dump_enabled_p ())
5431 dump_printf_loc (MSG_NOTE, vect_location,
5432 "proceeding using word mode.\n");
5435 /* Worthwhile without SIMD support? Check only during analysis. */
5436 if (!VECTOR_MODE_P (vec_mode)
5437 && !vec_stmt
5438 && !vect_worthwhile_without_simd_p (vinfo, code))
5440 if (dump_enabled_p ())
5441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5442 "not worthwhile without SIMD support.\n");
5443 return false;
5446 if (!vec_stmt) /* transformation not required. */
5448 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5449 if (dump_enabled_p ())
5450 dump_printf_loc (MSG_NOTE, vect_location,
5451 "=== vectorizable_operation ===\n");
5452 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5453 return true;
5456 /* Transform. */
5458 if (dump_enabled_p ())
5459 dump_printf_loc (MSG_NOTE, vect_location,
5460 "transform binary/unary operation.\n");
5462 /* Handle def. */
5463 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5465 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5466 vectors with unsigned elements, but the result is signed. So, we
5467 need to compute the MINUS_EXPR into vectype temporary and
5468 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5469 tree vec_cvt_dest = NULL_TREE;
5470 if (orig_code == POINTER_DIFF_EXPR)
5471 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5473 /* In case the vectorization factor (VF) is bigger than the number
5474 of elements that we can fit in a vectype (nunits), we have to generate
5475 more than one vector stmt - i.e - we need to "unroll" the
5476 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5477 from one copy of the vector stmt to the next, in the field
5478 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5479 stages to find the correct vector defs to be used when vectorizing
5480 stmts that use the defs of the current stmt. The example below
5481 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5482 we need to create 4 vectorized stmts):
5484 before vectorization:
5485 RELATED_STMT VEC_STMT
5486 S1: x = memref - -
5487 S2: z = x + 1 - -
5489 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5490 there):
5491 RELATED_STMT VEC_STMT
5492 VS1_0: vx0 = memref0 VS1_1 -
5493 VS1_1: vx1 = memref1 VS1_2 -
5494 VS1_2: vx2 = memref2 VS1_3 -
5495 VS1_3: vx3 = memref3 - -
5496 S1: x = load - VS1_0
5497 S2: z = x + 1 - -
5499 step2: vectorize stmt S2 (done here):
5500 To vectorize stmt S2 we first need to find the relevant vector
5501 def for the first operand 'x'. This is, as usual, obtained from
5502 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5503 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5504 relevant vector def 'vx0'. Having found 'vx0' we can generate
5505 the vector stmt VS2_0, and as usual, record it in the
5506 STMT_VINFO_VEC_STMT of stmt S2.
5507 When creating the second copy (VS2_1), we obtain the relevant vector
5508 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5509 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5510 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5511 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5512 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5513 chain of stmts and pointers:
5514 RELATED_STMT VEC_STMT
5515 VS1_0: vx0 = memref0 VS1_1 -
5516 VS1_1: vx1 = memref1 VS1_2 -
5517 VS1_2: vx2 = memref2 VS1_3 -
5518 VS1_3: vx3 = memref3 - -
5519 S1: x = load - VS1_0
5520 VS2_0: vz0 = vx0 + v1 VS2_1 -
5521 VS2_1: vz1 = vx1 + v1 VS2_2 -
5522 VS2_2: vz2 = vx2 + v1 VS2_3 -
5523 VS2_3: vz3 = vx3 + v1 - -
5524 S2: z = x + 1 - VS2_0 */
5526 prev_stmt_info = NULL;
5527 for (j = 0; j < ncopies; j++)
5529 /* Handle uses. */
5530 if (j == 0)
5532 if (op_type == binary_op || op_type == ternary_op)
5533 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5534 slp_node);
5535 else
5536 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5537 slp_node);
5538 if (op_type == ternary_op)
5539 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5540 slp_node);
5542 else
5544 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5545 if (op_type == ternary_op)
5547 tree vec_oprnd = vec_oprnds2.pop ();
5548 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5549 vec_oprnd));
5553 /* Arguments are ready. Create the new vector stmt. */
5554 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5556 vop1 = ((op_type == binary_op || op_type == ternary_op)
5557 ? vec_oprnds1[i] : NULL_TREE);
5558 vop2 = ((op_type == ternary_op)
5559 ? vec_oprnds2[i] : NULL_TREE);
5560 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5561 new_temp = make_ssa_name (vec_dest, new_stmt);
5562 gimple_assign_set_lhs (new_stmt, new_temp);
5563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5564 if (vec_cvt_dest)
5566 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5567 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5568 new_temp);
5569 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5570 gimple_assign_set_lhs (new_stmt, new_temp);
5571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5573 if (slp_node)
5574 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5577 if (slp_node)
5578 continue;
5580 if (j == 0)
5581 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5582 else
5583 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5584 prev_stmt_info = vinfo_for_stmt (new_stmt);
5587 vec_oprnds0.release ();
5588 vec_oprnds1.release ();
5589 vec_oprnds2.release ();
5591 return true;
5594 /* A helper function to ensure data reference DR's base alignment. */
5596 static void
5597 ensure_base_align (struct data_reference *dr)
5599 if (!dr->aux)
5600 return;
5602 if (DR_VECT_AUX (dr)->base_misaligned)
5604 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5606 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5608 if (decl_in_symtab_p (base_decl))
5609 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5610 else
5612 SET_DECL_ALIGN (base_decl, align_base_to);
5613 DECL_USER_ALIGN (base_decl) = 1;
5615 DR_VECT_AUX (dr)->base_misaligned = false;
5620 /* Function get_group_alias_ptr_type.
5622 Return the alias type for the group starting at FIRST_STMT. */
5624 static tree
5625 get_group_alias_ptr_type (gimple *first_stmt)
5627 struct data_reference *first_dr, *next_dr;
5628 gimple *next_stmt;
5630 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5631 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5632 while (next_stmt)
5634 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5635 if (get_alias_set (DR_REF (first_dr))
5636 != get_alias_set (DR_REF (next_dr)))
5638 if (dump_enabled_p ())
5639 dump_printf_loc (MSG_NOTE, vect_location,
5640 "conflicting alias set types.\n");
5641 return ptr_type_node;
5643 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5645 return reference_alias_ptr_type (DR_REF (first_dr));
5649 /* Function vectorizable_store.
5651 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5652 can be vectorized.
5653 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5654 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5655 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5657 static bool
5658 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5659 slp_tree slp_node)
5661 tree scalar_dest;
5662 tree data_ref;
5663 tree op;
5664 tree vec_oprnd = NULL_TREE;
5665 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5666 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5667 tree elem_type;
5668 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5669 struct loop *loop = NULL;
5670 machine_mode vec_mode;
5671 tree dummy;
5672 enum dr_alignment_support alignment_support_scheme;
5673 gimple *def_stmt;
5674 enum vect_def_type dt;
5675 stmt_vec_info prev_stmt_info = NULL;
5676 tree dataref_ptr = NULL_TREE;
5677 tree dataref_offset = NULL_TREE;
5678 gimple *ptr_incr = NULL;
5679 int ncopies;
5680 int j;
5681 gimple *next_stmt, *first_stmt;
5682 bool grouped_store;
5683 unsigned int group_size, i;
5684 vec<tree> oprnds = vNULL;
5685 vec<tree> result_chain = vNULL;
5686 bool inv_p;
5687 tree offset = NULL_TREE;
5688 vec<tree> vec_oprnds = vNULL;
5689 bool slp = (slp_node != NULL);
5690 unsigned int vec_num;
5691 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5692 vec_info *vinfo = stmt_info->vinfo;
5693 tree aggr_type;
5694 gather_scatter_info gs_info;
5695 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5696 gimple *new_stmt;
5697 int vf;
5698 vec_load_store_type vls_type;
5699 tree ref_type;
5701 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5702 return false;
5704 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5705 && ! vec_stmt)
5706 return false;
5708 /* Is vectorizable store? */
5710 if (!is_gimple_assign (stmt))
5711 return false;
5713 scalar_dest = gimple_assign_lhs (stmt);
5714 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5715 && is_pattern_stmt_p (stmt_info))
5716 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5717 if (TREE_CODE (scalar_dest) != ARRAY_REF
5718 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5719 && TREE_CODE (scalar_dest) != INDIRECT_REF
5720 && TREE_CODE (scalar_dest) != COMPONENT_REF
5721 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5722 && TREE_CODE (scalar_dest) != REALPART_EXPR
5723 && TREE_CODE (scalar_dest) != MEM_REF)
5724 return false;
5726 /* Cannot have hybrid store SLP -- that would mean storing to the
5727 same location twice. */
5728 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5730 gcc_assert (gimple_assign_single_p (stmt));
5732 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5733 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5735 if (loop_vinfo)
5737 loop = LOOP_VINFO_LOOP (loop_vinfo);
5738 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5740 else
5741 vf = 1;
5743 /* Multiple types in SLP are handled by creating the appropriate number of
5744 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5745 case of SLP. */
5746 if (slp)
5747 ncopies = 1;
5748 else
5749 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5751 gcc_assert (ncopies >= 1);
5753 /* FORNOW. This restriction should be relaxed. */
5754 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5756 if (dump_enabled_p ())
5757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5758 "multiple types in nested loop.\n");
5759 return false;
5762 op = gimple_assign_rhs1 (stmt);
5764 /* In the case this is a store from a constant make sure
5765 native_encode_expr can handle it. */
5766 if (CONSTANT_CLASS_P (op) && native_encode_expr (op, NULL, 64) == 0)
5767 return false;
5769 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773 "use not simple.\n");
5774 return false;
5777 if (dt == vect_constant_def || dt == vect_external_def)
5778 vls_type = VLS_STORE_INVARIANT;
5779 else
5780 vls_type = VLS_STORE;
5782 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5783 return false;
5785 elem_type = TREE_TYPE (vectype);
5786 vec_mode = TYPE_MODE (vectype);
5788 /* FORNOW. In some cases can vectorize even if data-type not supported
5789 (e.g. - array initialization with 0). */
5790 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5791 return false;
5793 if (!STMT_VINFO_DATA_REF (stmt_info))
5794 return false;
5796 vect_memory_access_type memory_access_type;
5797 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5798 &memory_access_type, &gs_info))
5799 return false;
5801 if (!vec_stmt) /* transformation not required. */
5803 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5804 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5805 /* The SLP costs are calculated during SLP analysis. */
5806 if (!PURE_SLP_STMT (stmt_info))
5807 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5808 NULL, NULL, NULL);
5809 return true;
5811 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5813 /* Transform. */
5815 ensure_base_align (dr);
5817 if (memory_access_type == VMAT_GATHER_SCATTER)
5819 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5820 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5821 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5822 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5823 edge pe = loop_preheader_edge (loop);
5824 gimple_seq seq;
5825 basic_block new_bb;
5826 enum { NARROW, NONE, WIDEN } modifier;
5827 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5829 if (nunits == (unsigned int) scatter_off_nunits)
5830 modifier = NONE;
5831 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5833 modifier = WIDEN;
5835 vec_perm_builder sel (scatter_off_nunits, scatter_off_nunits, 1);
5836 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5837 sel.quick_push (i | nunits);
5839 vec_perm_indices indices (sel, 1, scatter_off_nunits);
5840 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5841 indices);
5842 gcc_assert (perm_mask != NULL_TREE);
5844 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5846 modifier = NARROW;
5848 vec_perm_builder sel (nunits, nunits, 1);
5849 for (i = 0; i < (unsigned int) nunits; ++i)
5850 sel.quick_push (i | scatter_off_nunits);
5852 vec_perm_indices indices (sel, 2, nunits);
5853 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
5854 gcc_assert (perm_mask != NULL_TREE);
5855 ncopies *= 2;
5857 else
5858 gcc_unreachable ();
5860 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5861 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5862 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5863 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5864 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5865 scaletype = TREE_VALUE (arglist);
5867 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5868 && TREE_CODE (rettype) == VOID_TYPE);
5870 ptr = fold_convert (ptrtype, gs_info.base);
5871 if (!is_gimple_min_invariant (ptr))
5873 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5874 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5875 gcc_assert (!new_bb);
5878 /* Currently we support only unconditional scatter stores,
5879 so mask should be all ones. */
5880 mask = build_int_cst (masktype, -1);
5881 mask = vect_init_vector (stmt, mask, masktype, NULL);
5883 scale = build_int_cst (scaletype, gs_info.scale);
5885 prev_stmt_info = NULL;
5886 for (j = 0; j < ncopies; ++j)
5888 if (j == 0)
5890 src = vec_oprnd1
5891 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5892 op = vec_oprnd0
5893 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5895 else if (modifier != NONE && (j & 1))
5897 if (modifier == WIDEN)
5899 src = vec_oprnd1
5900 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5901 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5902 stmt, gsi);
5904 else if (modifier == NARROW)
5906 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5907 stmt, gsi);
5908 op = vec_oprnd0
5909 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5910 vec_oprnd0);
5912 else
5913 gcc_unreachable ();
5915 else
5917 src = vec_oprnd1
5918 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5919 op = vec_oprnd0
5920 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5921 vec_oprnd0);
5924 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5926 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5927 == TYPE_VECTOR_SUBPARTS (srctype));
5928 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5929 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5930 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5931 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5932 src = var;
5935 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5937 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5938 == TYPE_VECTOR_SUBPARTS (idxtype));
5939 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5940 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5941 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5942 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5943 op = var;
5946 new_stmt
5947 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5949 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5951 if (prev_stmt_info == NULL)
5952 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5953 else
5954 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5955 prev_stmt_info = vinfo_for_stmt (new_stmt);
5957 return true;
5960 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5961 if (grouped_store)
5963 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5964 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5965 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5967 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5969 /* FORNOW */
5970 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5972 /* We vectorize all the stmts of the interleaving group when we
5973 reach the last stmt in the group. */
5974 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5975 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5976 && !slp)
5978 *vec_stmt = NULL;
5979 return true;
5982 if (slp)
5984 grouped_store = false;
5985 /* VEC_NUM is the number of vect stmts to be created for this
5986 group. */
5987 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5988 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5989 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5990 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5991 op = gimple_assign_rhs1 (first_stmt);
5993 else
5994 /* VEC_NUM is the number of vect stmts to be created for this
5995 group. */
5996 vec_num = group_size;
5998 ref_type = get_group_alias_ptr_type (first_stmt);
6000 else
6002 first_stmt = stmt;
6003 first_dr = dr;
6004 group_size = vec_num = 1;
6005 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6008 if (dump_enabled_p ())
6009 dump_printf_loc (MSG_NOTE, vect_location,
6010 "transform store. ncopies = %d\n", ncopies);
6012 if (memory_access_type == VMAT_ELEMENTWISE
6013 || memory_access_type == VMAT_STRIDED_SLP)
6015 gimple_stmt_iterator incr_gsi;
6016 bool insert_after;
6017 gimple *incr;
6018 tree offvar;
6019 tree ivstep;
6020 tree running_off;
6021 gimple_seq stmts = NULL;
6022 tree stride_base, stride_step, alias_off;
6023 tree vec_oprnd;
6024 unsigned int g;
6026 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6028 stride_base
6029 = fold_build_pointer_plus
6030 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6031 size_binop (PLUS_EXPR,
6032 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6033 convert_to_ptrofftype (DR_INIT (first_dr))));
6034 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6036 /* For a store with loop-invariant (but other than power-of-2)
6037 stride (i.e. not a grouped access) like so:
6039 for (i = 0; i < n; i += stride)
6040 array[i] = ...;
6042 we generate a new induction variable and new stores from
6043 the components of the (vectorized) rhs:
6045 for (j = 0; ; j += VF*stride)
6046 vectemp = ...;
6047 tmp1 = vectemp[0];
6048 array[j] = tmp1;
6049 tmp2 = vectemp[1];
6050 array[j + stride] = tmp2;
6054 unsigned nstores = nunits;
6055 unsigned lnel = 1;
6056 tree ltype = elem_type;
6057 tree lvectype = vectype;
6058 if (slp)
6060 if (group_size < nunits
6061 && nunits % group_size == 0)
6063 nstores = nunits / group_size;
6064 lnel = group_size;
6065 ltype = build_vector_type (elem_type, group_size);
6066 lvectype = vectype;
6068 /* First check if vec_extract optab doesn't support extraction
6069 of vector elts directly. */
6070 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6071 machine_mode vmode;
6072 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6073 || !VECTOR_MODE_P (vmode)
6074 || (convert_optab_handler (vec_extract_optab,
6075 TYPE_MODE (vectype), vmode)
6076 == CODE_FOR_nothing))
6078 /* Try to avoid emitting an extract of vector elements
6079 by performing the extracts using an integer type of the
6080 same size, extracting from a vector of those and then
6081 re-interpreting it as the original vector type if
6082 supported. */
6083 unsigned lsize
6084 = group_size * GET_MODE_BITSIZE (elmode);
6085 elmode = int_mode_for_size (lsize, 0).require ();
6086 /* If we can't construct such a vector fall back to
6087 element extracts from the original vector type and
6088 element size stores. */
6089 if (mode_for_vector (elmode,
6090 nunits / group_size).exists (&vmode)
6091 && VECTOR_MODE_P (vmode)
6092 && (convert_optab_handler (vec_extract_optab,
6093 vmode, elmode)
6094 != CODE_FOR_nothing))
6096 nstores = nunits / group_size;
6097 lnel = group_size;
6098 ltype = build_nonstandard_integer_type (lsize, 1);
6099 lvectype = build_vector_type (ltype, nstores);
6101 /* Else fall back to vector extraction anyway.
6102 Fewer stores are more important than avoiding spilling
6103 of the vector we extract from. Compared to the
6104 construction case in vectorizable_load no store-forwarding
6105 issue exists here for reasonable archs. */
6108 else if (group_size >= nunits
6109 && group_size % nunits == 0)
6111 nstores = 1;
6112 lnel = nunits;
6113 ltype = vectype;
6114 lvectype = vectype;
6116 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6117 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6120 ivstep = stride_step;
6121 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6122 build_int_cst (TREE_TYPE (ivstep), vf));
6124 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6126 create_iv (stride_base, ivstep, NULL,
6127 loop, &incr_gsi, insert_after,
6128 &offvar, NULL);
6129 incr = gsi_stmt (incr_gsi);
6130 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6132 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6133 if (stmts)
6134 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6136 prev_stmt_info = NULL;
6137 alias_off = build_int_cst (ref_type, 0);
6138 next_stmt = first_stmt;
6139 for (g = 0; g < group_size; g++)
6141 running_off = offvar;
6142 if (g)
6144 tree size = TYPE_SIZE_UNIT (ltype);
6145 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6146 size);
6147 tree newoff = copy_ssa_name (running_off, NULL);
6148 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6149 running_off, pos);
6150 vect_finish_stmt_generation (stmt, incr, gsi);
6151 running_off = newoff;
6153 unsigned int group_el = 0;
6154 unsigned HOST_WIDE_INT
6155 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6156 for (j = 0; j < ncopies; j++)
6158 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6159 and first_stmt == stmt. */
6160 if (j == 0)
6162 if (slp)
6164 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6165 slp_node);
6166 vec_oprnd = vec_oprnds[0];
6168 else
6170 gcc_assert (gimple_assign_single_p (next_stmt));
6171 op = gimple_assign_rhs1 (next_stmt);
6172 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6175 else
6177 if (slp)
6178 vec_oprnd = vec_oprnds[j];
6179 else
6181 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6182 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6185 /* Pun the vector to extract from if necessary. */
6186 if (lvectype != vectype)
6188 tree tem = make_ssa_name (lvectype);
6189 gimple *pun
6190 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6191 lvectype, vec_oprnd));
6192 vect_finish_stmt_generation (stmt, pun, gsi);
6193 vec_oprnd = tem;
6195 for (i = 0; i < nstores; i++)
6197 tree newref, newoff;
6198 gimple *incr, *assign;
6199 tree size = TYPE_SIZE (ltype);
6200 /* Extract the i'th component. */
6201 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6202 bitsize_int (i), size);
6203 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6204 size, pos);
6206 elem = force_gimple_operand_gsi (gsi, elem, true,
6207 NULL_TREE, true,
6208 GSI_SAME_STMT);
6210 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6211 group_el * elsz);
6212 newref = build2 (MEM_REF, ltype,
6213 running_off, this_off);
6215 /* And store it to *running_off. */
6216 assign = gimple_build_assign (newref, elem);
6217 vect_finish_stmt_generation (stmt, assign, gsi);
6219 group_el += lnel;
6220 if (! slp
6221 || group_el == group_size)
6223 newoff = copy_ssa_name (running_off, NULL);
6224 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6225 running_off, stride_step);
6226 vect_finish_stmt_generation (stmt, incr, gsi);
6228 running_off = newoff;
6229 group_el = 0;
6231 if (g == group_size - 1
6232 && !slp)
6234 if (j == 0 && i == 0)
6235 STMT_VINFO_VEC_STMT (stmt_info)
6236 = *vec_stmt = assign;
6237 else
6238 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6239 prev_stmt_info = vinfo_for_stmt (assign);
6243 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6244 if (slp)
6245 break;
6248 vec_oprnds.release ();
6249 return true;
6252 auto_vec<tree> dr_chain (group_size);
6253 oprnds.create (group_size);
6255 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6256 gcc_assert (alignment_support_scheme);
6257 /* Targets with store-lane instructions must not require explicit
6258 realignment. */
6259 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6260 || alignment_support_scheme == dr_aligned
6261 || alignment_support_scheme == dr_unaligned_supported);
6263 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6264 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6265 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6267 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6268 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6269 else
6270 aggr_type = vectype;
6272 /* In case the vectorization factor (VF) is bigger than the number
6273 of elements that we can fit in a vectype (nunits), we have to generate
6274 more than one vector stmt - i.e - we need to "unroll" the
6275 vector stmt by a factor VF/nunits. For more details see documentation in
6276 vect_get_vec_def_for_copy_stmt. */
6278 /* In case of interleaving (non-unit grouped access):
6280 S1: &base + 2 = x2
6281 S2: &base = x0
6282 S3: &base + 1 = x1
6283 S4: &base + 3 = x3
6285 We create vectorized stores starting from base address (the access of the
6286 first stmt in the chain (S2 in the above example), when the last store stmt
6287 of the chain (S4) is reached:
6289 VS1: &base = vx2
6290 VS2: &base + vec_size*1 = vx0
6291 VS3: &base + vec_size*2 = vx1
6292 VS4: &base + vec_size*3 = vx3
6294 Then permutation statements are generated:
6296 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6297 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6300 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6301 (the order of the data-refs in the output of vect_permute_store_chain
6302 corresponds to the order of scalar stmts in the interleaving chain - see
6303 the documentation of vect_permute_store_chain()).
6305 In case of both multiple types and interleaving, above vector stores and
6306 permutation stmts are created for every copy. The result vector stmts are
6307 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6308 STMT_VINFO_RELATED_STMT for the next copies.
6311 prev_stmt_info = NULL;
6312 for (j = 0; j < ncopies; j++)
6315 if (j == 0)
6317 if (slp)
6319 /* Get vectorized arguments for SLP_NODE. */
6320 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6321 NULL, slp_node);
6323 vec_oprnd = vec_oprnds[0];
6325 else
6327 /* For interleaved stores we collect vectorized defs for all the
6328 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6329 used as an input to vect_permute_store_chain(), and OPRNDS as
6330 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6332 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6333 OPRNDS are of size 1. */
6334 next_stmt = first_stmt;
6335 for (i = 0; i < group_size; i++)
6337 /* Since gaps are not supported for interleaved stores,
6338 GROUP_SIZE is the exact number of stmts in the chain.
6339 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6340 there is no interleaving, GROUP_SIZE is 1, and only one
6341 iteration of the loop will be executed. */
6342 gcc_assert (next_stmt
6343 && gimple_assign_single_p (next_stmt));
6344 op = gimple_assign_rhs1 (next_stmt);
6346 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6347 dr_chain.quick_push (vec_oprnd);
6348 oprnds.quick_push (vec_oprnd);
6349 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6353 /* We should have catched mismatched types earlier. */
6354 gcc_assert (useless_type_conversion_p (vectype,
6355 TREE_TYPE (vec_oprnd)));
6356 bool simd_lane_access_p
6357 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6358 if (simd_lane_access_p
6359 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6360 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6361 && integer_zerop (DR_OFFSET (first_dr))
6362 && integer_zerop (DR_INIT (first_dr))
6363 && alias_sets_conflict_p (get_alias_set (aggr_type),
6364 get_alias_set (TREE_TYPE (ref_type))))
6366 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6367 dataref_offset = build_int_cst (ref_type, 0);
6368 inv_p = false;
6370 else
6371 dataref_ptr
6372 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6373 simd_lane_access_p ? loop : NULL,
6374 offset, &dummy, gsi, &ptr_incr,
6375 simd_lane_access_p, &inv_p);
6376 gcc_assert (bb_vinfo || !inv_p);
6378 else
6380 /* For interleaved stores we created vectorized defs for all the
6381 defs stored in OPRNDS in the previous iteration (previous copy).
6382 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6383 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6384 next copy.
6385 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6386 OPRNDS are of size 1. */
6387 for (i = 0; i < group_size; i++)
6389 op = oprnds[i];
6390 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6391 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6392 dr_chain[i] = vec_oprnd;
6393 oprnds[i] = vec_oprnd;
6395 if (dataref_offset)
6396 dataref_offset
6397 = int_const_binop (PLUS_EXPR, dataref_offset,
6398 TYPE_SIZE_UNIT (aggr_type));
6399 else
6400 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6401 TYPE_SIZE_UNIT (aggr_type));
6404 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6406 tree vec_array;
6408 /* Combine all the vectors into an array. */
6409 vec_array = create_vector_array (vectype, vec_num);
6410 for (i = 0; i < vec_num; i++)
6412 vec_oprnd = dr_chain[i];
6413 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6416 /* Emit:
6417 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6418 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6419 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6420 vec_array);
6421 gimple_call_set_lhs (call, data_ref);
6422 gimple_call_set_nothrow (call, true);
6423 new_stmt = call;
6424 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6426 else
6428 new_stmt = NULL;
6429 if (grouped_store)
6431 if (j == 0)
6432 result_chain.create (group_size);
6433 /* Permute. */
6434 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6435 &result_chain);
6438 next_stmt = first_stmt;
6439 for (i = 0; i < vec_num; i++)
6441 unsigned align, misalign;
6443 if (i > 0)
6444 /* Bump the vector pointer. */
6445 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6446 stmt, NULL_TREE);
6448 if (slp)
6449 vec_oprnd = vec_oprnds[i];
6450 else if (grouped_store)
6451 /* For grouped stores vectorized defs are interleaved in
6452 vect_permute_store_chain(). */
6453 vec_oprnd = result_chain[i];
6455 data_ref = fold_build2 (MEM_REF, vectype,
6456 dataref_ptr,
6457 dataref_offset
6458 ? dataref_offset
6459 : build_int_cst (ref_type, 0));
6460 align = DR_TARGET_ALIGNMENT (first_dr);
6461 if (aligned_access_p (first_dr))
6462 misalign = 0;
6463 else if (DR_MISALIGNMENT (first_dr) == -1)
6465 align = dr_alignment (vect_dr_behavior (first_dr));
6466 misalign = 0;
6467 TREE_TYPE (data_ref)
6468 = build_aligned_type (TREE_TYPE (data_ref),
6469 align * BITS_PER_UNIT);
6471 else
6473 TREE_TYPE (data_ref)
6474 = build_aligned_type (TREE_TYPE (data_ref),
6475 TYPE_ALIGN (elem_type));
6476 misalign = DR_MISALIGNMENT (first_dr);
6478 if (dataref_offset == NULL_TREE
6479 && TREE_CODE (dataref_ptr) == SSA_NAME)
6480 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6481 misalign);
6483 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6485 tree perm_mask = perm_mask_for_reverse (vectype);
6486 tree perm_dest
6487 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6488 vectype);
6489 tree new_temp = make_ssa_name (perm_dest);
6491 /* Generate the permute statement. */
6492 gimple *perm_stmt
6493 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6494 vec_oprnd, perm_mask);
6495 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6497 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6498 vec_oprnd = new_temp;
6501 /* Arguments are ready. Create the new vector stmt. */
6502 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6503 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6505 if (slp)
6506 continue;
6508 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6509 if (!next_stmt)
6510 break;
6513 if (!slp)
6515 if (j == 0)
6516 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6517 else
6518 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6519 prev_stmt_info = vinfo_for_stmt (new_stmt);
6523 oprnds.release ();
6524 result_chain.release ();
6525 vec_oprnds.release ();
6527 return true;
6530 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6531 VECTOR_CST mask. No checks are made that the target platform supports the
6532 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6533 vect_gen_perm_mask_checked. */
6535 tree
6536 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
6538 tree mask_elt_type, mask_type;
6540 mask_elt_type = lang_hooks.types.type_for_mode
6541 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
6542 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6543 return vec_perm_indices_to_tree (mask_type, sel);
6546 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6547 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6549 tree
6550 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
6552 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
6553 return vect_gen_perm_mask_any (vectype, sel);
6556 /* Given a vector variable X and Y, that was generated for the scalar
6557 STMT, generate instructions to permute the vector elements of X and Y
6558 using permutation mask MASK_VEC, insert them at *GSI and return the
6559 permuted vector variable. */
6561 static tree
6562 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6563 gimple_stmt_iterator *gsi)
6565 tree vectype = TREE_TYPE (x);
6566 tree perm_dest, data_ref;
6567 gimple *perm_stmt;
6569 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6570 data_ref = make_ssa_name (perm_dest);
6572 /* Generate the permute statement. */
6573 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6574 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6576 return data_ref;
6579 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6580 inserting them on the loops preheader edge. Returns true if we
6581 were successful in doing so (and thus STMT can be moved then),
6582 otherwise returns false. */
6584 static bool
6585 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6587 ssa_op_iter i;
6588 tree op;
6589 bool any = false;
6591 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6593 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6594 if (!gimple_nop_p (def_stmt)
6595 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6597 /* Make sure we don't need to recurse. While we could do
6598 so in simple cases when there are more complex use webs
6599 we don't have an easy way to preserve stmt order to fulfil
6600 dependencies within them. */
6601 tree op2;
6602 ssa_op_iter i2;
6603 if (gimple_code (def_stmt) == GIMPLE_PHI)
6604 return false;
6605 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6607 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6608 if (!gimple_nop_p (def_stmt2)
6609 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6610 return false;
6612 any = true;
6616 if (!any)
6617 return true;
6619 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6621 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6622 if (!gimple_nop_p (def_stmt)
6623 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6625 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6626 gsi_remove (&gsi, false);
6627 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6631 return true;
6634 /* vectorizable_load.
6636 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6637 can be vectorized.
6638 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6639 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6640 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6642 static bool
6643 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6644 slp_tree slp_node, slp_instance slp_node_instance)
6646 tree scalar_dest;
6647 tree vec_dest = NULL;
6648 tree data_ref = NULL;
6649 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6650 stmt_vec_info prev_stmt_info;
6651 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6652 struct loop *loop = NULL;
6653 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6654 bool nested_in_vect_loop = false;
6655 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6656 tree elem_type;
6657 tree new_temp;
6658 machine_mode mode;
6659 gimple *new_stmt = NULL;
6660 tree dummy;
6661 enum dr_alignment_support alignment_support_scheme;
6662 tree dataref_ptr = NULL_TREE;
6663 tree dataref_offset = NULL_TREE;
6664 gimple *ptr_incr = NULL;
6665 int ncopies;
6666 int i, j, group_size, group_gap_adj;
6667 tree msq = NULL_TREE, lsq;
6668 tree offset = NULL_TREE;
6669 tree byte_offset = NULL_TREE;
6670 tree realignment_token = NULL_TREE;
6671 gphi *phi = NULL;
6672 vec<tree> dr_chain = vNULL;
6673 bool grouped_load = false;
6674 gimple *first_stmt;
6675 gimple *first_stmt_for_drptr = NULL;
6676 bool inv_p;
6677 bool compute_in_loop = false;
6678 struct loop *at_loop;
6679 int vec_num;
6680 bool slp = (slp_node != NULL);
6681 bool slp_perm = false;
6682 enum tree_code code;
6683 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6684 int vf;
6685 tree aggr_type;
6686 gather_scatter_info gs_info;
6687 vec_info *vinfo = stmt_info->vinfo;
6688 tree ref_type;
6690 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6691 return false;
6693 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6694 && ! vec_stmt)
6695 return false;
6697 /* Is vectorizable load? */
6698 if (!is_gimple_assign (stmt))
6699 return false;
6701 scalar_dest = gimple_assign_lhs (stmt);
6702 if (TREE_CODE (scalar_dest) != SSA_NAME)
6703 return false;
6705 code = gimple_assign_rhs_code (stmt);
6706 if (code != ARRAY_REF
6707 && code != BIT_FIELD_REF
6708 && code != INDIRECT_REF
6709 && code != COMPONENT_REF
6710 && code != IMAGPART_EXPR
6711 && code != REALPART_EXPR
6712 && code != MEM_REF
6713 && TREE_CODE_CLASS (code) != tcc_declaration)
6714 return false;
6716 if (!STMT_VINFO_DATA_REF (stmt_info))
6717 return false;
6719 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6720 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6722 if (loop_vinfo)
6724 loop = LOOP_VINFO_LOOP (loop_vinfo);
6725 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6726 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6728 else
6729 vf = 1;
6731 /* Multiple types in SLP are handled by creating the appropriate number of
6732 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6733 case of SLP. */
6734 if (slp)
6735 ncopies = 1;
6736 else
6737 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6739 gcc_assert (ncopies >= 1);
6741 /* FORNOW. This restriction should be relaxed. */
6742 if (nested_in_vect_loop && ncopies > 1)
6744 if (dump_enabled_p ())
6745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6746 "multiple types in nested loop.\n");
6747 return false;
6750 /* Invalidate assumptions made by dependence analysis when vectorization
6751 on the unrolled body effectively re-orders stmts. */
6752 if (ncopies > 1
6753 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6754 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6755 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6757 if (dump_enabled_p ())
6758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6759 "cannot perform implicit CSE when unrolling "
6760 "with negative dependence distance\n");
6761 return false;
6764 elem_type = TREE_TYPE (vectype);
6765 mode = TYPE_MODE (vectype);
6767 /* FORNOW. In some cases can vectorize even if data-type not supported
6768 (e.g. - data copies). */
6769 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6771 if (dump_enabled_p ())
6772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6773 "Aligned load, but unsupported type.\n");
6774 return false;
6777 /* Check if the load is a part of an interleaving chain. */
6778 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6780 grouped_load = true;
6781 /* FORNOW */
6782 gcc_assert (!nested_in_vect_loop);
6783 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6785 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6786 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6788 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6789 slp_perm = true;
6791 /* Invalidate assumptions made by dependence analysis when vectorization
6792 on the unrolled body effectively re-orders stmts. */
6793 if (!PURE_SLP_STMT (stmt_info)
6794 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6795 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6796 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6798 if (dump_enabled_p ())
6799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6800 "cannot perform implicit CSE when performing "
6801 "group loads with negative dependence distance\n");
6802 return false;
6805 /* Similarly when the stmt is a load that is both part of a SLP
6806 instance and a loop vectorized stmt via the same-dr mechanism
6807 we have to give up. */
6808 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6809 && (STMT_SLP_TYPE (stmt_info)
6810 != STMT_SLP_TYPE (vinfo_for_stmt
6811 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6813 if (dump_enabled_p ())
6814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6815 "conflicting SLP types for CSEd load\n");
6816 return false;
6820 vect_memory_access_type memory_access_type;
6821 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6822 &memory_access_type, &gs_info))
6823 return false;
6825 if (!vec_stmt) /* transformation not required. */
6827 if (!slp)
6828 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6829 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6830 /* The SLP costs are calculated during SLP analysis. */
6831 if (!PURE_SLP_STMT (stmt_info))
6832 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6833 NULL, NULL, NULL);
6834 return true;
6837 if (!slp)
6838 gcc_assert (memory_access_type
6839 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6841 if (dump_enabled_p ())
6842 dump_printf_loc (MSG_NOTE, vect_location,
6843 "transform load. ncopies = %d\n", ncopies);
6845 /* Transform. */
6847 ensure_base_align (dr);
6849 if (memory_access_type == VMAT_GATHER_SCATTER)
6851 tree vec_oprnd0 = NULL_TREE, op;
6852 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6853 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6854 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6855 edge pe = loop_preheader_edge (loop);
6856 gimple_seq seq;
6857 basic_block new_bb;
6858 enum { NARROW, NONE, WIDEN } modifier;
6859 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6861 if (nunits == gather_off_nunits)
6862 modifier = NONE;
6863 else if (nunits == gather_off_nunits / 2)
6865 modifier = WIDEN;
6867 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
6868 for (i = 0; i < gather_off_nunits; ++i)
6869 sel.quick_push (i | nunits);
6871 vec_perm_indices indices (sel, 1, gather_off_nunits);
6872 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6873 indices);
6875 else if (nunits == gather_off_nunits * 2)
6877 modifier = NARROW;
6879 vec_perm_builder sel (nunits, nunits, 1);
6880 for (i = 0; i < nunits; ++i)
6881 sel.quick_push (i < gather_off_nunits
6882 ? i : i + nunits - gather_off_nunits);
6884 vec_perm_indices indices (sel, 2, nunits);
6885 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6886 ncopies *= 2;
6888 else
6889 gcc_unreachable ();
6891 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6892 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6893 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6894 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6895 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6896 scaletype = TREE_VALUE (arglist);
6897 gcc_checking_assert (types_compatible_p (srctype, rettype));
6899 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6901 ptr = fold_convert (ptrtype, gs_info.base);
6902 if (!is_gimple_min_invariant (ptr))
6904 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6905 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6906 gcc_assert (!new_bb);
6909 /* Currently we support only unconditional gather loads,
6910 so mask should be all ones. */
6911 if (TREE_CODE (masktype) == INTEGER_TYPE)
6912 mask = build_int_cst (masktype, -1);
6913 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6915 mask = build_int_cst (TREE_TYPE (masktype), -1);
6916 mask = build_vector_from_val (masktype, mask);
6917 mask = vect_init_vector (stmt, mask, masktype, NULL);
6919 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6921 REAL_VALUE_TYPE r;
6922 long tmp[6];
6923 for (j = 0; j < 6; ++j)
6924 tmp[j] = -1;
6925 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6926 mask = build_real (TREE_TYPE (masktype), r);
6927 mask = build_vector_from_val (masktype, mask);
6928 mask = vect_init_vector (stmt, mask, masktype, NULL);
6930 else
6931 gcc_unreachable ();
6933 scale = build_int_cst (scaletype, gs_info.scale);
6935 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6936 merge = build_int_cst (TREE_TYPE (rettype), 0);
6937 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6939 REAL_VALUE_TYPE r;
6940 long tmp[6];
6941 for (j = 0; j < 6; ++j)
6942 tmp[j] = 0;
6943 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6944 merge = build_real (TREE_TYPE (rettype), r);
6946 else
6947 gcc_unreachable ();
6948 merge = build_vector_from_val (rettype, merge);
6949 merge = vect_init_vector (stmt, merge, rettype, NULL);
6951 prev_stmt_info = NULL;
6952 for (j = 0; j < ncopies; ++j)
6954 if (modifier == WIDEN && (j & 1))
6955 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6956 perm_mask, stmt, gsi);
6957 else if (j == 0)
6958 op = vec_oprnd0
6959 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6960 else
6961 op = vec_oprnd0
6962 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6964 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6966 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6967 == TYPE_VECTOR_SUBPARTS (idxtype));
6968 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6969 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6970 new_stmt
6971 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6972 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6973 op = var;
6976 new_stmt
6977 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6979 if (!useless_type_conversion_p (vectype, rettype))
6981 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6982 == TYPE_VECTOR_SUBPARTS (rettype));
6983 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6984 gimple_call_set_lhs (new_stmt, op);
6985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6986 var = make_ssa_name (vec_dest);
6987 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6988 new_stmt
6989 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6991 else
6993 var = make_ssa_name (vec_dest, new_stmt);
6994 gimple_call_set_lhs (new_stmt, var);
6997 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6999 if (modifier == NARROW)
7001 if ((j & 1) == 0)
7003 prev_res = var;
7004 continue;
7006 var = permute_vec_elements (prev_res, var,
7007 perm_mask, stmt, gsi);
7008 new_stmt = SSA_NAME_DEF_STMT (var);
7011 if (prev_stmt_info == NULL)
7012 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7013 else
7014 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7015 prev_stmt_info = vinfo_for_stmt (new_stmt);
7017 return true;
7020 if (memory_access_type == VMAT_ELEMENTWISE
7021 || memory_access_type == VMAT_STRIDED_SLP)
7023 gimple_stmt_iterator incr_gsi;
7024 bool insert_after;
7025 gimple *incr;
7026 tree offvar;
7027 tree ivstep;
7028 tree running_off;
7029 vec<constructor_elt, va_gc> *v = NULL;
7030 gimple_seq stmts = NULL;
7031 tree stride_base, stride_step, alias_off;
7033 gcc_assert (!nested_in_vect_loop);
7035 if (slp && grouped_load)
7037 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7038 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7039 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7040 ref_type = get_group_alias_ptr_type (first_stmt);
7042 else
7044 first_stmt = stmt;
7045 first_dr = dr;
7046 group_size = 1;
7047 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7050 stride_base
7051 = fold_build_pointer_plus
7052 (DR_BASE_ADDRESS (first_dr),
7053 size_binop (PLUS_EXPR,
7054 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7055 convert_to_ptrofftype (DR_INIT (first_dr))));
7056 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7058 /* For a load with loop-invariant (but other than power-of-2)
7059 stride (i.e. not a grouped access) like so:
7061 for (i = 0; i < n; i += stride)
7062 ... = array[i];
7064 we generate a new induction variable and new accesses to
7065 form a new vector (or vectors, depending on ncopies):
7067 for (j = 0; ; j += VF*stride)
7068 tmp1 = array[j];
7069 tmp2 = array[j + stride];
7071 vectemp = {tmp1, tmp2, ...}
7074 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7075 build_int_cst (TREE_TYPE (stride_step), vf));
7077 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7079 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7080 loop, &incr_gsi, insert_after,
7081 &offvar, NULL);
7082 incr = gsi_stmt (incr_gsi);
7083 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7085 stride_step = force_gimple_operand (unshare_expr (stride_step),
7086 &stmts, true, NULL_TREE);
7087 if (stmts)
7088 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7090 prev_stmt_info = NULL;
7091 running_off = offvar;
7092 alias_off = build_int_cst (ref_type, 0);
7093 int nloads = nunits;
7094 int lnel = 1;
7095 tree ltype = TREE_TYPE (vectype);
7096 tree lvectype = vectype;
7097 auto_vec<tree> dr_chain;
7098 if (memory_access_type == VMAT_STRIDED_SLP)
7100 if (group_size < nunits)
7102 /* First check if vec_init optab supports construction from
7103 vector elts directly. */
7104 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7105 machine_mode vmode;
7106 if (mode_for_vector (elmode, group_size).exists (&vmode)
7107 && VECTOR_MODE_P (vmode)
7108 && (convert_optab_handler (vec_init_optab,
7109 TYPE_MODE (vectype), vmode)
7110 != CODE_FOR_nothing))
7112 nloads = nunits / group_size;
7113 lnel = group_size;
7114 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7116 else
7118 /* Otherwise avoid emitting a constructor of vector elements
7119 by performing the loads using an integer type of the same
7120 size, constructing a vector of those and then
7121 re-interpreting it as the original vector type.
7122 This avoids a huge runtime penalty due to the general
7123 inability to perform store forwarding from smaller stores
7124 to a larger load. */
7125 unsigned lsize
7126 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7127 elmode = int_mode_for_size (lsize, 0).require ();
7128 /* If we can't construct such a vector fall back to
7129 element loads of the original vector type. */
7130 if (mode_for_vector (elmode,
7131 nunits / group_size).exists (&vmode)
7132 && VECTOR_MODE_P (vmode)
7133 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7134 != CODE_FOR_nothing))
7136 nloads = nunits / group_size;
7137 lnel = group_size;
7138 ltype = build_nonstandard_integer_type (lsize, 1);
7139 lvectype = build_vector_type (ltype, nloads);
7143 else
7145 nloads = 1;
7146 lnel = nunits;
7147 ltype = vectype;
7149 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7151 if (slp)
7153 /* For SLP permutation support we need to load the whole group,
7154 not only the number of vector stmts the permutation result
7155 fits in. */
7156 if (slp_perm)
7158 ncopies = (group_size * vf + nunits - 1) / nunits;
7159 dr_chain.create (ncopies);
7161 else
7162 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7164 int group_el = 0;
7165 unsigned HOST_WIDE_INT
7166 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7167 for (j = 0; j < ncopies; j++)
7169 if (nloads > 1)
7170 vec_alloc (v, nloads);
7171 for (i = 0; i < nloads; i++)
7173 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7174 group_el * elsz);
7175 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7176 build2 (MEM_REF, ltype,
7177 running_off, this_off));
7178 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7179 if (nloads > 1)
7180 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7181 gimple_assign_lhs (new_stmt));
7183 group_el += lnel;
7184 if (! slp
7185 || group_el == group_size)
7187 tree newoff = copy_ssa_name (running_off);
7188 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7189 running_off, stride_step);
7190 vect_finish_stmt_generation (stmt, incr, gsi);
7192 running_off = newoff;
7193 group_el = 0;
7196 if (nloads > 1)
7198 tree vec_inv = build_constructor (lvectype, v);
7199 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7200 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7201 if (lvectype != vectype)
7203 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7204 VIEW_CONVERT_EXPR,
7205 build1 (VIEW_CONVERT_EXPR,
7206 vectype, new_temp));
7207 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7211 if (slp)
7213 if (slp_perm)
7214 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7215 else
7216 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7218 else
7220 if (j == 0)
7221 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7222 else
7223 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7224 prev_stmt_info = vinfo_for_stmt (new_stmt);
7227 if (slp_perm)
7229 unsigned n_perms;
7230 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7231 slp_node_instance, false, &n_perms);
7233 return true;
7236 if (grouped_load)
7238 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7239 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7240 /* For SLP vectorization we directly vectorize a subchain
7241 without permutation. */
7242 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7243 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7244 /* For BB vectorization always use the first stmt to base
7245 the data ref pointer on. */
7246 if (bb_vinfo)
7247 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7249 /* Check if the chain of loads is already vectorized. */
7250 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7251 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7252 ??? But we can only do so if there is exactly one
7253 as we have no way to get at the rest. Leave the CSE
7254 opportunity alone.
7255 ??? With the group load eventually participating
7256 in multiple different permutations (having multiple
7257 slp nodes which refer to the same group) the CSE
7258 is even wrong code. See PR56270. */
7259 && !slp)
7261 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7262 return true;
7264 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7265 group_gap_adj = 0;
7267 /* VEC_NUM is the number of vect stmts to be created for this group. */
7268 if (slp)
7270 grouped_load = false;
7271 /* For SLP permutation support we need to load the whole group,
7272 not only the number of vector stmts the permutation result
7273 fits in. */
7274 if (slp_perm)
7276 vec_num = (group_size * vf + nunits - 1) / nunits;
7277 group_gap_adj = vf * group_size - nunits * vec_num;
7279 else
7281 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7282 group_gap_adj
7283 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7286 else
7287 vec_num = group_size;
7289 ref_type = get_group_alias_ptr_type (first_stmt);
7291 else
7293 first_stmt = stmt;
7294 first_dr = dr;
7295 group_size = vec_num = 1;
7296 group_gap_adj = 0;
7297 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7300 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7301 gcc_assert (alignment_support_scheme);
7302 /* Targets with load-lane instructions must not require explicit
7303 realignment. */
7304 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7305 || alignment_support_scheme == dr_aligned
7306 || alignment_support_scheme == dr_unaligned_supported);
7308 /* In case the vectorization factor (VF) is bigger than the number
7309 of elements that we can fit in a vectype (nunits), we have to generate
7310 more than one vector stmt - i.e - we need to "unroll" the
7311 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7312 from one copy of the vector stmt to the next, in the field
7313 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7314 stages to find the correct vector defs to be used when vectorizing
7315 stmts that use the defs of the current stmt. The example below
7316 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7317 need to create 4 vectorized stmts):
7319 before vectorization:
7320 RELATED_STMT VEC_STMT
7321 S1: x = memref - -
7322 S2: z = x + 1 - -
7324 step 1: vectorize stmt S1:
7325 We first create the vector stmt VS1_0, and, as usual, record a
7326 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7327 Next, we create the vector stmt VS1_1, and record a pointer to
7328 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7329 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7330 stmts and pointers:
7331 RELATED_STMT VEC_STMT
7332 VS1_0: vx0 = memref0 VS1_1 -
7333 VS1_1: vx1 = memref1 VS1_2 -
7334 VS1_2: vx2 = memref2 VS1_3 -
7335 VS1_3: vx3 = memref3 - -
7336 S1: x = load - VS1_0
7337 S2: z = x + 1 - -
7339 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7340 information we recorded in RELATED_STMT field is used to vectorize
7341 stmt S2. */
7343 /* In case of interleaving (non-unit grouped access):
7345 S1: x2 = &base + 2
7346 S2: x0 = &base
7347 S3: x1 = &base + 1
7348 S4: x3 = &base + 3
7350 Vectorized loads are created in the order of memory accesses
7351 starting from the access of the first stmt of the chain:
7353 VS1: vx0 = &base
7354 VS2: vx1 = &base + vec_size*1
7355 VS3: vx3 = &base + vec_size*2
7356 VS4: vx4 = &base + vec_size*3
7358 Then permutation statements are generated:
7360 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7361 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7364 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7365 (the order of the data-refs in the output of vect_permute_load_chain
7366 corresponds to the order of scalar stmts in the interleaving chain - see
7367 the documentation of vect_permute_load_chain()).
7368 The generation of permutation stmts and recording them in
7369 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7371 In case of both multiple types and interleaving, the vector loads and
7372 permutation stmts above are created for every copy. The result vector
7373 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7374 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7376 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7377 on a target that supports unaligned accesses (dr_unaligned_supported)
7378 we generate the following code:
7379 p = initial_addr;
7380 indx = 0;
7381 loop {
7382 p = p + indx * vectype_size;
7383 vec_dest = *(p);
7384 indx = indx + 1;
7387 Otherwise, the data reference is potentially unaligned on a target that
7388 does not support unaligned accesses (dr_explicit_realign_optimized) -
7389 then generate the following code, in which the data in each iteration is
7390 obtained by two vector loads, one from the previous iteration, and one
7391 from the current iteration:
7392 p1 = initial_addr;
7393 msq_init = *(floor(p1))
7394 p2 = initial_addr + VS - 1;
7395 realignment_token = call target_builtin;
7396 indx = 0;
7397 loop {
7398 p2 = p2 + indx * vectype_size
7399 lsq = *(floor(p2))
7400 vec_dest = realign_load (msq, lsq, realignment_token)
7401 indx = indx + 1;
7402 msq = lsq;
7403 } */
7405 /* If the misalignment remains the same throughout the execution of the
7406 loop, we can create the init_addr and permutation mask at the loop
7407 preheader. Otherwise, it needs to be created inside the loop.
7408 This can only occur when vectorizing memory accesses in the inner-loop
7409 nested within an outer-loop that is being vectorized. */
7411 if (nested_in_vect_loop
7412 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
7414 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7415 compute_in_loop = true;
7418 if ((alignment_support_scheme == dr_explicit_realign_optimized
7419 || alignment_support_scheme == dr_explicit_realign)
7420 && !compute_in_loop)
7422 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7423 alignment_support_scheme, NULL_TREE,
7424 &at_loop);
7425 if (alignment_support_scheme == dr_explicit_realign_optimized)
7427 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7428 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7429 size_one_node);
7432 else
7433 at_loop = loop;
7435 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7436 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7438 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7439 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7440 else
7441 aggr_type = vectype;
7443 prev_stmt_info = NULL;
7444 int group_elt = 0;
7445 for (j = 0; j < ncopies; j++)
7447 /* 1. Create the vector or array pointer update chain. */
7448 if (j == 0)
7450 bool simd_lane_access_p
7451 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7452 if (simd_lane_access_p
7453 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7454 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7455 && integer_zerop (DR_OFFSET (first_dr))
7456 && integer_zerop (DR_INIT (first_dr))
7457 && alias_sets_conflict_p (get_alias_set (aggr_type),
7458 get_alias_set (TREE_TYPE (ref_type)))
7459 && (alignment_support_scheme == dr_aligned
7460 || alignment_support_scheme == dr_unaligned_supported))
7462 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7463 dataref_offset = build_int_cst (ref_type, 0);
7464 inv_p = false;
7466 else if (first_stmt_for_drptr
7467 && first_stmt != first_stmt_for_drptr)
7469 dataref_ptr
7470 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7471 at_loop, offset, &dummy, gsi,
7472 &ptr_incr, simd_lane_access_p,
7473 &inv_p, byte_offset);
7474 /* Adjust the pointer by the difference to first_stmt. */
7475 data_reference_p ptrdr
7476 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7477 tree diff = fold_convert (sizetype,
7478 size_binop (MINUS_EXPR,
7479 DR_INIT (first_dr),
7480 DR_INIT (ptrdr)));
7481 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7482 stmt, diff);
7484 else
7485 dataref_ptr
7486 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7487 offset, &dummy, gsi, &ptr_incr,
7488 simd_lane_access_p, &inv_p,
7489 byte_offset);
7491 else if (dataref_offset)
7492 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7493 TYPE_SIZE_UNIT (aggr_type));
7494 else
7495 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7496 TYPE_SIZE_UNIT (aggr_type));
7498 if (grouped_load || slp_perm)
7499 dr_chain.create (vec_num);
7501 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7503 tree vec_array;
7505 vec_array = create_vector_array (vectype, vec_num);
7507 /* Emit:
7508 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7509 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7510 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7511 data_ref);
7512 gimple_call_set_lhs (call, vec_array);
7513 gimple_call_set_nothrow (call, true);
7514 new_stmt = call;
7515 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7517 /* Extract each vector into an SSA_NAME. */
7518 for (i = 0; i < vec_num; i++)
7520 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7521 vec_array, i);
7522 dr_chain.quick_push (new_temp);
7525 /* Record the mapping between SSA_NAMEs and statements. */
7526 vect_record_grouped_load_vectors (stmt, dr_chain);
7528 else
7530 for (i = 0; i < vec_num; i++)
7532 if (i > 0)
7533 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7534 stmt, NULL_TREE);
7536 /* 2. Create the vector-load in the loop. */
7537 switch (alignment_support_scheme)
7539 case dr_aligned:
7540 case dr_unaligned_supported:
7542 unsigned int align, misalign;
7544 data_ref
7545 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7546 dataref_offset
7547 ? dataref_offset
7548 : build_int_cst (ref_type, 0));
7549 align = DR_TARGET_ALIGNMENT (dr);
7550 if (alignment_support_scheme == dr_aligned)
7552 gcc_assert (aligned_access_p (first_dr));
7553 misalign = 0;
7555 else if (DR_MISALIGNMENT (first_dr) == -1)
7557 align = dr_alignment (vect_dr_behavior (first_dr));
7558 misalign = 0;
7559 TREE_TYPE (data_ref)
7560 = build_aligned_type (TREE_TYPE (data_ref),
7561 align * BITS_PER_UNIT);
7563 else
7565 TREE_TYPE (data_ref)
7566 = build_aligned_type (TREE_TYPE (data_ref),
7567 TYPE_ALIGN (elem_type));
7568 misalign = DR_MISALIGNMENT (first_dr);
7570 if (dataref_offset == NULL_TREE
7571 && TREE_CODE (dataref_ptr) == SSA_NAME)
7572 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7573 align, misalign);
7574 break;
7576 case dr_explicit_realign:
7578 tree ptr, bump;
7580 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7582 if (compute_in_loop)
7583 msq = vect_setup_realignment (first_stmt, gsi,
7584 &realignment_token,
7585 dr_explicit_realign,
7586 dataref_ptr, NULL);
7588 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7589 ptr = copy_ssa_name (dataref_ptr);
7590 else
7591 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7592 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7593 new_stmt = gimple_build_assign
7594 (ptr, BIT_AND_EXPR, dataref_ptr,
7595 build_int_cst
7596 (TREE_TYPE (dataref_ptr),
7597 -(HOST_WIDE_INT) align));
7598 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7599 data_ref
7600 = build2 (MEM_REF, vectype, ptr,
7601 build_int_cst (ref_type, 0));
7602 vec_dest = vect_create_destination_var (scalar_dest,
7603 vectype);
7604 new_stmt = gimple_build_assign (vec_dest, data_ref);
7605 new_temp = make_ssa_name (vec_dest, new_stmt);
7606 gimple_assign_set_lhs (new_stmt, new_temp);
7607 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7608 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7609 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7610 msq = new_temp;
7612 bump = size_binop (MULT_EXPR, vs,
7613 TYPE_SIZE_UNIT (elem_type));
7614 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7615 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7616 new_stmt = gimple_build_assign
7617 (NULL_TREE, BIT_AND_EXPR, ptr,
7618 build_int_cst
7619 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7620 ptr = copy_ssa_name (ptr, new_stmt);
7621 gimple_assign_set_lhs (new_stmt, ptr);
7622 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7623 data_ref
7624 = build2 (MEM_REF, vectype, ptr,
7625 build_int_cst (ref_type, 0));
7626 break;
7628 case dr_explicit_realign_optimized:
7630 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7631 new_temp = copy_ssa_name (dataref_ptr);
7632 else
7633 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7634 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7635 new_stmt = gimple_build_assign
7636 (new_temp, BIT_AND_EXPR, dataref_ptr,
7637 build_int_cst (TREE_TYPE (dataref_ptr),
7638 -(HOST_WIDE_INT) align));
7639 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7640 data_ref
7641 = build2 (MEM_REF, vectype, new_temp,
7642 build_int_cst (ref_type, 0));
7643 break;
7645 default:
7646 gcc_unreachable ();
7648 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7649 new_stmt = gimple_build_assign (vec_dest, data_ref);
7650 new_temp = make_ssa_name (vec_dest, new_stmt);
7651 gimple_assign_set_lhs (new_stmt, new_temp);
7652 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7654 /* 3. Handle explicit realignment if necessary/supported.
7655 Create in loop:
7656 vec_dest = realign_load (msq, lsq, realignment_token) */
7657 if (alignment_support_scheme == dr_explicit_realign_optimized
7658 || alignment_support_scheme == dr_explicit_realign)
7660 lsq = gimple_assign_lhs (new_stmt);
7661 if (!realignment_token)
7662 realignment_token = dataref_ptr;
7663 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7664 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7665 msq, lsq, realignment_token);
7666 new_temp = make_ssa_name (vec_dest, new_stmt);
7667 gimple_assign_set_lhs (new_stmt, new_temp);
7668 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7670 if (alignment_support_scheme == dr_explicit_realign_optimized)
7672 gcc_assert (phi);
7673 if (i == vec_num - 1 && j == ncopies - 1)
7674 add_phi_arg (phi, lsq,
7675 loop_latch_edge (containing_loop),
7676 UNKNOWN_LOCATION);
7677 msq = lsq;
7681 /* 4. Handle invariant-load. */
7682 if (inv_p && !bb_vinfo)
7684 gcc_assert (!grouped_load);
7685 /* If we have versioned for aliasing or the loop doesn't
7686 have any data dependencies that would preclude this,
7687 then we are sure this is a loop invariant load and
7688 thus we can insert it on the preheader edge. */
7689 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7690 && !nested_in_vect_loop
7691 && hoist_defs_of_uses (stmt, loop))
7693 if (dump_enabled_p ())
7695 dump_printf_loc (MSG_NOTE, vect_location,
7696 "hoisting out of the vectorized "
7697 "loop: ");
7698 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7700 tree tem = copy_ssa_name (scalar_dest);
7701 gsi_insert_on_edge_immediate
7702 (loop_preheader_edge (loop),
7703 gimple_build_assign (tem,
7704 unshare_expr
7705 (gimple_assign_rhs1 (stmt))));
7706 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7707 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7708 set_vinfo_for_stmt (new_stmt,
7709 new_stmt_vec_info (new_stmt, vinfo));
7711 else
7713 gimple_stmt_iterator gsi2 = *gsi;
7714 gsi_next (&gsi2);
7715 new_temp = vect_init_vector (stmt, scalar_dest,
7716 vectype, &gsi2);
7717 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7721 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7723 tree perm_mask = perm_mask_for_reverse (vectype);
7724 new_temp = permute_vec_elements (new_temp, new_temp,
7725 perm_mask, stmt, gsi);
7726 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7729 /* Collect vector loads and later create their permutation in
7730 vect_transform_grouped_load (). */
7731 if (grouped_load || slp_perm)
7732 dr_chain.quick_push (new_temp);
7734 /* Store vector loads in the corresponding SLP_NODE. */
7735 if (slp && !slp_perm)
7736 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7738 /* With SLP permutation we load the gaps as well, without
7739 we need to skip the gaps after we manage to fully load
7740 all elements. group_gap_adj is GROUP_SIZE here. */
7741 group_elt += nunits;
7742 if (group_gap_adj != 0 && ! slp_perm
7743 && group_elt == group_size - group_gap_adj)
7745 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7746 * group_gap_adj);
7747 tree bump = wide_int_to_tree (sizetype, bump_val);
7748 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7749 stmt, bump);
7750 group_elt = 0;
7753 /* Bump the vector pointer to account for a gap or for excess
7754 elements loaded for a permuted SLP load. */
7755 if (group_gap_adj != 0 && slp_perm)
7757 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7758 * group_gap_adj);
7759 tree bump = wide_int_to_tree (sizetype, bump_val);
7760 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7761 stmt, bump);
7765 if (slp && !slp_perm)
7766 continue;
7768 if (slp_perm)
7770 unsigned n_perms;
7771 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7772 slp_node_instance, false,
7773 &n_perms))
7775 dr_chain.release ();
7776 return false;
7779 else
7781 if (grouped_load)
7783 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7784 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7785 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7787 else
7789 if (j == 0)
7790 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7791 else
7792 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7793 prev_stmt_info = vinfo_for_stmt (new_stmt);
7796 dr_chain.release ();
7799 return true;
7802 /* Function vect_is_simple_cond.
7804 Input:
7805 LOOP - the loop that is being vectorized.
7806 COND - Condition that is checked for simple use.
7808 Output:
7809 *COMP_VECTYPE - the vector type for the comparison.
7810 *DTS - The def types for the arguments of the comparison
7812 Returns whether a COND can be vectorized. Checks whether
7813 condition operands are supportable using vec_is_simple_use. */
7815 static bool
7816 vect_is_simple_cond (tree cond, vec_info *vinfo,
7817 tree *comp_vectype, enum vect_def_type *dts,
7818 tree vectype)
7820 tree lhs, rhs;
7821 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7823 /* Mask case. */
7824 if (TREE_CODE (cond) == SSA_NAME
7825 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7827 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7828 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7829 &dts[0], comp_vectype)
7830 || !*comp_vectype
7831 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7832 return false;
7833 return true;
7836 if (!COMPARISON_CLASS_P (cond))
7837 return false;
7839 lhs = TREE_OPERAND (cond, 0);
7840 rhs = TREE_OPERAND (cond, 1);
7842 if (TREE_CODE (lhs) == SSA_NAME)
7844 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7845 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7846 return false;
7848 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7849 || TREE_CODE (lhs) == FIXED_CST)
7850 dts[0] = vect_constant_def;
7851 else
7852 return false;
7854 if (TREE_CODE (rhs) == SSA_NAME)
7856 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7857 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7858 return false;
7860 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7861 || TREE_CODE (rhs) == FIXED_CST)
7862 dts[1] = vect_constant_def;
7863 else
7864 return false;
7866 if (vectype1 && vectype2
7867 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7868 return false;
7870 *comp_vectype = vectype1 ? vectype1 : vectype2;
7871 /* Invariant comparison. */
7872 if (! *comp_vectype)
7874 tree scalar_type = TREE_TYPE (lhs);
7875 /* If we can widen the comparison to match vectype do so. */
7876 if (INTEGRAL_TYPE_P (scalar_type)
7877 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7878 TYPE_SIZE (TREE_TYPE (vectype))))
7879 scalar_type = build_nonstandard_integer_type
7880 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7881 TYPE_UNSIGNED (scalar_type));
7882 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7885 return true;
7888 /* vectorizable_condition.
7890 Check if STMT is conditional modify expression that can be vectorized.
7891 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7892 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7893 at GSI.
7895 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7896 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7897 else clause if it is 2).
7899 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7901 bool
7902 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7903 gimple **vec_stmt, tree reduc_def, int reduc_index,
7904 slp_tree slp_node)
7906 tree scalar_dest = NULL_TREE;
7907 tree vec_dest = NULL_TREE;
7908 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7909 tree then_clause, else_clause;
7910 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7911 tree comp_vectype = NULL_TREE;
7912 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7913 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7914 tree vec_compare;
7915 tree new_temp;
7916 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7917 enum vect_def_type dts[4]
7918 = {vect_unknown_def_type, vect_unknown_def_type,
7919 vect_unknown_def_type, vect_unknown_def_type};
7920 int ndts = 4;
7921 int ncopies;
7922 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7923 stmt_vec_info prev_stmt_info = NULL;
7924 int i, j;
7925 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7926 vec<tree> vec_oprnds0 = vNULL;
7927 vec<tree> vec_oprnds1 = vNULL;
7928 vec<tree> vec_oprnds2 = vNULL;
7929 vec<tree> vec_oprnds3 = vNULL;
7930 tree vec_cmp_type;
7931 bool masked = false;
7933 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7934 return false;
7936 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7938 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7939 return false;
7941 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7942 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7943 && reduc_def))
7944 return false;
7946 /* FORNOW: not yet supported. */
7947 if (STMT_VINFO_LIVE_P (stmt_info))
7949 if (dump_enabled_p ())
7950 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7951 "value used after loop.\n");
7952 return false;
7956 /* Is vectorizable conditional operation? */
7957 if (!is_gimple_assign (stmt))
7958 return false;
7960 code = gimple_assign_rhs_code (stmt);
7962 if (code != COND_EXPR)
7963 return false;
7965 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7966 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7968 if (slp_node)
7969 ncopies = 1;
7970 else
7971 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7973 gcc_assert (ncopies >= 1);
7974 if (reduc_index && ncopies > 1)
7975 return false; /* FORNOW */
7977 cond_expr = gimple_assign_rhs1 (stmt);
7978 then_clause = gimple_assign_rhs2 (stmt);
7979 else_clause = gimple_assign_rhs3 (stmt);
7981 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7982 &comp_vectype, &dts[0], vectype)
7983 || !comp_vectype)
7984 return false;
7986 gimple *def_stmt;
7987 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7988 &vectype1))
7989 return false;
7990 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7991 &vectype2))
7992 return false;
7994 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7995 return false;
7997 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7998 return false;
8000 masked = !COMPARISON_CLASS_P (cond_expr);
8001 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8003 if (vec_cmp_type == NULL_TREE)
8004 return false;
8006 cond_code = TREE_CODE (cond_expr);
8007 if (!masked)
8009 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8010 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8013 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8015 /* Boolean values may have another representation in vectors
8016 and therefore we prefer bit operations over comparison for
8017 them (which also works for scalar masks). We store opcodes
8018 to use in bitop1 and bitop2. Statement is vectorized as
8019 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8020 depending on bitop1 and bitop2 arity. */
8021 switch (cond_code)
8023 case GT_EXPR:
8024 bitop1 = BIT_NOT_EXPR;
8025 bitop2 = BIT_AND_EXPR;
8026 break;
8027 case GE_EXPR:
8028 bitop1 = BIT_NOT_EXPR;
8029 bitop2 = BIT_IOR_EXPR;
8030 break;
8031 case LT_EXPR:
8032 bitop1 = BIT_NOT_EXPR;
8033 bitop2 = BIT_AND_EXPR;
8034 std::swap (cond_expr0, cond_expr1);
8035 break;
8036 case LE_EXPR:
8037 bitop1 = BIT_NOT_EXPR;
8038 bitop2 = BIT_IOR_EXPR;
8039 std::swap (cond_expr0, cond_expr1);
8040 break;
8041 case NE_EXPR:
8042 bitop1 = BIT_XOR_EXPR;
8043 break;
8044 case EQ_EXPR:
8045 bitop1 = BIT_XOR_EXPR;
8046 bitop2 = BIT_NOT_EXPR;
8047 break;
8048 default:
8049 return false;
8051 cond_code = SSA_NAME;
8054 if (!vec_stmt)
8056 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8057 if (bitop1 != NOP_EXPR)
8059 machine_mode mode = TYPE_MODE (comp_vectype);
8060 optab optab;
8062 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8063 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8064 return false;
8066 if (bitop2 != NOP_EXPR)
8068 optab = optab_for_tree_code (bitop2, comp_vectype,
8069 optab_default);
8070 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8071 return false;
8074 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8075 cond_code))
8077 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8078 return true;
8080 return false;
8083 /* Transform. */
8085 if (!slp_node)
8087 vec_oprnds0.create (1);
8088 vec_oprnds1.create (1);
8089 vec_oprnds2.create (1);
8090 vec_oprnds3.create (1);
8093 /* Handle def. */
8094 scalar_dest = gimple_assign_lhs (stmt);
8095 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8097 /* Handle cond expr. */
8098 for (j = 0; j < ncopies; j++)
8100 gassign *new_stmt = NULL;
8101 if (j == 0)
8103 if (slp_node)
8105 auto_vec<tree, 4> ops;
8106 auto_vec<vec<tree>, 4> vec_defs;
8108 if (masked)
8109 ops.safe_push (cond_expr);
8110 else
8112 ops.safe_push (cond_expr0);
8113 ops.safe_push (cond_expr1);
8115 ops.safe_push (then_clause);
8116 ops.safe_push (else_clause);
8117 vect_get_slp_defs (ops, slp_node, &vec_defs);
8118 vec_oprnds3 = vec_defs.pop ();
8119 vec_oprnds2 = vec_defs.pop ();
8120 if (!masked)
8121 vec_oprnds1 = vec_defs.pop ();
8122 vec_oprnds0 = vec_defs.pop ();
8124 else
8126 gimple *gtemp;
8127 if (masked)
8129 vec_cond_lhs
8130 = vect_get_vec_def_for_operand (cond_expr, stmt,
8131 comp_vectype);
8132 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8133 &gtemp, &dts[0]);
8135 else
8137 vec_cond_lhs
8138 = vect_get_vec_def_for_operand (cond_expr0,
8139 stmt, comp_vectype);
8140 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8142 vec_cond_rhs
8143 = vect_get_vec_def_for_operand (cond_expr1,
8144 stmt, comp_vectype);
8145 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8147 if (reduc_index == 1)
8148 vec_then_clause = reduc_def;
8149 else
8151 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8152 stmt);
8153 vect_is_simple_use (then_clause, loop_vinfo,
8154 &gtemp, &dts[2]);
8156 if (reduc_index == 2)
8157 vec_else_clause = reduc_def;
8158 else
8160 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8161 stmt);
8162 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8166 else
8168 vec_cond_lhs
8169 = vect_get_vec_def_for_stmt_copy (dts[0],
8170 vec_oprnds0.pop ());
8171 if (!masked)
8172 vec_cond_rhs
8173 = vect_get_vec_def_for_stmt_copy (dts[1],
8174 vec_oprnds1.pop ());
8176 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8177 vec_oprnds2.pop ());
8178 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8179 vec_oprnds3.pop ());
8182 if (!slp_node)
8184 vec_oprnds0.quick_push (vec_cond_lhs);
8185 if (!masked)
8186 vec_oprnds1.quick_push (vec_cond_rhs);
8187 vec_oprnds2.quick_push (vec_then_clause);
8188 vec_oprnds3.quick_push (vec_else_clause);
8191 /* Arguments are ready. Create the new vector stmt. */
8192 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8194 vec_then_clause = vec_oprnds2[i];
8195 vec_else_clause = vec_oprnds3[i];
8197 if (masked)
8198 vec_compare = vec_cond_lhs;
8199 else
8201 vec_cond_rhs = vec_oprnds1[i];
8202 if (bitop1 == NOP_EXPR)
8203 vec_compare = build2 (cond_code, vec_cmp_type,
8204 vec_cond_lhs, vec_cond_rhs);
8205 else
8207 new_temp = make_ssa_name (vec_cmp_type);
8208 if (bitop1 == BIT_NOT_EXPR)
8209 new_stmt = gimple_build_assign (new_temp, bitop1,
8210 vec_cond_rhs);
8211 else
8212 new_stmt
8213 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8214 vec_cond_rhs);
8215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8216 if (bitop2 == NOP_EXPR)
8217 vec_compare = new_temp;
8218 else if (bitop2 == BIT_NOT_EXPR)
8220 /* Instead of doing ~x ? y : z do x ? z : y. */
8221 vec_compare = new_temp;
8222 std::swap (vec_then_clause, vec_else_clause);
8224 else
8226 vec_compare = make_ssa_name (vec_cmp_type);
8227 new_stmt
8228 = gimple_build_assign (vec_compare, bitop2,
8229 vec_cond_lhs, new_temp);
8230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8234 new_temp = make_ssa_name (vec_dest);
8235 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8236 vec_compare, vec_then_clause,
8237 vec_else_clause);
8238 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8239 if (slp_node)
8240 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8243 if (slp_node)
8244 continue;
8246 if (j == 0)
8247 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8248 else
8249 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8251 prev_stmt_info = vinfo_for_stmt (new_stmt);
8254 vec_oprnds0.release ();
8255 vec_oprnds1.release ();
8256 vec_oprnds2.release ();
8257 vec_oprnds3.release ();
8259 return true;
8262 /* vectorizable_comparison.
8264 Check if STMT is comparison expression that can be vectorized.
8265 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8266 comparison, put it in VEC_STMT, and insert it at GSI.
8268 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8270 static bool
8271 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8272 gimple **vec_stmt, tree reduc_def,
8273 slp_tree slp_node)
8275 tree lhs, rhs1, rhs2;
8276 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8277 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8278 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8279 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8280 tree new_temp;
8281 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8282 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8283 int ndts = 2;
8284 unsigned nunits;
8285 int ncopies;
8286 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8287 stmt_vec_info prev_stmt_info = NULL;
8288 int i, j;
8289 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8290 vec<tree> vec_oprnds0 = vNULL;
8291 vec<tree> vec_oprnds1 = vNULL;
8292 gimple *def_stmt;
8293 tree mask_type;
8294 tree mask;
8296 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8297 return false;
8299 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8300 return false;
8302 mask_type = vectype;
8303 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8305 if (slp_node)
8306 ncopies = 1;
8307 else
8308 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8310 gcc_assert (ncopies >= 1);
8311 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8312 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8313 && reduc_def))
8314 return false;
8316 if (STMT_VINFO_LIVE_P (stmt_info))
8318 if (dump_enabled_p ())
8319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8320 "value used after loop.\n");
8321 return false;
8324 if (!is_gimple_assign (stmt))
8325 return false;
8327 code = gimple_assign_rhs_code (stmt);
8329 if (TREE_CODE_CLASS (code) != tcc_comparison)
8330 return false;
8332 rhs1 = gimple_assign_rhs1 (stmt);
8333 rhs2 = gimple_assign_rhs2 (stmt);
8335 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8336 &dts[0], &vectype1))
8337 return false;
8339 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8340 &dts[1], &vectype2))
8341 return false;
8343 if (vectype1 && vectype2
8344 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8345 return false;
8347 vectype = vectype1 ? vectype1 : vectype2;
8349 /* Invariant comparison. */
8350 if (!vectype)
8352 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8353 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8354 return false;
8356 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8357 return false;
8359 /* Can't compare mask and non-mask types. */
8360 if (vectype1 && vectype2
8361 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8362 return false;
8364 /* Boolean values may have another representation in vectors
8365 and therefore we prefer bit operations over comparison for
8366 them (which also works for scalar masks). We store opcodes
8367 to use in bitop1 and bitop2. Statement is vectorized as
8368 BITOP2 (rhs1 BITOP1 rhs2) or
8369 rhs1 BITOP2 (BITOP1 rhs2)
8370 depending on bitop1 and bitop2 arity. */
8371 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8373 if (code == GT_EXPR)
8375 bitop1 = BIT_NOT_EXPR;
8376 bitop2 = BIT_AND_EXPR;
8378 else if (code == GE_EXPR)
8380 bitop1 = BIT_NOT_EXPR;
8381 bitop2 = BIT_IOR_EXPR;
8383 else if (code == LT_EXPR)
8385 bitop1 = BIT_NOT_EXPR;
8386 bitop2 = BIT_AND_EXPR;
8387 std::swap (rhs1, rhs2);
8388 std::swap (dts[0], dts[1]);
8390 else if (code == LE_EXPR)
8392 bitop1 = BIT_NOT_EXPR;
8393 bitop2 = BIT_IOR_EXPR;
8394 std::swap (rhs1, rhs2);
8395 std::swap (dts[0], dts[1]);
8397 else
8399 bitop1 = BIT_XOR_EXPR;
8400 if (code == EQ_EXPR)
8401 bitop2 = BIT_NOT_EXPR;
8405 if (!vec_stmt)
8407 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8408 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8409 dts, ndts, NULL, NULL);
8410 if (bitop1 == NOP_EXPR)
8411 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8412 else
8414 machine_mode mode = TYPE_MODE (vectype);
8415 optab optab;
8417 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8418 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8419 return false;
8421 if (bitop2 != NOP_EXPR)
8423 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8424 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8425 return false;
8427 return true;
8431 /* Transform. */
8432 if (!slp_node)
8434 vec_oprnds0.create (1);
8435 vec_oprnds1.create (1);
8438 /* Handle def. */
8439 lhs = gimple_assign_lhs (stmt);
8440 mask = vect_create_destination_var (lhs, mask_type);
8442 /* Handle cmp expr. */
8443 for (j = 0; j < ncopies; j++)
8445 gassign *new_stmt = NULL;
8446 if (j == 0)
8448 if (slp_node)
8450 auto_vec<tree, 2> ops;
8451 auto_vec<vec<tree>, 2> vec_defs;
8453 ops.safe_push (rhs1);
8454 ops.safe_push (rhs2);
8455 vect_get_slp_defs (ops, slp_node, &vec_defs);
8456 vec_oprnds1 = vec_defs.pop ();
8457 vec_oprnds0 = vec_defs.pop ();
8459 else
8461 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8462 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8465 else
8467 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8468 vec_oprnds0.pop ());
8469 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8470 vec_oprnds1.pop ());
8473 if (!slp_node)
8475 vec_oprnds0.quick_push (vec_rhs1);
8476 vec_oprnds1.quick_push (vec_rhs2);
8479 /* Arguments are ready. Create the new vector stmt. */
8480 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8482 vec_rhs2 = vec_oprnds1[i];
8484 new_temp = make_ssa_name (mask);
8485 if (bitop1 == NOP_EXPR)
8487 new_stmt = gimple_build_assign (new_temp, code,
8488 vec_rhs1, vec_rhs2);
8489 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8491 else
8493 if (bitop1 == BIT_NOT_EXPR)
8494 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8495 else
8496 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8497 vec_rhs2);
8498 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8499 if (bitop2 != NOP_EXPR)
8501 tree res = make_ssa_name (mask);
8502 if (bitop2 == BIT_NOT_EXPR)
8503 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8504 else
8505 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8506 new_temp);
8507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8510 if (slp_node)
8511 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8514 if (slp_node)
8515 continue;
8517 if (j == 0)
8518 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8519 else
8520 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8522 prev_stmt_info = vinfo_for_stmt (new_stmt);
8525 vec_oprnds0.release ();
8526 vec_oprnds1.release ();
8528 return true;
8531 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8532 can handle all live statements in the node. Otherwise return true
8533 if STMT is not live or if vectorizable_live_operation can handle it.
8534 GSI and VEC_STMT are as for vectorizable_live_operation. */
8536 static bool
8537 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8538 slp_tree slp_node, gimple **vec_stmt)
8540 if (slp_node)
8542 gimple *slp_stmt;
8543 unsigned int i;
8544 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8546 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8547 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8548 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8549 vec_stmt))
8550 return false;
8553 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8554 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8555 return false;
8557 return true;
8560 /* Make sure the statement is vectorizable. */
8562 bool
8563 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8564 slp_instance node_instance)
8566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8567 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8568 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8569 bool ok;
8570 gimple *pattern_stmt;
8571 gimple_seq pattern_def_seq;
8573 if (dump_enabled_p ())
8575 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8576 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8579 if (gimple_has_volatile_ops (stmt))
8581 if (dump_enabled_p ())
8582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8583 "not vectorized: stmt has volatile operands\n");
8585 return false;
8588 /* Skip stmts that do not need to be vectorized. In loops this is expected
8589 to include:
8590 - the COND_EXPR which is the loop exit condition
8591 - any LABEL_EXPRs in the loop
8592 - computations that are used only for array indexing or loop control.
8593 In basic blocks we only analyze statements that are a part of some SLP
8594 instance, therefore, all the statements are relevant.
8596 Pattern statement needs to be analyzed instead of the original statement
8597 if the original statement is not relevant. Otherwise, we analyze both
8598 statements. In basic blocks we are called from some SLP instance
8599 traversal, don't analyze pattern stmts instead, the pattern stmts
8600 already will be part of SLP instance. */
8602 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8603 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8604 && !STMT_VINFO_LIVE_P (stmt_info))
8606 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8607 && pattern_stmt
8608 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8609 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8611 /* Analyze PATTERN_STMT instead of the original stmt. */
8612 stmt = pattern_stmt;
8613 stmt_info = vinfo_for_stmt (pattern_stmt);
8614 if (dump_enabled_p ())
8616 dump_printf_loc (MSG_NOTE, vect_location,
8617 "==> examining pattern statement: ");
8618 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8621 else
8623 if (dump_enabled_p ())
8624 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8626 return true;
8629 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8630 && node == NULL
8631 && pattern_stmt
8632 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8633 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8635 /* Analyze PATTERN_STMT too. */
8636 if (dump_enabled_p ())
8638 dump_printf_loc (MSG_NOTE, vect_location,
8639 "==> examining pattern statement: ");
8640 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8643 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8644 node_instance))
8645 return false;
8648 if (is_pattern_stmt_p (stmt_info)
8649 && node == NULL
8650 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8652 gimple_stmt_iterator si;
8654 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8656 gimple *pattern_def_stmt = gsi_stmt (si);
8657 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8658 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8660 /* Analyze def stmt of STMT if it's a pattern stmt. */
8661 if (dump_enabled_p ())
8663 dump_printf_loc (MSG_NOTE, vect_location,
8664 "==> examining pattern def statement: ");
8665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8668 if (!vect_analyze_stmt (pattern_def_stmt,
8669 need_to_vectorize, node, node_instance))
8670 return false;
8675 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8677 case vect_internal_def:
8678 break;
8680 case vect_reduction_def:
8681 case vect_nested_cycle:
8682 gcc_assert (!bb_vinfo
8683 && (relevance == vect_used_in_outer
8684 || relevance == vect_used_in_outer_by_reduction
8685 || relevance == vect_used_by_reduction
8686 || relevance == vect_unused_in_scope
8687 || relevance == vect_used_only_live));
8688 break;
8690 case vect_induction_def:
8691 gcc_assert (!bb_vinfo);
8692 break;
8694 case vect_constant_def:
8695 case vect_external_def:
8696 case vect_unknown_def_type:
8697 default:
8698 gcc_unreachable ();
8701 if (STMT_VINFO_RELEVANT_P (stmt_info))
8703 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8704 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8705 || (is_gimple_call (stmt)
8706 && gimple_call_lhs (stmt) == NULL_TREE));
8707 *need_to_vectorize = true;
8710 if (PURE_SLP_STMT (stmt_info) && !node)
8712 dump_printf_loc (MSG_NOTE, vect_location,
8713 "handled only by SLP analysis\n");
8714 return true;
8717 ok = true;
8718 if (!bb_vinfo
8719 && (STMT_VINFO_RELEVANT_P (stmt_info)
8720 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8721 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8722 || vectorizable_conversion (stmt, NULL, NULL, node)
8723 || vectorizable_shift (stmt, NULL, NULL, node)
8724 || vectorizable_operation (stmt, NULL, NULL, node)
8725 || vectorizable_assignment (stmt, NULL, NULL, node)
8726 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8727 || vectorizable_call (stmt, NULL, NULL, node)
8728 || vectorizable_store (stmt, NULL, NULL, node)
8729 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8730 || vectorizable_induction (stmt, NULL, NULL, node)
8731 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8732 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8733 else
8735 if (bb_vinfo)
8736 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8737 || vectorizable_conversion (stmt, NULL, NULL, node)
8738 || vectorizable_shift (stmt, NULL, NULL, node)
8739 || vectorizable_operation (stmt, NULL, NULL, node)
8740 || vectorizable_assignment (stmt, NULL, NULL, node)
8741 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8742 || vectorizable_call (stmt, NULL, NULL, node)
8743 || vectorizable_store (stmt, NULL, NULL, node)
8744 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8745 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8748 if (!ok)
8750 if (dump_enabled_p ())
8752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8753 "not vectorized: relevant stmt not ");
8754 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8755 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8758 return false;
8761 if (bb_vinfo)
8762 return true;
8764 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8765 need extra handling, except for vectorizable reductions. */
8766 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8767 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
8769 if (dump_enabled_p ())
8771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8772 "not vectorized: live stmt not supported: ");
8773 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8776 return false;
8779 return true;
8783 /* Function vect_transform_stmt.
8785 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8787 bool
8788 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8789 bool *grouped_store, slp_tree slp_node,
8790 slp_instance slp_node_instance)
8792 bool is_store = false;
8793 gimple *vec_stmt = NULL;
8794 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8795 bool done;
8797 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8798 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8800 switch (STMT_VINFO_TYPE (stmt_info))
8802 case type_demotion_vec_info_type:
8803 case type_promotion_vec_info_type:
8804 case type_conversion_vec_info_type:
8805 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8806 gcc_assert (done);
8807 break;
8809 case induc_vec_info_type:
8810 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8811 gcc_assert (done);
8812 break;
8814 case shift_vec_info_type:
8815 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8816 gcc_assert (done);
8817 break;
8819 case op_vec_info_type:
8820 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8821 gcc_assert (done);
8822 break;
8824 case assignment_vec_info_type:
8825 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8826 gcc_assert (done);
8827 break;
8829 case load_vec_info_type:
8830 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8831 slp_node_instance);
8832 gcc_assert (done);
8833 break;
8835 case store_vec_info_type:
8836 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8837 gcc_assert (done);
8838 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8840 /* In case of interleaving, the whole chain is vectorized when the
8841 last store in the chain is reached. Store stmts before the last
8842 one are skipped, and there vec_stmt_info shouldn't be freed
8843 meanwhile. */
8844 *grouped_store = true;
8845 if (STMT_VINFO_VEC_STMT (stmt_info))
8846 is_store = true;
8848 else
8849 is_store = true;
8850 break;
8852 case condition_vec_info_type:
8853 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8854 gcc_assert (done);
8855 break;
8857 case comparison_vec_info_type:
8858 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8859 gcc_assert (done);
8860 break;
8862 case call_vec_info_type:
8863 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8864 stmt = gsi_stmt (*gsi);
8865 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8866 is_store = true;
8867 break;
8869 case call_simd_clone_vec_info_type:
8870 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8871 stmt = gsi_stmt (*gsi);
8872 break;
8874 case reduc_vec_info_type:
8875 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8876 slp_node_instance);
8877 gcc_assert (done);
8878 break;
8880 default:
8881 if (!STMT_VINFO_LIVE_P (stmt_info))
8883 if (dump_enabled_p ())
8884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8885 "stmt not supported.\n");
8886 gcc_unreachable ();
8890 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8891 This would break hybrid SLP vectorization. */
8892 if (slp_node)
8893 gcc_assert (!vec_stmt
8894 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8896 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8897 is being vectorized, but outside the immediately enclosing loop. */
8898 if (vec_stmt
8899 && STMT_VINFO_LOOP_VINFO (stmt_info)
8900 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8901 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8902 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8903 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8904 || STMT_VINFO_RELEVANT (stmt_info) ==
8905 vect_used_in_outer_by_reduction))
8907 struct loop *innerloop = LOOP_VINFO_LOOP (
8908 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8909 imm_use_iterator imm_iter;
8910 use_operand_p use_p;
8911 tree scalar_dest;
8912 gimple *exit_phi;
8914 if (dump_enabled_p ())
8915 dump_printf_loc (MSG_NOTE, vect_location,
8916 "Record the vdef for outer-loop vectorization.\n");
8918 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8919 (to be used when vectorizing outer-loop stmts that use the DEF of
8920 STMT). */
8921 if (gimple_code (stmt) == GIMPLE_PHI)
8922 scalar_dest = PHI_RESULT (stmt);
8923 else
8924 scalar_dest = gimple_assign_lhs (stmt);
8926 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8928 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8930 exit_phi = USE_STMT (use_p);
8931 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8936 /* Handle stmts whose DEF is used outside the loop-nest that is
8937 being vectorized. */
8938 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8940 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
8941 gcc_assert (done);
8944 if (vec_stmt)
8945 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8947 return is_store;
8951 /* Remove a group of stores (for SLP or interleaving), free their
8952 stmt_vec_info. */
8954 void
8955 vect_remove_stores (gimple *first_stmt)
8957 gimple *next = first_stmt;
8958 gimple *tmp;
8959 gimple_stmt_iterator next_si;
8961 while (next)
8963 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8965 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8966 if (is_pattern_stmt_p (stmt_info))
8967 next = STMT_VINFO_RELATED_STMT (stmt_info);
8968 /* Free the attached stmt_vec_info and remove the stmt. */
8969 next_si = gsi_for_stmt (next);
8970 unlink_stmt_vdef (next);
8971 gsi_remove (&next_si, true);
8972 release_defs (next);
8973 free_stmt_vec_info (next);
8974 next = tmp;
8979 /* Function new_stmt_vec_info.
8981 Create and initialize a new stmt_vec_info struct for STMT. */
8983 stmt_vec_info
8984 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8986 stmt_vec_info res;
8987 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8989 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8990 STMT_VINFO_STMT (res) = stmt;
8991 res->vinfo = vinfo;
8992 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8993 STMT_VINFO_LIVE_P (res) = false;
8994 STMT_VINFO_VECTYPE (res) = NULL;
8995 STMT_VINFO_VEC_STMT (res) = NULL;
8996 STMT_VINFO_VECTORIZABLE (res) = true;
8997 STMT_VINFO_IN_PATTERN_P (res) = false;
8998 STMT_VINFO_RELATED_STMT (res) = NULL;
8999 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9000 STMT_VINFO_DATA_REF (res) = NULL;
9001 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9002 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9004 if (gimple_code (stmt) == GIMPLE_PHI
9005 && is_loop_header_bb_p (gimple_bb (stmt)))
9006 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9007 else
9008 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9010 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9011 STMT_SLP_TYPE (res) = loop_vect;
9012 STMT_VINFO_NUM_SLP_USES (res) = 0;
9014 GROUP_FIRST_ELEMENT (res) = NULL;
9015 GROUP_NEXT_ELEMENT (res) = NULL;
9016 GROUP_SIZE (res) = 0;
9017 GROUP_STORE_COUNT (res) = 0;
9018 GROUP_GAP (res) = 0;
9019 GROUP_SAME_DR_STMT (res) = NULL;
9021 return res;
9025 /* Create a hash table for stmt_vec_info. */
9027 void
9028 init_stmt_vec_info_vec (void)
9030 gcc_assert (!stmt_vec_info_vec.exists ());
9031 stmt_vec_info_vec.create (50);
9035 /* Free hash table for stmt_vec_info. */
9037 void
9038 free_stmt_vec_info_vec (void)
9040 unsigned int i;
9041 stmt_vec_info info;
9042 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9043 if (info != NULL)
9044 free_stmt_vec_info (STMT_VINFO_STMT (info));
9045 gcc_assert (stmt_vec_info_vec.exists ());
9046 stmt_vec_info_vec.release ();
9050 /* Free stmt vectorization related info. */
9052 void
9053 free_stmt_vec_info (gimple *stmt)
9055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9057 if (!stmt_info)
9058 return;
9060 /* Check if this statement has a related "pattern stmt"
9061 (introduced by the vectorizer during the pattern recognition
9062 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9063 too. */
9064 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9066 stmt_vec_info patt_info
9067 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9068 if (patt_info)
9070 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9071 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9072 gimple_set_bb (patt_stmt, NULL);
9073 tree lhs = gimple_get_lhs (patt_stmt);
9074 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9075 release_ssa_name (lhs);
9076 if (seq)
9078 gimple_stmt_iterator si;
9079 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9081 gimple *seq_stmt = gsi_stmt (si);
9082 gimple_set_bb (seq_stmt, NULL);
9083 lhs = gimple_get_lhs (seq_stmt);
9084 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9085 release_ssa_name (lhs);
9086 free_stmt_vec_info (seq_stmt);
9089 free_stmt_vec_info (patt_stmt);
9093 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9094 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9095 set_vinfo_for_stmt (stmt, NULL);
9096 free (stmt_info);
9100 /* Function get_vectype_for_scalar_type_and_size.
9102 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9103 by the target. */
9105 static tree
9106 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
9108 tree orig_scalar_type = scalar_type;
9109 scalar_mode inner_mode;
9110 machine_mode simd_mode;
9111 int nunits;
9112 tree vectype;
9114 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9115 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9116 return NULL_TREE;
9118 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9120 /* For vector types of elements whose mode precision doesn't
9121 match their types precision we use a element type of mode
9122 precision. The vectorization routines will have to make sure
9123 they support the proper result truncation/extension.
9124 We also make sure to build vector types with INTEGER_TYPE
9125 component type only. */
9126 if (INTEGRAL_TYPE_P (scalar_type)
9127 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9128 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9129 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9130 TYPE_UNSIGNED (scalar_type));
9132 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9133 When the component mode passes the above test simply use a type
9134 corresponding to that mode. The theory is that any use that
9135 would cause problems with this will disable vectorization anyway. */
9136 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9137 && !INTEGRAL_TYPE_P (scalar_type))
9138 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9140 /* We can't build a vector type of elements with alignment bigger than
9141 their size. */
9142 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9143 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9144 TYPE_UNSIGNED (scalar_type));
9146 /* If we felt back to using the mode fail if there was
9147 no scalar type for it. */
9148 if (scalar_type == NULL_TREE)
9149 return NULL_TREE;
9151 /* If no size was supplied use the mode the target prefers. Otherwise
9152 lookup a vector mode of the specified size. */
9153 if (size == 0)
9154 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9155 else if (!mode_for_vector (inner_mode, size / nbytes).exists (&simd_mode))
9156 return NULL_TREE;
9157 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9158 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9159 if (nunits < 1)
9160 return NULL_TREE;
9162 vectype = build_vector_type (scalar_type, nunits);
9164 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9165 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9166 return NULL_TREE;
9168 /* Re-attach the address-space qualifier if we canonicalized the scalar
9169 type. */
9170 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9171 return build_qualified_type
9172 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9174 return vectype;
9177 unsigned int current_vector_size;
9179 /* Function get_vectype_for_scalar_type.
9181 Returns the vector type corresponding to SCALAR_TYPE as supported
9182 by the target. */
9184 tree
9185 get_vectype_for_scalar_type (tree scalar_type)
9187 tree vectype;
9188 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9189 current_vector_size);
9190 if (vectype
9191 && current_vector_size == 0)
9192 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9193 return vectype;
9196 /* Function get_mask_type_for_scalar_type.
9198 Returns the mask type corresponding to a result of comparison
9199 of vectors of specified SCALAR_TYPE as supported by target. */
9201 tree
9202 get_mask_type_for_scalar_type (tree scalar_type)
9204 tree vectype = get_vectype_for_scalar_type (scalar_type);
9206 if (!vectype)
9207 return NULL;
9209 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9210 current_vector_size);
9213 /* Function get_same_sized_vectype
9215 Returns a vector type corresponding to SCALAR_TYPE of size
9216 VECTOR_TYPE if supported by the target. */
9218 tree
9219 get_same_sized_vectype (tree scalar_type, tree vector_type)
9221 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9222 return build_same_sized_truth_vector_type (vector_type);
9224 return get_vectype_for_scalar_type_and_size
9225 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9228 /* Function vect_is_simple_use.
9230 Input:
9231 VINFO - the vect info of the loop or basic block that is being vectorized.
9232 OPERAND - operand in the loop or bb.
9233 Output:
9234 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9235 DT - the type of definition
9237 Returns whether a stmt with OPERAND can be vectorized.
9238 For loops, supportable operands are constants, loop invariants, and operands
9239 that are defined by the current iteration of the loop. Unsupportable
9240 operands are those that are defined by a previous iteration of the loop (as
9241 is the case in reduction/induction computations).
9242 For basic blocks, supportable operands are constants and bb invariants.
9243 For now, operands defined outside the basic block are not supported. */
9245 bool
9246 vect_is_simple_use (tree operand, vec_info *vinfo,
9247 gimple **def_stmt, enum vect_def_type *dt)
9249 *def_stmt = NULL;
9250 *dt = vect_unknown_def_type;
9252 if (dump_enabled_p ())
9254 dump_printf_loc (MSG_NOTE, vect_location,
9255 "vect_is_simple_use: operand ");
9256 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9257 dump_printf (MSG_NOTE, "\n");
9260 if (CONSTANT_CLASS_P (operand))
9262 *dt = vect_constant_def;
9263 return true;
9266 if (is_gimple_min_invariant (operand))
9268 *dt = vect_external_def;
9269 return true;
9272 if (TREE_CODE (operand) != SSA_NAME)
9274 if (dump_enabled_p ())
9275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9276 "not ssa-name.\n");
9277 return false;
9280 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9282 *dt = vect_external_def;
9283 return true;
9286 *def_stmt = SSA_NAME_DEF_STMT (operand);
9287 if (dump_enabled_p ())
9289 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9290 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9293 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9294 *dt = vect_external_def;
9295 else
9297 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9298 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9301 if (dump_enabled_p ())
9303 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9304 switch (*dt)
9306 case vect_uninitialized_def:
9307 dump_printf (MSG_NOTE, "uninitialized\n");
9308 break;
9309 case vect_constant_def:
9310 dump_printf (MSG_NOTE, "constant\n");
9311 break;
9312 case vect_external_def:
9313 dump_printf (MSG_NOTE, "external\n");
9314 break;
9315 case vect_internal_def:
9316 dump_printf (MSG_NOTE, "internal\n");
9317 break;
9318 case vect_induction_def:
9319 dump_printf (MSG_NOTE, "induction\n");
9320 break;
9321 case vect_reduction_def:
9322 dump_printf (MSG_NOTE, "reduction\n");
9323 break;
9324 case vect_double_reduction_def:
9325 dump_printf (MSG_NOTE, "double reduction\n");
9326 break;
9327 case vect_nested_cycle:
9328 dump_printf (MSG_NOTE, "nested cycle\n");
9329 break;
9330 case vect_unknown_def_type:
9331 dump_printf (MSG_NOTE, "unknown\n");
9332 break;
9336 if (*dt == vect_unknown_def_type)
9338 if (dump_enabled_p ())
9339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9340 "Unsupported pattern.\n");
9341 return false;
9344 switch (gimple_code (*def_stmt))
9346 case GIMPLE_PHI:
9347 case GIMPLE_ASSIGN:
9348 case GIMPLE_CALL:
9349 break;
9350 default:
9351 if (dump_enabled_p ())
9352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9353 "unsupported defining stmt:\n");
9354 return false;
9357 return true;
9360 /* Function vect_is_simple_use.
9362 Same as vect_is_simple_use but also determines the vector operand
9363 type of OPERAND and stores it to *VECTYPE. If the definition of
9364 OPERAND is vect_uninitialized_def, vect_constant_def or
9365 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9366 is responsible to compute the best suited vector type for the
9367 scalar operand. */
9369 bool
9370 vect_is_simple_use (tree operand, vec_info *vinfo,
9371 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9373 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9374 return false;
9376 /* Now get a vector type if the def is internal, otherwise supply
9377 NULL_TREE and leave it up to the caller to figure out a proper
9378 type for the use stmt. */
9379 if (*dt == vect_internal_def
9380 || *dt == vect_induction_def
9381 || *dt == vect_reduction_def
9382 || *dt == vect_double_reduction_def
9383 || *dt == vect_nested_cycle)
9385 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9387 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9388 && !STMT_VINFO_RELEVANT (stmt_info)
9389 && !STMT_VINFO_LIVE_P (stmt_info))
9390 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9392 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9393 gcc_assert (*vectype != NULL_TREE);
9395 else if (*dt == vect_uninitialized_def
9396 || *dt == vect_constant_def
9397 || *dt == vect_external_def)
9398 *vectype = NULL_TREE;
9399 else
9400 gcc_unreachable ();
9402 return true;
9406 /* Function supportable_widening_operation
9408 Check whether an operation represented by the code CODE is a
9409 widening operation that is supported by the target platform in
9410 vector form (i.e., when operating on arguments of type VECTYPE_IN
9411 producing a result of type VECTYPE_OUT).
9413 Widening operations we currently support are NOP (CONVERT), FLOAT
9414 and WIDEN_MULT. This function checks if these operations are supported
9415 by the target platform either directly (via vector tree-codes), or via
9416 target builtins.
9418 Output:
9419 - CODE1 and CODE2 are codes of vector operations to be used when
9420 vectorizing the operation, if available.
9421 - MULTI_STEP_CVT determines the number of required intermediate steps in
9422 case of multi-step conversion (like char->short->int - in that case
9423 MULTI_STEP_CVT will be 1).
9424 - INTERM_TYPES contains the intermediate type required to perform the
9425 widening operation (short in the above example). */
9427 bool
9428 supportable_widening_operation (enum tree_code code, gimple *stmt,
9429 tree vectype_out, tree vectype_in,
9430 enum tree_code *code1, enum tree_code *code2,
9431 int *multi_step_cvt,
9432 vec<tree> *interm_types)
9434 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9435 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9436 struct loop *vect_loop = NULL;
9437 machine_mode vec_mode;
9438 enum insn_code icode1, icode2;
9439 optab optab1, optab2;
9440 tree vectype = vectype_in;
9441 tree wide_vectype = vectype_out;
9442 enum tree_code c1, c2;
9443 int i;
9444 tree prev_type, intermediate_type;
9445 machine_mode intermediate_mode, prev_mode;
9446 optab optab3, optab4;
9448 *multi_step_cvt = 0;
9449 if (loop_info)
9450 vect_loop = LOOP_VINFO_LOOP (loop_info);
9452 switch (code)
9454 case WIDEN_MULT_EXPR:
9455 /* The result of a vectorized widening operation usually requires
9456 two vectors (because the widened results do not fit into one vector).
9457 The generated vector results would normally be expected to be
9458 generated in the same order as in the original scalar computation,
9459 i.e. if 8 results are generated in each vector iteration, they are
9460 to be organized as follows:
9461 vect1: [res1,res2,res3,res4],
9462 vect2: [res5,res6,res7,res8].
9464 However, in the special case that the result of the widening
9465 operation is used in a reduction computation only, the order doesn't
9466 matter (because when vectorizing a reduction we change the order of
9467 the computation). Some targets can take advantage of this and
9468 generate more efficient code. For example, targets like Altivec,
9469 that support widen_mult using a sequence of {mult_even,mult_odd}
9470 generate the following vectors:
9471 vect1: [res1,res3,res5,res7],
9472 vect2: [res2,res4,res6,res8].
9474 When vectorizing outer-loops, we execute the inner-loop sequentially
9475 (each vectorized inner-loop iteration contributes to VF outer-loop
9476 iterations in parallel). We therefore don't allow to change the
9477 order of the computation in the inner-loop during outer-loop
9478 vectorization. */
9479 /* TODO: Another case in which order doesn't *really* matter is when we
9480 widen and then contract again, e.g. (short)((int)x * y >> 8).
9481 Normally, pack_trunc performs an even/odd permute, whereas the
9482 repack from an even/odd expansion would be an interleave, which
9483 would be significantly simpler for e.g. AVX2. */
9484 /* In any case, in order to avoid duplicating the code below, recurse
9485 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9486 are properly set up for the caller. If we fail, we'll continue with
9487 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9488 if (vect_loop
9489 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9490 && !nested_in_vect_loop_p (vect_loop, stmt)
9491 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9492 stmt, vectype_out, vectype_in,
9493 code1, code2, multi_step_cvt,
9494 interm_types))
9496 /* Elements in a vector with vect_used_by_reduction property cannot
9497 be reordered if the use chain with this property does not have the
9498 same operation. One such an example is s += a * b, where elements
9499 in a and b cannot be reordered. Here we check if the vector defined
9500 by STMT is only directly used in the reduction statement. */
9501 tree lhs = gimple_assign_lhs (stmt);
9502 use_operand_p dummy;
9503 gimple *use_stmt;
9504 stmt_vec_info use_stmt_info = NULL;
9505 if (single_imm_use (lhs, &dummy, &use_stmt)
9506 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9507 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9508 return true;
9510 c1 = VEC_WIDEN_MULT_LO_EXPR;
9511 c2 = VEC_WIDEN_MULT_HI_EXPR;
9512 break;
9514 case DOT_PROD_EXPR:
9515 c1 = DOT_PROD_EXPR;
9516 c2 = DOT_PROD_EXPR;
9517 break;
9519 case SAD_EXPR:
9520 c1 = SAD_EXPR;
9521 c2 = SAD_EXPR;
9522 break;
9524 case VEC_WIDEN_MULT_EVEN_EXPR:
9525 /* Support the recursion induced just above. */
9526 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9527 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9528 break;
9530 case WIDEN_LSHIFT_EXPR:
9531 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9532 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9533 break;
9535 CASE_CONVERT:
9536 c1 = VEC_UNPACK_LO_EXPR;
9537 c2 = VEC_UNPACK_HI_EXPR;
9538 break;
9540 case FLOAT_EXPR:
9541 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9542 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9543 break;
9545 case FIX_TRUNC_EXPR:
9546 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9547 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9548 computing the operation. */
9549 return false;
9551 default:
9552 gcc_unreachable ();
9555 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9556 std::swap (c1, c2);
9558 if (code == FIX_TRUNC_EXPR)
9560 /* The signedness is determined from output operand. */
9561 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9562 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9564 else
9566 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9567 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9570 if (!optab1 || !optab2)
9571 return false;
9573 vec_mode = TYPE_MODE (vectype);
9574 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9575 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9576 return false;
9578 *code1 = c1;
9579 *code2 = c2;
9581 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9582 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9583 /* For scalar masks we may have different boolean
9584 vector types having the same QImode. Thus we
9585 add additional check for elements number. */
9586 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9587 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9588 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9590 /* Check if it's a multi-step conversion that can be done using intermediate
9591 types. */
9593 prev_type = vectype;
9594 prev_mode = vec_mode;
9596 if (!CONVERT_EXPR_CODE_P (code))
9597 return false;
9599 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9600 intermediate steps in promotion sequence. We try
9601 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9602 not. */
9603 interm_types->create (MAX_INTERM_CVT_STEPS);
9604 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9606 intermediate_mode = insn_data[icode1].operand[0].mode;
9607 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9609 intermediate_type
9610 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9611 current_vector_size);
9612 if (intermediate_mode != TYPE_MODE (intermediate_type))
9613 return false;
9615 else
9616 intermediate_type
9617 = lang_hooks.types.type_for_mode (intermediate_mode,
9618 TYPE_UNSIGNED (prev_type));
9620 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9621 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9623 if (!optab3 || !optab4
9624 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9625 || insn_data[icode1].operand[0].mode != intermediate_mode
9626 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9627 || insn_data[icode2].operand[0].mode != intermediate_mode
9628 || ((icode1 = optab_handler (optab3, intermediate_mode))
9629 == CODE_FOR_nothing)
9630 || ((icode2 = optab_handler (optab4, intermediate_mode))
9631 == CODE_FOR_nothing))
9632 break;
9634 interm_types->quick_push (intermediate_type);
9635 (*multi_step_cvt)++;
9637 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9638 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9639 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9640 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9641 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9643 prev_type = intermediate_type;
9644 prev_mode = intermediate_mode;
9647 interm_types->release ();
9648 return false;
9652 /* Function supportable_narrowing_operation
9654 Check whether an operation represented by the code CODE is a
9655 narrowing operation that is supported by the target platform in
9656 vector form (i.e., when operating on arguments of type VECTYPE_IN
9657 and producing a result of type VECTYPE_OUT).
9659 Narrowing operations we currently support are NOP (CONVERT) and
9660 FIX_TRUNC. This function checks if these operations are supported by
9661 the target platform directly via vector tree-codes.
9663 Output:
9664 - CODE1 is the code of a vector operation to be used when
9665 vectorizing the operation, if available.
9666 - MULTI_STEP_CVT determines the number of required intermediate steps in
9667 case of multi-step conversion (like int->short->char - in that case
9668 MULTI_STEP_CVT will be 1).
9669 - INTERM_TYPES contains the intermediate type required to perform the
9670 narrowing operation (short in the above example). */
9672 bool
9673 supportable_narrowing_operation (enum tree_code code,
9674 tree vectype_out, tree vectype_in,
9675 enum tree_code *code1, int *multi_step_cvt,
9676 vec<tree> *interm_types)
9678 machine_mode vec_mode;
9679 enum insn_code icode1;
9680 optab optab1, interm_optab;
9681 tree vectype = vectype_in;
9682 tree narrow_vectype = vectype_out;
9683 enum tree_code c1;
9684 tree intermediate_type, prev_type;
9685 machine_mode intermediate_mode, prev_mode;
9686 int i;
9687 bool uns;
9689 *multi_step_cvt = 0;
9690 switch (code)
9692 CASE_CONVERT:
9693 c1 = VEC_PACK_TRUNC_EXPR;
9694 break;
9696 case FIX_TRUNC_EXPR:
9697 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9698 break;
9700 case FLOAT_EXPR:
9701 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9702 tree code and optabs used for computing the operation. */
9703 return false;
9705 default:
9706 gcc_unreachable ();
9709 if (code == FIX_TRUNC_EXPR)
9710 /* The signedness is determined from output operand. */
9711 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9712 else
9713 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9715 if (!optab1)
9716 return false;
9718 vec_mode = TYPE_MODE (vectype);
9719 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9720 return false;
9722 *code1 = c1;
9724 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9725 /* For scalar masks we may have different boolean
9726 vector types having the same QImode. Thus we
9727 add additional check for elements number. */
9728 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9729 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9730 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9732 /* Check if it's a multi-step conversion that can be done using intermediate
9733 types. */
9734 prev_mode = vec_mode;
9735 prev_type = vectype;
9736 if (code == FIX_TRUNC_EXPR)
9737 uns = TYPE_UNSIGNED (vectype_out);
9738 else
9739 uns = TYPE_UNSIGNED (vectype);
9741 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9742 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9743 costly than signed. */
9744 if (code == FIX_TRUNC_EXPR && uns)
9746 enum insn_code icode2;
9748 intermediate_type
9749 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9750 interm_optab
9751 = optab_for_tree_code (c1, intermediate_type, optab_default);
9752 if (interm_optab != unknown_optab
9753 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9754 && insn_data[icode1].operand[0].mode
9755 == insn_data[icode2].operand[0].mode)
9757 uns = false;
9758 optab1 = interm_optab;
9759 icode1 = icode2;
9763 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9764 intermediate steps in promotion sequence. We try
9765 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9766 interm_types->create (MAX_INTERM_CVT_STEPS);
9767 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9769 intermediate_mode = insn_data[icode1].operand[0].mode;
9770 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9772 intermediate_type
9773 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9774 current_vector_size);
9775 if (intermediate_mode != TYPE_MODE (intermediate_type))
9776 return false;
9778 else
9779 intermediate_type
9780 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9781 interm_optab
9782 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9783 optab_default);
9784 if (!interm_optab
9785 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9786 || insn_data[icode1].operand[0].mode != intermediate_mode
9787 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9788 == CODE_FOR_nothing))
9789 break;
9791 interm_types->quick_push (intermediate_type);
9792 (*multi_step_cvt)++;
9794 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9795 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9796 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9797 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9799 prev_mode = intermediate_mode;
9800 prev_type = intermediate_type;
9801 optab1 = interm_optab;
9804 interm_types->release ();
9805 return false;