PR tree-optimization/78496
[official-gcc.git] / gcc / tree-vect-stmts.c
blob31349f2b9c13dabbcf138f93bf3e39605539591d
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
583 vect_mark_relevant (worklist, def_stmt, relevant, false);
584 return true;
588 /* Function vect_mark_stmts_to_be_vectorized.
590 Not all stmts in the loop need to be vectorized. For example:
592 for i...
593 for j...
594 1. T0 = i + j
595 2. T1 = a[T0]
597 3. j = j + 1
599 Stmt 1 and 3 do not need to be vectorized, because loop control and
600 addressing of vectorized data-refs are handled differently.
602 This pass detects such stmts. */
604 bool
605 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
607 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
608 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
609 unsigned int nbbs = loop->num_nodes;
610 gimple_stmt_iterator si;
611 gimple *stmt;
612 unsigned int i;
613 stmt_vec_info stmt_vinfo;
614 basic_block bb;
615 gimple *phi;
616 bool live_p;
617 enum vect_relevant relevant;
619 if (dump_enabled_p ())
620 dump_printf_loc (MSG_NOTE, vect_location,
621 "=== vect_mark_stmts_to_be_vectorized ===\n");
623 auto_vec<gimple *, 64> worklist;
625 /* 1. Init worklist. */
626 for (i = 0; i < nbbs; i++)
628 bb = bbs[i];
629 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
631 phi = gsi_stmt (si);
632 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
635 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
638 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
639 vect_mark_relevant (&worklist, phi, relevant, live_p);
641 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
643 stmt = gsi_stmt (si);
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
647 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
650 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
651 vect_mark_relevant (&worklist, stmt, relevant, live_p);
655 /* 2. Process_worklist */
656 while (worklist.length () > 0)
658 use_operand_p use_p;
659 ssa_op_iter iter;
661 stmt = worklist.pop ();
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
668 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
669 (DEF_STMT) as relevant/irrelevant according to the relevance property
670 of STMT. */
671 stmt_vinfo = vinfo_for_stmt (stmt);
672 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
674 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
675 propagated as is to the DEF_STMTs of its USEs.
677 One exception is when STMT has been identified as defining a reduction
678 variable; in this case we set the relevance to vect_used_by_reduction.
679 This is because we distinguish between two kinds of relevant stmts -
680 those that are used by a reduction computation, and those that are
681 (also) used by a regular computation. This allows us later on to
682 identify stmts that are used solely by a reduction, and therefore the
683 order of the results that they produce does not have to be kept. */
685 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
687 case vect_reduction_def:
688 gcc_assert (relevant != vect_unused_in_scope);
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_scope
691 && relevant != vect_used_by_reduction
692 && relevant != vect_used_only_live)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of reduction.\n");
697 return false;
699 break;
701 case vect_nested_cycle:
702 if (relevant != vect_unused_in_scope
703 && relevant != vect_used_in_outer_by_reduction
704 && relevant != vect_used_in_outer)
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "unsupported use of nested cycle.\n");
710 return false;
712 break;
714 case vect_double_reduction_def:
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_by_reduction
717 && relevant != vect_used_only_live)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721 "unsupported use of double reduction.\n");
723 return false;
725 break;
727 default:
728 break;
731 if (is_pattern_stmt_p (stmt_vinfo))
733 /* Pattern statements are not inserted into the code, so
734 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
735 have to scan the RHS or function arguments instead. */
736 if (is_gimple_assign (stmt))
738 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
739 tree op = gimple_assign_rhs1 (stmt);
741 i = 1;
742 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
744 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
745 relevant, &worklist, false)
746 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
747 relevant, &worklist, false))
748 return false;
749 i = 2;
751 for (; i < gimple_num_ops (stmt); i++)
753 op = gimple_op (stmt, i);
754 if (TREE_CODE (op) == SSA_NAME
755 && !process_use (stmt, op, loop_vinfo, relevant,
756 &worklist, false))
757 return false;
760 else if (is_gimple_call (stmt))
762 for (i = 0; i < gimple_call_num_args (stmt); i++)
764 tree arg = gimple_call_arg (stmt, i);
765 if (!process_use (stmt, arg, loop_vinfo, relevant,
766 &worklist, false))
767 return false;
771 else
772 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 tree op = USE_FROM_PTR (use_p);
775 if (!process_use (stmt, op, loop_vinfo, relevant,
776 &worklist, false))
777 return false;
780 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
782 gather_scatter_info gs_info;
783 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
784 gcc_unreachable ();
785 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
786 &worklist, true))
787 return false;
789 } /* while worklist */
791 return true;
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
801 void
802 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
803 enum vect_def_type *dt,
804 stmt_vector_for_cost *prologue_cost_vec,
805 stmt_vector_for_cost *body_cost_vec)
807 int i;
808 int inside_cost = 0, prologue_cost = 0;
810 /* The SLP costs were already calculated during SLP tree build. */
811 if (PURE_SLP_STMT (stmt_info))
812 return;
814 /* FORNOW: Assuming maximum 2 args per stmts. */
815 for (i = 0; i < 2; i++)
816 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
817 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
818 stmt_info, 0, vect_prologue);
820 /* Pass the inside-of-loop statements to the target-specific cost model. */
821 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
822 stmt_info, 0, vect_body);
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_NOTE, vect_location,
826 "vect_model_simple_cost: inside_cost = %d, "
827 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 /* Model cost for type demotion and promotion operations. PWR is normally
832 zero for single-step promotions and demotions. It will be one if
833 two-step promotion/demotion is required, and so on. Each additional
834 step doubles the number of instructions required. */
836 static void
837 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
838 enum vect_def_type *dt, int pwr)
840 int i, tmp;
841 int inside_cost = 0, prologue_cost = 0;
842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
843 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
844 void *target_cost_data;
846 /* The SLP costs were already calculated during SLP tree build. */
847 if (PURE_SLP_STMT (stmt_info))
848 return;
850 if (loop_vinfo)
851 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
852 else
853 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
855 for (i = 0; i < pwr + 1; i++)
857 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
858 (i + 1) : i;
859 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
860 vec_promote_demote, stmt_info, 0,
861 vect_body);
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i = 0; i < 2; i++)
866 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
867 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
868 stmt_info, 0, vect_prologue);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE, vect_location,
872 "vect_model_promotion_demotion_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost, prologue_cost);
876 /* Function vect_model_store_cost
878 Models cost for stores. In the case of grouped accesses, one access
879 has the overhead of the grouped access attributed to it. */
881 void
882 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
883 vect_memory_access_type memory_access_type,
884 enum vect_def_type dt, slp_tree slp_node,
885 stmt_vector_for_cost *prologue_cost_vec,
886 stmt_vector_for_cost *body_cost_vec)
888 unsigned int inside_cost = 0, prologue_cost = 0;
889 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
890 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
891 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
893 if (dt == vect_constant_def || dt == vect_external_def)
894 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
895 stmt_info, 0, vect_prologue);
897 /* Grouped stores update all elements in the group at once,
898 so we want the DR for the first statement. */
899 if (!slp_node && grouped_access_p)
901 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
902 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
905 /* True if we should include any once-per-group costs as well as
906 the cost of the statement itself. For SLP we only get called
907 once per group anyhow. */
908 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
910 /* We assume that the cost of a single store-lanes instruction is
911 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
912 access is instead being provided by a permute-and-store operation,
913 include the cost of the permutes. */
914 if (first_stmt_p
915 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
917 /* Uses a high and low interleave or shuffle operations for each
918 needed permute. */
919 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
920 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
921 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
922 stmt_info, 0, vect_body);
924 if (dump_enabled_p ())
925 dump_printf_loc (MSG_NOTE, vect_location,
926 "vect_model_store_cost: strided group_size = %d .\n",
927 group_size);
930 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
931 /* Costs of the stores. */
932 if (memory_access_type == VMAT_ELEMENTWISE
933 || memory_access_type == VMAT_GATHER_SCATTER)
934 /* N scalar stores plus extracting the elements. */
935 inside_cost += record_stmt_cost (body_cost_vec,
936 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
937 scalar_store, stmt_info, 0, vect_body);
938 else
939 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
941 if (memory_access_type == VMAT_ELEMENTWISE
942 || memory_access_type == VMAT_STRIDED_SLP)
943 inside_cost += record_stmt_cost (body_cost_vec,
944 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
945 vec_to_scalar, stmt_info, 0, vect_body);
947 if (dump_enabled_p ())
948 dump_printf_loc (MSG_NOTE, vect_location,
949 "vect_model_store_cost: inside_cost = %d, "
950 "prologue_cost = %d .\n", inside_cost, prologue_cost);
954 /* Calculate cost of DR's memory access. */
955 void
956 vect_get_store_cost (struct data_reference *dr, int ncopies,
957 unsigned int *inside_cost,
958 stmt_vector_for_cost *body_cost_vec)
960 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
961 gimple *stmt = DR_STMT (dr);
962 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
964 switch (alignment_support_scheme)
966 case dr_aligned:
968 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
969 vector_store, stmt_info, 0,
970 vect_body);
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_NOTE, vect_location,
974 "vect_model_store_cost: aligned.\n");
975 break;
978 case dr_unaligned_supported:
980 /* Here, we assign an additional cost for the unaligned store. */
981 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
982 unaligned_store, stmt_info,
983 DR_MISALIGNMENT (dr), vect_body);
984 if (dump_enabled_p ())
985 dump_printf_loc (MSG_NOTE, vect_location,
986 "vect_model_store_cost: unaligned supported by "
987 "hardware.\n");
988 break;
991 case dr_unaligned_unsupported:
993 *inside_cost = VECT_MAX_COST;
995 if (dump_enabled_p ())
996 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
997 "vect_model_store_cost: unsupported access.\n");
998 break;
1001 default:
1002 gcc_unreachable ();
1007 /* Function vect_model_load_cost
1009 Models cost for loads. In the case of grouped accesses, one access has
1010 the overhead of the grouped access attributed to it. Since unaligned
1011 accesses are supported for loads, we also account for the costs of the
1012 access scheme chosen. */
1014 void
1015 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1016 vect_memory_access_type memory_access_type,
1017 slp_tree slp_node,
1018 stmt_vector_for_cost *prologue_cost_vec,
1019 stmt_vector_for_cost *body_cost_vec)
1021 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1022 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1023 unsigned int inside_cost = 0, prologue_cost = 0;
1024 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1026 /* Grouped loads read all elements in the group at once,
1027 so we want the DR for the first statement. */
1028 if (!slp_node && grouped_access_p)
1030 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1031 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1034 /* True if we should include any once-per-group costs as well as
1035 the cost of the statement itself. For SLP we only get called
1036 once per group anyhow. */
1037 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1039 /* We assume that the cost of a single load-lanes instruction is
1040 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1041 access is instead being provided by a load-and-permute operation,
1042 include the cost of the permutes. */
1043 if (first_stmt_p
1044 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1046 /* Uses an even and odd extract operations or shuffle operations
1047 for each needed permute. */
1048 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1049 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1050 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1051 stmt_info, 0, vect_body);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_load_cost: strided group_size = %d .\n",
1056 group_size);
1059 /* The loads themselves. */
1060 if (memory_access_type == VMAT_ELEMENTWISE
1061 || memory_access_type == VMAT_GATHER_SCATTER)
1063 /* N scalar loads plus gathering them into a vector. */
1064 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1065 inside_cost += record_stmt_cost (body_cost_vec,
1066 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1067 scalar_load, stmt_info, 0, vect_body);
1069 else
1070 vect_get_load_cost (dr, ncopies, first_stmt_p,
1071 &inside_cost, &prologue_cost,
1072 prologue_cost_vec, body_cost_vec, true);
1073 if (memory_access_type == VMAT_ELEMENTWISE
1074 || memory_access_type == VMAT_STRIDED_SLP)
1075 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1076 stmt_info, 0, vect_body);
1078 if (dump_enabled_p ())
1079 dump_printf_loc (MSG_NOTE, vect_location,
1080 "vect_model_load_cost: inside_cost = %d, "
1081 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1085 /* Calculate cost of DR's memory access. */
1086 void
1087 vect_get_load_cost (struct data_reference *dr, int ncopies,
1088 bool add_realign_cost, unsigned int *inside_cost,
1089 unsigned int *prologue_cost,
1090 stmt_vector_for_cost *prologue_cost_vec,
1091 stmt_vector_for_cost *body_cost_vec,
1092 bool record_prologue_costs)
1094 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1095 gimple *stmt = DR_STMT (dr);
1096 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1098 switch (alignment_support_scheme)
1100 case dr_aligned:
1102 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1103 stmt_info, 0, vect_body);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: aligned.\n");
1109 break;
1111 case dr_unaligned_supported:
1113 /* Here, we assign an additional cost for the unaligned load. */
1114 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1115 unaligned_load, stmt_info,
1116 DR_MISALIGNMENT (dr), vect_body);
1118 if (dump_enabled_p ())
1119 dump_printf_loc (MSG_NOTE, vect_location,
1120 "vect_model_load_cost: unaligned supported by "
1121 "hardware.\n");
1123 break;
1125 case dr_explicit_realign:
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1128 vector_load, stmt_info, 0, vect_body);
1129 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1130 vec_perm, stmt_info, 0, vect_body);
1132 /* FIXME: If the misalignment remains fixed across the iterations of
1133 the containing loop, the following cost should be added to the
1134 prologue costs. */
1135 if (targetm.vectorize.builtin_mask_for_load)
1136 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1137 stmt_info, 0, vect_body);
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE, vect_location,
1141 "vect_model_load_cost: explicit realign\n");
1143 break;
1145 case dr_explicit_realign_optimized:
1147 if (dump_enabled_p ())
1148 dump_printf_loc (MSG_NOTE, vect_location,
1149 "vect_model_load_cost: unaligned software "
1150 "pipelined.\n");
1152 /* Unaligned software pipeline has a load of an address, an initial
1153 load, and possibly a mask operation to "prime" the loop. However,
1154 if this is an access in a group of loads, which provide grouped
1155 access, then the above cost should only be considered for one
1156 access in the group. Inside the loop, there is a load op
1157 and a realignment op. */
1159 if (add_realign_cost && record_prologue_costs)
1161 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1162 vector_stmt, stmt_info,
1163 0, vect_prologue);
1164 if (targetm.vectorize.builtin_mask_for_load)
1165 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1166 vector_stmt, stmt_info,
1167 0, vect_prologue);
1170 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1171 stmt_info, 0, vect_body);
1172 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1173 stmt_info, 0, vect_body);
1175 if (dump_enabled_p ())
1176 dump_printf_loc (MSG_NOTE, vect_location,
1177 "vect_model_load_cost: explicit realign optimized"
1178 "\n");
1180 break;
1183 case dr_unaligned_unsupported:
1185 *inside_cost = VECT_MAX_COST;
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1189 "vect_model_load_cost: unsupported access.\n");
1190 break;
1193 default:
1194 gcc_unreachable ();
1198 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1199 the loop preheader for the vectorized stmt STMT. */
1201 static void
1202 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1204 if (gsi)
1205 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1206 else
1208 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1211 if (loop_vinfo)
1213 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1214 basic_block new_bb;
1215 edge pe;
1217 if (nested_in_vect_loop_p (loop, stmt))
1218 loop = loop->inner;
1220 pe = loop_preheader_edge (loop);
1221 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1222 gcc_assert (!new_bb);
1224 else
1226 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1227 basic_block bb;
1228 gimple_stmt_iterator gsi_bb_start;
1230 gcc_assert (bb_vinfo);
1231 bb = BB_VINFO_BB (bb_vinfo);
1232 gsi_bb_start = gsi_after_labels (bb);
1233 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1237 if (dump_enabled_p ())
1239 dump_printf_loc (MSG_NOTE, vect_location,
1240 "created new init_stmt: ");
1241 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1245 /* Function vect_init_vector.
1247 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1248 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1249 vector type a vector with all elements equal to VAL is created first.
1250 Place the initialization at BSI if it is not NULL. Otherwise, place the
1251 initialization at the loop preheader.
1252 Return the DEF of INIT_STMT.
1253 It will be used in the vectorization of STMT. */
1255 tree
1256 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1258 gimple *init_stmt;
1259 tree new_temp;
1261 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1262 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1264 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1265 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1267 /* Scalar boolean value should be transformed into
1268 all zeros or all ones value before building a vector. */
1269 if (VECTOR_BOOLEAN_TYPE_P (type))
1271 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1272 tree false_val = build_zero_cst (TREE_TYPE (type));
1274 if (CONSTANT_CLASS_P (val))
1275 val = integer_zerop (val) ? false_val : true_val;
1276 else
1278 new_temp = make_ssa_name (TREE_TYPE (type));
1279 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1280 val, true_val, false_val);
1281 vect_init_vector_1 (stmt, init_stmt, gsi);
1282 val = new_temp;
1285 else if (CONSTANT_CLASS_P (val))
1286 val = fold_convert (TREE_TYPE (type), val);
1287 else
1289 new_temp = make_ssa_name (TREE_TYPE (type));
1290 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1291 init_stmt = gimple_build_assign (new_temp,
1292 fold_build1 (VIEW_CONVERT_EXPR,
1293 TREE_TYPE (type),
1294 val));
1295 else
1296 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1297 vect_init_vector_1 (stmt, init_stmt, gsi);
1298 val = new_temp;
1301 val = build_vector_from_val (type, val);
1304 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1305 init_stmt = gimple_build_assign (new_temp, val);
1306 vect_init_vector_1 (stmt, init_stmt, gsi);
1307 return new_temp;
1310 /* Function vect_get_vec_def_for_operand_1.
1312 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1313 DT that will be used in the vectorized stmt. */
1315 tree
1316 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1318 tree vec_oprnd;
1319 gimple *vec_stmt;
1320 stmt_vec_info def_stmt_info = NULL;
1322 switch (dt)
1324 /* operand is a constant or a loop invariant. */
1325 case vect_constant_def:
1326 case vect_external_def:
1327 /* Code should use vect_get_vec_def_for_operand. */
1328 gcc_unreachable ();
1330 /* operand is defined inside the loop. */
1331 case vect_internal_def:
1333 /* Get the def from the vectorized stmt. */
1334 def_stmt_info = vinfo_for_stmt (def_stmt);
1336 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1337 /* Get vectorized pattern statement. */
1338 if (!vec_stmt
1339 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1340 && !STMT_VINFO_RELEVANT (def_stmt_info))
1341 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1342 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1343 gcc_assert (vec_stmt);
1344 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1345 vec_oprnd = PHI_RESULT (vec_stmt);
1346 else if (is_gimple_call (vec_stmt))
1347 vec_oprnd = gimple_call_lhs (vec_stmt);
1348 else
1349 vec_oprnd = gimple_assign_lhs (vec_stmt);
1350 return vec_oprnd;
1353 /* operand is defined by a loop header phi - reduction */
1354 case vect_reduction_def:
1355 case vect_double_reduction_def:
1356 case vect_nested_cycle:
1357 /* Code should use get_initial_def_for_reduction. */
1358 gcc_unreachable ();
1360 /* operand is defined by loop-header phi - induction. */
1361 case vect_induction_def:
1363 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1365 /* Get the def from the vectorized stmt. */
1366 def_stmt_info = vinfo_for_stmt (def_stmt);
1367 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1368 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1369 vec_oprnd = PHI_RESULT (vec_stmt);
1370 else
1371 vec_oprnd = gimple_get_lhs (vec_stmt);
1372 return vec_oprnd;
1375 default:
1376 gcc_unreachable ();
1381 /* Function vect_get_vec_def_for_operand.
1383 OP is an operand in STMT. This function returns a (vector) def that will be
1384 used in the vectorized stmt for STMT.
1386 In the case that OP is an SSA_NAME which is defined in the loop, then
1387 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1389 In case OP is an invariant or constant, a new stmt that creates a vector def
1390 needs to be introduced. VECTYPE may be used to specify a required type for
1391 vector invariant. */
1393 tree
1394 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1396 gimple *def_stmt;
1397 enum vect_def_type dt;
1398 bool is_simple_use;
1399 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1400 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1402 if (dump_enabled_p ())
1404 dump_printf_loc (MSG_NOTE, vect_location,
1405 "vect_get_vec_def_for_operand: ");
1406 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1407 dump_printf (MSG_NOTE, "\n");
1410 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1411 gcc_assert (is_simple_use);
1412 if (def_stmt && dump_enabled_p ())
1414 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1415 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1418 if (dt == vect_constant_def || dt == vect_external_def)
1420 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1421 tree vector_type;
1423 if (vectype)
1424 vector_type = vectype;
1425 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1426 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1427 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1428 else
1429 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1431 gcc_assert (vector_type);
1432 return vect_init_vector (stmt, op, vector_type, NULL);
1434 else
1435 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1439 /* Function vect_get_vec_def_for_stmt_copy
1441 Return a vector-def for an operand. This function is used when the
1442 vectorized stmt to be created (by the caller to this function) is a "copy"
1443 created in case the vectorized result cannot fit in one vector, and several
1444 copies of the vector-stmt are required. In this case the vector-def is
1445 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1446 of the stmt that defines VEC_OPRND.
1447 DT is the type of the vector def VEC_OPRND.
1449 Context:
1450 In case the vectorization factor (VF) is bigger than the number
1451 of elements that can fit in a vectype (nunits), we have to generate
1452 more than one vector stmt to vectorize the scalar stmt. This situation
1453 arises when there are multiple data-types operated upon in the loop; the
1454 smallest data-type determines the VF, and as a result, when vectorizing
1455 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1456 vector stmt (each computing a vector of 'nunits' results, and together
1457 computing 'VF' results in each iteration). This function is called when
1458 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1459 which VF=16 and nunits=4, so the number of copies required is 4):
1461 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1463 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1464 VS1.1: vx.1 = memref1 VS1.2
1465 VS1.2: vx.2 = memref2 VS1.3
1466 VS1.3: vx.3 = memref3
1468 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1469 VSnew.1: vz1 = vx.1 + ... VSnew.2
1470 VSnew.2: vz2 = vx.2 + ... VSnew.3
1471 VSnew.3: vz3 = vx.3 + ...
1473 The vectorization of S1 is explained in vectorizable_load.
1474 The vectorization of S2:
1475 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1476 the function 'vect_get_vec_def_for_operand' is called to
1477 get the relevant vector-def for each operand of S2. For operand x it
1478 returns the vector-def 'vx.0'.
1480 To create the remaining copies of the vector-stmt (VSnew.j), this
1481 function is called to get the relevant vector-def for each operand. It is
1482 obtained from the respective VS1.j stmt, which is recorded in the
1483 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1485 For example, to obtain the vector-def 'vx.1' in order to create the
1486 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1487 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1488 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1489 and return its def ('vx.1').
1490 Overall, to create the above sequence this function will be called 3 times:
1491 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1492 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1493 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1495 tree
1496 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1498 gimple *vec_stmt_for_operand;
1499 stmt_vec_info def_stmt_info;
1501 /* Do nothing; can reuse same def. */
1502 if (dt == vect_external_def || dt == vect_constant_def )
1503 return vec_oprnd;
1505 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1506 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1507 gcc_assert (def_stmt_info);
1508 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1509 gcc_assert (vec_stmt_for_operand);
1510 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1511 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1512 else
1513 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1514 return vec_oprnd;
1518 /* Get vectorized definitions for the operands to create a copy of an original
1519 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1521 static void
1522 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1523 vec<tree> *vec_oprnds0,
1524 vec<tree> *vec_oprnds1)
1526 tree vec_oprnd = vec_oprnds0->pop ();
1528 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1529 vec_oprnds0->quick_push (vec_oprnd);
1531 if (vec_oprnds1 && vec_oprnds1->length ())
1533 vec_oprnd = vec_oprnds1->pop ();
1534 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1535 vec_oprnds1->quick_push (vec_oprnd);
1540 /* Get vectorized definitions for OP0 and OP1.
1541 REDUC_INDEX is the index of reduction operand in case of reduction,
1542 and -1 otherwise. */
1544 void
1545 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1,
1548 slp_tree slp_node, int reduc_index)
1550 if (slp_node)
1552 int nops = (op1 == NULL_TREE) ? 1 : 2;
1553 auto_vec<tree> ops (nops);
1554 auto_vec<vec<tree> > vec_defs (nops);
1556 ops.quick_push (op0);
1557 if (op1)
1558 ops.quick_push (op1);
1560 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1562 *vec_oprnds0 = vec_defs[0];
1563 if (op1)
1564 *vec_oprnds1 = vec_defs[1];
1566 else
1568 tree vec_oprnd;
1570 vec_oprnds0->create (1);
1571 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1572 vec_oprnds0->quick_push (vec_oprnd);
1574 if (op1)
1576 vec_oprnds1->create (1);
1577 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1578 vec_oprnds1->quick_push (vec_oprnd);
1584 /* Function vect_finish_stmt_generation.
1586 Insert a new stmt. */
1588 void
1589 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1590 gimple_stmt_iterator *gsi)
1592 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1593 vec_info *vinfo = stmt_info->vinfo;
1595 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1597 if (!gsi_end_p (*gsi)
1598 && gimple_has_mem_ops (vec_stmt))
1600 gimple *at_stmt = gsi_stmt (*gsi);
1601 tree vuse = gimple_vuse (at_stmt);
1602 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1604 tree vdef = gimple_vdef (at_stmt);
1605 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1606 /* If we have an SSA vuse and insert a store, update virtual
1607 SSA form to avoid triggering the renamer. Do so only
1608 if we can easily see all uses - which is what almost always
1609 happens with the way vectorized stmts are inserted. */
1610 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1611 && ((is_gimple_assign (vec_stmt)
1612 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1613 || (is_gimple_call (vec_stmt)
1614 && !(gimple_call_flags (vec_stmt)
1615 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1617 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1618 gimple_set_vdef (vec_stmt, new_vdef);
1619 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1623 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1625 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1627 if (dump_enabled_p ())
1629 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1630 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1633 gimple_set_location (vec_stmt, gimple_location (stmt));
1635 /* While EH edges will generally prevent vectorization, stmt might
1636 e.g. be in a must-not-throw region. Ensure newly created stmts
1637 that could throw are part of the same region. */
1638 int lp_nr = lookup_stmt_eh_lp (stmt);
1639 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1640 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1643 /* We want to vectorize a call to combined function CFN with function
1644 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1645 as the types of all inputs. Check whether this is possible using
1646 an internal function, returning its code if so or IFN_LAST if not. */
1648 static internal_fn
1649 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1650 tree vectype_out, tree vectype_in)
1652 internal_fn ifn;
1653 if (internal_fn_p (cfn))
1654 ifn = as_internal_fn (cfn);
1655 else
1656 ifn = associated_internal_fn (fndecl);
1657 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1659 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1660 if (info.vectorizable)
1662 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1663 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1664 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1665 OPTIMIZE_FOR_SPEED))
1666 return ifn;
1669 return IFN_LAST;
1673 static tree permute_vec_elements (tree, tree, tree, gimple *,
1674 gimple_stmt_iterator *);
1676 /* STMT is a non-strided load or store, meaning that it accesses
1677 elements with a known constant step. Return -1 if that step
1678 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1680 static int
1681 compare_step_with_zero (gimple *stmt)
1683 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1684 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1685 tree step;
1686 if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
1687 step = STMT_VINFO_DR_STEP (stmt_info);
1688 else
1689 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1690 return tree_int_cst_compare (step, size_zero_node);
1693 /* If the target supports a permute mask that reverses the elements in
1694 a vector of type VECTYPE, return that mask, otherwise return null. */
1696 static tree
1697 perm_mask_for_reverse (tree vectype)
1699 int i, nunits;
1700 unsigned char *sel;
1702 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1703 sel = XALLOCAVEC (unsigned char, nunits);
1705 for (i = 0; i < nunits; ++i)
1706 sel[i] = nunits - 1 - i;
1708 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1709 return NULL_TREE;
1710 return vect_gen_perm_mask_checked (vectype, sel);
1713 /* A subroutine of get_load_store_type, with a subset of the same
1714 arguments. Handle the case where STMT is part of a grouped load
1715 or store.
1717 For stores, the statements in the group are all consecutive
1718 and there is no gap at the end. For loads, the statements in the
1719 group might not be consecutive; there can be gaps between statements
1720 as well as at the end. */
1722 static bool
1723 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1724 vec_load_store_type vls_type,
1725 vect_memory_access_type *memory_access_type)
1727 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1728 vec_info *vinfo = stmt_info->vinfo;
1729 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1730 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1731 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1732 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1733 bool single_element_p = (stmt == first_stmt
1734 && !GROUP_NEXT_ELEMENT (stmt_info));
1735 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1736 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1738 /* True if the vectorized statements would access beyond the last
1739 statement in the group. */
1740 bool overrun_p = false;
1742 /* True if we can cope with such overrun by peeling for gaps, so that
1743 there is at least one final scalar iteration after the vector loop. */
1744 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1746 /* There can only be a gap at the end of the group if the stride is
1747 known at compile time. */
1748 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1750 /* Stores can't yet have gaps. */
1751 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1753 if (slp)
1755 if (STMT_VINFO_STRIDED_P (stmt_info))
1757 /* Try to use consecutive accesses of GROUP_SIZE elements,
1758 separated by the stride, until we have a complete vector.
1759 Fall back to scalar accesses if that isn't possible. */
1760 if (nunits % group_size == 0)
1761 *memory_access_type = VMAT_STRIDED_SLP;
1762 else
1763 *memory_access_type = VMAT_ELEMENTWISE;
1765 else
1767 overrun_p = loop_vinfo && gap != 0;
1768 if (overrun_p && vls_type != VLS_LOAD)
1770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1771 "Grouped store with gaps requires"
1772 " non-consecutive accesses\n");
1773 return false;
1775 /* If the access is aligned an overrun is fine. */
1776 if (overrun_p
1777 && aligned_access_p
1778 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1779 overrun_p = false;
1780 if (overrun_p && !can_overrun_p)
1782 if (dump_enabled_p ())
1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1784 "Peeling for outer loop is not supported\n");
1785 return false;
1787 *memory_access_type = VMAT_CONTIGUOUS;
1790 else
1792 /* We can always handle this case using elementwise accesses,
1793 but see if something more efficient is available. */
1794 *memory_access_type = VMAT_ELEMENTWISE;
1796 /* If there is a gap at the end of the group then these optimizations
1797 would access excess elements in the last iteration. */
1798 bool would_overrun_p = (gap != 0);
1799 /* If the access is aligned an overrun is fine, but only if the
1800 overrun is not inside an unused vector (if the gap is as large
1801 or larger than a vector). */
1802 if (would_overrun_p
1803 && gap < nunits
1804 && aligned_access_p
1805 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1806 would_overrun_p = false;
1807 if (!STMT_VINFO_STRIDED_P (stmt_info)
1808 && (can_overrun_p || !would_overrun_p)
1809 && compare_step_with_zero (stmt) > 0)
1811 /* First try using LOAD/STORE_LANES. */
1812 if (vls_type == VLS_LOAD
1813 ? vect_load_lanes_supported (vectype, group_size)
1814 : vect_store_lanes_supported (vectype, group_size))
1816 *memory_access_type = VMAT_LOAD_STORE_LANES;
1817 overrun_p = would_overrun_p;
1820 /* If that fails, try using permuting loads. */
1821 if (*memory_access_type == VMAT_ELEMENTWISE
1822 && (vls_type == VLS_LOAD
1823 ? vect_grouped_load_supported (vectype, single_element_p,
1824 group_size)
1825 : vect_grouped_store_supported (vectype, group_size)))
1827 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1828 overrun_p = would_overrun_p;
1833 if (vls_type != VLS_LOAD && first_stmt == stmt)
1835 /* STMT is the leader of the group. Check the operands of all the
1836 stmts of the group. */
1837 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1838 while (next_stmt)
1840 gcc_assert (gimple_assign_single_p (next_stmt));
1841 tree op = gimple_assign_rhs1 (next_stmt);
1842 gimple *def_stmt;
1843 enum vect_def_type dt;
1844 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1846 if (dump_enabled_p ())
1847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1848 "use not simple.\n");
1849 return false;
1851 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1855 if (overrun_p)
1857 gcc_assert (can_overrun_p);
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1860 "Data access with gaps requires scalar "
1861 "epilogue loop\n");
1862 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1865 return true;
1868 /* A subroutine of get_load_store_type, with a subset of the same
1869 arguments. Handle the case where STMT is a load or store that
1870 accesses consecutive elements with a negative step. */
1872 static vect_memory_access_type
1873 get_negative_load_store_type (gimple *stmt, tree vectype,
1874 vec_load_store_type vls_type,
1875 unsigned int ncopies)
1877 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1878 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1879 dr_alignment_support alignment_support_scheme;
1881 if (ncopies > 1)
1883 if (dump_enabled_p ())
1884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1885 "multiple types with negative step.\n");
1886 return VMAT_ELEMENTWISE;
1889 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1890 if (alignment_support_scheme != dr_aligned
1891 && alignment_support_scheme != dr_unaligned_supported)
1893 if (dump_enabled_p ())
1894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1895 "negative step but alignment required.\n");
1896 return VMAT_ELEMENTWISE;
1899 if (vls_type == VLS_STORE_INVARIANT)
1901 if (dump_enabled_p ())
1902 dump_printf_loc (MSG_NOTE, vect_location,
1903 "negative step with invariant source;"
1904 " no permute needed.\n");
1905 return VMAT_CONTIGUOUS_DOWN;
1908 if (!perm_mask_for_reverse (vectype))
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1912 "negative step and reversing not supported.\n");
1913 return VMAT_ELEMENTWISE;
1916 return VMAT_CONTIGUOUS_REVERSE;
1919 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1920 if there is a memory access type that the vectorized form can use,
1921 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1922 or scatters, fill in GS_INFO accordingly.
1924 SLP says whether we're performing SLP rather than loop vectorization.
1925 VECTYPE is the vector type that the vectorized statements will use.
1926 NCOPIES is the number of vector statements that will be needed. */
1928 static bool
1929 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1930 vec_load_store_type vls_type, unsigned int ncopies,
1931 vect_memory_access_type *memory_access_type,
1932 gather_scatter_info *gs_info)
1934 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1935 vec_info *vinfo = stmt_info->vinfo;
1936 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1937 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1939 *memory_access_type = VMAT_GATHER_SCATTER;
1940 gimple *def_stmt;
1941 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1942 gcc_unreachable ();
1943 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1944 &gs_info->offset_dt,
1945 &gs_info->offset_vectype))
1947 if (dump_enabled_p ())
1948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1949 "%s index use not simple.\n",
1950 vls_type == VLS_LOAD ? "gather" : "scatter");
1951 return false;
1954 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1956 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1957 memory_access_type))
1958 return false;
1960 else if (STMT_VINFO_STRIDED_P (stmt_info))
1962 gcc_assert (!slp);
1963 *memory_access_type = VMAT_ELEMENTWISE;
1965 else
1967 int cmp = compare_step_with_zero (stmt);
1968 if (cmp < 0)
1969 *memory_access_type = get_negative_load_store_type
1970 (stmt, vectype, vls_type, ncopies);
1971 else if (cmp == 0)
1973 gcc_assert (vls_type == VLS_LOAD);
1974 *memory_access_type = VMAT_INVARIANT;
1976 else
1977 *memory_access_type = VMAT_CONTIGUOUS;
1980 /* FIXME: At the moment the cost model seems to underestimate the
1981 cost of using elementwise accesses. This check preserves the
1982 traditional behavior until that can be fixed. */
1983 if (*memory_access_type == VMAT_ELEMENTWISE
1984 && !STMT_VINFO_STRIDED_P (stmt_info))
1986 if (dump_enabled_p ())
1987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1988 "not falling back to elementwise accesses\n");
1989 return false;
1991 return true;
1994 /* Function vectorizable_mask_load_store.
1996 Check if STMT performs a conditional load or store that can be vectorized.
1997 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1998 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1999 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2001 static bool
2002 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2003 gimple **vec_stmt, slp_tree slp_node)
2005 tree vec_dest = NULL;
2006 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2007 stmt_vec_info prev_stmt_info;
2008 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2009 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2010 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2011 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2012 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2013 tree rhs_vectype = NULL_TREE;
2014 tree mask_vectype;
2015 tree elem_type;
2016 gimple *new_stmt;
2017 tree dummy;
2018 tree dataref_ptr = NULL_TREE;
2019 gimple *ptr_incr;
2020 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2021 int ncopies;
2022 int i, j;
2023 bool inv_p;
2024 gather_scatter_info gs_info;
2025 vec_load_store_type vls_type;
2026 tree mask;
2027 gimple *def_stmt;
2028 enum vect_def_type dt;
2030 if (slp_node != NULL)
2031 return false;
2033 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2034 gcc_assert (ncopies >= 1);
2036 mask = gimple_call_arg (stmt, 2);
2038 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2039 return false;
2041 /* FORNOW. This restriction should be relaxed. */
2042 if (nested_in_vect_loop && ncopies > 1)
2044 if (dump_enabled_p ())
2045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2046 "multiple types in nested loop.");
2047 return false;
2050 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2051 return false;
2053 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2054 && ! vec_stmt)
2055 return false;
2057 if (!STMT_VINFO_DATA_REF (stmt_info))
2058 return false;
2060 elem_type = TREE_TYPE (vectype);
2062 if (TREE_CODE (mask) != SSA_NAME)
2063 return false;
2065 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2066 return false;
2068 if (!mask_vectype)
2069 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2071 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2072 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2073 return false;
2075 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2077 tree rhs = gimple_call_arg (stmt, 3);
2078 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2079 return false;
2080 if (dt == vect_constant_def || dt == vect_external_def)
2081 vls_type = VLS_STORE_INVARIANT;
2082 else
2083 vls_type = VLS_STORE;
2085 else
2086 vls_type = VLS_LOAD;
2088 vect_memory_access_type memory_access_type;
2089 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2090 &memory_access_type, &gs_info))
2091 return false;
2093 if (memory_access_type == VMAT_GATHER_SCATTER)
2095 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2096 tree masktype
2097 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2098 if (TREE_CODE (masktype) == INTEGER_TYPE)
2100 if (dump_enabled_p ())
2101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2102 "masked gather with integer mask not supported.");
2103 return false;
2106 else if (memory_access_type != VMAT_CONTIGUOUS)
2108 if (dump_enabled_p ())
2109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2110 "unsupported access type for masked %s.\n",
2111 vls_type == VLS_LOAD ? "load" : "store");
2112 return false;
2114 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2115 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2116 TYPE_MODE (mask_vectype),
2117 vls_type == VLS_LOAD)
2118 || (rhs_vectype
2119 && !useless_type_conversion_p (vectype, rhs_vectype)))
2120 return false;
2122 if (!vec_stmt) /* transformation not required. */
2124 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2125 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2126 if (vls_type == VLS_LOAD)
2127 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2128 NULL, NULL, NULL);
2129 else
2130 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2131 dt, NULL, NULL, NULL);
2132 return true;
2134 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2136 /** Transform. **/
2138 if (memory_access_type == VMAT_GATHER_SCATTER)
2140 tree vec_oprnd0 = NULL_TREE, op;
2141 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2142 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2143 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2144 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2145 tree mask_perm_mask = NULL_TREE;
2146 edge pe = loop_preheader_edge (loop);
2147 gimple_seq seq;
2148 basic_block new_bb;
2149 enum { NARROW, NONE, WIDEN } modifier;
2150 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2152 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2153 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2154 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2155 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2156 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2157 scaletype = TREE_VALUE (arglist);
2158 gcc_checking_assert (types_compatible_p (srctype, rettype)
2159 && types_compatible_p (srctype, masktype));
2161 if (nunits == gather_off_nunits)
2162 modifier = NONE;
2163 else if (nunits == gather_off_nunits / 2)
2165 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2166 modifier = WIDEN;
2168 for (i = 0; i < gather_off_nunits; ++i)
2169 sel[i] = i | nunits;
2171 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2173 else if (nunits == gather_off_nunits * 2)
2175 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2176 modifier = NARROW;
2178 for (i = 0; i < nunits; ++i)
2179 sel[i] = i < gather_off_nunits
2180 ? i : i + nunits - gather_off_nunits;
2182 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2183 ncopies *= 2;
2184 for (i = 0; i < nunits; ++i)
2185 sel[i] = i | gather_off_nunits;
2186 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2188 else
2189 gcc_unreachable ();
2191 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2193 ptr = fold_convert (ptrtype, gs_info.base);
2194 if (!is_gimple_min_invariant (ptr))
2196 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2197 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2198 gcc_assert (!new_bb);
2201 scale = build_int_cst (scaletype, gs_info.scale);
2203 prev_stmt_info = NULL;
2204 for (j = 0; j < ncopies; ++j)
2206 if (modifier == WIDEN && (j & 1))
2207 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2208 perm_mask, stmt, gsi);
2209 else if (j == 0)
2210 op = vec_oprnd0
2211 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2212 else
2213 op = vec_oprnd0
2214 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2216 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2218 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2219 == TYPE_VECTOR_SUBPARTS (idxtype));
2220 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2221 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2222 new_stmt
2223 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2224 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2225 op = var;
2228 if (mask_perm_mask && (j & 1))
2229 mask_op = permute_vec_elements (mask_op, mask_op,
2230 mask_perm_mask, stmt, gsi);
2231 else
2233 if (j == 0)
2234 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2235 else
2237 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2238 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2241 mask_op = vec_mask;
2242 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2244 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2245 == TYPE_VECTOR_SUBPARTS (masktype));
2246 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2247 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2248 new_stmt
2249 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2250 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2251 mask_op = var;
2255 new_stmt
2256 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2257 scale);
2259 if (!useless_type_conversion_p (vectype, rettype))
2261 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2262 == TYPE_VECTOR_SUBPARTS (rettype));
2263 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2264 gimple_call_set_lhs (new_stmt, op);
2265 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2266 var = make_ssa_name (vec_dest);
2267 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2268 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2270 else
2272 var = make_ssa_name (vec_dest, new_stmt);
2273 gimple_call_set_lhs (new_stmt, var);
2276 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2278 if (modifier == NARROW)
2280 if ((j & 1) == 0)
2282 prev_res = var;
2283 continue;
2285 var = permute_vec_elements (prev_res, var,
2286 perm_mask, stmt, gsi);
2287 new_stmt = SSA_NAME_DEF_STMT (var);
2290 if (prev_stmt_info == NULL)
2291 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2292 else
2293 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2294 prev_stmt_info = vinfo_for_stmt (new_stmt);
2297 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2298 from the IL. */
2299 if (STMT_VINFO_RELATED_STMT (stmt_info))
2301 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2302 stmt_info = vinfo_for_stmt (stmt);
2304 tree lhs = gimple_call_lhs (stmt);
2305 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2306 set_vinfo_for_stmt (new_stmt, stmt_info);
2307 set_vinfo_for_stmt (stmt, NULL);
2308 STMT_VINFO_STMT (stmt_info) = new_stmt;
2309 gsi_replace (gsi, new_stmt, true);
2310 return true;
2312 else if (vls_type != VLS_LOAD)
2314 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2315 prev_stmt_info = NULL;
2316 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2317 for (i = 0; i < ncopies; i++)
2319 unsigned align, misalign;
2321 if (i == 0)
2323 tree rhs = gimple_call_arg (stmt, 3);
2324 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2325 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2326 /* We should have catched mismatched types earlier. */
2327 gcc_assert (useless_type_conversion_p (vectype,
2328 TREE_TYPE (vec_rhs)));
2329 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2330 NULL_TREE, &dummy, gsi,
2331 &ptr_incr, false, &inv_p);
2332 gcc_assert (!inv_p);
2334 else
2336 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2337 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2338 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2339 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2340 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2341 TYPE_SIZE_UNIT (vectype));
2344 align = TYPE_ALIGN_UNIT (vectype);
2345 if (aligned_access_p (dr))
2346 misalign = 0;
2347 else if (DR_MISALIGNMENT (dr) == -1)
2349 align = TYPE_ALIGN_UNIT (elem_type);
2350 misalign = 0;
2352 else
2353 misalign = DR_MISALIGNMENT (dr);
2354 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2355 misalign);
2356 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2357 misalign ? least_bit_hwi (misalign) : align);
2358 new_stmt
2359 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2360 ptr, vec_mask, vec_rhs);
2361 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2362 if (i == 0)
2363 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2364 else
2365 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2366 prev_stmt_info = vinfo_for_stmt (new_stmt);
2369 else
2371 tree vec_mask = NULL_TREE;
2372 prev_stmt_info = NULL;
2373 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2374 for (i = 0; i < ncopies; i++)
2376 unsigned align, misalign;
2378 if (i == 0)
2380 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2381 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2382 NULL_TREE, &dummy, gsi,
2383 &ptr_incr, false, &inv_p);
2384 gcc_assert (!inv_p);
2386 else
2388 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2389 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2390 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2391 TYPE_SIZE_UNIT (vectype));
2394 align = TYPE_ALIGN_UNIT (vectype);
2395 if (aligned_access_p (dr))
2396 misalign = 0;
2397 else if (DR_MISALIGNMENT (dr) == -1)
2399 align = TYPE_ALIGN_UNIT (elem_type);
2400 misalign = 0;
2402 else
2403 misalign = DR_MISALIGNMENT (dr);
2404 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2405 misalign);
2406 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2407 misalign ? least_bit_hwi (misalign) : align);
2408 new_stmt
2409 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2410 ptr, vec_mask);
2411 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2413 if (i == 0)
2414 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2415 else
2416 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2417 prev_stmt_info = vinfo_for_stmt (new_stmt);
2421 if (vls_type == VLS_LOAD)
2423 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2424 from the IL. */
2425 if (STMT_VINFO_RELATED_STMT (stmt_info))
2427 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2428 stmt_info = vinfo_for_stmt (stmt);
2430 tree lhs = gimple_call_lhs (stmt);
2431 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2432 set_vinfo_for_stmt (new_stmt, stmt_info);
2433 set_vinfo_for_stmt (stmt, NULL);
2434 STMT_VINFO_STMT (stmt_info) = new_stmt;
2435 gsi_replace (gsi, new_stmt, true);
2438 return true;
2441 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2443 static bool
2444 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2445 gimple **vec_stmt, slp_tree slp_node,
2446 tree vectype_in, enum vect_def_type *dt)
2448 tree op, vectype;
2449 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2450 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2451 unsigned ncopies, nunits;
2453 op = gimple_call_arg (stmt, 0);
2454 vectype = STMT_VINFO_VECTYPE (stmt_info);
2455 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2457 /* Multiple types in SLP are handled by creating the appropriate number of
2458 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2459 case of SLP. */
2460 if (slp_node)
2461 ncopies = 1;
2462 else
2463 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2465 gcc_assert (ncopies >= 1);
2467 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2468 if (! char_vectype)
2469 return false;
2471 unsigned char *elts
2472 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype));
2473 unsigned char *elt = elts;
2474 unsigned word_bytes = TYPE_VECTOR_SUBPARTS (char_vectype) / nunits;
2475 for (unsigned i = 0; i < nunits; ++i)
2476 for (unsigned j = 0; j < word_bytes; ++j)
2477 *elt++ = (i + 1) * word_bytes - j - 1;
2479 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
2480 return false;
2482 if (! vec_stmt)
2484 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2485 if (dump_enabled_p ())
2486 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2487 "\n");
2488 if (! PURE_SLP_STMT (stmt_info))
2490 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2491 1, vector_stmt, stmt_info, 0, vect_prologue);
2492 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2493 ncopies, vec_perm, stmt_info, 0, vect_body);
2495 return true;
2498 tree *telts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (char_vectype));
2499 for (unsigned i = 0; i < TYPE_VECTOR_SUBPARTS (char_vectype); ++i)
2500 telts[i] = build_int_cst (char_type_node, elts[i]);
2501 tree bswap_vconst = build_vector (char_vectype, telts);
2503 /* Transform. */
2504 vec<tree> vec_oprnds = vNULL;
2505 gimple *new_stmt = NULL;
2506 stmt_vec_info prev_stmt_info = NULL;
2507 for (unsigned j = 0; j < ncopies; j++)
2509 /* Handle uses. */
2510 if (j == 0)
2511 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2512 else
2513 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2515 /* Arguments are ready. create the new vector stmt. */
2516 unsigned i;
2517 tree vop;
2518 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2520 tree tem = make_ssa_name (char_vectype);
2521 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2522 char_vectype, vop));
2523 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2524 tree tem2 = make_ssa_name (char_vectype);
2525 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2526 tem, tem, bswap_vconst);
2527 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2528 tem = make_ssa_name (vectype);
2529 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2530 vectype, tem2));
2531 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2532 if (slp_node)
2533 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2536 if (slp_node)
2537 continue;
2539 if (j == 0)
2540 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2541 else
2542 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2544 prev_stmt_info = vinfo_for_stmt (new_stmt);
2547 vec_oprnds.release ();
2548 return true;
2551 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2552 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2553 in a single step. On success, store the binary pack code in
2554 *CONVERT_CODE. */
2556 static bool
2557 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2558 tree_code *convert_code)
2560 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2561 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2562 return false;
2564 tree_code code;
2565 int multi_step_cvt = 0;
2566 auto_vec <tree, 8> interm_types;
2567 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2568 &code, &multi_step_cvt,
2569 &interm_types)
2570 || multi_step_cvt)
2571 return false;
2573 *convert_code = code;
2574 return true;
2577 /* Function vectorizable_call.
2579 Check if GS performs a function call that can be vectorized.
2580 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2581 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2582 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2584 static bool
2585 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2586 slp_tree slp_node)
2588 gcall *stmt;
2589 tree vec_dest;
2590 tree scalar_dest;
2591 tree op, type;
2592 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2593 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2594 tree vectype_out, vectype_in;
2595 int nunits_in;
2596 int nunits_out;
2597 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2598 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2599 vec_info *vinfo = stmt_info->vinfo;
2600 tree fndecl, new_temp, rhs_type;
2601 gimple *def_stmt;
2602 enum vect_def_type dt[3]
2603 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2604 gimple *new_stmt = NULL;
2605 int ncopies, j;
2606 vec<tree> vargs = vNULL;
2607 enum { NARROW, NONE, WIDEN } modifier;
2608 size_t i, nargs;
2609 tree lhs;
2611 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2612 return false;
2614 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2615 && ! vec_stmt)
2616 return false;
2618 /* Is GS a vectorizable call? */
2619 stmt = dyn_cast <gcall *> (gs);
2620 if (!stmt)
2621 return false;
2623 if (gimple_call_internal_p (stmt)
2624 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2625 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2626 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2627 slp_node);
2629 if (gimple_call_lhs (stmt) == NULL_TREE
2630 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2631 return false;
2633 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2635 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2637 /* Process function arguments. */
2638 rhs_type = NULL_TREE;
2639 vectype_in = NULL_TREE;
2640 nargs = gimple_call_num_args (stmt);
2642 /* Bail out if the function has more than three arguments, we do not have
2643 interesting builtin functions to vectorize with more than two arguments
2644 except for fma. No arguments is also not good. */
2645 if (nargs == 0 || nargs > 3)
2646 return false;
2648 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2649 if (gimple_call_internal_p (stmt)
2650 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2652 nargs = 0;
2653 rhs_type = unsigned_type_node;
2656 for (i = 0; i < nargs; i++)
2658 tree opvectype;
2660 op = gimple_call_arg (stmt, i);
2662 /* We can only handle calls with arguments of the same type. */
2663 if (rhs_type
2664 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2666 if (dump_enabled_p ())
2667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2668 "argument types differ.\n");
2669 return false;
2671 if (!rhs_type)
2672 rhs_type = TREE_TYPE (op);
2674 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2676 if (dump_enabled_p ())
2677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2678 "use not simple.\n");
2679 return false;
2682 if (!vectype_in)
2683 vectype_in = opvectype;
2684 else if (opvectype
2685 && opvectype != vectype_in)
2687 if (dump_enabled_p ())
2688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2689 "argument vector types differ.\n");
2690 return false;
2693 /* If all arguments are external or constant defs use a vector type with
2694 the same size as the output vector type. */
2695 if (!vectype_in)
2696 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2697 if (vec_stmt)
2698 gcc_assert (vectype_in);
2699 if (!vectype_in)
2701 if (dump_enabled_p ())
2703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2704 "no vectype for scalar type ");
2705 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2706 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2709 return false;
2712 /* FORNOW */
2713 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2714 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2715 if (nunits_in == nunits_out / 2)
2716 modifier = NARROW;
2717 else if (nunits_out == nunits_in)
2718 modifier = NONE;
2719 else if (nunits_out == nunits_in / 2)
2720 modifier = WIDEN;
2721 else
2722 return false;
2724 /* We only handle functions that do not read or clobber memory. */
2725 if (gimple_vuse (stmt))
2727 if (dump_enabled_p ())
2728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2729 "function reads from or writes to memory.\n");
2730 return false;
2733 /* For now, we only vectorize functions if a target specific builtin
2734 is available. TODO -- in some cases, it might be profitable to
2735 insert the calls for pieces of the vector, in order to be able
2736 to vectorize other operations in the loop. */
2737 fndecl = NULL_TREE;
2738 internal_fn ifn = IFN_LAST;
2739 combined_fn cfn = gimple_call_combined_fn (stmt);
2740 tree callee = gimple_call_fndecl (stmt);
2742 /* First try using an internal function. */
2743 tree_code convert_code = ERROR_MARK;
2744 if (cfn != CFN_LAST
2745 && (modifier == NONE
2746 || (modifier == NARROW
2747 && simple_integer_narrowing (vectype_out, vectype_in,
2748 &convert_code))))
2749 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2750 vectype_in);
2752 /* If that fails, try asking for a target-specific built-in function. */
2753 if (ifn == IFN_LAST)
2755 if (cfn != CFN_LAST)
2756 fndecl = targetm.vectorize.builtin_vectorized_function
2757 (cfn, vectype_out, vectype_in);
2758 else
2759 fndecl = targetm.vectorize.builtin_md_vectorized_function
2760 (callee, vectype_out, vectype_in);
2763 if (ifn == IFN_LAST && !fndecl)
2765 if (cfn == CFN_GOMP_SIMD_LANE
2766 && !slp_node
2767 && loop_vinfo
2768 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2769 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2770 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2771 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2773 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2774 { 0, 1, 2, ... vf - 1 } vector. */
2775 gcc_assert (nargs == 0);
2777 else if (modifier == NONE
2778 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2779 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2780 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2781 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2782 vectype_in, dt);
2783 else
2785 if (dump_enabled_p ())
2786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2787 "function is not vectorizable.\n");
2788 return false;
2792 if (slp_node)
2793 ncopies = 1;
2794 else if (modifier == NARROW && ifn == IFN_LAST)
2795 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2796 else
2797 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2799 /* Sanity check: make sure that at least one copy of the vectorized stmt
2800 needs to be generated. */
2801 gcc_assert (ncopies >= 1);
2803 if (!vec_stmt) /* transformation not required. */
2805 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2806 if (dump_enabled_p ())
2807 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2808 "\n");
2809 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2810 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2811 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2812 vec_promote_demote, stmt_info, 0, vect_body);
2814 return true;
2817 /** Transform. **/
2819 if (dump_enabled_p ())
2820 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2822 /* Handle def. */
2823 scalar_dest = gimple_call_lhs (stmt);
2824 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2826 prev_stmt_info = NULL;
2827 if (modifier == NONE || ifn != IFN_LAST)
2829 tree prev_res = NULL_TREE;
2830 for (j = 0; j < ncopies; ++j)
2832 /* Build argument list for the vectorized call. */
2833 if (j == 0)
2834 vargs.create (nargs);
2835 else
2836 vargs.truncate (0);
2838 if (slp_node)
2840 auto_vec<vec<tree> > vec_defs (nargs);
2841 vec<tree> vec_oprnds0;
2843 for (i = 0; i < nargs; i++)
2844 vargs.quick_push (gimple_call_arg (stmt, i));
2845 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2846 vec_oprnds0 = vec_defs[0];
2848 /* Arguments are ready. Create the new vector stmt. */
2849 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2851 size_t k;
2852 for (k = 0; k < nargs; k++)
2854 vec<tree> vec_oprndsk = vec_defs[k];
2855 vargs[k] = vec_oprndsk[i];
2857 if (modifier == NARROW)
2859 tree half_res = make_ssa_name (vectype_in);
2860 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2861 gimple_call_set_lhs (new_stmt, half_res);
2862 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2863 if ((i & 1) == 0)
2865 prev_res = half_res;
2866 continue;
2868 new_temp = make_ssa_name (vec_dest);
2869 new_stmt = gimple_build_assign (new_temp, convert_code,
2870 prev_res, half_res);
2872 else
2874 if (ifn != IFN_LAST)
2875 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2876 else
2877 new_stmt = gimple_build_call_vec (fndecl, vargs);
2878 new_temp = make_ssa_name (vec_dest, new_stmt);
2879 gimple_call_set_lhs (new_stmt, new_temp);
2881 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2882 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2885 for (i = 0; i < nargs; i++)
2887 vec<tree> vec_oprndsi = vec_defs[i];
2888 vec_oprndsi.release ();
2890 continue;
2893 for (i = 0; i < nargs; i++)
2895 op = gimple_call_arg (stmt, i);
2896 if (j == 0)
2897 vec_oprnd0
2898 = vect_get_vec_def_for_operand (op, stmt);
2899 else
2901 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2902 vec_oprnd0
2903 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2906 vargs.quick_push (vec_oprnd0);
2909 if (gimple_call_internal_p (stmt)
2910 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2912 tree *v = XALLOCAVEC (tree, nunits_out);
2913 int k;
2914 for (k = 0; k < nunits_out; ++k)
2915 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2916 tree cst = build_vector (vectype_out, v);
2917 tree new_var
2918 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2919 gimple *init_stmt = gimple_build_assign (new_var, cst);
2920 vect_init_vector_1 (stmt, init_stmt, NULL);
2921 new_temp = make_ssa_name (vec_dest);
2922 new_stmt = gimple_build_assign (new_temp, new_var);
2924 else if (modifier == NARROW)
2926 tree half_res = make_ssa_name (vectype_in);
2927 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2928 gimple_call_set_lhs (new_stmt, half_res);
2929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2930 if ((j & 1) == 0)
2932 prev_res = half_res;
2933 continue;
2935 new_temp = make_ssa_name (vec_dest);
2936 new_stmt = gimple_build_assign (new_temp, convert_code,
2937 prev_res, half_res);
2939 else
2941 if (ifn != IFN_LAST)
2942 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2943 else
2944 new_stmt = gimple_build_call_vec (fndecl, vargs);
2945 new_temp = make_ssa_name (vec_dest, new_stmt);
2946 gimple_call_set_lhs (new_stmt, new_temp);
2948 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2950 if (j == (modifier == NARROW ? 1 : 0))
2951 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2952 else
2953 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2955 prev_stmt_info = vinfo_for_stmt (new_stmt);
2958 else if (modifier == NARROW)
2960 for (j = 0; j < ncopies; ++j)
2962 /* Build argument list for the vectorized call. */
2963 if (j == 0)
2964 vargs.create (nargs * 2);
2965 else
2966 vargs.truncate (0);
2968 if (slp_node)
2970 auto_vec<vec<tree> > vec_defs (nargs);
2971 vec<tree> vec_oprnds0;
2973 for (i = 0; i < nargs; i++)
2974 vargs.quick_push (gimple_call_arg (stmt, i));
2975 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2976 vec_oprnds0 = vec_defs[0];
2978 /* Arguments are ready. Create the new vector stmt. */
2979 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2981 size_t k;
2982 vargs.truncate (0);
2983 for (k = 0; k < nargs; k++)
2985 vec<tree> vec_oprndsk = vec_defs[k];
2986 vargs.quick_push (vec_oprndsk[i]);
2987 vargs.quick_push (vec_oprndsk[i + 1]);
2989 if (ifn != IFN_LAST)
2990 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2991 else
2992 new_stmt = gimple_build_call_vec (fndecl, vargs);
2993 new_temp = make_ssa_name (vec_dest, new_stmt);
2994 gimple_call_set_lhs (new_stmt, new_temp);
2995 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2996 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2999 for (i = 0; i < nargs; i++)
3001 vec<tree> vec_oprndsi = vec_defs[i];
3002 vec_oprndsi.release ();
3004 continue;
3007 for (i = 0; i < nargs; i++)
3009 op = gimple_call_arg (stmt, i);
3010 if (j == 0)
3012 vec_oprnd0
3013 = vect_get_vec_def_for_operand (op, stmt);
3014 vec_oprnd1
3015 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3017 else
3019 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3020 vec_oprnd0
3021 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3022 vec_oprnd1
3023 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3026 vargs.quick_push (vec_oprnd0);
3027 vargs.quick_push (vec_oprnd1);
3030 new_stmt = gimple_build_call_vec (fndecl, vargs);
3031 new_temp = make_ssa_name (vec_dest, new_stmt);
3032 gimple_call_set_lhs (new_stmt, new_temp);
3033 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3035 if (j == 0)
3036 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3037 else
3038 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3040 prev_stmt_info = vinfo_for_stmt (new_stmt);
3043 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3045 else
3046 /* No current target implements this case. */
3047 return false;
3049 vargs.release ();
3051 /* The call in STMT might prevent it from being removed in dce.
3052 We however cannot remove it here, due to the way the ssa name
3053 it defines is mapped to the new definition. So just replace
3054 rhs of the statement with something harmless. */
3056 if (slp_node)
3057 return true;
3059 type = TREE_TYPE (scalar_dest);
3060 if (is_pattern_stmt_p (stmt_info))
3061 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3062 else
3063 lhs = gimple_call_lhs (stmt);
3065 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3066 set_vinfo_for_stmt (new_stmt, stmt_info);
3067 set_vinfo_for_stmt (stmt, NULL);
3068 STMT_VINFO_STMT (stmt_info) = new_stmt;
3069 gsi_replace (gsi, new_stmt, false);
3071 return true;
3075 struct simd_call_arg_info
3077 tree vectype;
3078 tree op;
3079 HOST_WIDE_INT linear_step;
3080 enum vect_def_type dt;
3081 unsigned int align;
3082 bool simd_lane_linear;
3085 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3086 is linear within simd lane (but not within whole loop), note it in
3087 *ARGINFO. */
3089 static void
3090 vect_simd_lane_linear (tree op, struct loop *loop,
3091 struct simd_call_arg_info *arginfo)
3093 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3095 if (!is_gimple_assign (def_stmt)
3096 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3097 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3098 return;
3100 tree base = gimple_assign_rhs1 (def_stmt);
3101 HOST_WIDE_INT linear_step = 0;
3102 tree v = gimple_assign_rhs2 (def_stmt);
3103 while (TREE_CODE (v) == SSA_NAME)
3105 tree t;
3106 def_stmt = SSA_NAME_DEF_STMT (v);
3107 if (is_gimple_assign (def_stmt))
3108 switch (gimple_assign_rhs_code (def_stmt))
3110 case PLUS_EXPR:
3111 t = gimple_assign_rhs2 (def_stmt);
3112 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3113 return;
3114 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3115 v = gimple_assign_rhs1 (def_stmt);
3116 continue;
3117 case MULT_EXPR:
3118 t = gimple_assign_rhs2 (def_stmt);
3119 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3120 return;
3121 linear_step = tree_to_shwi (t);
3122 v = gimple_assign_rhs1 (def_stmt);
3123 continue;
3124 CASE_CONVERT:
3125 t = gimple_assign_rhs1 (def_stmt);
3126 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3127 || (TYPE_PRECISION (TREE_TYPE (v))
3128 < TYPE_PRECISION (TREE_TYPE (t))))
3129 return;
3130 if (!linear_step)
3131 linear_step = 1;
3132 v = t;
3133 continue;
3134 default:
3135 return;
3137 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3138 && loop->simduid
3139 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3140 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3141 == loop->simduid))
3143 if (!linear_step)
3144 linear_step = 1;
3145 arginfo->linear_step = linear_step;
3146 arginfo->op = base;
3147 arginfo->simd_lane_linear = true;
3148 return;
3153 /* Function vectorizable_simd_clone_call.
3155 Check if STMT performs a function call that can be vectorized
3156 by calling a simd clone of the function.
3157 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3158 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3159 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3161 static bool
3162 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3163 gimple **vec_stmt, slp_tree slp_node)
3165 tree vec_dest;
3166 tree scalar_dest;
3167 tree op, type;
3168 tree vec_oprnd0 = NULL_TREE;
3169 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3170 tree vectype;
3171 unsigned int nunits;
3172 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3173 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3174 vec_info *vinfo = stmt_info->vinfo;
3175 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3176 tree fndecl, new_temp;
3177 gimple *def_stmt;
3178 gimple *new_stmt = NULL;
3179 int ncopies, j;
3180 auto_vec<simd_call_arg_info> arginfo;
3181 vec<tree> vargs = vNULL;
3182 size_t i, nargs;
3183 tree lhs, rtype, ratype;
3184 vec<constructor_elt, va_gc> *ret_ctor_elts;
3186 /* Is STMT a vectorizable call? */
3187 if (!is_gimple_call (stmt))
3188 return false;
3190 fndecl = gimple_call_fndecl (stmt);
3191 if (fndecl == NULL_TREE)
3192 return false;
3194 struct cgraph_node *node = cgraph_node::get (fndecl);
3195 if (node == NULL || node->simd_clones == NULL)
3196 return false;
3198 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3199 return false;
3201 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3202 && ! vec_stmt)
3203 return false;
3205 if (gimple_call_lhs (stmt)
3206 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3207 return false;
3209 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3211 vectype = STMT_VINFO_VECTYPE (stmt_info);
3213 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3214 return false;
3216 /* FORNOW */
3217 if (slp_node)
3218 return false;
3220 /* Process function arguments. */
3221 nargs = gimple_call_num_args (stmt);
3223 /* Bail out if the function has zero arguments. */
3224 if (nargs == 0)
3225 return false;
3227 arginfo.reserve (nargs, true);
3229 for (i = 0; i < nargs; i++)
3231 simd_call_arg_info thisarginfo;
3232 affine_iv iv;
3234 thisarginfo.linear_step = 0;
3235 thisarginfo.align = 0;
3236 thisarginfo.op = NULL_TREE;
3237 thisarginfo.simd_lane_linear = false;
3239 op = gimple_call_arg (stmt, i);
3240 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3241 &thisarginfo.vectype)
3242 || thisarginfo.dt == vect_uninitialized_def)
3244 if (dump_enabled_p ())
3245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3246 "use not simple.\n");
3247 return false;
3250 if (thisarginfo.dt == vect_constant_def
3251 || thisarginfo.dt == vect_external_def)
3252 gcc_assert (thisarginfo.vectype == NULL_TREE);
3253 else
3254 gcc_assert (thisarginfo.vectype != NULL_TREE);
3256 /* For linear arguments, the analyze phase should have saved
3257 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3258 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3259 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3261 gcc_assert (vec_stmt);
3262 thisarginfo.linear_step
3263 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3264 thisarginfo.op
3265 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3266 thisarginfo.simd_lane_linear
3267 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3268 == boolean_true_node);
3269 /* If loop has been peeled for alignment, we need to adjust it. */
3270 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3271 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3272 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3274 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3275 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3276 tree opt = TREE_TYPE (thisarginfo.op);
3277 bias = fold_convert (TREE_TYPE (step), bias);
3278 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3279 thisarginfo.op
3280 = fold_build2 (POINTER_TYPE_P (opt)
3281 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3282 thisarginfo.op, bias);
3285 else if (!vec_stmt
3286 && thisarginfo.dt != vect_constant_def
3287 && thisarginfo.dt != vect_external_def
3288 && loop_vinfo
3289 && TREE_CODE (op) == SSA_NAME
3290 && simple_iv (loop, loop_containing_stmt (stmt), op,
3291 &iv, false)
3292 && tree_fits_shwi_p (iv.step))
3294 thisarginfo.linear_step = tree_to_shwi (iv.step);
3295 thisarginfo.op = iv.base;
3297 else if ((thisarginfo.dt == vect_constant_def
3298 || thisarginfo.dt == vect_external_def)
3299 && POINTER_TYPE_P (TREE_TYPE (op)))
3300 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3301 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3302 linear too. */
3303 if (POINTER_TYPE_P (TREE_TYPE (op))
3304 && !thisarginfo.linear_step
3305 && !vec_stmt
3306 && thisarginfo.dt != vect_constant_def
3307 && thisarginfo.dt != vect_external_def
3308 && loop_vinfo
3309 && !slp_node
3310 && TREE_CODE (op) == SSA_NAME)
3311 vect_simd_lane_linear (op, loop, &thisarginfo);
3313 arginfo.quick_push (thisarginfo);
3316 unsigned int badness = 0;
3317 struct cgraph_node *bestn = NULL;
3318 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3319 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3320 else
3321 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3322 n = n->simdclone->next_clone)
3324 unsigned int this_badness = 0;
3325 if (n->simdclone->simdlen
3326 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3327 || n->simdclone->nargs != nargs)
3328 continue;
3329 if (n->simdclone->simdlen
3330 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3331 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3332 - exact_log2 (n->simdclone->simdlen)) * 1024;
3333 if (n->simdclone->inbranch)
3334 this_badness += 2048;
3335 int target_badness = targetm.simd_clone.usable (n);
3336 if (target_badness < 0)
3337 continue;
3338 this_badness += target_badness * 512;
3339 /* FORNOW: Have to add code to add the mask argument. */
3340 if (n->simdclone->inbranch)
3341 continue;
3342 for (i = 0; i < nargs; i++)
3344 switch (n->simdclone->args[i].arg_type)
3346 case SIMD_CLONE_ARG_TYPE_VECTOR:
3347 if (!useless_type_conversion_p
3348 (n->simdclone->args[i].orig_type,
3349 TREE_TYPE (gimple_call_arg (stmt, i))))
3350 i = -1;
3351 else if (arginfo[i].dt == vect_constant_def
3352 || arginfo[i].dt == vect_external_def
3353 || arginfo[i].linear_step)
3354 this_badness += 64;
3355 break;
3356 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3357 if (arginfo[i].dt != vect_constant_def
3358 && arginfo[i].dt != vect_external_def)
3359 i = -1;
3360 break;
3361 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3362 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3363 if (arginfo[i].dt == vect_constant_def
3364 || arginfo[i].dt == vect_external_def
3365 || (arginfo[i].linear_step
3366 != n->simdclone->args[i].linear_step))
3367 i = -1;
3368 break;
3369 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3370 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3371 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3372 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3373 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3374 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3375 /* FORNOW */
3376 i = -1;
3377 break;
3378 case SIMD_CLONE_ARG_TYPE_MASK:
3379 gcc_unreachable ();
3381 if (i == (size_t) -1)
3382 break;
3383 if (n->simdclone->args[i].alignment > arginfo[i].align)
3385 i = -1;
3386 break;
3388 if (arginfo[i].align)
3389 this_badness += (exact_log2 (arginfo[i].align)
3390 - exact_log2 (n->simdclone->args[i].alignment));
3392 if (i == (size_t) -1)
3393 continue;
3394 if (bestn == NULL || this_badness < badness)
3396 bestn = n;
3397 badness = this_badness;
3401 if (bestn == NULL)
3402 return false;
3404 for (i = 0; i < nargs; i++)
3405 if ((arginfo[i].dt == vect_constant_def
3406 || arginfo[i].dt == vect_external_def)
3407 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3409 arginfo[i].vectype
3410 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3411 i)));
3412 if (arginfo[i].vectype == NULL
3413 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3414 > bestn->simdclone->simdlen))
3415 return false;
3418 fndecl = bestn->decl;
3419 nunits = bestn->simdclone->simdlen;
3420 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3422 /* If the function isn't const, only allow it in simd loops where user
3423 has asserted that at least nunits consecutive iterations can be
3424 performed using SIMD instructions. */
3425 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3426 && gimple_vuse (stmt))
3427 return false;
3429 /* Sanity check: make sure that at least one copy of the vectorized stmt
3430 needs to be generated. */
3431 gcc_assert (ncopies >= 1);
3433 if (!vec_stmt) /* transformation not required. */
3435 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3436 for (i = 0; i < nargs; i++)
3437 if ((bestn->simdclone->args[i].arg_type
3438 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3439 || (bestn->simdclone->args[i].arg_type
3440 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3442 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3443 + 1);
3444 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3445 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3446 ? size_type_node : TREE_TYPE (arginfo[i].op);
3447 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3448 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3449 tree sll = arginfo[i].simd_lane_linear
3450 ? boolean_true_node : boolean_false_node;
3451 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3453 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3454 if (dump_enabled_p ())
3455 dump_printf_loc (MSG_NOTE, vect_location,
3456 "=== vectorizable_simd_clone_call ===\n");
3457 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3458 return true;
3461 /** Transform. **/
3463 if (dump_enabled_p ())
3464 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3466 /* Handle def. */
3467 scalar_dest = gimple_call_lhs (stmt);
3468 vec_dest = NULL_TREE;
3469 rtype = NULL_TREE;
3470 ratype = NULL_TREE;
3471 if (scalar_dest)
3473 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3474 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3475 if (TREE_CODE (rtype) == ARRAY_TYPE)
3477 ratype = rtype;
3478 rtype = TREE_TYPE (ratype);
3482 prev_stmt_info = NULL;
3483 for (j = 0; j < ncopies; ++j)
3485 /* Build argument list for the vectorized call. */
3486 if (j == 0)
3487 vargs.create (nargs);
3488 else
3489 vargs.truncate (0);
3491 for (i = 0; i < nargs; i++)
3493 unsigned int k, l, m, o;
3494 tree atype;
3495 op = gimple_call_arg (stmt, i);
3496 switch (bestn->simdclone->args[i].arg_type)
3498 case SIMD_CLONE_ARG_TYPE_VECTOR:
3499 atype = bestn->simdclone->args[i].vector_type;
3500 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3501 for (m = j * o; m < (j + 1) * o; m++)
3503 if (TYPE_VECTOR_SUBPARTS (atype)
3504 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3506 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3507 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3508 / TYPE_VECTOR_SUBPARTS (atype));
3509 gcc_assert ((k & (k - 1)) == 0);
3510 if (m == 0)
3511 vec_oprnd0
3512 = vect_get_vec_def_for_operand (op, stmt);
3513 else
3515 vec_oprnd0 = arginfo[i].op;
3516 if ((m & (k - 1)) == 0)
3517 vec_oprnd0
3518 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3519 vec_oprnd0);
3521 arginfo[i].op = vec_oprnd0;
3522 vec_oprnd0
3523 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3524 size_int (prec),
3525 bitsize_int ((m & (k - 1)) * prec));
3526 new_stmt
3527 = gimple_build_assign (make_ssa_name (atype),
3528 vec_oprnd0);
3529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3530 vargs.safe_push (gimple_assign_lhs (new_stmt));
3532 else
3534 k = (TYPE_VECTOR_SUBPARTS (atype)
3535 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3536 gcc_assert ((k & (k - 1)) == 0);
3537 vec<constructor_elt, va_gc> *ctor_elts;
3538 if (k != 1)
3539 vec_alloc (ctor_elts, k);
3540 else
3541 ctor_elts = NULL;
3542 for (l = 0; l < k; l++)
3544 if (m == 0 && l == 0)
3545 vec_oprnd0
3546 = vect_get_vec_def_for_operand (op, stmt);
3547 else
3548 vec_oprnd0
3549 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3550 arginfo[i].op);
3551 arginfo[i].op = vec_oprnd0;
3552 if (k == 1)
3553 break;
3554 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3555 vec_oprnd0);
3557 if (k == 1)
3558 vargs.safe_push (vec_oprnd0);
3559 else
3561 vec_oprnd0 = build_constructor (atype, ctor_elts);
3562 new_stmt
3563 = gimple_build_assign (make_ssa_name (atype),
3564 vec_oprnd0);
3565 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3566 vargs.safe_push (gimple_assign_lhs (new_stmt));
3570 break;
3571 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3572 vargs.safe_push (op);
3573 break;
3574 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3575 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3576 if (j == 0)
3578 gimple_seq stmts;
3579 arginfo[i].op
3580 = force_gimple_operand (arginfo[i].op, &stmts, true,
3581 NULL_TREE);
3582 if (stmts != NULL)
3584 basic_block new_bb;
3585 edge pe = loop_preheader_edge (loop);
3586 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3587 gcc_assert (!new_bb);
3589 if (arginfo[i].simd_lane_linear)
3591 vargs.safe_push (arginfo[i].op);
3592 break;
3594 tree phi_res = copy_ssa_name (op);
3595 gphi *new_phi = create_phi_node (phi_res, loop->header);
3596 set_vinfo_for_stmt (new_phi,
3597 new_stmt_vec_info (new_phi, loop_vinfo));
3598 add_phi_arg (new_phi, arginfo[i].op,
3599 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3600 enum tree_code code
3601 = POINTER_TYPE_P (TREE_TYPE (op))
3602 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3603 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3604 ? sizetype : TREE_TYPE (op);
3605 widest_int cst
3606 = wi::mul (bestn->simdclone->args[i].linear_step,
3607 ncopies * nunits);
3608 tree tcst = wide_int_to_tree (type, cst);
3609 tree phi_arg = copy_ssa_name (op);
3610 new_stmt
3611 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3612 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3613 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3614 set_vinfo_for_stmt (new_stmt,
3615 new_stmt_vec_info (new_stmt, loop_vinfo));
3616 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3617 UNKNOWN_LOCATION);
3618 arginfo[i].op = phi_res;
3619 vargs.safe_push (phi_res);
3621 else
3623 enum tree_code code
3624 = POINTER_TYPE_P (TREE_TYPE (op))
3625 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3626 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3627 ? sizetype : TREE_TYPE (op);
3628 widest_int cst
3629 = wi::mul (bestn->simdclone->args[i].linear_step,
3630 j * nunits);
3631 tree tcst = wide_int_to_tree (type, cst);
3632 new_temp = make_ssa_name (TREE_TYPE (op));
3633 new_stmt = gimple_build_assign (new_temp, code,
3634 arginfo[i].op, tcst);
3635 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3636 vargs.safe_push (new_temp);
3638 break;
3639 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3640 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3641 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3642 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3643 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3644 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3645 default:
3646 gcc_unreachable ();
3650 new_stmt = gimple_build_call_vec (fndecl, vargs);
3651 if (vec_dest)
3653 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3654 if (ratype)
3655 new_temp = create_tmp_var (ratype);
3656 else if (TYPE_VECTOR_SUBPARTS (vectype)
3657 == TYPE_VECTOR_SUBPARTS (rtype))
3658 new_temp = make_ssa_name (vec_dest, new_stmt);
3659 else
3660 new_temp = make_ssa_name (rtype, new_stmt);
3661 gimple_call_set_lhs (new_stmt, new_temp);
3663 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3665 if (vec_dest)
3667 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3669 unsigned int k, l;
3670 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3671 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3672 gcc_assert ((k & (k - 1)) == 0);
3673 for (l = 0; l < k; l++)
3675 tree t;
3676 if (ratype)
3678 t = build_fold_addr_expr (new_temp);
3679 t = build2 (MEM_REF, vectype, t,
3680 build_int_cst (TREE_TYPE (t),
3681 l * prec / BITS_PER_UNIT));
3683 else
3684 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3685 size_int (prec), bitsize_int (l * prec));
3686 new_stmt
3687 = gimple_build_assign (make_ssa_name (vectype), t);
3688 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3689 if (j == 0 && l == 0)
3690 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3691 else
3692 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3694 prev_stmt_info = vinfo_for_stmt (new_stmt);
3697 if (ratype)
3699 tree clobber = build_constructor (ratype, NULL);
3700 TREE_THIS_VOLATILE (clobber) = 1;
3701 new_stmt = gimple_build_assign (new_temp, clobber);
3702 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3704 continue;
3706 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3708 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3709 / TYPE_VECTOR_SUBPARTS (rtype));
3710 gcc_assert ((k & (k - 1)) == 0);
3711 if ((j & (k - 1)) == 0)
3712 vec_alloc (ret_ctor_elts, k);
3713 if (ratype)
3715 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3716 for (m = 0; m < o; m++)
3718 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3719 size_int (m), NULL_TREE, NULL_TREE);
3720 new_stmt
3721 = gimple_build_assign (make_ssa_name (rtype), tem);
3722 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3723 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3724 gimple_assign_lhs (new_stmt));
3726 tree clobber = build_constructor (ratype, NULL);
3727 TREE_THIS_VOLATILE (clobber) = 1;
3728 new_stmt = gimple_build_assign (new_temp, clobber);
3729 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3731 else
3732 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3733 if ((j & (k - 1)) != k - 1)
3734 continue;
3735 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3736 new_stmt
3737 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3738 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3740 if ((unsigned) j == k - 1)
3741 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3742 else
3743 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3745 prev_stmt_info = vinfo_for_stmt (new_stmt);
3746 continue;
3748 else if (ratype)
3750 tree t = build_fold_addr_expr (new_temp);
3751 t = build2 (MEM_REF, vectype, t,
3752 build_int_cst (TREE_TYPE (t), 0));
3753 new_stmt
3754 = gimple_build_assign (make_ssa_name (vec_dest), t);
3755 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3756 tree clobber = build_constructor (ratype, NULL);
3757 TREE_THIS_VOLATILE (clobber) = 1;
3758 vect_finish_stmt_generation (stmt,
3759 gimple_build_assign (new_temp,
3760 clobber), gsi);
3764 if (j == 0)
3765 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3766 else
3767 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3769 prev_stmt_info = vinfo_for_stmt (new_stmt);
3772 vargs.release ();
3774 /* The call in STMT might prevent it from being removed in dce.
3775 We however cannot remove it here, due to the way the ssa name
3776 it defines is mapped to the new definition. So just replace
3777 rhs of the statement with something harmless. */
3779 if (slp_node)
3780 return true;
3782 if (scalar_dest)
3784 type = TREE_TYPE (scalar_dest);
3785 if (is_pattern_stmt_p (stmt_info))
3786 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3787 else
3788 lhs = gimple_call_lhs (stmt);
3789 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3791 else
3792 new_stmt = gimple_build_nop ();
3793 set_vinfo_for_stmt (new_stmt, stmt_info);
3794 set_vinfo_for_stmt (stmt, NULL);
3795 STMT_VINFO_STMT (stmt_info) = new_stmt;
3796 gsi_replace (gsi, new_stmt, true);
3797 unlink_stmt_vdef (stmt);
3799 return true;
3803 /* Function vect_gen_widened_results_half
3805 Create a vector stmt whose code, type, number of arguments, and result
3806 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3807 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3808 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3809 needs to be created (DECL is a function-decl of a target-builtin).
3810 STMT is the original scalar stmt that we are vectorizing. */
3812 static gimple *
3813 vect_gen_widened_results_half (enum tree_code code,
3814 tree decl,
3815 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3816 tree vec_dest, gimple_stmt_iterator *gsi,
3817 gimple *stmt)
3819 gimple *new_stmt;
3820 tree new_temp;
3822 /* Generate half of the widened result: */
3823 if (code == CALL_EXPR)
3825 /* Target specific support */
3826 if (op_type == binary_op)
3827 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3828 else
3829 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3830 new_temp = make_ssa_name (vec_dest, new_stmt);
3831 gimple_call_set_lhs (new_stmt, new_temp);
3833 else
3835 /* Generic support */
3836 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3837 if (op_type != binary_op)
3838 vec_oprnd1 = NULL;
3839 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3840 new_temp = make_ssa_name (vec_dest, new_stmt);
3841 gimple_assign_set_lhs (new_stmt, new_temp);
3843 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3845 return new_stmt;
3849 /* Get vectorized definitions for loop-based vectorization. For the first
3850 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3851 scalar operand), and for the rest we get a copy with
3852 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3853 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3854 The vectors are collected into VEC_OPRNDS. */
3856 static void
3857 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3858 vec<tree> *vec_oprnds, int multi_step_cvt)
3860 tree vec_oprnd;
3862 /* Get first vector operand. */
3863 /* All the vector operands except the very first one (that is scalar oprnd)
3864 are stmt copies. */
3865 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3866 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3867 else
3868 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3870 vec_oprnds->quick_push (vec_oprnd);
3872 /* Get second vector operand. */
3873 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3874 vec_oprnds->quick_push (vec_oprnd);
3876 *oprnd = vec_oprnd;
3878 /* For conversion in multiple steps, continue to get operands
3879 recursively. */
3880 if (multi_step_cvt)
3881 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3885 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3886 For multi-step conversions store the resulting vectors and call the function
3887 recursively. */
3889 static void
3890 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3891 int multi_step_cvt, gimple *stmt,
3892 vec<tree> vec_dsts,
3893 gimple_stmt_iterator *gsi,
3894 slp_tree slp_node, enum tree_code code,
3895 stmt_vec_info *prev_stmt_info)
3897 unsigned int i;
3898 tree vop0, vop1, new_tmp, vec_dest;
3899 gimple *new_stmt;
3900 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3902 vec_dest = vec_dsts.pop ();
3904 for (i = 0; i < vec_oprnds->length (); i += 2)
3906 /* Create demotion operation. */
3907 vop0 = (*vec_oprnds)[i];
3908 vop1 = (*vec_oprnds)[i + 1];
3909 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3910 new_tmp = make_ssa_name (vec_dest, new_stmt);
3911 gimple_assign_set_lhs (new_stmt, new_tmp);
3912 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3914 if (multi_step_cvt)
3915 /* Store the resulting vector for next recursive call. */
3916 (*vec_oprnds)[i/2] = new_tmp;
3917 else
3919 /* This is the last step of the conversion sequence. Store the
3920 vectors in SLP_NODE or in vector info of the scalar statement
3921 (or in STMT_VINFO_RELATED_STMT chain). */
3922 if (slp_node)
3923 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3924 else
3926 if (!*prev_stmt_info)
3927 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3928 else
3929 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3931 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3936 /* For multi-step demotion operations we first generate demotion operations
3937 from the source type to the intermediate types, and then combine the
3938 results (stored in VEC_OPRNDS) in demotion operation to the destination
3939 type. */
3940 if (multi_step_cvt)
3942 /* At each level of recursion we have half of the operands we had at the
3943 previous level. */
3944 vec_oprnds->truncate ((i+1)/2);
3945 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3946 stmt, vec_dsts, gsi, slp_node,
3947 VEC_PACK_TRUNC_EXPR,
3948 prev_stmt_info);
3951 vec_dsts.quick_push (vec_dest);
3955 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3956 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3957 the resulting vectors and call the function recursively. */
3959 static void
3960 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3961 vec<tree> *vec_oprnds1,
3962 gimple *stmt, tree vec_dest,
3963 gimple_stmt_iterator *gsi,
3964 enum tree_code code1,
3965 enum tree_code code2, tree decl1,
3966 tree decl2, int op_type)
3968 int i;
3969 tree vop0, vop1, new_tmp1, new_tmp2;
3970 gimple *new_stmt1, *new_stmt2;
3971 vec<tree> vec_tmp = vNULL;
3973 vec_tmp.create (vec_oprnds0->length () * 2);
3974 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3976 if (op_type == binary_op)
3977 vop1 = (*vec_oprnds1)[i];
3978 else
3979 vop1 = NULL_TREE;
3981 /* Generate the two halves of promotion operation. */
3982 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3983 op_type, vec_dest, gsi, stmt);
3984 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3985 op_type, vec_dest, gsi, stmt);
3986 if (is_gimple_call (new_stmt1))
3988 new_tmp1 = gimple_call_lhs (new_stmt1);
3989 new_tmp2 = gimple_call_lhs (new_stmt2);
3991 else
3993 new_tmp1 = gimple_assign_lhs (new_stmt1);
3994 new_tmp2 = gimple_assign_lhs (new_stmt2);
3997 /* Store the results for the next step. */
3998 vec_tmp.quick_push (new_tmp1);
3999 vec_tmp.quick_push (new_tmp2);
4002 vec_oprnds0->release ();
4003 *vec_oprnds0 = vec_tmp;
4007 /* Check if STMT performs a conversion operation, that can be vectorized.
4008 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4009 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4010 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4012 static bool
4013 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4014 gimple **vec_stmt, slp_tree slp_node)
4016 tree vec_dest;
4017 tree scalar_dest;
4018 tree op0, op1 = NULL_TREE;
4019 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4020 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4021 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4022 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4023 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4024 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4025 tree new_temp;
4026 gimple *def_stmt;
4027 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4028 gimple *new_stmt = NULL;
4029 stmt_vec_info prev_stmt_info;
4030 int nunits_in;
4031 int nunits_out;
4032 tree vectype_out, vectype_in;
4033 int ncopies, i, j;
4034 tree lhs_type, rhs_type;
4035 enum { NARROW, NONE, WIDEN } modifier;
4036 vec<tree> vec_oprnds0 = vNULL;
4037 vec<tree> vec_oprnds1 = vNULL;
4038 tree vop0;
4039 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4040 vec_info *vinfo = stmt_info->vinfo;
4041 int multi_step_cvt = 0;
4042 vec<tree> interm_types = vNULL;
4043 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4044 int op_type;
4045 machine_mode rhs_mode;
4046 unsigned short fltsz;
4048 /* Is STMT a vectorizable conversion? */
4050 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4051 return false;
4053 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4054 && ! vec_stmt)
4055 return false;
4057 if (!is_gimple_assign (stmt))
4058 return false;
4060 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4061 return false;
4063 code = gimple_assign_rhs_code (stmt);
4064 if (!CONVERT_EXPR_CODE_P (code)
4065 && code != FIX_TRUNC_EXPR
4066 && code != FLOAT_EXPR
4067 && code != WIDEN_MULT_EXPR
4068 && code != WIDEN_LSHIFT_EXPR)
4069 return false;
4071 op_type = TREE_CODE_LENGTH (code);
4073 /* Check types of lhs and rhs. */
4074 scalar_dest = gimple_assign_lhs (stmt);
4075 lhs_type = TREE_TYPE (scalar_dest);
4076 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4078 op0 = gimple_assign_rhs1 (stmt);
4079 rhs_type = TREE_TYPE (op0);
4081 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4082 && !((INTEGRAL_TYPE_P (lhs_type)
4083 && INTEGRAL_TYPE_P (rhs_type))
4084 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4085 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4086 return false;
4088 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4089 && ((INTEGRAL_TYPE_P (lhs_type)
4090 && (TYPE_PRECISION (lhs_type)
4091 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
4092 || (INTEGRAL_TYPE_P (rhs_type)
4093 && (TYPE_PRECISION (rhs_type)
4094 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4096 if (dump_enabled_p ())
4097 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4098 "type conversion to/from bit-precision unsupported."
4099 "\n");
4100 return false;
4103 /* Check the operands of the operation. */
4104 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4106 if (dump_enabled_p ())
4107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4108 "use not simple.\n");
4109 return false;
4111 if (op_type == binary_op)
4113 bool ok;
4115 op1 = gimple_assign_rhs2 (stmt);
4116 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4117 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4118 OP1. */
4119 if (CONSTANT_CLASS_P (op0))
4120 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4121 else
4122 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4124 if (!ok)
4126 if (dump_enabled_p ())
4127 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4128 "use not simple.\n");
4129 return false;
4133 /* If op0 is an external or constant defs use a vector type of
4134 the same size as the output vector type. */
4135 if (!vectype_in)
4136 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4137 if (vec_stmt)
4138 gcc_assert (vectype_in);
4139 if (!vectype_in)
4141 if (dump_enabled_p ())
4143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4144 "no vectype for scalar type ");
4145 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4146 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4149 return false;
4152 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4153 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4155 if (dump_enabled_p ())
4157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4158 "can't convert between boolean and non "
4159 "boolean vectors");
4160 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4161 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4164 return false;
4167 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4168 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4169 if (nunits_in < nunits_out)
4170 modifier = NARROW;
4171 else if (nunits_out == nunits_in)
4172 modifier = NONE;
4173 else
4174 modifier = WIDEN;
4176 /* Multiple types in SLP are handled by creating the appropriate number of
4177 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4178 case of SLP. */
4179 if (slp_node)
4180 ncopies = 1;
4181 else if (modifier == NARROW)
4182 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4183 else
4184 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4186 /* Sanity check: make sure that at least one copy of the vectorized stmt
4187 needs to be generated. */
4188 gcc_assert (ncopies >= 1);
4190 /* Supportable by target? */
4191 switch (modifier)
4193 case NONE:
4194 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4195 return false;
4196 if (supportable_convert_operation (code, vectype_out, vectype_in,
4197 &decl1, &code1))
4198 break;
4199 /* FALLTHRU */
4200 unsupported:
4201 if (dump_enabled_p ())
4202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4203 "conversion not supported by target.\n");
4204 return false;
4206 case WIDEN:
4207 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4208 &code1, &code2, &multi_step_cvt,
4209 &interm_types))
4211 /* Binary widening operation can only be supported directly by the
4212 architecture. */
4213 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4214 break;
4217 if (code != FLOAT_EXPR
4218 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4219 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4220 goto unsupported;
4222 rhs_mode = TYPE_MODE (rhs_type);
4223 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4224 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4225 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4226 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4228 cvt_type
4229 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4230 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4231 if (cvt_type == NULL_TREE)
4232 goto unsupported;
4234 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4236 if (!supportable_convert_operation (code, vectype_out,
4237 cvt_type, &decl1, &codecvt1))
4238 goto unsupported;
4240 else if (!supportable_widening_operation (code, stmt, vectype_out,
4241 cvt_type, &codecvt1,
4242 &codecvt2, &multi_step_cvt,
4243 &interm_types))
4244 continue;
4245 else
4246 gcc_assert (multi_step_cvt == 0);
4248 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4249 vectype_in, &code1, &code2,
4250 &multi_step_cvt, &interm_types))
4251 break;
4254 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4255 goto unsupported;
4257 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4258 codecvt2 = ERROR_MARK;
4259 else
4261 multi_step_cvt++;
4262 interm_types.safe_push (cvt_type);
4263 cvt_type = NULL_TREE;
4265 break;
4267 case NARROW:
4268 gcc_assert (op_type == unary_op);
4269 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4270 &code1, &multi_step_cvt,
4271 &interm_types))
4272 break;
4274 if (code != FIX_TRUNC_EXPR
4275 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4276 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4277 goto unsupported;
4279 rhs_mode = TYPE_MODE (rhs_type);
4280 cvt_type
4281 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4282 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4283 if (cvt_type == NULL_TREE)
4284 goto unsupported;
4285 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4286 &decl1, &codecvt1))
4287 goto unsupported;
4288 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4289 &code1, &multi_step_cvt,
4290 &interm_types))
4291 break;
4292 goto unsupported;
4294 default:
4295 gcc_unreachable ();
4298 if (!vec_stmt) /* transformation not required. */
4300 if (dump_enabled_p ())
4301 dump_printf_loc (MSG_NOTE, vect_location,
4302 "=== vectorizable_conversion ===\n");
4303 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4305 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4306 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4308 else if (modifier == NARROW)
4310 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4311 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4313 else
4315 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4316 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4318 interm_types.release ();
4319 return true;
4322 /** Transform. **/
4323 if (dump_enabled_p ())
4324 dump_printf_loc (MSG_NOTE, vect_location,
4325 "transform conversion. ncopies = %d.\n", ncopies);
4327 if (op_type == binary_op)
4329 if (CONSTANT_CLASS_P (op0))
4330 op0 = fold_convert (TREE_TYPE (op1), op0);
4331 else if (CONSTANT_CLASS_P (op1))
4332 op1 = fold_convert (TREE_TYPE (op0), op1);
4335 /* In case of multi-step conversion, we first generate conversion operations
4336 to the intermediate types, and then from that types to the final one.
4337 We create vector destinations for the intermediate type (TYPES) received
4338 from supportable_*_operation, and store them in the correct order
4339 for future use in vect_create_vectorized_*_stmts (). */
4340 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4341 vec_dest = vect_create_destination_var (scalar_dest,
4342 (cvt_type && modifier == WIDEN)
4343 ? cvt_type : vectype_out);
4344 vec_dsts.quick_push (vec_dest);
4346 if (multi_step_cvt)
4348 for (i = interm_types.length () - 1;
4349 interm_types.iterate (i, &intermediate_type); i--)
4351 vec_dest = vect_create_destination_var (scalar_dest,
4352 intermediate_type);
4353 vec_dsts.quick_push (vec_dest);
4357 if (cvt_type)
4358 vec_dest = vect_create_destination_var (scalar_dest,
4359 modifier == WIDEN
4360 ? vectype_out : cvt_type);
4362 if (!slp_node)
4364 if (modifier == WIDEN)
4366 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4367 if (op_type == binary_op)
4368 vec_oprnds1.create (1);
4370 else if (modifier == NARROW)
4371 vec_oprnds0.create (
4372 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4374 else if (code == WIDEN_LSHIFT_EXPR)
4375 vec_oprnds1.create (slp_node->vec_stmts_size);
4377 last_oprnd = op0;
4378 prev_stmt_info = NULL;
4379 switch (modifier)
4381 case NONE:
4382 for (j = 0; j < ncopies; j++)
4384 if (j == 0)
4385 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4386 -1);
4387 else
4388 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4390 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4392 /* Arguments are ready, create the new vector stmt. */
4393 if (code1 == CALL_EXPR)
4395 new_stmt = gimple_build_call (decl1, 1, vop0);
4396 new_temp = make_ssa_name (vec_dest, new_stmt);
4397 gimple_call_set_lhs (new_stmt, new_temp);
4399 else
4401 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4402 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4403 new_temp = make_ssa_name (vec_dest, new_stmt);
4404 gimple_assign_set_lhs (new_stmt, new_temp);
4407 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4408 if (slp_node)
4409 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4410 else
4412 if (!prev_stmt_info)
4413 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4414 else
4415 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4416 prev_stmt_info = vinfo_for_stmt (new_stmt);
4420 break;
4422 case WIDEN:
4423 /* In case the vectorization factor (VF) is bigger than the number
4424 of elements that we can fit in a vectype (nunits), we have to
4425 generate more than one vector stmt - i.e - we need to "unroll"
4426 the vector stmt by a factor VF/nunits. */
4427 for (j = 0; j < ncopies; j++)
4429 /* Handle uses. */
4430 if (j == 0)
4432 if (slp_node)
4434 if (code == WIDEN_LSHIFT_EXPR)
4436 unsigned int k;
4438 vec_oprnd1 = op1;
4439 /* Store vec_oprnd1 for every vector stmt to be created
4440 for SLP_NODE. We check during the analysis that all
4441 the shift arguments are the same. */
4442 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4443 vec_oprnds1.quick_push (vec_oprnd1);
4445 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4446 slp_node, -1);
4448 else
4449 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4450 &vec_oprnds1, slp_node, -1);
4452 else
4454 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4455 vec_oprnds0.quick_push (vec_oprnd0);
4456 if (op_type == binary_op)
4458 if (code == WIDEN_LSHIFT_EXPR)
4459 vec_oprnd1 = op1;
4460 else
4461 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4462 vec_oprnds1.quick_push (vec_oprnd1);
4466 else
4468 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4469 vec_oprnds0.truncate (0);
4470 vec_oprnds0.quick_push (vec_oprnd0);
4471 if (op_type == binary_op)
4473 if (code == WIDEN_LSHIFT_EXPR)
4474 vec_oprnd1 = op1;
4475 else
4476 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4477 vec_oprnd1);
4478 vec_oprnds1.truncate (0);
4479 vec_oprnds1.quick_push (vec_oprnd1);
4483 /* Arguments are ready. Create the new vector stmts. */
4484 for (i = multi_step_cvt; i >= 0; i--)
4486 tree this_dest = vec_dsts[i];
4487 enum tree_code c1 = code1, c2 = code2;
4488 if (i == 0 && codecvt2 != ERROR_MARK)
4490 c1 = codecvt1;
4491 c2 = codecvt2;
4493 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4494 &vec_oprnds1,
4495 stmt, this_dest, gsi,
4496 c1, c2, decl1, decl2,
4497 op_type);
4500 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4502 if (cvt_type)
4504 if (codecvt1 == CALL_EXPR)
4506 new_stmt = gimple_build_call (decl1, 1, vop0);
4507 new_temp = make_ssa_name (vec_dest, new_stmt);
4508 gimple_call_set_lhs (new_stmt, new_temp);
4510 else
4512 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4513 new_temp = make_ssa_name (vec_dest);
4514 new_stmt = gimple_build_assign (new_temp, codecvt1,
4515 vop0);
4518 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4520 else
4521 new_stmt = SSA_NAME_DEF_STMT (vop0);
4523 if (slp_node)
4524 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4525 else
4527 if (!prev_stmt_info)
4528 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4529 else
4530 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4531 prev_stmt_info = vinfo_for_stmt (new_stmt);
4536 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4537 break;
4539 case NARROW:
4540 /* In case the vectorization factor (VF) is bigger than the number
4541 of elements that we can fit in a vectype (nunits), we have to
4542 generate more than one vector stmt - i.e - we need to "unroll"
4543 the vector stmt by a factor VF/nunits. */
4544 for (j = 0; j < ncopies; j++)
4546 /* Handle uses. */
4547 if (slp_node)
4548 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4549 slp_node, -1);
4550 else
4552 vec_oprnds0.truncate (0);
4553 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4554 vect_pow2 (multi_step_cvt) - 1);
4557 /* Arguments are ready. Create the new vector stmts. */
4558 if (cvt_type)
4559 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4561 if (codecvt1 == CALL_EXPR)
4563 new_stmt = gimple_build_call (decl1, 1, vop0);
4564 new_temp = make_ssa_name (vec_dest, new_stmt);
4565 gimple_call_set_lhs (new_stmt, new_temp);
4567 else
4569 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4570 new_temp = make_ssa_name (vec_dest);
4571 new_stmt = gimple_build_assign (new_temp, codecvt1,
4572 vop0);
4575 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4576 vec_oprnds0[i] = new_temp;
4579 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4580 stmt, vec_dsts, gsi,
4581 slp_node, code1,
4582 &prev_stmt_info);
4585 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4586 break;
4589 vec_oprnds0.release ();
4590 vec_oprnds1.release ();
4591 interm_types.release ();
4593 return true;
4597 /* Function vectorizable_assignment.
4599 Check if STMT performs an assignment (copy) that can be vectorized.
4600 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4601 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4602 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4604 static bool
4605 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4606 gimple **vec_stmt, slp_tree slp_node)
4608 tree vec_dest;
4609 tree scalar_dest;
4610 tree op;
4611 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4612 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4613 tree new_temp;
4614 gimple *def_stmt;
4615 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4616 int ncopies;
4617 int i, j;
4618 vec<tree> vec_oprnds = vNULL;
4619 tree vop;
4620 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4621 vec_info *vinfo = stmt_info->vinfo;
4622 gimple *new_stmt = NULL;
4623 stmt_vec_info prev_stmt_info = NULL;
4624 enum tree_code code;
4625 tree vectype_in;
4627 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4628 return false;
4630 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4631 && ! vec_stmt)
4632 return false;
4634 /* Is vectorizable assignment? */
4635 if (!is_gimple_assign (stmt))
4636 return false;
4638 scalar_dest = gimple_assign_lhs (stmt);
4639 if (TREE_CODE (scalar_dest) != SSA_NAME)
4640 return false;
4642 code = gimple_assign_rhs_code (stmt);
4643 if (gimple_assign_single_p (stmt)
4644 || code == PAREN_EXPR
4645 || CONVERT_EXPR_CODE_P (code))
4646 op = gimple_assign_rhs1 (stmt);
4647 else
4648 return false;
4650 if (code == VIEW_CONVERT_EXPR)
4651 op = TREE_OPERAND (op, 0);
4653 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4654 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4656 /* Multiple types in SLP are handled by creating the appropriate number of
4657 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4658 case of SLP. */
4659 if (slp_node)
4660 ncopies = 1;
4661 else
4662 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4664 gcc_assert (ncopies >= 1);
4666 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4668 if (dump_enabled_p ())
4669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4670 "use not simple.\n");
4671 return false;
4674 /* We can handle NOP_EXPR conversions that do not change the number
4675 of elements or the vector size. */
4676 if ((CONVERT_EXPR_CODE_P (code)
4677 || code == VIEW_CONVERT_EXPR)
4678 && (!vectype_in
4679 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4680 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4681 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4682 return false;
4684 /* We do not handle bit-precision changes. */
4685 if ((CONVERT_EXPR_CODE_P (code)
4686 || code == VIEW_CONVERT_EXPR)
4687 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4688 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4689 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4690 || ((TYPE_PRECISION (TREE_TYPE (op))
4691 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4692 /* But a conversion that does not change the bit-pattern is ok. */
4693 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4694 > TYPE_PRECISION (TREE_TYPE (op)))
4695 && TYPE_UNSIGNED (TREE_TYPE (op)))
4696 /* Conversion between boolean types of different sizes is
4697 a simple assignment in case their vectypes are same
4698 boolean vectors. */
4699 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4700 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4702 if (dump_enabled_p ())
4703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4704 "type conversion to/from bit-precision "
4705 "unsupported.\n");
4706 return false;
4709 if (!vec_stmt) /* transformation not required. */
4711 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4712 if (dump_enabled_p ())
4713 dump_printf_loc (MSG_NOTE, vect_location,
4714 "=== vectorizable_assignment ===\n");
4715 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4716 return true;
4719 /** Transform. **/
4720 if (dump_enabled_p ())
4721 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4723 /* Handle def. */
4724 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4726 /* Handle use. */
4727 for (j = 0; j < ncopies; j++)
4729 /* Handle uses. */
4730 if (j == 0)
4731 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4732 else
4733 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4735 /* Arguments are ready. create the new vector stmt. */
4736 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4738 if (CONVERT_EXPR_CODE_P (code)
4739 || code == VIEW_CONVERT_EXPR)
4740 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4741 new_stmt = gimple_build_assign (vec_dest, vop);
4742 new_temp = make_ssa_name (vec_dest, new_stmt);
4743 gimple_assign_set_lhs (new_stmt, new_temp);
4744 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4745 if (slp_node)
4746 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4749 if (slp_node)
4750 continue;
4752 if (j == 0)
4753 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4754 else
4755 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4757 prev_stmt_info = vinfo_for_stmt (new_stmt);
4760 vec_oprnds.release ();
4761 return true;
4765 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4766 either as shift by a scalar or by a vector. */
4768 bool
4769 vect_supportable_shift (enum tree_code code, tree scalar_type)
4772 machine_mode vec_mode;
4773 optab optab;
4774 int icode;
4775 tree vectype;
4777 vectype = get_vectype_for_scalar_type (scalar_type);
4778 if (!vectype)
4779 return false;
4781 optab = optab_for_tree_code (code, vectype, optab_scalar);
4782 if (!optab
4783 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4785 optab = optab_for_tree_code (code, vectype, optab_vector);
4786 if (!optab
4787 || (optab_handler (optab, TYPE_MODE (vectype))
4788 == CODE_FOR_nothing))
4789 return false;
4792 vec_mode = TYPE_MODE (vectype);
4793 icode = (int) optab_handler (optab, vec_mode);
4794 if (icode == CODE_FOR_nothing)
4795 return false;
4797 return true;
4801 /* Function vectorizable_shift.
4803 Check if STMT performs a shift operation that can be vectorized.
4804 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4805 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4806 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4808 static bool
4809 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4810 gimple **vec_stmt, slp_tree slp_node)
4812 tree vec_dest;
4813 tree scalar_dest;
4814 tree op0, op1 = NULL;
4815 tree vec_oprnd1 = NULL_TREE;
4816 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4817 tree vectype;
4818 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4819 enum tree_code code;
4820 machine_mode vec_mode;
4821 tree new_temp;
4822 optab optab;
4823 int icode;
4824 machine_mode optab_op2_mode;
4825 gimple *def_stmt;
4826 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4827 gimple *new_stmt = NULL;
4828 stmt_vec_info prev_stmt_info;
4829 int nunits_in;
4830 int nunits_out;
4831 tree vectype_out;
4832 tree op1_vectype;
4833 int ncopies;
4834 int j, i;
4835 vec<tree> vec_oprnds0 = vNULL;
4836 vec<tree> vec_oprnds1 = vNULL;
4837 tree vop0, vop1;
4838 unsigned int k;
4839 bool scalar_shift_arg = true;
4840 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4841 vec_info *vinfo = stmt_info->vinfo;
4842 int vf;
4844 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4845 return false;
4847 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4848 && ! vec_stmt)
4849 return false;
4851 /* Is STMT a vectorizable binary/unary operation? */
4852 if (!is_gimple_assign (stmt))
4853 return false;
4855 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4856 return false;
4858 code = gimple_assign_rhs_code (stmt);
4860 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4861 || code == RROTATE_EXPR))
4862 return false;
4864 scalar_dest = gimple_assign_lhs (stmt);
4865 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4866 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4867 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4869 if (dump_enabled_p ())
4870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4871 "bit-precision shifts not supported.\n");
4872 return false;
4875 op0 = gimple_assign_rhs1 (stmt);
4876 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4878 if (dump_enabled_p ())
4879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4880 "use not simple.\n");
4881 return false;
4883 /* If op0 is an external or constant def use a vector type with
4884 the same size as the output vector type. */
4885 if (!vectype)
4886 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4887 if (vec_stmt)
4888 gcc_assert (vectype);
4889 if (!vectype)
4891 if (dump_enabled_p ())
4892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4893 "no vectype for scalar type\n");
4894 return false;
4897 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4898 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4899 if (nunits_out != nunits_in)
4900 return false;
4902 op1 = gimple_assign_rhs2 (stmt);
4903 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4905 if (dump_enabled_p ())
4906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4907 "use not simple.\n");
4908 return false;
4911 if (loop_vinfo)
4912 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4913 else
4914 vf = 1;
4916 /* Multiple types in SLP are handled by creating the appropriate number of
4917 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4918 case of SLP. */
4919 if (slp_node)
4920 ncopies = 1;
4921 else
4922 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4924 gcc_assert (ncopies >= 1);
4926 /* Determine whether the shift amount is a vector, or scalar. If the
4927 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4929 if ((dt[1] == vect_internal_def
4930 || dt[1] == vect_induction_def)
4931 && !slp_node)
4932 scalar_shift_arg = false;
4933 else if (dt[1] == vect_constant_def
4934 || dt[1] == vect_external_def
4935 || dt[1] == vect_internal_def)
4937 /* In SLP, need to check whether the shift count is the same,
4938 in loops if it is a constant or invariant, it is always
4939 a scalar shift. */
4940 if (slp_node)
4942 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4943 gimple *slpstmt;
4945 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4946 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4947 scalar_shift_arg = false;
4950 /* If the shift amount is computed by a pattern stmt we cannot
4951 use the scalar amount directly thus give up and use a vector
4952 shift. */
4953 if (dt[1] == vect_internal_def)
4955 gimple *def = SSA_NAME_DEF_STMT (op1);
4956 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4957 scalar_shift_arg = false;
4960 else
4962 if (dump_enabled_p ())
4963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4964 "operand mode requires invariant argument.\n");
4965 return false;
4968 /* Vector shifted by vector. */
4969 if (!scalar_shift_arg)
4971 optab = optab_for_tree_code (code, vectype, optab_vector);
4972 if (dump_enabled_p ())
4973 dump_printf_loc (MSG_NOTE, vect_location,
4974 "vector/vector shift/rotate found.\n");
4976 if (!op1_vectype)
4977 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4978 if (op1_vectype == NULL_TREE
4979 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4981 if (dump_enabled_p ())
4982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4983 "unusable type for last operand in"
4984 " vector/vector shift/rotate.\n");
4985 return false;
4988 /* See if the machine has a vector shifted by scalar insn and if not
4989 then see if it has a vector shifted by vector insn. */
4990 else
4992 optab = optab_for_tree_code (code, vectype, optab_scalar);
4993 if (optab
4994 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_NOTE, vect_location,
4998 "vector/scalar shift/rotate found.\n");
5000 else
5002 optab = optab_for_tree_code (code, vectype, optab_vector);
5003 if (optab
5004 && (optab_handler (optab, TYPE_MODE (vectype))
5005 != CODE_FOR_nothing))
5007 scalar_shift_arg = false;
5009 if (dump_enabled_p ())
5010 dump_printf_loc (MSG_NOTE, vect_location,
5011 "vector/vector shift/rotate found.\n");
5013 /* Unlike the other binary operators, shifts/rotates have
5014 the rhs being int, instead of the same type as the lhs,
5015 so make sure the scalar is the right type if we are
5016 dealing with vectors of long long/long/short/char. */
5017 if (dt[1] == vect_constant_def)
5018 op1 = fold_convert (TREE_TYPE (vectype), op1);
5019 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5020 TREE_TYPE (op1)))
5022 if (slp_node
5023 && TYPE_MODE (TREE_TYPE (vectype))
5024 != TYPE_MODE (TREE_TYPE (op1)))
5026 if (dump_enabled_p ())
5027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5028 "unusable type for last operand in"
5029 " vector/vector shift/rotate.\n");
5030 return false;
5032 if (vec_stmt && !slp_node)
5034 op1 = fold_convert (TREE_TYPE (vectype), op1);
5035 op1 = vect_init_vector (stmt, op1,
5036 TREE_TYPE (vectype), NULL);
5043 /* Supportable by target? */
5044 if (!optab)
5046 if (dump_enabled_p ())
5047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5048 "no optab.\n");
5049 return false;
5051 vec_mode = TYPE_MODE (vectype);
5052 icode = (int) optab_handler (optab, vec_mode);
5053 if (icode == CODE_FOR_nothing)
5055 if (dump_enabled_p ())
5056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5057 "op not supported by target.\n");
5058 /* Check only during analysis. */
5059 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5060 || (vf < vect_min_worthwhile_factor (code)
5061 && !vec_stmt))
5062 return false;
5063 if (dump_enabled_p ())
5064 dump_printf_loc (MSG_NOTE, vect_location,
5065 "proceeding using word mode.\n");
5068 /* Worthwhile without SIMD support? Check only during analysis. */
5069 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5070 && vf < vect_min_worthwhile_factor (code)
5071 && !vec_stmt)
5073 if (dump_enabled_p ())
5074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5075 "not worthwhile without SIMD support.\n");
5076 return false;
5079 if (!vec_stmt) /* transformation not required. */
5081 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5082 if (dump_enabled_p ())
5083 dump_printf_loc (MSG_NOTE, vect_location,
5084 "=== vectorizable_shift ===\n");
5085 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5086 return true;
5089 /** Transform. **/
5091 if (dump_enabled_p ())
5092 dump_printf_loc (MSG_NOTE, vect_location,
5093 "transform binary/unary operation.\n");
5095 /* Handle def. */
5096 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5098 prev_stmt_info = NULL;
5099 for (j = 0; j < ncopies; j++)
5101 /* Handle uses. */
5102 if (j == 0)
5104 if (scalar_shift_arg)
5106 /* Vector shl and shr insn patterns can be defined with scalar
5107 operand 2 (shift operand). In this case, use constant or loop
5108 invariant op1 directly, without extending it to vector mode
5109 first. */
5110 optab_op2_mode = insn_data[icode].operand[2].mode;
5111 if (!VECTOR_MODE_P (optab_op2_mode))
5113 if (dump_enabled_p ())
5114 dump_printf_loc (MSG_NOTE, vect_location,
5115 "operand 1 using scalar mode.\n");
5116 vec_oprnd1 = op1;
5117 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5118 vec_oprnds1.quick_push (vec_oprnd1);
5119 if (slp_node)
5121 /* Store vec_oprnd1 for every vector stmt to be created
5122 for SLP_NODE. We check during the analysis that all
5123 the shift arguments are the same.
5124 TODO: Allow different constants for different vector
5125 stmts generated for an SLP instance. */
5126 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5127 vec_oprnds1.quick_push (vec_oprnd1);
5132 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5133 (a special case for certain kind of vector shifts); otherwise,
5134 operand 1 should be of a vector type (the usual case). */
5135 if (vec_oprnd1)
5136 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5137 slp_node, -1);
5138 else
5139 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5140 slp_node, -1);
5142 else
5143 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5145 /* Arguments are ready. Create the new vector stmt. */
5146 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5148 vop1 = vec_oprnds1[i];
5149 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5150 new_temp = make_ssa_name (vec_dest, new_stmt);
5151 gimple_assign_set_lhs (new_stmt, new_temp);
5152 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5153 if (slp_node)
5154 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5157 if (slp_node)
5158 continue;
5160 if (j == 0)
5161 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5162 else
5163 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5164 prev_stmt_info = vinfo_for_stmt (new_stmt);
5167 vec_oprnds0.release ();
5168 vec_oprnds1.release ();
5170 return true;
5174 /* Function vectorizable_operation.
5176 Check if STMT performs a binary, unary or ternary operation that can
5177 be vectorized.
5178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5182 static bool
5183 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5184 gimple **vec_stmt, slp_tree slp_node)
5186 tree vec_dest;
5187 tree scalar_dest;
5188 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5190 tree vectype;
5191 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5192 enum tree_code code;
5193 machine_mode vec_mode;
5194 tree new_temp;
5195 int op_type;
5196 optab optab;
5197 bool target_support_p;
5198 gimple *def_stmt;
5199 enum vect_def_type dt[3]
5200 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5201 gimple *new_stmt = NULL;
5202 stmt_vec_info prev_stmt_info;
5203 int nunits_in;
5204 int nunits_out;
5205 tree vectype_out;
5206 int ncopies;
5207 int j, i;
5208 vec<tree> vec_oprnds0 = vNULL;
5209 vec<tree> vec_oprnds1 = vNULL;
5210 vec<tree> vec_oprnds2 = vNULL;
5211 tree vop0, vop1, vop2;
5212 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5213 vec_info *vinfo = stmt_info->vinfo;
5214 int vf;
5216 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5217 return false;
5219 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5220 && ! vec_stmt)
5221 return false;
5223 /* Is STMT a vectorizable binary/unary operation? */
5224 if (!is_gimple_assign (stmt))
5225 return false;
5227 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5228 return false;
5230 code = gimple_assign_rhs_code (stmt);
5232 /* For pointer addition, we should use the normal plus for
5233 the vector addition. */
5234 if (code == POINTER_PLUS_EXPR)
5235 code = PLUS_EXPR;
5237 /* Support only unary or binary operations. */
5238 op_type = TREE_CODE_LENGTH (code);
5239 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5241 if (dump_enabled_p ())
5242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5243 "num. args = %d (not unary/binary/ternary op).\n",
5244 op_type);
5245 return false;
5248 scalar_dest = gimple_assign_lhs (stmt);
5249 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5251 /* Most operations cannot handle bit-precision types without extra
5252 truncations. */
5253 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5254 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5255 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5256 /* Exception are bitwise binary operations. */
5257 && code != BIT_IOR_EXPR
5258 && code != BIT_XOR_EXPR
5259 && code != BIT_AND_EXPR)
5261 if (dump_enabled_p ())
5262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5263 "bit-precision arithmetic not supported.\n");
5264 return false;
5267 op0 = gimple_assign_rhs1 (stmt);
5268 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5270 if (dump_enabled_p ())
5271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5272 "use not simple.\n");
5273 return false;
5275 /* If op0 is an external or constant def use a vector type with
5276 the same size as the output vector type. */
5277 if (!vectype)
5279 /* For boolean type we cannot determine vectype by
5280 invariant value (don't know whether it is a vector
5281 of booleans or vector of integers). We use output
5282 vectype because operations on boolean don't change
5283 type. */
5284 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5286 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5288 if (dump_enabled_p ())
5289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5290 "not supported operation on bool value.\n");
5291 return false;
5293 vectype = vectype_out;
5295 else
5296 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5298 if (vec_stmt)
5299 gcc_assert (vectype);
5300 if (!vectype)
5302 if (dump_enabled_p ())
5304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5305 "no vectype for scalar type ");
5306 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5307 TREE_TYPE (op0));
5308 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5311 return false;
5314 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5315 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5316 if (nunits_out != nunits_in)
5317 return false;
5319 if (op_type == binary_op || op_type == ternary_op)
5321 op1 = gimple_assign_rhs2 (stmt);
5322 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5324 if (dump_enabled_p ())
5325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5326 "use not simple.\n");
5327 return false;
5330 if (op_type == ternary_op)
5332 op2 = gimple_assign_rhs3 (stmt);
5333 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5335 if (dump_enabled_p ())
5336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5337 "use not simple.\n");
5338 return false;
5342 if (loop_vinfo)
5343 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5344 else
5345 vf = 1;
5347 /* Multiple types in SLP are handled by creating the appropriate number of
5348 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5349 case of SLP. */
5350 if (slp_node)
5351 ncopies = 1;
5352 else
5353 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5355 gcc_assert (ncopies >= 1);
5357 /* Shifts are handled in vectorizable_shift (). */
5358 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5359 || code == RROTATE_EXPR)
5360 return false;
5362 /* Supportable by target? */
5364 vec_mode = TYPE_MODE (vectype);
5365 if (code == MULT_HIGHPART_EXPR)
5366 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5367 else
5369 optab = optab_for_tree_code (code, vectype, optab_default);
5370 if (!optab)
5372 if (dump_enabled_p ())
5373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5374 "no optab.\n");
5375 return false;
5377 target_support_p = (optab_handler (optab, vec_mode)
5378 != CODE_FOR_nothing);
5381 if (!target_support_p)
5383 if (dump_enabled_p ())
5384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5385 "op not supported by target.\n");
5386 /* Check only during analysis. */
5387 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5388 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5389 return false;
5390 if (dump_enabled_p ())
5391 dump_printf_loc (MSG_NOTE, vect_location,
5392 "proceeding using word mode.\n");
5395 /* Worthwhile without SIMD support? Check only during analysis. */
5396 if (!VECTOR_MODE_P (vec_mode)
5397 && !vec_stmt
5398 && vf < vect_min_worthwhile_factor (code))
5400 if (dump_enabled_p ())
5401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5402 "not worthwhile without SIMD support.\n");
5403 return false;
5406 if (!vec_stmt) /* transformation not required. */
5408 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5409 if (dump_enabled_p ())
5410 dump_printf_loc (MSG_NOTE, vect_location,
5411 "=== vectorizable_operation ===\n");
5412 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5413 return true;
5416 /** Transform. **/
5418 if (dump_enabled_p ())
5419 dump_printf_loc (MSG_NOTE, vect_location,
5420 "transform binary/unary operation.\n");
5422 /* Handle def. */
5423 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5425 /* In case the vectorization factor (VF) is bigger than the number
5426 of elements that we can fit in a vectype (nunits), we have to generate
5427 more than one vector stmt - i.e - we need to "unroll" the
5428 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5429 from one copy of the vector stmt to the next, in the field
5430 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5431 stages to find the correct vector defs to be used when vectorizing
5432 stmts that use the defs of the current stmt. The example below
5433 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5434 we need to create 4 vectorized stmts):
5436 before vectorization:
5437 RELATED_STMT VEC_STMT
5438 S1: x = memref - -
5439 S2: z = x + 1 - -
5441 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5442 there):
5443 RELATED_STMT VEC_STMT
5444 VS1_0: vx0 = memref0 VS1_1 -
5445 VS1_1: vx1 = memref1 VS1_2 -
5446 VS1_2: vx2 = memref2 VS1_3 -
5447 VS1_3: vx3 = memref3 - -
5448 S1: x = load - VS1_0
5449 S2: z = x + 1 - -
5451 step2: vectorize stmt S2 (done here):
5452 To vectorize stmt S2 we first need to find the relevant vector
5453 def for the first operand 'x'. This is, as usual, obtained from
5454 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5455 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5456 relevant vector def 'vx0'. Having found 'vx0' we can generate
5457 the vector stmt VS2_0, and as usual, record it in the
5458 STMT_VINFO_VEC_STMT of stmt S2.
5459 When creating the second copy (VS2_1), we obtain the relevant vector
5460 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5461 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5462 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5463 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5464 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5465 chain of stmts and pointers:
5466 RELATED_STMT VEC_STMT
5467 VS1_0: vx0 = memref0 VS1_1 -
5468 VS1_1: vx1 = memref1 VS1_2 -
5469 VS1_2: vx2 = memref2 VS1_3 -
5470 VS1_3: vx3 = memref3 - -
5471 S1: x = load - VS1_0
5472 VS2_0: vz0 = vx0 + v1 VS2_1 -
5473 VS2_1: vz1 = vx1 + v1 VS2_2 -
5474 VS2_2: vz2 = vx2 + v1 VS2_3 -
5475 VS2_3: vz3 = vx3 + v1 - -
5476 S2: z = x + 1 - VS2_0 */
5478 prev_stmt_info = NULL;
5479 for (j = 0; j < ncopies; j++)
5481 /* Handle uses. */
5482 if (j == 0)
5484 if (op_type == binary_op || op_type == ternary_op)
5485 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5486 slp_node, -1);
5487 else
5488 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5489 slp_node, -1);
5490 if (op_type == ternary_op)
5491 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5492 slp_node, -1);
5494 else
5496 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5497 if (op_type == ternary_op)
5499 tree vec_oprnd = vec_oprnds2.pop ();
5500 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5501 vec_oprnd));
5505 /* Arguments are ready. Create the new vector stmt. */
5506 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5508 vop1 = ((op_type == binary_op || op_type == ternary_op)
5509 ? vec_oprnds1[i] : NULL_TREE);
5510 vop2 = ((op_type == ternary_op)
5511 ? vec_oprnds2[i] : NULL_TREE);
5512 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5513 new_temp = make_ssa_name (vec_dest, new_stmt);
5514 gimple_assign_set_lhs (new_stmt, new_temp);
5515 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5516 if (slp_node)
5517 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5520 if (slp_node)
5521 continue;
5523 if (j == 0)
5524 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5525 else
5526 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5527 prev_stmt_info = vinfo_for_stmt (new_stmt);
5530 vec_oprnds0.release ();
5531 vec_oprnds1.release ();
5532 vec_oprnds2.release ();
5534 return true;
5537 /* A helper function to ensure data reference DR's base alignment
5538 for STMT_INFO. */
5540 static void
5541 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5543 if (!dr->aux)
5544 return;
5546 if (DR_VECT_AUX (dr)->base_misaligned)
5548 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5549 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5551 if (decl_in_symtab_p (base_decl))
5552 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5553 else
5555 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5556 DECL_USER_ALIGN (base_decl) = 1;
5558 DR_VECT_AUX (dr)->base_misaligned = false;
5563 /* Function get_group_alias_ptr_type.
5565 Return the alias type for the group starting at FIRST_STMT. */
5567 static tree
5568 get_group_alias_ptr_type (gimple *first_stmt)
5570 struct data_reference *first_dr, *next_dr;
5571 gimple *next_stmt;
5573 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5574 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5575 while (next_stmt)
5577 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5578 if (get_alias_set (DR_REF (first_dr))
5579 != get_alias_set (DR_REF (next_dr)))
5581 if (dump_enabled_p ())
5582 dump_printf_loc (MSG_NOTE, vect_location,
5583 "conflicting alias set types.\n");
5584 return ptr_type_node;
5586 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5588 return reference_alias_ptr_type (DR_REF (first_dr));
5592 /* Function vectorizable_store.
5594 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5595 can be vectorized.
5596 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5597 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5598 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5600 static bool
5601 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5602 slp_tree slp_node)
5604 tree scalar_dest;
5605 tree data_ref;
5606 tree op;
5607 tree vec_oprnd = NULL_TREE;
5608 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5609 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5610 tree elem_type;
5611 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5612 struct loop *loop = NULL;
5613 machine_mode vec_mode;
5614 tree dummy;
5615 enum dr_alignment_support alignment_support_scheme;
5616 gimple *def_stmt;
5617 enum vect_def_type dt;
5618 stmt_vec_info prev_stmt_info = NULL;
5619 tree dataref_ptr = NULL_TREE;
5620 tree dataref_offset = NULL_TREE;
5621 gimple *ptr_incr = NULL;
5622 int ncopies;
5623 int j;
5624 gimple *next_stmt, *first_stmt;
5625 bool grouped_store;
5626 unsigned int group_size, i;
5627 vec<tree> oprnds = vNULL;
5628 vec<tree> result_chain = vNULL;
5629 bool inv_p;
5630 tree offset = NULL_TREE;
5631 vec<tree> vec_oprnds = vNULL;
5632 bool slp = (slp_node != NULL);
5633 unsigned int vec_num;
5634 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5635 vec_info *vinfo = stmt_info->vinfo;
5636 tree aggr_type;
5637 gather_scatter_info gs_info;
5638 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5639 gimple *new_stmt;
5640 int vf;
5641 vec_load_store_type vls_type;
5642 tree ref_type;
5644 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5645 return false;
5647 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5648 && ! vec_stmt)
5649 return false;
5651 /* Is vectorizable store? */
5653 if (!is_gimple_assign (stmt))
5654 return false;
5656 scalar_dest = gimple_assign_lhs (stmt);
5657 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5658 && is_pattern_stmt_p (stmt_info))
5659 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5660 if (TREE_CODE (scalar_dest) != ARRAY_REF
5661 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5662 && TREE_CODE (scalar_dest) != INDIRECT_REF
5663 && TREE_CODE (scalar_dest) != COMPONENT_REF
5664 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5665 && TREE_CODE (scalar_dest) != REALPART_EXPR
5666 && TREE_CODE (scalar_dest) != MEM_REF)
5667 return false;
5669 /* Cannot have hybrid store SLP -- that would mean storing to the
5670 same location twice. */
5671 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5673 gcc_assert (gimple_assign_single_p (stmt));
5675 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5676 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5678 if (loop_vinfo)
5680 loop = LOOP_VINFO_LOOP (loop_vinfo);
5681 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5683 else
5684 vf = 1;
5686 /* Multiple types in SLP are handled by creating the appropriate number of
5687 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5688 case of SLP. */
5689 if (slp)
5690 ncopies = 1;
5691 else
5692 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5694 gcc_assert (ncopies >= 1);
5696 /* FORNOW. This restriction should be relaxed. */
5697 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5699 if (dump_enabled_p ())
5700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5701 "multiple types in nested loop.\n");
5702 return false;
5705 op = gimple_assign_rhs1 (stmt);
5707 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5709 if (dump_enabled_p ())
5710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5711 "use not simple.\n");
5712 return false;
5715 if (dt == vect_constant_def || dt == vect_external_def)
5716 vls_type = VLS_STORE_INVARIANT;
5717 else
5718 vls_type = VLS_STORE;
5720 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5721 return false;
5723 elem_type = TREE_TYPE (vectype);
5724 vec_mode = TYPE_MODE (vectype);
5726 /* FORNOW. In some cases can vectorize even if data-type not supported
5727 (e.g. - array initialization with 0). */
5728 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5729 return false;
5731 if (!STMT_VINFO_DATA_REF (stmt_info))
5732 return false;
5734 vect_memory_access_type memory_access_type;
5735 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5736 &memory_access_type, &gs_info))
5737 return false;
5739 if (!vec_stmt) /* transformation not required. */
5741 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5742 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5743 /* The SLP costs are calculated during SLP analysis. */
5744 if (!PURE_SLP_STMT (stmt_info))
5745 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5746 NULL, NULL, NULL);
5747 return true;
5749 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5751 /** Transform. **/
5753 ensure_base_align (stmt_info, dr);
5755 if (memory_access_type == VMAT_GATHER_SCATTER)
5757 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5758 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5759 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5760 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5761 edge pe = loop_preheader_edge (loop);
5762 gimple_seq seq;
5763 basic_block new_bb;
5764 enum { NARROW, NONE, WIDEN } modifier;
5765 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5767 if (nunits == (unsigned int) scatter_off_nunits)
5768 modifier = NONE;
5769 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5771 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5772 modifier = WIDEN;
5774 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5775 sel[i] = i | nunits;
5777 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5778 gcc_assert (perm_mask != NULL_TREE);
5780 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5782 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5783 modifier = NARROW;
5785 for (i = 0; i < (unsigned int) nunits; ++i)
5786 sel[i] = i | scatter_off_nunits;
5788 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5789 gcc_assert (perm_mask != NULL_TREE);
5790 ncopies *= 2;
5792 else
5793 gcc_unreachable ();
5795 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5796 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5797 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5798 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5799 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5800 scaletype = TREE_VALUE (arglist);
5802 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5803 && TREE_CODE (rettype) == VOID_TYPE);
5805 ptr = fold_convert (ptrtype, gs_info.base);
5806 if (!is_gimple_min_invariant (ptr))
5808 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5809 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5810 gcc_assert (!new_bb);
5813 /* Currently we support only unconditional scatter stores,
5814 so mask should be all ones. */
5815 mask = build_int_cst (masktype, -1);
5816 mask = vect_init_vector (stmt, mask, masktype, NULL);
5818 scale = build_int_cst (scaletype, gs_info.scale);
5820 prev_stmt_info = NULL;
5821 for (j = 0; j < ncopies; ++j)
5823 if (j == 0)
5825 src = vec_oprnd1
5826 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5827 op = vec_oprnd0
5828 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5830 else if (modifier != NONE && (j & 1))
5832 if (modifier == WIDEN)
5834 src = vec_oprnd1
5835 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5836 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5837 stmt, gsi);
5839 else if (modifier == NARROW)
5841 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5842 stmt, gsi);
5843 op = vec_oprnd0
5844 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5845 vec_oprnd0);
5847 else
5848 gcc_unreachable ();
5850 else
5852 src = vec_oprnd1
5853 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5854 op = vec_oprnd0
5855 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5856 vec_oprnd0);
5859 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5861 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5862 == TYPE_VECTOR_SUBPARTS (srctype));
5863 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5864 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5865 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5866 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5867 src = var;
5870 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5872 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5873 == TYPE_VECTOR_SUBPARTS (idxtype));
5874 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5875 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5876 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5877 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5878 op = var;
5881 new_stmt
5882 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5884 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5886 if (prev_stmt_info == NULL)
5887 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5888 else
5889 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5890 prev_stmt_info = vinfo_for_stmt (new_stmt);
5892 return true;
5895 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5896 if (grouped_store)
5898 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5899 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5900 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5902 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5904 /* FORNOW */
5905 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5907 /* We vectorize all the stmts of the interleaving group when we
5908 reach the last stmt in the group. */
5909 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5910 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5911 && !slp)
5913 *vec_stmt = NULL;
5914 return true;
5917 if (slp)
5919 grouped_store = false;
5920 /* VEC_NUM is the number of vect stmts to be created for this
5921 group. */
5922 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5923 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5924 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5925 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5926 op = gimple_assign_rhs1 (first_stmt);
5928 else
5929 /* VEC_NUM is the number of vect stmts to be created for this
5930 group. */
5931 vec_num = group_size;
5933 ref_type = get_group_alias_ptr_type (first_stmt);
5935 else
5937 first_stmt = stmt;
5938 first_dr = dr;
5939 group_size = vec_num = 1;
5940 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5943 if (dump_enabled_p ())
5944 dump_printf_loc (MSG_NOTE, vect_location,
5945 "transform store. ncopies = %d\n", ncopies);
5947 if (memory_access_type == VMAT_ELEMENTWISE
5948 || memory_access_type == VMAT_STRIDED_SLP)
5950 gimple_stmt_iterator incr_gsi;
5951 bool insert_after;
5952 gimple *incr;
5953 tree offvar;
5954 tree ivstep;
5955 tree running_off;
5956 gimple_seq stmts = NULL;
5957 tree stride_base, stride_step, alias_off;
5958 tree vec_oprnd;
5959 unsigned int g;
5961 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5963 stride_base
5964 = fold_build_pointer_plus
5965 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5966 size_binop (PLUS_EXPR,
5967 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5968 convert_to_ptrofftype (DR_INIT (first_dr))));
5969 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5971 /* For a store with loop-invariant (but other than power-of-2)
5972 stride (i.e. not a grouped access) like so:
5974 for (i = 0; i < n; i += stride)
5975 array[i] = ...;
5977 we generate a new induction variable and new stores from
5978 the components of the (vectorized) rhs:
5980 for (j = 0; ; j += VF*stride)
5981 vectemp = ...;
5982 tmp1 = vectemp[0];
5983 array[j] = tmp1;
5984 tmp2 = vectemp[1];
5985 array[j + stride] = tmp2;
5989 unsigned nstores = nunits;
5990 unsigned lnel = 1;
5991 tree ltype = elem_type;
5992 if (slp)
5994 if (group_size < nunits
5995 && nunits % group_size == 0)
5997 nstores = nunits / group_size;
5998 lnel = group_size;
5999 ltype = build_vector_type (elem_type, group_size);
6001 else if (group_size >= nunits
6002 && group_size % nunits == 0)
6004 nstores = 1;
6005 lnel = nunits;
6006 ltype = vectype;
6008 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6009 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6012 ivstep = stride_step;
6013 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6014 build_int_cst (TREE_TYPE (ivstep), vf));
6016 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6018 create_iv (stride_base, ivstep, NULL,
6019 loop, &incr_gsi, insert_after,
6020 &offvar, NULL);
6021 incr = gsi_stmt (incr_gsi);
6022 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6024 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6025 if (stmts)
6026 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6028 prev_stmt_info = NULL;
6029 alias_off = build_int_cst (ref_type, 0);
6030 next_stmt = first_stmt;
6031 for (g = 0; g < group_size; g++)
6033 running_off = offvar;
6034 if (g)
6036 tree size = TYPE_SIZE_UNIT (ltype);
6037 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6038 size);
6039 tree newoff = copy_ssa_name (running_off, NULL);
6040 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6041 running_off, pos);
6042 vect_finish_stmt_generation (stmt, incr, gsi);
6043 running_off = newoff;
6045 unsigned int group_el = 0;
6046 unsigned HOST_WIDE_INT
6047 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6048 for (j = 0; j < ncopies; j++)
6050 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6051 and first_stmt == stmt. */
6052 if (j == 0)
6054 if (slp)
6056 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6057 slp_node, -1);
6058 vec_oprnd = vec_oprnds[0];
6060 else
6062 gcc_assert (gimple_assign_single_p (next_stmt));
6063 op = gimple_assign_rhs1 (next_stmt);
6064 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6067 else
6069 if (slp)
6070 vec_oprnd = vec_oprnds[j];
6071 else
6073 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6074 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6078 for (i = 0; i < nstores; i++)
6080 tree newref, newoff;
6081 gimple *incr, *assign;
6082 tree size = TYPE_SIZE (ltype);
6083 /* Extract the i'th component. */
6084 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6085 bitsize_int (i), size);
6086 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6087 size, pos);
6089 elem = force_gimple_operand_gsi (gsi, elem, true,
6090 NULL_TREE, true,
6091 GSI_SAME_STMT);
6093 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6094 group_el * elsz);
6095 newref = build2 (MEM_REF, ltype,
6096 running_off, this_off);
6098 /* And store it to *running_off. */
6099 assign = gimple_build_assign (newref, elem);
6100 vect_finish_stmt_generation (stmt, assign, gsi);
6102 group_el += lnel;
6103 if (! slp
6104 || group_el == group_size)
6106 newoff = copy_ssa_name (running_off, NULL);
6107 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6108 running_off, stride_step);
6109 vect_finish_stmt_generation (stmt, incr, gsi);
6111 running_off = newoff;
6112 group_el = 0;
6114 if (g == group_size - 1
6115 && !slp)
6117 if (j == 0 && i == 0)
6118 STMT_VINFO_VEC_STMT (stmt_info)
6119 = *vec_stmt = assign;
6120 else
6121 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6122 prev_stmt_info = vinfo_for_stmt (assign);
6126 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6127 if (slp)
6128 break;
6131 vec_oprnds.release ();
6132 return true;
6135 auto_vec<tree> dr_chain (group_size);
6136 oprnds.create (group_size);
6138 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6139 gcc_assert (alignment_support_scheme);
6140 /* Targets with store-lane instructions must not require explicit
6141 realignment. */
6142 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6143 || alignment_support_scheme == dr_aligned
6144 || alignment_support_scheme == dr_unaligned_supported);
6146 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6147 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6148 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6150 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6151 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6152 else
6153 aggr_type = vectype;
6155 /* In case the vectorization factor (VF) is bigger than the number
6156 of elements that we can fit in a vectype (nunits), we have to generate
6157 more than one vector stmt - i.e - we need to "unroll" the
6158 vector stmt by a factor VF/nunits. For more details see documentation in
6159 vect_get_vec_def_for_copy_stmt. */
6161 /* In case of interleaving (non-unit grouped access):
6163 S1: &base + 2 = x2
6164 S2: &base = x0
6165 S3: &base + 1 = x1
6166 S4: &base + 3 = x3
6168 We create vectorized stores starting from base address (the access of the
6169 first stmt in the chain (S2 in the above example), when the last store stmt
6170 of the chain (S4) is reached:
6172 VS1: &base = vx2
6173 VS2: &base + vec_size*1 = vx0
6174 VS3: &base + vec_size*2 = vx1
6175 VS4: &base + vec_size*3 = vx3
6177 Then permutation statements are generated:
6179 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6180 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6183 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6184 (the order of the data-refs in the output of vect_permute_store_chain
6185 corresponds to the order of scalar stmts in the interleaving chain - see
6186 the documentation of vect_permute_store_chain()).
6188 In case of both multiple types and interleaving, above vector stores and
6189 permutation stmts are created for every copy. The result vector stmts are
6190 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6191 STMT_VINFO_RELATED_STMT for the next copies.
6194 prev_stmt_info = NULL;
6195 for (j = 0; j < ncopies; j++)
6198 if (j == 0)
6200 if (slp)
6202 /* Get vectorized arguments for SLP_NODE. */
6203 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6204 NULL, slp_node, -1);
6206 vec_oprnd = vec_oprnds[0];
6208 else
6210 /* For interleaved stores we collect vectorized defs for all the
6211 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6212 used as an input to vect_permute_store_chain(), and OPRNDS as
6213 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6215 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6216 OPRNDS are of size 1. */
6217 next_stmt = first_stmt;
6218 for (i = 0; i < group_size; i++)
6220 /* Since gaps are not supported for interleaved stores,
6221 GROUP_SIZE is the exact number of stmts in the chain.
6222 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6223 there is no interleaving, GROUP_SIZE is 1, and only one
6224 iteration of the loop will be executed. */
6225 gcc_assert (next_stmt
6226 && gimple_assign_single_p (next_stmt));
6227 op = gimple_assign_rhs1 (next_stmt);
6229 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6230 dr_chain.quick_push (vec_oprnd);
6231 oprnds.quick_push (vec_oprnd);
6232 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6236 /* We should have catched mismatched types earlier. */
6237 gcc_assert (useless_type_conversion_p (vectype,
6238 TREE_TYPE (vec_oprnd)));
6239 bool simd_lane_access_p
6240 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6241 if (simd_lane_access_p
6242 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6243 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6244 && integer_zerop (DR_OFFSET (first_dr))
6245 && integer_zerop (DR_INIT (first_dr))
6246 && alias_sets_conflict_p (get_alias_set (aggr_type),
6247 get_alias_set (TREE_TYPE (ref_type))))
6249 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6250 dataref_offset = build_int_cst (ref_type, 0);
6251 inv_p = false;
6253 else
6254 dataref_ptr
6255 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6256 simd_lane_access_p ? loop : NULL,
6257 offset, &dummy, gsi, &ptr_incr,
6258 simd_lane_access_p, &inv_p);
6259 gcc_assert (bb_vinfo || !inv_p);
6261 else
6263 /* For interleaved stores we created vectorized defs for all the
6264 defs stored in OPRNDS in the previous iteration (previous copy).
6265 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6266 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6267 next copy.
6268 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6269 OPRNDS are of size 1. */
6270 for (i = 0; i < group_size; i++)
6272 op = oprnds[i];
6273 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6274 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6275 dr_chain[i] = vec_oprnd;
6276 oprnds[i] = vec_oprnd;
6278 if (dataref_offset)
6279 dataref_offset
6280 = int_const_binop (PLUS_EXPR, dataref_offset,
6281 TYPE_SIZE_UNIT (aggr_type));
6282 else
6283 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6284 TYPE_SIZE_UNIT (aggr_type));
6287 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6289 tree vec_array;
6291 /* Combine all the vectors into an array. */
6292 vec_array = create_vector_array (vectype, vec_num);
6293 for (i = 0; i < vec_num; i++)
6295 vec_oprnd = dr_chain[i];
6296 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6299 /* Emit:
6300 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6301 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6302 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6303 gimple_call_set_lhs (new_stmt, data_ref);
6304 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6306 else
6308 new_stmt = NULL;
6309 if (grouped_store)
6311 if (j == 0)
6312 result_chain.create (group_size);
6313 /* Permute. */
6314 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6315 &result_chain);
6318 next_stmt = first_stmt;
6319 for (i = 0; i < vec_num; i++)
6321 unsigned align, misalign;
6323 if (i > 0)
6324 /* Bump the vector pointer. */
6325 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6326 stmt, NULL_TREE);
6328 if (slp)
6329 vec_oprnd = vec_oprnds[i];
6330 else if (grouped_store)
6331 /* For grouped stores vectorized defs are interleaved in
6332 vect_permute_store_chain(). */
6333 vec_oprnd = result_chain[i];
6335 data_ref = fold_build2 (MEM_REF, vectype,
6336 dataref_ptr,
6337 dataref_offset
6338 ? dataref_offset
6339 : build_int_cst (ref_type, 0));
6340 align = TYPE_ALIGN_UNIT (vectype);
6341 if (aligned_access_p (first_dr))
6342 misalign = 0;
6343 else if (DR_MISALIGNMENT (first_dr) == -1)
6345 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6346 align = TYPE_ALIGN_UNIT (elem_type);
6347 else
6348 align = get_object_alignment (DR_REF (first_dr))
6349 / BITS_PER_UNIT;
6350 misalign = 0;
6351 TREE_TYPE (data_ref)
6352 = build_aligned_type (TREE_TYPE (data_ref),
6353 align * BITS_PER_UNIT);
6355 else
6357 TREE_TYPE (data_ref)
6358 = build_aligned_type (TREE_TYPE (data_ref),
6359 TYPE_ALIGN (elem_type));
6360 misalign = DR_MISALIGNMENT (first_dr);
6362 if (dataref_offset == NULL_TREE
6363 && TREE_CODE (dataref_ptr) == SSA_NAME)
6364 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6365 misalign);
6367 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6369 tree perm_mask = perm_mask_for_reverse (vectype);
6370 tree perm_dest
6371 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6372 vectype);
6373 tree new_temp = make_ssa_name (perm_dest);
6375 /* Generate the permute statement. */
6376 gimple *perm_stmt
6377 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6378 vec_oprnd, perm_mask);
6379 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6381 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6382 vec_oprnd = new_temp;
6385 /* Arguments are ready. Create the new vector stmt. */
6386 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6387 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6389 if (slp)
6390 continue;
6392 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6393 if (!next_stmt)
6394 break;
6397 if (!slp)
6399 if (j == 0)
6400 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6401 else
6402 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6403 prev_stmt_info = vinfo_for_stmt (new_stmt);
6407 oprnds.release ();
6408 result_chain.release ();
6409 vec_oprnds.release ();
6411 return true;
6414 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6415 VECTOR_CST mask. No checks are made that the target platform supports the
6416 mask, so callers may wish to test can_vec_perm_p separately, or use
6417 vect_gen_perm_mask_checked. */
6419 tree
6420 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6422 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6423 int i, nunits;
6425 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6427 mask_elt_type = lang_hooks.types.type_for_mode
6428 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6429 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6431 mask_elts = XALLOCAVEC (tree, nunits);
6432 for (i = nunits - 1; i >= 0; i--)
6433 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6434 mask_vec = build_vector (mask_type, mask_elts);
6436 return mask_vec;
6439 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6440 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6442 tree
6443 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6445 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6446 return vect_gen_perm_mask_any (vectype, sel);
6449 /* Given a vector variable X and Y, that was generated for the scalar
6450 STMT, generate instructions to permute the vector elements of X and Y
6451 using permutation mask MASK_VEC, insert them at *GSI and return the
6452 permuted vector variable. */
6454 static tree
6455 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6456 gimple_stmt_iterator *gsi)
6458 tree vectype = TREE_TYPE (x);
6459 tree perm_dest, data_ref;
6460 gimple *perm_stmt;
6462 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6463 data_ref = make_ssa_name (perm_dest);
6465 /* Generate the permute statement. */
6466 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6467 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6469 return data_ref;
6472 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6473 inserting them on the loops preheader edge. Returns true if we
6474 were successful in doing so (and thus STMT can be moved then),
6475 otherwise returns false. */
6477 static bool
6478 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6480 ssa_op_iter i;
6481 tree op;
6482 bool any = false;
6484 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6486 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6487 if (!gimple_nop_p (def_stmt)
6488 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6490 /* Make sure we don't need to recurse. While we could do
6491 so in simple cases when there are more complex use webs
6492 we don't have an easy way to preserve stmt order to fulfil
6493 dependencies within them. */
6494 tree op2;
6495 ssa_op_iter i2;
6496 if (gimple_code (def_stmt) == GIMPLE_PHI)
6497 return false;
6498 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6500 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6501 if (!gimple_nop_p (def_stmt2)
6502 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6503 return false;
6505 any = true;
6509 if (!any)
6510 return true;
6512 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6514 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6515 if (!gimple_nop_p (def_stmt)
6516 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6518 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6519 gsi_remove (&gsi, false);
6520 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6524 return true;
6527 /* vectorizable_load.
6529 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6530 can be vectorized.
6531 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6532 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6533 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6535 static bool
6536 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6537 slp_tree slp_node, slp_instance slp_node_instance)
6539 tree scalar_dest;
6540 tree vec_dest = NULL;
6541 tree data_ref = NULL;
6542 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6543 stmt_vec_info prev_stmt_info;
6544 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6545 struct loop *loop = NULL;
6546 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6547 bool nested_in_vect_loop = false;
6548 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6549 tree elem_type;
6550 tree new_temp;
6551 machine_mode mode;
6552 gimple *new_stmt = NULL;
6553 tree dummy;
6554 enum dr_alignment_support alignment_support_scheme;
6555 tree dataref_ptr = NULL_TREE;
6556 tree dataref_offset = NULL_TREE;
6557 gimple *ptr_incr = NULL;
6558 int ncopies;
6559 int i, j, group_size, group_gap_adj;
6560 tree msq = NULL_TREE, lsq;
6561 tree offset = NULL_TREE;
6562 tree byte_offset = NULL_TREE;
6563 tree realignment_token = NULL_TREE;
6564 gphi *phi = NULL;
6565 vec<tree> dr_chain = vNULL;
6566 bool grouped_load = false;
6567 gimple *first_stmt;
6568 gimple *first_stmt_for_drptr = NULL;
6569 bool inv_p;
6570 bool compute_in_loop = false;
6571 struct loop *at_loop;
6572 int vec_num;
6573 bool slp = (slp_node != NULL);
6574 bool slp_perm = false;
6575 enum tree_code code;
6576 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6577 int vf;
6578 tree aggr_type;
6579 gather_scatter_info gs_info;
6580 vec_info *vinfo = stmt_info->vinfo;
6581 tree ref_type;
6583 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6584 return false;
6586 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6587 && ! vec_stmt)
6588 return false;
6590 /* Is vectorizable load? */
6591 if (!is_gimple_assign (stmt))
6592 return false;
6594 scalar_dest = gimple_assign_lhs (stmt);
6595 if (TREE_CODE (scalar_dest) != SSA_NAME)
6596 return false;
6598 code = gimple_assign_rhs_code (stmt);
6599 if (code != ARRAY_REF
6600 && code != BIT_FIELD_REF
6601 && code != INDIRECT_REF
6602 && code != COMPONENT_REF
6603 && code != IMAGPART_EXPR
6604 && code != REALPART_EXPR
6605 && code != MEM_REF
6606 && TREE_CODE_CLASS (code) != tcc_declaration)
6607 return false;
6609 if (!STMT_VINFO_DATA_REF (stmt_info))
6610 return false;
6612 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6613 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6615 if (loop_vinfo)
6617 loop = LOOP_VINFO_LOOP (loop_vinfo);
6618 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6619 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6621 else
6622 vf = 1;
6624 /* Multiple types in SLP are handled by creating the appropriate number of
6625 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6626 case of SLP. */
6627 if (slp)
6628 ncopies = 1;
6629 else
6630 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6632 gcc_assert (ncopies >= 1);
6634 /* FORNOW. This restriction should be relaxed. */
6635 if (nested_in_vect_loop && ncopies > 1)
6637 if (dump_enabled_p ())
6638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6639 "multiple types in nested loop.\n");
6640 return false;
6643 /* Invalidate assumptions made by dependence analysis when vectorization
6644 on the unrolled body effectively re-orders stmts. */
6645 if (ncopies > 1
6646 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6647 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6648 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6650 if (dump_enabled_p ())
6651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6652 "cannot perform implicit CSE when unrolling "
6653 "with negative dependence distance\n");
6654 return false;
6657 elem_type = TREE_TYPE (vectype);
6658 mode = TYPE_MODE (vectype);
6660 /* FORNOW. In some cases can vectorize even if data-type not supported
6661 (e.g. - data copies). */
6662 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6664 if (dump_enabled_p ())
6665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6666 "Aligned load, but unsupported type.\n");
6667 return false;
6670 /* Check if the load is a part of an interleaving chain. */
6671 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6673 grouped_load = true;
6674 /* FORNOW */
6675 gcc_assert (!nested_in_vect_loop);
6676 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6678 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6679 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6681 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6682 slp_perm = true;
6684 /* Invalidate assumptions made by dependence analysis when vectorization
6685 on the unrolled body effectively re-orders stmts. */
6686 if (!PURE_SLP_STMT (stmt_info)
6687 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6688 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6689 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6691 if (dump_enabled_p ())
6692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6693 "cannot perform implicit CSE when performing "
6694 "group loads with negative dependence distance\n");
6695 return false;
6698 /* Similarly when the stmt is a load that is both part of a SLP
6699 instance and a loop vectorized stmt via the same-dr mechanism
6700 we have to give up. */
6701 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6702 && (STMT_SLP_TYPE (stmt_info)
6703 != STMT_SLP_TYPE (vinfo_for_stmt
6704 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6706 if (dump_enabled_p ())
6707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6708 "conflicting SLP types for CSEd load\n");
6709 return false;
6713 vect_memory_access_type memory_access_type;
6714 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6715 &memory_access_type, &gs_info))
6716 return false;
6718 if (!vec_stmt) /* transformation not required. */
6720 if (!slp)
6721 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6722 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6723 /* The SLP costs are calculated during SLP analysis. */
6724 if (!PURE_SLP_STMT (stmt_info))
6725 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6726 NULL, NULL, NULL);
6727 return true;
6730 if (!slp)
6731 gcc_assert (memory_access_type
6732 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6734 if (dump_enabled_p ())
6735 dump_printf_loc (MSG_NOTE, vect_location,
6736 "transform load. ncopies = %d\n", ncopies);
6738 /** Transform. **/
6740 ensure_base_align (stmt_info, dr);
6742 if (memory_access_type == VMAT_GATHER_SCATTER)
6744 tree vec_oprnd0 = NULL_TREE, op;
6745 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6746 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6747 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6748 edge pe = loop_preheader_edge (loop);
6749 gimple_seq seq;
6750 basic_block new_bb;
6751 enum { NARROW, NONE, WIDEN } modifier;
6752 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6754 if (nunits == gather_off_nunits)
6755 modifier = NONE;
6756 else if (nunits == gather_off_nunits / 2)
6758 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6759 modifier = WIDEN;
6761 for (i = 0; i < gather_off_nunits; ++i)
6762 sel[i] = i | nunits;
6764 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6766 else if (nunits == gather_off_nunits * 2)
6768 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6769 modifier = NARROW;
6771 for (i = 0; i < nunits; ++i)
6772 sel[i] = i < gather_off_nunits
6773 ? i : i + nunits - gather_off_nunits;
6775 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6776 ncopies *= 2;
6778 else
6779 gcc_unreachable ();
6781 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6782 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6783 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6784 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6785 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6786 scaletype = TREE_VALUE (arglist);
6787 gcc_checking_assert (types_compatible_p (srctype, rettype));
6789 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6791 ptr = fold_convert (ptrtype, gs_info.base);
6792 if (!is_gimple_min_invariant (ptr))
6794 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6795 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6796 gcc_assert (!new_bb);
6799 /* Currently we support only unconditional gather loads,
6800 so mask should be all ones. */
6801 if (TREE_CODE (masktype) == INTEGER_TYPE)
6802 mask = build_int_cst (masktype, -1);
6803 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6805 mask = build_int_cst (TREE_TYPE (masktype), -1);
6806 mask = build_vector_from_val (masktype, mask);
6807 mask = vect_init_vector (stmt, mask, masktype, NULL);
6809 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6811 REAL_VALUE_TYPE r;
6812 long tmp[6];
6813 for (j = 0; j < 6; ++j)
6814 tmp[j] = -1;
6815 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6816 mask = build_real (TREE_TYPE (masktype), r);
6817 mask = build_vector_from_val (masktype, mask);
6818 mask = vect_init_vector (stmt, mask, masktype, NULL);
6820 else
6821 gcc_unreachable ();
6823 scale = build_int_cst (scaletype, gs_info.scale);
6825 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6826 merge = build_int_cst (TREE_TYPE (rettype), 0);
6827 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6829 REAL_VALUE_TYPE r;
6830 long tmp[6];
6831 for (j = 0; j < 6; ++j)
6832 tmp[j] = 0;
6833 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6834 merge = build_real (TREE_TYPE (rettype), r);
6836 else
6837 gcc_unreachable ();
6838 merge = build_vector_from_val (rettype, merge);
6839 merge = vect_init_vector (stmt, merge, rettype, NULL);
6841 prev_stmt_info = NULL;
6842 for (j = 0; j < ncopies; ++j)
6844 if (modifier == WIDEN && (j & 1))
6845 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6846 perm_mask, stmt, gsi);
6847 else if (j == 0)
6848 op = vec_oprnd0
6849 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6850 else
6851 op = vec_oprnd0
6852 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6854 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6856 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6857 == TYPE_VECTOR_SUBPARTS (idxtype));
6858 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6859 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6860 new_stmt
6861 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6862 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6863 op = var;
6866 new_stmt
6867 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6869 if (!useless_type_conversion_p (vectype, rettype))
6871 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6872 == TYPE_VECTOR_SUBPARTS (rettype));
6873 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6874 gimple_call_set_lhs (new_stmt, op);
6875 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6876 var = make_ssa_name (vec_dest);
6877 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6878 new_stmt
6879 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6881 else
6883 var = make_ssa_name (vec_dest, new_stmt);
6884 gimple_call_set_lhs (new_stmt, var);
6887 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6889 if (modifier == NARROW)
6891 if ((j & 1) == 0)
6893 prev_res = var;
6894 continue;
6896 var = permute_vec_elements (prev_res, var,
6897 perm_mask, stmt, gsi);
6898 new_stmt = SSA_NAME_DEF_STMT (var);
6901 if (prev_stmt_info == NULL)
6902 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6903 else
6904 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6905 prev_stmt_info = vinfo_for_stmt (new_stmt);
6907 return true;
6910 if (memory_access_type == VMAT_ELEMENTWISE
6911 || memory_access_type == VMAT_STRIDED_SLP)
6913 gimple_stmt_iterator incr_gsi;
6914 bool insert_after;
6915 gimple *incr;
6916 tree offvar;
6917 tree ivstep;
6918 tree running_off;
6919 vec<constructor_elt, va_gc> *v = NULL;
6920 gimple_seq stmts = NULL;
6921 tree stride_base, stride_step, alias_off;
6923 gcc_assert (!nested_in_vect_loop);
6925 if (slp && grouped_load)
6927 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6928 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6929 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6930 ref_type = get_group_alias_ptr_type (first_stmt);
6932 else
6934 first_stmt = stmt;
6935 first_dr = dr;
6936 group_size = 1;
6937 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6940 stride_base
6941 = fold_build_pointer_plus
6942 (DR_BASE_ADDRESS (first_dr),
6943 size_binop (PLUS_EXPR,
6944 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6945 convert_to_ptrofftype (DR_INIT (first_dr))));
6946 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6948 /* For a load with loop-invariant (but other than power-of-2)
6949 stride (i.e. not a grouped access) like so:
6951 for (i = 0; i < n; i += stride)
6952 ... = array[i];
6954 we generate a new induction variable and new accesses to
6955 form a new vector (or vectors, depending on ncopies):
6957 for (j = 0; ; j += VF*stride)
6958 tmp1 = array[j];
6959 tmp2 = array[j + stride];
6961 vectemp = {tmp1, tmp2, ...}
6964 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6965 build_int_cst (TREE_TYPE (stride_step), vf));
6967 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6969 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6970 loop, &incr_gsi, insert_after,
6971 &offvar, NULL);
6972 incr = gsi_stmt (incr_gsi);
6973 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6975 stride_step = force_gimple_operand (unshare_expr (stride_step),
6976 &stmts, true, NULL_TREE);
6977 if (stmts)
6978 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6980 prev_stmt_info = NULL;
6981 running_off = offvar;
6982 alias_off = build_int_cst (ref_type, 0);
6983 int nloads = nunits;
6984 int lnel = 1;
6985 tree ltype = TREE_TYPE (vectype);
6986 tree lvectype = vectype;
6987 auto_vec<tree> dr_chain;
6988 if (memory_access_type == VMAT_STRIDED_SLP)
6990 if (group_size < nunits)
6992 /* Avoid emitting a constructor of vector elements by performing
6993 the loads using an integer type of the same size,
6994 constructing a vector of those and then re-interpreting it
6995 as the original vector type. This works around the fact
6996 that the vec_init optab was only designed for scalar
6997 element modes and thus expansion goes through memory.
6998 This avoids a huge runtime penalty due to the general
6999 inability to perform store forwarding from smaller stores
7000 to a larger load. */
7001 unsigned lsize
7002 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7003 enum machine_mode elmode = mode_for_size (lsize, MODE_INT, 0);
7004 enum machine_mode vmode = mode_for_vector (elmode,
7005 nunits / group_size);
7006 /* If we can't construct such a vector fall back to
7007 element loads of the original vector type. */
7008 if (VECTOR_MODE_P (vmode)
7009 && optab_handler (vec_init_optab, vmode) != CODE_FOR_nothing)
7011 nloads = nunits / group_size;
7012 lnel = group_size;
7013 ltype = build_nonstandard_integer_type (lsize, 1);
7014 lvectype = build_vector_type (ltype, nloads);
7017 else
7019 nloads = 1;
7020 lnel = nunits;
7021 ltype = vectype;
7023 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7025 if (slp)
7027 /* For SLP permutation support we need to load the whole group,
7028 not only the number of vector stmts the permutation result
7029 fits in. */
7030 if (slp_perm)
7032 ncopies = (group_size * vf + nunits - 1) / nunits;
7033 dr_chain.create (ncopies);
7035 else
7036 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7038 int group_el = 0;
7039 unsigned HOST_WIDE_INT
7040 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7041 for (j = 0; j < ncopies; j++)
7043 if (nloads > 1)
7044 vec_alloc (v, nloads);
7045 for (i = 0; i < nloads; i++)
7047 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7048 group_el * elsz);
7049 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7050 build2 (MEM_REF, ltype,
7051 running_off, this_off));
7052 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7053 if (nloads > 1)
7054 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7055 gimple_assign_lhs (new_stmt));
7057 group_el += lnel;
7058 if (! slp
7059 || group_el == group_size)
7061 tree newoff = copy_ssa_name (running_off);
7062 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7063 running_off, stride_step);
7064 vect_finish_stmt_generation (stmt, incr, gsi);
7066 running_off = newoff;
7067 group_el = 0;
7070 if (nloads > 1)
7072 tree vec_inv = build_constructor (lvectype, v);
7073 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7074 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7075 if (lvectype != vectype)
7077 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7078 VIEW_CONVERT_EXPR,
7079 build1 (VIEW_CONVERT_EXPR,
7080 vectype, new_temp));
7081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7085 if (slp)
7087 if (slp_perm)
7088 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7089 else
7090 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7092 else
7094 if (j == 0)
7095 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7096 else
7097 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7098 prev_stmt_info = vinfo_for_stmt (new_stmt);
7101 if (slp_perm)
7103 unsigned n_perms;
7104 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7105 slp_node_instance, false, &n_perms);
7107 return true;
7110 if (grouped_load)
7112 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7113 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7114 /* For SLP vectorization we directly vectorize a subchain
7115 without permutation. */
7116 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7117 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7118 /* For BB vectorization always use the first stmt to base
7119 the data ref pointer on. */
7120 if (bb_vinfo)
7121 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7123 /* Check if the chain of loads is already vectorized. */
7124 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7125 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7126 ??? But we can only do so if there is exactly one
7127 as we have no way to get at the rest. Leave the CSE
7128 opportunity alone.
7129 ??? With the group load eventually participating
7130 in multiple different permutations (having multiple
7131 slp nodes which refer to the same group) the CSE
7132 is even wrong code. See PR56270. */
7133 && !slp)
7135 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7136 return true;
7138 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7139 group_gap_adj = 0;
7141 /* VEC_NUM is the number of vect stmts to be created for this group. */
7142 if (slp)
7144 grouped_load = false;
7145 /* For SLP permutation support we need to load the whole group,
7146 not only the number of vector stmts the permutation result
7147 fits in. */
7148 if (slp_perm)
7149 vec_num = (group_size * vf + nunits - 1) / nunits;
7150 else
7151 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7152 group_gap_adj = vf * group_size - nunits * vec_num;
7154 else
7155 vec_num = group_size;
7157 ref_type = get_group_alias_ptr_type (first_stmt);
7159 else
7161 first_stmt = stmt;
7162 first_dr = dr;
7163 group_size = vec_num = 1;
7164 group_gap_adj = 0;
7165 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7168 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7169 gcc_assert (alignment_support_scheme);
7170 /* Targets with load-lane instructions must not require explicit
7171 realignment. */
7172 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7173 || alignment_support_scheme == dr_aligned
7174 || alignment_support_scheme == dr_unaligned_supported);
7176 /* In case the vectorization factor (VF) is bigger than the number
7177 of elements that we can fit in a vectype (nunits), we have to generate
7178 more than one vector stmt - i.e - we need to "unroll" the
7179 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7180 from one copy of the vector stmt to the next, in the field
7181 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7182 stages to find the correct vector defs to be used when vectorizing
7183 stmts that use the defs of the current stmt. The example below
7184 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7185 need to create 4 vectorized stmts):
7187 before vectorization:
7188 RELATED_STMT VEC_STMT
7189 S1: x = memref - -
7190 S2: z = x + 1 - -
7192 step 1: vectorize stmt S1:
7193 We first create the vector stmt VS1_0, and, as usual, record a
7194 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7195 Next, we create the vector stmt VS1_1, and record a pointer to
7196 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7197 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7198 stmts and pointers:
7199 RELATED_STMT VEC_STMT
7200 VS1_0: vx0 = memref0 VS1_1 -
7201 VS1_1: vx1 = memref1 VS1_2 -
7202 VS1_2: vx2 = memref2 VS1_3 -
7203 VS1_3: vx3 = memref3 - -
7204 S1: x = load - VS1_0
7205 S2: z = x + 1 - -
7207 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7208 information we recorded in RELATED_STMT field is used to vectorize
7209 stmt S2. */
7211 /* In case of interleaving (non-unit grouped access):
7213 S1: x2 = &base + 2
7214 S2: x0 = &base
7215 S3: x1 = &base + 1
7216 S4: x3 = &base + 3
7218 Vectorized loads are created in the order of memory accesses
7219 starting from the access of the first stmt of the chain:
7221 VS1: vx0 = &base
7222 VS2: vx1 = &base + vec_size*1
7223 VS3: vx3 = &base + vec_size*2
7224 VS4: vx4 = &base + vec_size*3
7226 Then permutation statements are generated:
7228 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7229 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7232 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7233 (the order of the data-refs in the output of vect_permute_load_chain
7234 corresponds to the order of scalar stmts in the interleaving chain - see
7235 the documentation of vect_permute_load_chain()).
7236 The generation of permutation stmts and recording them in
7237 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7239 In case of both multiple types and interleaving, the vector loads and
7240 permutation stmts above are created for every copy. The result vector
7241 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7242 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7244 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7245 on a target that supports unaligned accesses (dr_unaligned_supported)
7246 we generate the following code:
7247 p = initial_addr;
7248 indx = 0;
7249 loop {
7250 p = p + indx * vectype_size;
7251 vec_dest = *(p);
7252 indx = indx + 1;
7255 Otherwise, the data reference is potentially unaligned on a target that
7256 does not support unaligned accesses (dr_explicit_realign_optimized) -
7257 then generate the following code, in which the data in each iteration is
7258 obtained by two vector loads, one from the previous iteration, and one
7259 from the current iteration:
7260 p1 = initial_addr;
7261 msq_init = *(floor(p1))
7262 p2 = initial_addr + VS - 1;
7263 realignment_token = call target_builtin;
7264 indx = 0;
7265 loop {
7266 p2 = p2 + indx * vectype_size
7267 lsq = *(floor(p2))
7268 vec_dest = realign_load (msq, lsq, realignment_token)
7269 indx = indx + 1;
7270 msq = lsq;
7271 } */
7273 /* If the misalignment remains the same throughout the execution of the
7274 loop, we can create the init_addr and permutation mask at the loop
7275 preheader. Otherwise, it needs to be created inside the loop.
7276 This can only occur when vectorizing memory accesses in the inner-loop
7277 nested within an outer-loop that is being vectorized. */
7279 if (nested_in_vect_loop
7280 && (TREE_INT_CST_LOW (DR_STEP (dr))
7281 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7283 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7284 compute_in_loop = true;
7287 if ((alignment_support_scheme == dr_explicit_realign_optimized
7288 || alignment_support_scheme == dr_explicit_realign)
7289 && !compute_in_loop)
7291 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7292 alignment_support_scheme, NULL_TREE,
7293 &at_loop);
7294 if (alignment_support_scheme == dr_explicit_realign_optimized)
7296 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7297 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7298 size_one_node);
7301 else
7302 at_loop = loop;
7304 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7305 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7307 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7308 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7309 else
7310 aggr_type = vectype;
7312 prev_stmt_info = NULL;
7313 for (j = 0; j < ncopies; j++)
7315 /* 1. Create the vector or array pointer update chain. */
7316 if (j == 0)
7318 bool simd_lane_access_p
7319 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7320 if (simd_lane_access_p
7321 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7322 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7323 && integer_zerop (DR_OFFSET (first_dr))
7324 && integer_zerop (DR_INIT (first_dr))
7325 && alias_sets_conflict_p (get_alias_set (aggr_type),
7326 get_alias_set (TREE_TYPE (ref_type)))
7327 && (alignment_support_scheme == dr_aligned
7328 || alignment_support_scheme == dr_unaligned_supported))
7330 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7331 dataref_offset = build_int_cst (ref_type, 0);
7332 inv_p = false;
7334 else if (first_stmt_for_drptr
7335 && first_stmt != first_stmt_for_drptr)
7337 dataref_ptr
7338 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7339 at_loop, offset, &dummy, gsi,
7340 &ptr_incr, simd_lane_access_p,
7341 &inv_p, byte_offset);
7342 /* Adjust the pointer by the difference to first_stmt. */
7343 data_reference_p ptrdr
7344 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7345 tree diff = fold_convert (sizetype,
7346 size_binop (MINUS_EXPR,
7347 DR_INIT (first_dr),
7348 DR_INIT (ptrdr)));
7349 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7350 stmt, diff);
7352 else
7353 dataref_ptr
7354 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7355 offset, &dummy, gsi, &ptr_incr,
7356 simd_lane_access_p, &inv_p,
7357 byte_offset);
7359 else if (dataref_offset)
7360 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7361 TYPE_SIZE_UNIT (aggr_type));
7362 else
7363 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7364 TYPE_SIZE_UNIT (aggr_type));
7366 if (grouped_load || slp_perm)
7367 dr_chain.create (vec_num);
7369 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7371 tree vec_array;
7373 vec_array = create_vector_array (vectype, vec_num);
7375 /* Emit:
7376 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7377 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7378 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7379 gimple_call_set_lhs (new_stmt, vec_array);
7380 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7382 /* Extract each vector into an SSA_NAME. */
7383 for (i = 0; i < vec_num; i++)
7385 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7386 vec_array, i);
7387 dr_chain.quick_push (new_temp);
7390 /* Record the mapping between SSA_NAMEs and statements. */
7391 vect_record_grouped_load_vectors (stmt, dr_chain);
7393 else
7395 for (i = 0; i < vec_num; i++)
7397 if (i > 0)
7398 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7399 stmt, NULL_TREE);
7401 /* 2. Create the vector-load in the loop. */
7402 switch (alignment_support_scheme)
7404 case dr_aligned:
7405 case dr_unaligned_supported:
7407 unsigned int align, misalign;
7409 data_ref
7410 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7411 dataref_offset
7412 ? dataref_offset
7413 : build_int_cst (ref_type, 0));
7414 align = TYPE_ALIGN_UNIT (vectype);
7415 if (alignment_support_scheme == dr_aligned)
7417 gcc_assert (aligned_access_p (first_dr));
7418 misalign = 0;
7420 else if (DR_MISALIGNMENT (first_dr) == -1)
7422 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7423 align = TYPE_ALIGN_UNIT (elem_type);
7424 else
7425 align = (get_object_alignment (DR_REF (first_dr))
7426 / BITS_PER_UNIT);
7427 misalign = 0;
7428 TREE_TYPE (data_ref)
7429 = build_aligned_type (TREE_TYPE (data_ref),
7430 align * BITS_PER_UNIT);
7432 else
7434 TREE_TYPE (data_ref)
7435 = build_aligned_type (TREE_TYPE (data_ref),
7436 TYPE_ALIGN (elem_type));
7437 misalign = DR_MISALIGNMENT (first_dr);
7439 if (dataref_offset == NULL_TREE
7440 && TREE_CODE (dataref_ptr) == SSA_NAME)
7441 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7442 align, misalign);
7443 break;
7445 case dr_explicit_realign:
7447 tree ptr, bump;
7449 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7451 if (compute_in_loop)
7452 msq = vect_setup_realignment (first_stmt, gsi,
7453 &realignment_token,
7454 dr_explicit_realign,
7455 dataref_ptr, NULL);
7457 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7458 ptr = copy_ssa_name (dataref_ptr);
7459 else
7460 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7461 new_stmt = gimple_build_assign
7462 (ptr, BIT_AND_EXPR, dataref_ptr,
7463 build_int_cst
7464 (TREE_TYPE (dataref_ptr),
7465 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7466 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7467 data_ref
7468 = build2 (MEM_REF, vectype, ptr,
7469 build_int_cst (ref_type, 0));
7470 vec_dest = vect_create_destination_var (scalar_dest,
7471 vectype);
7472 new_stmt = gimple_build_assign (vec_dest, data_ref);
7473 new_temp = make_ssa_name (vec_dest, new_stmt);
7474 gimple_assign_set_lhs (new_stmt, new_temp);
7475 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7476 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7477 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7478 msq = new_temp;
7480 bump = size_binop (MULT_EXPR, vs,
7481 TYPE_SIZE_UNIT (elem_type));
7482 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7483 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7484 new_stmt = gimple_build_assign
7485 (NULL_TREE, BIT_AND_EXPR, ptr,
7486 build_int_cst
7487 (TREE_TYPE (ptr),
7488 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7489 ptr = copy_ssa_name (ptr, new_stmt);
7490 gimple_assign_set_lhs (new_stmt, ptr);
7491 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7492 data_ref
7493 = build2 (MEM_REF, vectype, ptr,
7494 build_int_cst (ref_type, 0));
7495 break;
7497 case dr_explicit_realign_optimized:
7498 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7499 new_temp = copy_ssa_name (dataref_ptr);
7500 else
7501 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7502 new_stmt = gimple_build_assign
7503 (new_temp, BIT_AND_EXPR, dataref_ptr,
7504 build_int_cst
7505 (TREE_TYPE (dataref_ptr),
7506 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7508 data_ref
7509 = build2 (MEM_REF, vectype, new_temp,
7510 build_int_cst (ref_type, 0));
7511 break;
7512 default:
7513 gcc_unreachable ();
7515 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7516 new_stmt = gimple_build_assign (vec_dest, data_ref);
7517 new_temp = make_ssa_name (vec_dest, new_stmt);
7518 gimple_assign_set_lhs (new_stmt, new_temp);
7519 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7521 /* 3. Handle explicit realignment if necessary/supported.
7522 Create in loop:
7523 vec_dest = realign_load (msq, lsq, realignment_token) */
7524 if (alignment_support_scheme == dr_explicit_realign_optimized
7525 || alignment_support_scheme == dr_explicit_realign)
7527 lsq = gimple_assign_lhs (new_stmt);
7528 if (!realignment_token)
7529 realignment_token = dataref_ptr;
7530 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7531 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7532 msq, lsq, realignment_token);
7533 new_temp = make_ssa_name (vec_dest, new_stmt);
7534 gimple_assign_set_lhs (new_stmt, new_temp);
7535 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7537 if (alignment_support_scheme == dr_explicit_realign_optimized)
7539 gcc_assert (phi);
7540 if (i == vec_num - 1 && j == ncopies - 1)
7541 add_phi_arg (phi, lsq,
7542 loop_latch_edge (containing_loop),
7543 UNKNOWN_LOCATION);
7544 msq = lsq;
7548 /* 4. Handle invariant-load. */
7549 if (inv_p && !bb_vinfo)
7551 gcc_assert (!grouped_load);
7552 /* If we have versioned for aliasing or the loop doesn't
7553 have any data dependencies that would preclude this,
7554 then we are sure this is a loop invariant load and
7555 thus we can insert it on the preheader edge. */
7556 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7557 && !nested_in_vect_loop
7558 && hoist_defs_of_uses (stmt, loop))
7560 if (dump_enabled_p ())
7562 dump_printf_loc (MSG_NOTE, vect_location,
7563 "hoisting out of the vectorized "
7564 "loop: ");
7565 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7567 tree tem = copy_ssa_name (scalar_dest);
7568 gsi_insert_on_edge_immediate
7569 (loop_preheader_edge (loop),
7570 gimple_build_assign (tem,
7571 unshare_expr
7572 (gimple_assign_rhs1 (stmt))));
7573 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7574 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7575 set_vinfo_for_stmt (new_stmt,
7576 new_stmt_vec_info (new_stmt, vinfo));
7578 else
7580 gimple_stmt_iterator gsi2 = *gsi;
7581 gsi_next (&gsi2);
7582 new_temp = vect_init_vector (stmt, scalar_dest,
7583 vectype, &gsi2);
7584 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7588 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7590 tree perm_mask = perm_mask_for_reverse (vectype);
7591 new_temp = permute_vec_elements (new_temp, new_temp,
7592 perm_mask, stmt, gsi);
7593 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7596 /* Collect vector loads and later create their permutation in
7597 vect_transform_grouped_load (). */
7598 if (grouped_load || slp_perm)
7599 dr_chain.quick_push (new_temp);
7601 /* Store vector loads in the corresponding SLP_NODE. */
7602 if (slp && !slp_perm)
7603 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7605 /* Bump the vector pointer to account for a gap or for excess
7606 elements loaded for a permuted SLP load. */
7607 if (group_gap_adj != 0)
7609 bool ovf;
7610 tree bump
7611 = wide_int_to_tree (sizetype,
7612 wi::smul (TYPE_SIZE_UNIT (elem_type),
7613 group_gap_adj, &ovf));
7614 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7615 stmt, bump);
7619 if (slp && !slp_perm)
7620 continue;
7622 if (slp_perm)
7624 unsigned n_perms;
7625 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7626 slp_node_instance, false,
7627 &n_perms))
7629 dr_chain.release ();
7630 return false;
7633 else
7635 if (grouped_load)
7637 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7638 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7639 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7641 else
7643 if (j == 0)
7644 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7645 else
7646 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7647 prev_stmt_info = vinfo_for_stmt (new_stmt);
7650 dr_chain.release ();
7653 return true;
7656 /* Function vect_is_simple_cond.
7658 Input:
7659 LOOP - the loop that is being vectorized.
7660 COND - Condition that is checked for simple use.
7662 Output:
7663 *COMP_VECTYPE - the vector type for the comparison.
7665 Returns whether a COND can be vectorized. Checks whether
7666 condition operands are supportable using vec_is_simple_use. */
7668 static bool
7669 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7671 tree lhs, rhs;
7672 enum vect_def_type dt;
7673 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7675 /* Mask case. */
7676 if (TREE_CODE (cond) == SSA_NAME
7677 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7679 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7680 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7681 &dt, comp_vectype)
7682 || !*comp_vectype
7683 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7684 return false;
7685 return true;
7688 if (!COMPARISON_CLASS_P (cond))
7689 return false;
7691 lhs = TREE_OPERAND (cond, 0);
7692 rhs = TREE_OPERAND (cond, 1);
7694 if (TREE_CODE (lhs) == SSA_NAME)
7696 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7697 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7698 return false;
7700 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7701 && TREE_CODE (lhs) != FIXED_CST)
7702 return false;
7704 if (TREE_CODE (rhs) == SSA_NAME)
7706 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7707 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7708 return false;
7710 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7711 && TREE_CODE (rhs) != FIXED_CST)
7712 return false;
7714 if (vectype1 && vectype2
7715 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7716 return false;
7718 *comp_vectype = vectype1 ? vectype1 : vectype2;
7719 return true;
7722 /* vectorizable_condition.
7724 Check if STMT is conditional modify expression that can be vectorized.
7725 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7726 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7727 at GSI.
7729 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7730 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7731 else clause if it is 2).
7733 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7735 bool
7736 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7737 gimple **vec_stmt, tree reduc_def, int reduc_index,
7738 slp_tree slp_node)
7740 tree scalar_dest = NULL_TREE;
7741 tree vec_dest = NULL_TREE;
7742 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7743 tree then_clause, else_clause;
7744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7745 tree comp_vectype = NULL_TREE;
7746 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7747 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7748 tree vec_compare;
7749 tree new_temp;
7750 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7751 enum vect_def_type dt, dts[4];
7752 int ncopies;
7753 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7754 stmt_vec_info prev_stmt_info = NULL;
7755 int i, j;
7756 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7757 vec<tree> vec_oprnds0 = vNULL;
7758 vec<tree> vec_oprnds1 = vNULL;
7759 vec<tree> vec_oprnds2 = vNULL;
7760 vec<tree> vec_oprnds3 = vNULL;
7761 tree vec_cmp_type;
7762 bool masked = false;
7764 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7765 return false;
7767 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7769 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7770 return false;
7772 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7773 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7774 && reduc_def))
7775 return false;
7777 /* FORNOW: not yet supported. */
7778 if (STMT_VINFO_LIVE_P (stmt_info))
7780 if (dump_enabled_p ())
7781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7782 "value used after loop.\n");
7783 return false;
7787 /* Is vectorizable conditional operation? */
7788 if (!is_gimple_assign (stmt))
7789 return false;
7791 code = gimple_assign_rhs_code (stmt);
7793 if (code != COND_EXPR)
7794 return false;
7796 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7797 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7798 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7800 if (slp_node)
7801 ncopies = 1;
7802 else
7803 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7805 gcc_assert (ncopies >= 1);
7806 if (reduc_index && ncopies > 1)
7807 return false; /* FORNOW */
7809 cond_expr = gimple_assign_rhs1 (stmt);
7810 then_clause = gimple_assign_rhs2 (stmt);
7811 else_clause = gimple_assign_rhs3 (stmt);
7813 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7814 || !comp_vectype)
7815 return false;
7817 gimple *def_stmt;
7818 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7819 &vectype1))
7820 return false;
7821 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7822 &vectype2))
7823 return false;
7825 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7826 return false;
7828 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7829 return false;
7831 masked = !COMPARISON_CLASS_P (cond_expr);
7832 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7834 if (vec_cmp_type == NULL_TREE)
7835 return false;
7837 cond_code = TREE_CODE (cond_expr);
7838 if (!masked)
7840 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7841 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7844 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7846 /* Boolean values may have another representation in vectors
7847 and therefore we prefer bit operations over comparison for
7848 them (which also works for scalar masks). We store opcodes
7849 to use in bitop1 and bitop2. Statement is vectorized as
7850 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7851 depending on bitop1 and bitop2 arity. */
7852 switch (cond_code)
7854 case GT_EXPR:
7855 bitop1 = BIT_NOT_EXPR;
7856 bitop2 = BIT_AND_EXPR;
7857 break;
7858 case GE_EXPR:
7859 bitop1 = BIT_NOT_EXPR;
7860 bitop2 = BIT_IOR_EXPR;
7861 break;
7862 case LT_EXPR:
7863 bitop1 = BIT_NOT_EXPR;
7864 bitop2 = BIT_AND_EXPR;
7865 std::swap (cond_expr0, cond_expr1);
7866 break;
7867 case LE_EXPR:
7868 bitop1 = BIT_NOT_EXPR;
7869 bitop2 = BIT_IOR_EXPR;
7870 std::swap (cond_expr0, cond_expr1);
7871 break;
7872 case NE_EXPR:
7873 bitop1 = BIT_XOR_EXPR;
7874 break;
7875 case EQ_EXPR:
7876 bitop1 = BIT_XOR_EXPR;
7877 bitop2 = BIT_NOT_EXPR;
7878 break;
7879 default:
7880 return false;
7882 cond_code = SSA_NAME;
7885 if (!vec_stmt)
7887 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7888 if (bitop1 != NOP_EXPR)
7890 machine_mode mode = TYPE_MODE (comp_vectype);
7891 optab optab;
7893 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
7894 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7895 return false;
7897 if (bitop2 != NOP_EXPR)
7899 optab = optab_for_tree_code (bitop2, comp_vectype,
7900 optab_default);
7901 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7902 return false;
7905 return expand_vec_cond_expr_p (vectype, comp_vectype,
7906 cond_code);
7909 /* Transform. */
7911 if (!slp_node)
7913 vec_oprnds0.create (1);
7914 vec_oprnds1.create (1);
7915 vec_oprnds2.create (1);
7916 vec_oprnds3.create (1);
7919 /* Handle def. */
7920 scalar_dest = gimple_assign_lhs (stmt);
7921 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7923 /* Handle cond expr. */
7924 for (j = 0; j < ncopies; j++)
7926 gassign *new_stmt = NULL;
7927 if (j == 0)
7929 if (slp_node)
7931 auto_vec<tree, 4> ops;
7932 auto_vec<vec<tree>, 4> vec_defs;
7934 if (masked)
7935 ops.safe_push (cond_expr);
7936 else
7938 ops.safe_push (cond_expr0);
7939 ops.safe_push (cond_expr1);
7941 ops.safe_push (then_clause);
7942 ops.safe_push (else_clause);
7943 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7944 vec_oprnds3 = vec_defs.pop ();
7945 vec_oprnds2 = vec_defs.pop ();
7946 if (!masked)
7947 vec_oprnds1 = vec_defs.pop ();
7948 vec_oprnds0 = vec_defs.pop ();
7950 else
7952 gimple *gtemp;
7953 if (masked)
7955 vec_cond_lhs
7956 = vect_get_vec_def_for_operand (cond_expr, stmt,
7957 comp_vectype);
7958 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7959 &gtemp, &dts[0]);
7961 else
7963 vec_cond_lhs
7964 = vect_get_vec_def_for_operand (cond_expr0,
7965 stmt, comp_vectype);
7966 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
7968 vec_cond_rhs
7969 = vect_get_vec_def_for_operand (cond_expr1,
7970 stmt, comp_vectype);
7971 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
7973 if (reduc_index == 1)
7974 vec_then_clause = reduc_def;
7975 else
7977 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7978 stmt);
7979 vect_is_simple_use (then_clause, loop_vinfo,
7980 &gtemp, &dts[2]);
7982 if (reduc_index == 2)
7983 vec_else_clause = reduc_def;
7984 else
7986 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7987 stmt);
7988 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7992 else
7994 vec_cond_lhs
7995 = vect_get_vec_def_for_stmt_copy (dts[0],
7996 vec_oprnds0.pop ());
7997 if (!masked)
7998 vec_cond_rhs
7999 = vect_get_vec_def_for_stmt_copy (dts[1],
8000 vec_oprnds1.pop ());
8002 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8003 vec_oprnds2.pop ());
8004 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8005 vec_oprnds3.pop ());
8008 if (!slp_node)
8010 vec_oprnds0.quick_push (vec_cond_lhs);
8011 if (!masked)
8012 vec_oprnds1.quick_push (vec_cond_rhs);
8013 vec_oprnds2.quick_push (vec_then_clause);
8014 vec_oprnds3.quick_push (vec_else_clause);
8017 /* Arguments are ready. Create the new vector stmt. */
8018 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8020 vec_then_clause = vec_oprnds2[i];
8021 vec_else_clause = vec_oprnds3[i];
8023 if (masked)
8024 vec_compare = vec_cond_lhs;
8025 else
8027 vec_cond_rhs = vec_oprnds1[i];
8028 if (bitop1 == NOP_EXPR)
8029 vec_compare = build2 (cond_code, vec_cmp_type,
8030 vec_cond_lhs, vec_cond_rhs);
8031 else
8033 new_temp = make_ssa_name (vec_cmp_type);
8034 if (bitop1 == BIT_NOT_EXPR)
8035 new_stmt = gimple_build_assign (new_temp, bitop1,
8036 vec_cond_rhs);
8037 else
8038 new_stmt
8039 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8040 vec_cond_rhs);
8041 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8042 if (bitop2 == NOP_EXPR)
8043 vec_compare = new_temp;
8044 else if (bitop2 == BIT_NOT_EXPR)
8046 /* Instead of doing ~x ? y : z do x ? z : y. */
8047 vec_compare = new_temp;
8048 std::swap (vec_then_clause, vec_else_clause);
8050 else
8052 vec_compare = make_ssa_name (vec_cmp_type);
8053 new_stmt
8054 = gimple_build_assign (vec_compare, bitop2,
8055 vec_cond_lhs, new_temp);
8056 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8060 new_temp = make_ssa_name (vec_dest);
8061 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8062 vec_compare, vec_then_clause,
8063 vec_else_clause);
8064 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8065 if (slp_node)
8066 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8069 if (slp_node)
8070 continue;
8072 if (j == 0)
8073 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8074 else
8075 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8077 prev_stmt_info = vinfo_for_stmt (new_stmt);
8080 vec_oprnds0.release ();
8081 vec_oprnds1.release ();
8082 vec_oprnds2.release ();
8083 vec_oprnds3.release ();
8085 return true;
8088 /* vectorizable_comparison.
8090 Check if STMT is comparison expression that can be vectorized.
8091 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8092 comparison, put it in VEC_STMT, and insert it at GSI.
8094 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8096 static bool
8097 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8098 gimple **vec_stmt, tree reduc_def,
8099 slp_tree slp_node)
8101 tree lhs, rhs1, rhs2;
8102 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8103 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8104 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8105 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8106 tree new_temp;
8107 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8108 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8109 unsigned nunits;
8110 int ncopies;
8111 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8112 stmt_vec_info prev_stmt_info = NULL;
8113 int i, j;
8114 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8115 vec<tree> vec_oprnds0 = vNULL;
8116 vec<tree> vec_oprnds1 = vNULL;
8117 gimple *def_stmt;
8118 tree mask_type;
8119 tree mask;
8121 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8122 return false;
8124 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8125 return false;
8127 mask_type = vectype;
8128 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8130 if (slp_node)
8131 ncopies = 1;
8132 else
8133 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
8135 gcc_assert (ncopies >= 1);
8136 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8137 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8138 && reduc_def))
8139 return false;
8141 if (STMT_VINFO_LIVE_P (stmt_info))
8143 if (dump_enabled_p ())
8144 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8145 "value used after loop.\n");
8146 return false;
8149 if (!is_gimple_assign (stmt))
8150 return false;
8152 code = gimple_assign_rhs_code (stmt);
8154 if (TREE_CODE_CLASS (code) != tcc_comparison)
8155 return false;
8157 rhs1 = gimple_assign_rhs1 (stmt);
8158 rhs2 = gimple_assign_rhs2 (stmt);
8160 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8161 &dts[0], &vectype1))
8162 return false;
8164 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8165 &dts[1], &vectype2))
8166 return false;
8168 if (vectype1 && vectype2
8169 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8170 return false;
8172 vectype = vectype1 ? vectype1 : vectype2;
8174 /* Invariant comparison. */
8175 if (!vectype)
8177 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8178 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8179 return false;
8181 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8182 return false;
8184 /* Can't compare mask and non-mask types. */
8185 if (vectype1 && vectype2
8186 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8187 return false;
8189 /* Boolean values may have another representation in vectors
8190 and therefore we prefer bit operations over comparison for
8191 them (which also works for scalar masks). We store opcodes
8192 to use in bitop1 and bitop2. Statement is vectorized as
8193 BITOP2 (rhs1 BITOP1 rhs2) or
8194 rhs1 BITOP2 (BITOP1 rhs2)
8195 depending on bitop1 and bitop2 arity. */
8196 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8198 if (code == GT_EXPR)
8200 bitop1 = BIT_NOT_EXPR;
8201 bitop2 = BIT_AND_EXPR;
8203 else if (code == GE_EXPR)
8205 bitop1 = BIT_NOT_EXPR;
8206 bitop2 = BIT_IOR_EXPR;
8208 else if (code == LT_EXPR)
8210 bitop1 = BIT_NOT_EXPR;
8211 bitop2 = BIT_AND_EXPR;
8212 std::swap (rhs1, rhs2);
8213 std::swap (dts[0], dts[1]);
8215 else if (code == LE_EXPR)
8217 bitop1 = BIT_NOT_EXPR;
8218 bitop2 = BIT_IOR_EXPR;
8219 std::swap (rhs1, rhs2);
8220 std::swap (dts[0], dts[1]);
8222 else
8224 bitop1 = BIT_XOR_EXPR;
8225 if (code == EQ_EXPR)
8226 bitop2 = BIT_NOT_EXPR;
8230 if (!vec_stmt)
8232 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8233 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8234 dts, NULL, NULL);
8235 if (bitop1 == NOP_EXPR)
8236 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8237 else
8239 machine_mode mode = TYPE_MODE (vectype);
8240 optab optab;
8242 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8243 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8244 return false;
8246 if (bitop2 != NOP_EXPR)
8248 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8249 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8250 return false;
8252 return true;
8256 /* Transform. */
8257 if (!slp_node)
8259 vec_oprnds0.create (1);
8260 vec_oprnds1.create (1);
8263 /* Handle def. */
8264 lhs = gimple_assign_lhs (stmt);
8265 mask = vect_create_destination_var (lhs, mask_type);
8267 /* Handle cmp expr. */
8268 for (j = 0; j < ncopies; j++)
8270 gassign *new_stmt = NULL;
8271 if (j == 0)
8273 if (slp_node)
8275 auto_vec<tree, 2> ops;
8276 auto_vec<vec<tree>, 2> vec_defs;
8278 ops.safe_push (rhs1);
8279 ops.safe_push (rhs2);
8280 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
8281 vec_oprnds1 = vec_defs.pop ();
8282 vec_oprnds0 = vec_defs.pop ();
8284 else
8286 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8287 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8290 else
8292 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8293 vec_oprnds0.pop ());
8294 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8295 vec_oprnds1.pop ());
8298 if (!slp_node)
8300 vec_oprnds0.quick_push (vec_rhs1);
8301 vec_oprnds1.quick_push (vec_rhs2);
8304 /* Arguments are ready. Create the new vector stmt. */
8305 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8307 vec_rhs2 = vec_oprnds1[i];
8309 new_temp = make_ssa_name (mask);
8310 if (bitop1 == NOP_EXPR)
8312 new_stmt = gimple_build_assign (new_temp, code,
8313 vec_rhs1, vec_rhs2);
8314 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8316 else
8318 if (bitop1 == BIT_NOT_EXPR)
8319 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8320 else
8321 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8322 vec_rhs2);
8323 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8324 if (bitop2 != NOP_EXPR)
8326 tree res = make_ssa_name (mask);
8327 if (bitop2 == BIT_NOT_EXPR)
8328 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8329 else
8330 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8331 new_temp);
8332 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8335 if (slp_node)
8336 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8339 if (slp_node)
8340 continue;
8342 if (j == 0)
8343 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8344 else
8345 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8347 prev_stmt_info = vinfo_for_stmt (new_stmt);
8350 vec_oprnds0.release ();
8351 vec_oprnds1.release ();
8353 return true;
8356 /* Make sure the statement is vectorizable. */
8358 bool
8359 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8362 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8363 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8364 bool ok;
8365 tree scalar_type, vectype;
8366 gimple *pattern_stmt;
8367 gimple_seq pattern_def_seq;
8369 if (dump_enabled_p ())
8371 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8372 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8375 if (gimple_has_volatile_ops (stmt))
8377 if (dump_enabled_p ())
8378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8379 "not vectorized: stmt has volatile operands\n");
8381 return false;
8384 /* Skip stmts that do not need to be vectorized. In loops this is expected
8385 to include:
8386 - the COND_EXPR which is the loop exit condition
8387 - any LABEL_EXPRs in the loop
8388 - computations that are used only for array indexing or loop control.
8389 In basic blocks we only analyze statements that are a part of some SLP
8390 instance, therefore, all the statements are relevant.
8392 Pattern statement needs to be analyzed instead of the original statement
8393 if the original statement is not relevant. Otherwise, we analyze both
8394 statements. In basic blocks we are called from some SLP instance
8395 traversal, don't analyze pattern stmts instead, the pattern stmts
8396 already will be part of SLP instance. */
8398 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8399 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8400 && !STMT_VINFO_LIVE_P (stmt_info))
8402 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8403 && pattern_stmt
8404 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8405 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8407 /* Analyze PATTERN_STMT instead of the original stmt. */
8408 stmt = pattern_stmt;
8409 stmt_info = vinfo_for_stmt (pattern_stmt);
8410 if (dump_enabled_p ())
8412 dump_printf_loc (MSG_NOTE, vect_location,
8413 "==> examining pattern statement: ");
8414 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8417 else
8419 if (dump_enabled_p ())
8420 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8422 return true;
8425 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8426 && node == NULL
8427 && pattern_stmt
8428 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8429 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8431 /* Analyze PATTERN_STMT too. */
8432 if (dump_enabled_p ())
8434 dump_printf_loc (MSG_NOTE, vect_location,
8435 "==> examining pattern statement: ");
8436 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8439 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8440 return false;
8443 if (is_pattern_stmt_p (stmt_info)
8444 && node == NULL
8445 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8447 gimple_stmt_iterator si;
8449 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8451 gimple *pattern_def_stmt = gsi_stmt (si);
8452 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8453 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8455 /* Analyze def stmt of STMT if it's a pattern stmt. */
8456 if (dump_enabled_p ())
8458 dump_printf_loc (MSG_NOTE, vect_location,
8459 "==> examining pattern def statement: ");
8460 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8463 if (!vect_analyze_stmt (pattern_def_stmt,
8464 need_to_vectorize, node))
8465 return false;
8470 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8472 case vect_internal_def:
8473 break;
8475 case vect_reduction_def:
8476 case vect_nested_cycle:
8477 gcc_assert (!bb_vinfo
8478 && (relevance == vect_used_in_outer
8479 || relevance == vect_used_in_outer_by_reduction
8480 || relevance == vect_used_by_reduction
8481 || relevance == vect_unused_in_scope
8482 || relevance == vect_used_only_live));
8483 break;
8485 case vect_induction_def:
8486 case vect_constant_def:
8487 case vect_external_def:
8488 case vect_unknown_def_type:
8489 default:
8490 gcc_unreachable ();
8493 if (bb_vinfo)
8495 gcc_assert (PURE_SLP_STMT (stmt_info));
8497 /* Memory accesses already got their vector type assigned
8498 in vect_analyze_data_refs. */
8499 if (! STMT_VINFO_DATA_REF (stmt_info))
8501 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8502 if (dump_enabled_p ())
8504 dump_printf_loc (MSG_NOTE, vect_location,
8505 "get vectype for scalar type: ");
8506 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8507 dump_printf (MSG_NOTE, "\n");
8510 vectype = get_vectype_for_scalar_type (scalar_type);
8511 if (!vectype)
8513 if (dump_enabled_p ())
8515 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8516 "not SLPed: unsupported data-type ");
8517 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8518 scalar_type);
8519 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8521 return false;
8524 if (dump_enabled_p ())
8526 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8527 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8528 dump_printf (MSG_NOTE, "\n");
8531 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8535 if (STMT_VINFO_RELEVANT_P (stmt_info))
8537 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8538 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8539 || (is_gimple_call (stmt)
8540 && gimple_call_lhs (stmt) == NULL_TREE));
8541 *need_to_vectorize = true;
8544 if (PURE_SLP_STMT (stmt_info) && !node)
8546 dump_printf_loc (MSG_NOTE, vect_location,
8547 "handled only by SLP analysis\n");
8548 return true;
8551 ok = true;
8552 if (!bb_vinfo
8553 && (STMT_VINFO_RELEVANT_P (stmt_info)
8554 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8555 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8556 || vectorizable_conversion (stmt, NULL, NULL, node)
8557 || vectorizable_shift (stmt, NULL, NULL, node)
8558 || vectorizable_operation (stmt, NULL, NULL, node)
8559 || vectorizable_assignment (stmt, NULL, NULL, node)
8560 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8561 || vectorizable_call (stmt, NULL, NULL, node)
8562 || vectorizable_store (stmt, NULL, NULL, node)
8563 || vectorizable_reduction (stmt, NULL, NULL, node)
8564 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8565 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8566 else
8568 if (bb_vinfo)
8569 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8570 || vectorizable_conversion (stmt, NULL, NULL, node)
8571 || vectorizable_shift (stmt, NULL, NULL, node)
8572 || vectorizable_operation (stmt, NULL, NULL, node)
8573 || vectorizable_assignment (stmt, NULL, NULL, node)
8574 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8575 || vectorizable_call (stmt, NULL, NULL, node)
8576 || vectorizable_store (stmt, NULL, NULL, node)
8577 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8578 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8581 if (!ok)
8583 if (dump_enabled_p ())
8585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8586 "not vectorized: relevant stmt not ");
8587 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8588 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8591 return false;
8594 if (bb_vinfo)
8595 return true;
8597 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8598 need extra handling, except for vectorizable reductions. */
8599 if (STMT_VINFO_LIVE_P (stmt_info)
8600 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8601 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8603 if (!ok)
8605 if (dump_enabled_p ())
8607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8608 "not vectorized: live stmt not ");
8609 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8610 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8613 return false;
8616 return true;
8620 /* Function vect_transform_stmt.
8622 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8624 bool
8625 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8626 bool *grouped_store, slp_tree slp_node,
8627 slp_instance slp_node_instance)
8629 bool is_store = false;
8630 gimple *vec_stmt = NULL;
8631 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8632 bool done;
8634 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8635 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8637 switch (STMT_VINFO_TYPE (stmt_info))
8639 case type_demotion_vec_info_type:
8640 case type_promotion_vec_info_type:
8641 case type_conversion_vec_info_type:
8642 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8643 gcc_assert (done);
8644 break;
8646 case induc_vec_info_type:
8647 gcc_assert (!slp_node);
8648 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8649 gcc_assert (done);
8650 break;
8652 case shift_vec_info_type:
8653 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8654 gcc_assert (done);
8655 break;
8657 case op_vec_info_type:
8658 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8659 gcc_assert (done);
8660 break;
8662 case assignment_vec_info_type:
8663 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8664 gcc_assert (done);
8665 break;
8667 case load_vec_info_type:
8668 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8669 slp_node_instance);
8670 gcc_assert (done);
8671 break;
8673 case store_vec_info_type:
8674 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8675 gcc_assert (done);
8676 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8678 /* In case of interleaving, the whole chain is vectorized when the
8679 last store in the chain is reached. Store stmts before the last
8680 one are skipped, and there vec_stmt_info shouldn't be freed
8681 meanwhile. */
8682 *grouped_store = true;
8683 if (STMT_VINFO_VEC_STMT (stmt_info))
8684 is_store = true;
8686 else
8687 is_store = true;
8688 break;
8690 case condition_vec_info_type:
8691 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8692 gcc_assert (done);
8693 break;
8695 case comparison_vec_info_type:
8696 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8697 gcc_assert (done);
8698 break;
8700 case call_vec_info_type:
8701 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8702 stmt = gsi_stmt (*gsi);
8703 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8704 is_store = true;
8705 break;
8707 case call_simd_clone_vec_info_type:
8708 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8709 stmt = gsi_stmt (*gsi);
8710 break;
8712 case reduc_vec_info_type:
8713 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8714 gcc_assert (done);
8715 break;
8717 default:
8718 if (!STMT_VINFO_LIVE_P (stmt_info))
8720 if (dump_enabled_p ())
8721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8722 "stmt not supported.\n");
8723 gcc_unreachable ();
8727 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8728 This would break hybrid SLP vectorization. */
8729 if (slp_node)
8730 gcc_assert (!vec_stmt
8731 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8733 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8734 is being vectorized, but outside the immediately enclosing loop. */
8735 if (vec_stmt
8736 && STMT_VINFO_LOOP_VINFO (stmt_info)
8737 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8738 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8739 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8740 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8741 || STMT_VINFO_RELEVANT (stmt_info) ==
8742 vect_used_in_outer_by_reduction))
8744 struct loop *innerloop = LOOP_VINFO_LOOP (
8745 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8746 imm_use_iterator imm_iter;
8747 use_operand_p use_p;
8748 tree scalar_dest;
8749 gimple *exit_phi;
8751 if (dump_enabled_p ())
8752 dump_printf_loc (MSG_NOTE, vect_location,
8753 "Record the vdef for outer-loop vectorization.\n");
8755 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8756 (to be used when vectorizing outer-loop stmts that use the DEF of
8757 STMT). */
8758 if (gimple_code (stmt) == GIMPLE_PHI)
8759 scalar_dest = PHI_RESULT (stmt);
8760 else
8761 scalar_dest = gimple_assign_lhs (stmt);
8763 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8765 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8767 exit_phi = USE_STMT (use_p);
8768 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8773 /* Handle stmts whose DEF is used outside the loop-nest that is
8774 being vectorized. */
8775 if (slp_node)
8777 gimple *slp_stmt;
8778 int i;
8779 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8781 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8782 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8783 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8785 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8786 &vec_stmt);
8787 gcc_assert (done);
8791 else if (STMT_VINFO_LIVE_P (stmt_info)
8792 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8794 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8795 gcc_assert (done);
8798 if (vec_stmt)
8799 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8801 return is_store;
8805 /* Remove a group of stores (for SLP or interleaving), free their
8806 stmt_vec_info. */
8808 void
8809 vect_remove_stores (gimple *first_stmt)
8811 gimple *next = first_stmt;
8812 gimple *tmp;
8813 gimple_stmt_iterator next_si;
8815 while (next)
8817 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8819 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8820 if (is_pattern_stmt_p (stmt_info))
8821 next = STMT_VINFO_RELATED_STMT (stmt_info);
8822 /* Free the attached stmt_vec_info and remove the stmt. */
8823 next_si = gsi_for_stmt (next);
8824 unlink_stmt_vdef (next);
8825 gsi_remove (&next_si, true);
8826 release_defs (next);
8827 free_stmt_vec_info (next);
8828 next = tmp;
8833 /* Function new_stmt_vec_info.
8835 Create and initialize a new stmt_vec_info struct for STMT. */
8837 stmt_vec_info
8838 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8840 stmt_vec_info res;
8841 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8843 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8844 STMT_VINFO_STMT (res) = stmt;
8845 res->vinfo = vinfo;
8846 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8847 STMT_VINFO_LIVE_P (res) = false;
8848 STMT_VINFO_VECTYPE (res) = NULL;
8849 STMT_VINFO_VEC_STMT (res) = NULL;
8850 STMT_VINFO_VECTORIZABLE (res) = true;
8851 STMT_VINFO_IN_PATTERN_P (res) = false;
8852 STMT_VINFO_RELATED_STMT (res) = NULL;
8853 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8854 STMT_VINFO_DATA_REF (res) = NULL;
8855 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8856 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8858 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8859 STMT_VINFO_DR_OFFSET (res) = NULL;
8860 STMT_VINFO_DR_INIT (res) = NULL;
8861 STMT_VINFO_DR_STEP (res) = NULL;
8862 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8864 if (gimple_code (stmt) == GIMPLE_PHI
8865 && is_loop_header_bb_p (gimple_bb (stmt)))
8866 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8867 else
8868 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8870 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8871 STMT_SLP_TYPE (res) = loop_vect;
8872 STMT_VINFO_NUM_SLP_USES (res) = 0;
8874 GROUP_FIRST_ELEMENT (res) = NULL;
8875 GROUP_NEXT_ELEMENT (res) = NULL;
8876 GROUP_SIZE (res) = 0;
8877 GROUP_STORE_COUNT (res) = 0;
8878 GROUP_GAP (res) = 0;
8879 GROUP_SAME_DR_STMT (res) = NULL;
8881 return res;
8885 /* Create a hash table for stmt_vec_info. */
8887 void
8888 init_stmt_vec_info_vec (void)
8890 gcc_assert (!stmt_vec_info_vec.exists ());
8891 stmt_vec_info_vec.create (50);
8895 /* Free hash table for stmt_vec_info. */
8897 void
8898 free_stmt_vec_info_vec (void)
8900 unsigned int i;
8901 stmt_vec_info info;
8902 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8903 if (info != NULL)
8904 free_stmt_vec_info (STMT_VINFO_STMT (info));
8905 gcc_assert (stmt_vec_info_vec.exists ());
8906 stmt_vec_info_vec.release ();
8910 /* Free stmt vectorization related info. */
8912 void
8913 free_stmt_vec_info (gimple *stmt)
8915 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8917 if (!stmt_info)
8918 return;
8920 /* Check if this statement has a related "pattern stmt"
8921 (introduced by the vectorizer during the pattern recognition
8922 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8923 too. */
8924 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8926 stmt_vec_info patt_info
8927 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8928 if (patt_info)
8930 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8931 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8932 gimple_set_bb (patt_stmt, NULL);
8933 tree lhs = gimple_get_lhs (patt_stmt);
8934 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8935 release_ssa_name (lhs);
8936 if (seq)
8938 gimple_stmt_iterator si;
8939 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8941 gimple *seq_stmt = gsi_stmt (si);
8942 gimple_set_bb (seq_stmt, NULL);
8943 lhs = gimple_get_lhs (seq_stmt);
8944 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8945 release_ssa_name (lhs);
8946 free_stmt_vec_info (seq_stmt);
8949 free_stmt_vec_info (patt_stmt);
8953 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8954 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8955 set_vinfo_for_stmt (stmt, NULL);
8956 free (stmt_info);
8960 /* Function get_vectype_for_scalar_type_and_size.
8962 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8963 by the target. */
8965 static tree
8966 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8968 tree orig_scalar_type = scalar_type;
8969 machine_mode inner_mode = TYPE_MODE (scalar_type);
8970 machine_mode simd_mode;
8971 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8972 int nunits;
8973 tree vectype;
8975 if (nbytes == 0)
8976 return NULL_TREE;
8978 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8979 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8980 return NULL_TREE;
8982 /* For vector types of elements whose mode precision doesn't
8983 match their types precision we use a element type of mode
8984 precision. The vectorization routines will have to make sure
8985 they support the proper result truncation/extension.
8986 We also make sure to build vector types with INTEGER_TYPE
8987 component type only. */
8988 if (INTEGRAL_TYPE_P (scalar_type)
8989 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8990 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8991 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8992 TYPE_UNSIGNED (scalar_type));
8994 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8995 When the component mode passes the above test simply use a type
8996 corresponding to that mode. The theory is that any use that
8997 would cause problems with this will disable vectorization anyway. */
8998 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8999 && !INTEGRAL_TYPE_P (scalar_type))
9000 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9002 /* We can't build a vector type of elements with alignment bigger than
9003 their size. */
9004 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9005 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9006 TYPE_UNSIGNED (scalar_type));
9008 /* If we felt back to using the mode fail if there was
9009 no scalar type for it. */
9010 if (scalar_type == NULL_TREE)
9011 return NULL_TREE;
9013 /* If no size was supplied use the mode the target prefers. Otherwise
9014 lookup a vector mode of the specified size. */
9015 if (size == 0)
9016 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9017 else
9018 simd_mode = mode_for_vector (inner_mode, size / nbytes);
9019 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9020 if (nunits <= 1)
9021 return NULL_TREE;
9023 vectype = build_vector_type (scalar_type, nunits);
9025 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9026 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9027 return NULL_TREE;
9029 /* Re-attach the address-space qualifier if we canonicalized the scalar
9030 type. */
9031 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9032 return build_qualified_type
9033 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9035 return vectype;
9038 unsigned int current_vector_size;
9040 /* Function get_vectype_for_scalar_type.
9042 Returns the vector type corresponding to SCALAR_TYPE as supported
9043 by the target. */
9045 tree
9046 get_vectype_for_scalar_type (tree scalar_type)
9048 tree vectype;
9049 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9050 current_vector_size);
9051 if (vectype
9052 && current_vector_size == 0)
9053 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9054 return vectype;
9057 /* Function get_mask_type_for_scalar_type.
9059 Returns the mask type corresponding to a result of comparison
9060 of vectors of specified SCALAR_TYPE as supported by target. */
9062 tree
9063 get_mask_type_for_scalar_type (tree scalar_type)
9065 tree vectype = get_vectype_for_scalar_type (scalar_type);
9067 if (!vectype)
9068 return NULL;
9070 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9071 current_vector_size);
9074 /* Function get_same_sized_vectype
9076 Returns a vector type corresponding to SCALAR_TYPE of size
9077 VECTOR_TYPE if supported by the target. */
9079 tree
9080 get_same_sized_vectype (tree scalar_type, tree vector_type)
9082 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9083 return build_same_sized_truth_vector_type (vector_type);
9085 return get_vectype_for_scalar_type_and_size
9086 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9089 /* Function vect_is_simple_use.
9091 Input:
9092 VINFO - the vect info of the loop or basic block that is being vectorized.
9093 OPERAND - operand in the loop or bb.
9094 Output:
9095 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9096 DT - the type of definition
9098 Returns whether a stmt with OPERAND can be vectorized.
9099 For loops, supportable operands are constants, loop invariants, and operands
9100 that are defined by the current iteration of the loop. Unsupportable
9101 operands are those that are defined by a previous iteration of the loop (as
9102 is the case in reduction/induction computations).
9103 For basic blocks, supportable operands are constants and bb invariants.
9104 For now, operands defined outside the basic block are not supported. */
9106 bool
9107 vect_is_simple_use (tree operand, vec_info *vinfo,
9108 gimple **def_stmt, enum vect_def_type *dt)
9110 *def_stmt = NULL;
9111 *dt = vect_unknown_def_type;
9113 if (dump_enabled_p ())
9115 dump_printf_loc (MSG_NOTE, vect_location,
9116 "vect_is_simple_use: operand ");
9117 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9118 dump_printf (MSG_NOTE, "\n");
9121 if (CONSTANT_CLASS_P (operand))
9123 *dt = vect_constant_def;
9124 return true;
9127 if (is_gimple_min_invariant (operand))
9129 *dt = vect_external_def;
9130 return true;
9133 if (TREE_CODE (operand) != SSA_NAME)
9135 if (dump_enabled_p ())
9136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9137 "not ssa-name.\n");
9138 return false;
9141 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9143 *dt = vect_external_def;
9144 return true;
9147 *def_stmt = SSA_NAME_DEF_STMT (operand);
9148 if (dump_enabled_p ())
9150 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9151 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9154 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9155 *dt = vect_external_def;
9156 else
9158 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9159 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9162 if (dump_enabled_p ())
9164 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9165 switch (*dt)
9167 case vect_uninitialized_def:
9168 dump_printf (MSG_NOTE, "uninitialized\n");
9169 break;
9170 case vect_constant_def:
9171 dump_printf (MSG_NOTE, "constant\n");
9172 break;
9173 case vect_external_def:
9174 dump_printf (MSG_NOTE, "external\n");
9175 break;
9176 case vect_internal_def:
9177 dump_printf (MSG_NOTE, "internal\n");
9178 break;
9179 case vect_induction_def:
9180 dump_printf (MSG_NOTE, "induction\n");
9181 break;
9182 case vect_reduction_def:
9183 dump_printf (MSG_NOTE, "reduction\n");
9184 break;
9185 case vect_double_reduction_def:
9186 dump_printf (MSG_NOTE, "double reduction\n");
9187 break;
9188 case vect_nested_cycle:
9189 dump_printf (MSG_NOTE, "nested cycle\n");
9190 break;
9191 case vect_unknown_def_type:
9192 dump_printf (MSG_NOTE, "unknown\n");
9193 break;
9197 if (*dt == vect_unknown_def_type)
9199 if (dump_enabled_p ())
9200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9201 "Unsupported pattern.\n");
9202 return false;
9205 switch (gimple_code (*def_stmt))
9207 case GIMPLE_PHI:
9208 case GIMPLE_ASSIGN:
9209 case GIMPLE_CALL:
9210 break;
9211 default:
9212 if (dump_enabled_p ())
9213 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9214 "unsupported defining stmt:\n");
9215 return false;
9218 return true;
9221 /* Function vect_is_simple_use.
9223 Same as vect_is_simple_use but also determines the vector operand
9224 type of OPERAND and stores it to *VECTYPE. If the definition of
9225 OPERAND is vect_uninitialized_def, vect_constant_def or
9226 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9227 is responsible to compute the best suited vector type for the
9228 scalar operand. */
9230 bool
9231 vect_is_simple_use (tree operand, vec_info *vinfo,
9232 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9234 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9235 return false;
9237 /* Now get a vector type if the def is internal, otherwise supply
9238 NULL_TREE and leave it up to the caller to figure out a proper
9239 type for the use stmt. */
9240 if (*dt == vect_internal_def
9241 || *dt == vect_induction_def
9242 || *dt == vect_reduction_def
9243 || *dt == vect_double_reduction_def
9244 || *dt == vect_nested_cycle)
9246 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9248 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9249 && !STMT_VINFO_RELEVANT (stmt_info)
9250 && !STMT_VINFO_LIVE_P (stmt_info))
9251 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9253 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9254 gcc_assert (*vectype != NULL_TREE);
9256 else if (*dt == vect_uninitialized_def
9257 || *dt == vect_constant_def
9258 || *dt == vect_external_def)
9259 *vectype = NULL_TREE;
9260 else
9261 gcc_unreachable ();
9263 return true;
9267 /* Function supportable_widening_operation
9269 Check whether an operation represented by the code CODE is a
9270 widening operation that is supported by the target platform in
9271 vector form (i.e., when operating on arguments of type VECTYPE_IN
9272 producing a result of type VECTYPE_OUT).
9274 Widening operations we currently support are NOP (CONVERT), FLOAT
9275 and WIDEN_MULT. This function checks if these operations are supported
9276 by the target platform either directly (via vector tree-codes), or via
9277 target builtins.
9279 Output:
9280 - CODE1 and CODE2 are codes of vector operations to be used when
9281 vectorizing the operation, if available.
9282 - MULTI_STEP_CVT determines the number of required intermediate steps in
9283 case of multi-step conversion (like char->short->int - in that case
9284 MULTI_STEP_CVT will be 1).
9285 - INTERM_TYPES contains the intermediate type required to perform the
9286 widening operation (short in the above example). */
9288 bool
9289 supportable_widening_operation (enum tree_code code, gimple *stmt,
9290 tree vectype_out, tree vectype_in,
9291 enum tree_code *code1, enum tree_code *code2,
9292 int *multi_step_cvt,
9293 vec<tree> *interm_types)
9295 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9296 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9297 struct loop *vect_loop = NULL;
9298 machine_mode vec_mode;
9299 enum insn_code icode1, icode2;
9300 optab optab1, optab2;
9301 tree vectype = vectype_in;
9302 tree wide_vectype = vectype_out;
9303 enum tree_code c1, c2;
9304 int i;
9305 tree prev_type, intermediate_type;
9306 machine_mode intermediate_mode, prev_mode;
9307 optab optab3, optab4;
9309 *multi_step_cvt = 0;
9310 if (loop_info)
9311 vect_loop = LOOP_VINFO_LOOP (loop_info);
9313 switch (code)
9315 case WIDEN_MULT_EXPR:
9316 /* The result of a vectorized widening operation usually requires
9317 two vectors (because the widened results do not fit into one vector).
9318 The generated vector results would normally be expected to be
9319 generated in the same order as in the original scalar computation,
9320 i.e. if 8 results are generated in each vector iteration, they are
9321 to be organized as follows:
9322 vect1: [res1,res2,res3,res4],
9323 vect2: [res5,res6,res7,res8].
9325 However, in the special case that the result of the widening
9326 operation is used in a reduction computation only, the order doesn't
9327 matter (because when vectorizing a reduction we change the order of
9328 the computation). Some targets can take advantage of this and
9329 generate more efficient code. For example, targets like Altivec,
9330 that support widen_mult using a sequence of {mult_even,mult_odd}
9331 generate the following vectors:
9332 vect1: [res1,res3,res5,res7],
9333 vect2: [res2,res4,res6,res8].
9335 When vectorizing outer-loops, we execute the inner-loop sequentially
9336 (each vectorized inner-loop iteration contributes to VF outer-loop
9337 iterations in parallel). We therefore don't allow to change the
9338 order of the computation in the inner-loop during outer-loop
9339 vectorization. */
9340 /* TODO: Another case in which order doesn't *really* matter is when we
9341 widen and then contract again, e.g. (short)((int)x * y >> 8).
9342 Normally, pack_trunc performs an even/odd permute, whereas the
9343 repack from an even/odd expansion would be an interleave, which
9344 would be significantly simpler for e.g. AVX2. */
9345 /* In any case, in order to avoid duplicating the code below, recurse
9346 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9347 are properly set up for the caller. If we fail, we'll continue with
9348 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9349 if (vect_loop
9350 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9351 && !nested_in_vect_loop_p (vect_loop, stmt)
9352 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9353 stmt, vectype_out, vectype_in,
9354 code1, code2, multi_step_cvt,
9355 interm_types))
9357 /* Elements in a vector with vect_used_by_reduction property cannot
9358 be reordered if the use chain with this property does not have the
9359 same operation. One such an example is s += a * b, where elements
9360 in a and b cannot be reordered. Here we check if the vector defined
9361 by STMT is only directly used in the reduction statement. */
9362 tree lhs = gimple_assign_lhs (stmt);
9363 use_operand_p dummy;
9364 gimple *use_stmt;
9365 stmt_vec_info use_stmt_info = NULL;
9366 if (single_imm_use (lhs, &dummy, &use_stmt)
9367 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9368 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9369 return true;
9371 c1 = VEC_WIDEN_MULT_LO_EXPR;
9372 c2 = VEC_WIDEN_MULT_HI_EXPR;
9373 break;
9375 case DOT_PROD_EXPR:
9376 c1 = DOT_PROD_EXPR;
9377 c2 = DOT_PROD_EXPR;
9378 break;
9380 case SAD_EXPR:
9381 c1 = SAD_EXPR;
9382 c2 = SAD_EXPR;
9383 break;
9385 case VEC_WIDEN_MULT_EVEN_EXPR:
9386 /* Support the recursion induced just above. */
9387 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9388 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9389 break;
9391 case WIDEN_LSHIFT_EXPR:
9392 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9393 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9394 break;
9396 CASE_CONVERT:
9397 c1 = VEC_UNPACK_LO_EXPR;
9398 c2 = VEC_UNPACK_HI_EXPR;
9399 break;
9401 case FLOAT_EXPR:
9402 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9403 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9404 break;
9406 case FIX_TRUNC_EXPR:
9407 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9408 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9409 computing the operation. */
9410 return false;
9412 default:
9413 gcc_unreachable ();
9416 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9417 std::swap (c1, c2);
9419 if (code == FIX_TRUNC_EXPR)
9421 /* The signedness is determined from output operand. */
9422 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9423 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9425 else
9427 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9428 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9431 if (!optab1 || !optab2)
9432 return false;
9434 vec_mode = TYPE_MODE (vectype);
9435 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9436 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9437 return false;
9439 *code1 = c1;
9440 *code2 = c2;
9442 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9443 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9444 /* For scalar masks we may have different boolean
9445 vector types having the same QImode. Thus we
9446 add additional check for elements number. */
9447 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9448 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9449 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9451 /* Check if it's a multi-step conversion that can be done using intermediate
9452 types. */
9454 prev_type = vectype;
9455 prev_mode = vec_mode;
9457 if (!CONVERT_EXPR_CODE_P (code))
9458 return false;
9460 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9461 intermediate steps in promotion sequence. We try
9462 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9463 not. */
9464 interm_types->create (MAX_INTERM_CVT_STEPS);
9465 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9467 intermediate_mode = insn_data[icode1].operand[0].mode;
9468 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9470 intermediate_type
9471 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9472 current_vector_size);
9473 if (intermediate_mode != TYPE_MODE (intermediate_type))
9474 return false;
9476 else
9477 intermediate_type
9478 = lang_hooks.types.type_for_mode (intermediate_mode,
9479 TYPE_UNSIGNED (prev_type));
9481 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9482 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9484 if (!optab3 || !optab4
9485 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9486 || insn_data[icode1].operand[0].mode != intermediate_mode
9487 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9488 || insn_data[icode2].operand[0].mode != intermediate_mode
9489 || ((icode1 = optab_handler (optab3, intermediate_mode))
9490 == CODE_FOR_nothing)
9491 || ((icode2 = optab_handler (optab4, intermediate_mode))
9492 == CODE_FOR_nothing))
9493 break;
9495 interm_types->quick_push (intermediate_type);
9496 (*multi_step_cvt)++;
9498 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9499 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9500 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9501 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9502 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9504 prev_type = intermediate_type;
9505 prev_mode = intermediate_mode;
9508 interm_types->release ();
9509 return false;
9513 /* Function supportable_narrowing_operation
9515 Check whether an operation represented by the code CODE is a
9516 narrowing operation that is supported by the target platform in
9517 vector form (i.e., when operating on arguments of type VECTYPE_IN
9518 and producing a result of type VECTYPE_OUT).
9520 Narrowing operations we currently support are NOP (CONVERT) and
9521 FIX_TRUNC. This function checks if these operations are supported by
9522 the target platform directly via vector tree-codes.
9524 Output:
9525 - CODE1 is the code of a vector operation to be used when
9526 vectorizing the operation, if available.
9527 - MULTI_STEP_CVT determines the number of required intermediate steps in
9528 case of multi-step conversion (like int->short->char - in that case
9529 MULTI_STEP_CVT will be 1).
9530 - INTERM_TYPES contains the intermediate type required to perform the
9531 narrowing operation (short in the above example). */
9533 bool
9534 supportable_narrowing_operation (enum tree_code code,
9535 tree vectype_out, tree vectype_in,
9536 enum tree_code *code1, int *multi_step_cvt,
9537 vec<tree> *interm_types)
9539 machine_mode vec_mode;
9540 enum insn_code icode1;
9541 optab optab1, interm_optab;
9542 tree vectype = vectype_in;
9543 tree narrow_vectype = vectype_out;
9544 enum tree_code c1;
9545 tree intermediate_type, prev_type;
9546 machine_mode intermediate_mode, prev_mode;
9547 int i;
9548 bool uns;
9550 *multi_step_cvt = 0;
9551 switch (code)
9553 CASE_CONVERT:
9554 c1 = VEC_PACK_TRUNC_EXPR;
9555 break;
9557 case FIX_TRUNC_EXPR:
9558 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9559 break;
9561 case FLOAT_EXPR:
9562 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9563 tree code and optabs used for computing the operation. */
9564 return false;
9566 default:
9567 gcc_unreachable ();
9570 if (code == FIX_TRUNC_EXPR)
9571 /* The signedness is determined from output operand. */
9572 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9573 else
9574 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9576 if (!optab1)
9577 return false;
9579 vec_mode = TYPE_MODE (vectype);
9580 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9581 return false;
9583 *code1 = c1;
9585 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9586 /* For scalar masks we may have different boolean
9587 vector types having the same QImode. Thus we
9588 add additional check for elements number. */
9589 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9590 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9591 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9593 /* Check if it's a multi-step conversion that can be done using intermediate
9594 types. */
9595 prev_mode = vec_mode;
9596 prev_type = vectype;
9597 if (code == FIX_TRUNC_EXPR)
9598 uns = TYPE_UNSIGNED (vectype_out);
9599 else
9600 uns = TYPE_UNSIGNED (vectype);
9602 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9603 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9604 costly than signed. */
9605 if (code == FIX_TRUNC_EXPR && uns)
9607 enum insn_code icode2;
9609 intermediate_type
9610 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9611 interm_optab
9612 = optab_for_tree_code (c1, intermediate_type, optab_default);
9613 if (interm_optab != unknown_optab
9614 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9615 && insn_data[icode1].operand[0].mode
9616 == insn_data[icode2].operand[0].mode)
9618 uns = false;
9619 optab1 = interm_optab;
9620 icode1 = icode2;
9624 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9625 intermediate steps in promotion sequence. We try
9626 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9627 interm_types->create (MAX_INTERM_CVT_STEPS);
9628 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9630 intermediate_mode = insn_data[icode1].operand[0].mode;
9631 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9633 intermediate_type
9634 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9635 current_vector_size);
9636 if (intermediate_mode != TYPE_MODE (intermediate_type))
9637 return false;
9639 else
9640 intermediate_type
9641 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9642 interm_optab
9643 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9644 optab_default);
9645 if (!interm_optab
9646 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9647 || insn_data[icode1].operand[0].mode != intermediate_mode
9648 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9649 == CODE_FOR_nothing))
9650 break;
9652 interm_types->quick_push (intermediate_type);
9653 (*multi_step_cvt)++;
9655 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9656 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9657 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9658 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9660 prev_mode = intermediate_mode;
9661 prev_type = intermediate_type;
9662 optab1 = interm_optab;
9665 interm_types->release ();
9666 return false;