* gcc.dg/torture/tls/tls-reload-1.c: Add tls options.
[official-gcc.git] / gcc / tree-vect-stmts.c
blobdfbce969fdb2c5befdf552288624d20fe7034ae8
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "dumpfile.h"
27 #include "tm.h"
28 #include "ggc.h"
29 #include "tree.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h" /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "diagnostic-core.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
42 /* For lang_hooks.types.type_for_mode. */
43 #include "langhooks.h"
45 /* Return the vectorized type for the given statement. */
47 tree
48 stmt_vectype (struct _stmt_vec_info *stmt_info)
50 return STMT_VINFO_VECTYPE (stmt_info);
53 /* Return TRUE iff the given statement is in an inner loop relative to
54 the loop being vectorized. */
55 bool
56 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
58 gimple stmt = STMT_VINFO_STMT (stmt_info);
59 basic_block bb = gimple_bb (stmt);
60 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
61 struct loop* loop;
63 if (!loop_vinfo)
64 return false;
66 loop = LOOP_VINFO_LOOP (loop_vinfo);
68 return (bb->loop_father == loop->inner);
71 /* Record the cost of a statement, either by directly informing the
72 target model or by saving it in a vector for later processing.
73 Return a preliminary estimate of the statement's cost. */
75 unsigned
76 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
77 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
78 int misalign, enum vect_cost_model_location where)
80 if (body_cost_vec)
82 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
83 add_stmt_info_to_vec (body_cost_vec, count, kind,
84 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
85 misalign);
86 return (unsigned)
87 (builtin_vectorization_cost (kind, vectype, misalign) * count);
90 else
92 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
93 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
94 void *target_cost_data;
96 if (loop_vinfo)
97 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
98 else
99 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
101 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
102 misalign, where);
106 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 static tree
109 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
111 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
112 "vect_array");
115 /* ARRAY is an array of vectors created by create_vector_array.
116 Return an SSA_NAME for the vector in index N. The reference
117 is part of the vectorization of STMT and the vector is associated
118 with scalar destination SCALAR_DEST. */
120 static tree
121 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
122 tree array, unsigned HOST_WIDE_INT n)
124 tree vect_type, vect, vect_name, array_ref;
125 gimple new_stmt;
127 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
128 vect_type = TREE_TYPE (TREE_TYPE (array));
129 vect = vect_create_destination_var (scalar_dest, vect_type);
130 array_ref = build4 (ARRAY_REF, vect_type, array,
131 build_int_cst (size_type_node, n),
132 NULL_TREE, NULL_TREE);
134 new_stmt = gimple_build_assign (vect, array_ref);
135 vect_name = make_ssa_name (vect, new_stmt);
136 gimple_assign_set_lhs (new_stmt, vect_name);
137 vect_finish_stmt_generation (stmt, new_stmt, gsi);
139 return vect_name;
142 /* ARRAY is an array of vectors created by create_vector_array.
143 Emit code to store SSA_NAME VECT in index N of the array.
144 The store is part of the vectorization of STMT. */
146 static void
147 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
148 tree array, unsigned HOST_WIDE_INT n)
150 tree array_ref;
151 gimple new_stmt;
153 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
154 build_int_cst (size_type_node, n),
155 NULL_TREE, NULL_TREE);
157 new_stmt = gimple_build_assign (array_ref, vect);
158 vect_finish_stmt_generation (stmt, new_stmt, gsi);
161 /* PTR is a pointer to an array of type TYPE. Return a representation
162 of *PTR. The memory reference replaces those in FIRST_DR
163 (and its group). */
165 static tree
166 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
168 tree mem_ref, alias_ptr_type;
170 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
171 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
172 /* Arrays have the same alignment as their type. */
173 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
174 return mem_ref;
177 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
179 /* Function vect_mark_relevant.
181 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 static void
184 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
185 enum vect_relevant relevant, bool live_p,
186 bool used_in_pattern)
188 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
189 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
190 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
191 gimple pattern_stmt;
193 if (dump_enabled_p ())
194 dump_printf_loc (MSG_NOTE, vect_location,
195 "mark relevant %d, live %d.", relevant, live_p);
197 /* If this stmt is an original stmt in a pattern, we might need to mark its
198 related pattern stmt instead of the original stmt. However, such stmts
199 may have their own uses that are not in any pattern, in such cases the
200 stmt itself should be marked. */
201 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
203 bool found = false;
204 if (!used_in_pattern)
206 imm_use_iterator imm_iter;
207 use_operand_p use_p;
208 gimple use_stmt;
209 tree lhs;
210 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
213 if (is_gimple_assign (stmt))
214 lhs = gimple_assign_lhs (stmt);
215 else
216 lhs = gimple_call_lhs (stmt);
218 /* This use is out of pattern use, if LHS has other uses that are
219 pattern uses, we should mark the stmt itself, and not the pattern
220 stmt. */
221 if (TREE_CODE (lhs) == SSA_NAME)
222 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
224 if (is_gimple_debug (USE_STMT (use_p)))
225 continue;
226 use_stmt = USE_STMT (use_p);
228 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
229 continue;
231 if (vinfo_for_stmt (use_stmt)
232 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
234 found = true;
235 break;
240 if (!found)
242 /* This is the last stmt in a sequence that was detected as a
243 pattern that can potentially be vectorized. Don't mark the stmt
244 as relevant/live because it's not going to be vectorized.
245 Instead mark the pattern-stmt that replaces it. */
247 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
249 if (dump_enabled_p ())
250 dump_printf_loc (MSG_NOTE, vect_location,
251 "last stmt in pattern. don't mark"
252 " relevant/live.");
253 stmt_info = vinfo_for_stmt (pattern_stmt);
254 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
255 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
256 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
257 stmt = pattern_stmt;
261 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
262 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
263 STMT_VINFO_RELEVANT (stmt_info) = relevant;
265 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
266 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
268 if (dump_enabled_p ())
269 dump_printf_loc (MSG_NOTE, vect_location,
270 "already marked relevant/live.");
271 return;
274 worklist->safe_push (stmt);
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt))
313 if (dump_enabled_p ())
314 dump_printf_loc (MSG_NOTE, vect_location,
315 "vec_stmt_relevant_p: stmt has vdefs.");
316 *relevant = vect_used_in_scope;
319 /* uses outside the loop. */
320 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
322 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
324 basic_block bb = gimple_bb (USE_STMT (use_p));
325 if (!flow_bb_inside_loop_p (loop, bb))
327 if (dump_enabled_p ())
328 dump_printf_loc (MSG_NOTE, vect_location,
329 "vec_stmt_relevant_p: used out of loop.");
331 if (is_gimple_debug (USE_STMT (use_p)))
332 continue;
334 /* We expect all such uses to be in the loop exit phis
335 (because of loop closed form) */
336 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
337 gcc_assert (bb == single_exit (loop)->dest);
339 *live_p = true;
344 return (*live_p || *relevant);
348 /* Function exist_non_indexing_operands_for_use_p
350 USE is one of the uses attached to STMT. Check if USE is
351 used in STMT for anything other than indexing an array. */
353 static bool
354 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
356 tree operand;
357 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
359 /* USE corresponds to some operand in STMT. If there is no data
360 reference in STMT, then any operand that corresponds to USE
361 is not indexing an array. */
362 if (!STMT_VINFO_DATA_REF (stmt_info))
363 return true;
365 /* STMT has a data_ref. FORNOW this means that its of one of
366 the following forms:
367 -1- ARRAY_REF = var
368 -2- var = ARRAY_REF
369 (This should have been verified in analyze_data_refs).
371 'var' in the second case corresponds to a def, not a use,
372 so USE cannot correspond to any operands that are not used
373 for array indexing.
375 Therefore, all we need to check is if STMT falls into the
376 first case, and whether var corresponds to USE. */
378 if (!gimple_assign_copy_p (stmt))
379 return false;
380 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
381 return false;
382 operand = gimple_assign_rhs1 (stmt);
383 if (TREE_CODE (operand) != SSA_NAME)
384 return false;
386 if (operand == use)
387 return true;
389 return false;
394 Function process_use.
396 Inputs:
397 - a USE in STMT in a loop represented by LOOP_VINFO
398 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
399 that defined USE. This is done by calling mark_relevant and passing it
400 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
401 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
402 be performed.
404 Outputs:
405 Generally, LIVE_P and RELEVANT are used to define the liveness and
406 relevance info of the DEF_STMT of this USE:
407 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
408 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
409 Exceptions:
410 - case 1: If USE is used only for address computations (e.g. array indexing),
411 which does not need to be directly vectorized, then the liveness/relevance
412 of the respective DEF_STMT is left unchanged.
413 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
414 skip DEF_STMT cause it had already been processed.
415 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
416 be modified accordingly.
418 Return true if everything is as expected. Return false otherwise. */
420 static bool
421 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
422 enum vect_relevant relevant, vec<gimple> *worklist,
423 bool force)
425 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
426 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
427 stmt_vec_info dstmt_vinfo;
428 basic_block bb, def_bb;
429 tree def;
430 gimple def_stmt;
431 enum vect_def_type dt;
433 /* case 1: we are only interested in uses that need to be vectorized. Uses
434 that are used for address computation are not considered relevant. */
435 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
436 return true;
438 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
440 if (dump_enabled_p ())
441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
442 "not vectorized: unsupported use in stmt.");
443 return false;
446 if (!def_stmt || gimple_nop_p (def_stmt))
447 return true;
449 def_bb = gimple_bb (def_stmt);
450 if (!flow_bb_inside_loop_p (loop, def_bb))
452 if (dump_enabled_p ())
453 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.");
454 return true;
457 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
458 DEF_STMT must have already been processed, because this should be the
459 only way that STMT, which is a reduction-phi, was put in the worklist,
460 as there should be no other uses for DEF_STMT in the loop. So we just
461 check that everything is as expected, and we are done. */
462 dstmt_vinfo = vinfo_for_stmt (def_stmt);
463 bb = gimple_bb (stmt);
464 if (gimple_code (stmt) == GIMPLE_PHI
465 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
466 && gimple_code (def_stmt) != GIMPLE_PHI
467 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
468 && bb->loop_father == def_bb->loop_father)
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_NOTE, vect_location,
472 "reduc-stmt defining reduc-phi in the same nest.");
473 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
474 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
475 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
476 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
477 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
478 return true;
481 /* case 3a: outer-loop stmt defining an inner-loop stmt:
482 outer-loop-header-bb:
483 d = def_stmt
484 inner-loop:
485 stmt # use (d)
486 outer-loop-tail-bb:
487 ... */
488 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_NOTE, vect_location,
492 "outer-loop def-stmt defining inner-loop stmt.");
494 switch (relevant)
496 case vect_unused_in_scope:
497 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
498 vect_used_in_scope : vect_unused_in_scope;
499 break;
501 case vect_used_in_outer_by_reduction:
502 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
503 relevant = vect_used_by_reduction;
504 break;
506 case vect_used_in_outer:
507 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
508 relevant = vect_used_in_scope;
509 break;
511 case vect_used_in_scope:
512 break;
514 default:
515 gcc_unreachable ();
519 /* case 3b: inner-loop stmt defining an outer-loop stmt:
520 outer-loop-header-bb:
522 inner-loop:
523 d = def_stmt
524 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
525 stmt # use (d) */
526 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
528 if (dump_enabled_p ())
529 dump_printf_loc (MSG_NOTE, vect_location,
530 "inner-loop def-stmt defining outer-loop stmt.");
532 switch (relevant)
534 case vect_unused_in_scope:
535 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
536 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
537 vect_used_in_outer_by_reduction : vect_unused_in_scope;
538 break;
540 case vect_used_by_reduction:
541 relevant = vect_used_in_outer_by_reduction;
542 break;
544 case vect_used_in_scope:
545 relevant = vect_used_in_outer;
546 break;
548 default:
549 gcc_unreachable ();
553 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
554 is_pattern_stmt_p (stmt_vinfo));
555 return true;
559 /* Function vect_mark_stmts_to_be_vectorized.
561 Not all stmts in the loop need to be vectorized. For example:
563 for i...
564 for j...
565 1. T0 = i + j
566 2. T1 = a[T0]
568 3. j = j + 1
570 Stmt 1 and 3 do not need to be vectorized, because loop control and
571 addressing of vectorized data-refs are handled differently.
573 This pass detects such stmts. */
575 bool
576 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
578 vec<gimple> worklist;
579 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
580 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
581 unsigned int nbbs = loop->num_nodes;
582 gimple_stmt_iterator si;
583 gimple stmt;
584 unsigned int i;
585 stmt_vec_info stmt_vinfo;
586 basic_block bb;
587 gimple phi;
588 bool live_p;
589 enum vect_relevant relevant, tmp_relevant;
590 enum vect_def_type def_type;
592 if (dump_enabled_p ())
593 dump_printf_loc (MSG_NOTE, vect_location,
594 "=== vect_mark_stmts_to_be_vectorized ===");
596 worklist.create (64);
598 /* 1. Init worklist. */
599 for (i = 0; i < nbbs; i++)
601 bb = bbs[i];
602 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
604 phi = gsi_stmt (si);
605 if (dump_enabled_p ())
607 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
608 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
611 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
612 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
614 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
616 stmt = gsi_stmt (si);
617 if (dump_enabled_p ())
619 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
620 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
623 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
624 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
628 /* 2. Process_worklist */
629 while (worklist.length () > 0)
631 use_operand_p use_p;
632 ssa_op_iter iter;
634 stmt = worklist.pop ();
635 if (dump_enabled_p ())
637 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
638 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
641 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
642 (DEF_STMT) as relevant/irrelevant and live/dead according to the
643 liveness and relevance properties of STMT. */
644 stmt_vinfo = vinfo_for_stmt (stmt);
645 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
646 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
648 /* Generally, the liveness and relevance properties of STMT are
649 propagated as is to the DEF_STMTs of its USEs:
650 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
651 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
653 One exception is when STMT has been identified as defining a reduction
654 variable; in this case we set the liveness/relevance as follows:
655 live_p = false
656 relevant = vect_used_by_reduction
657 This is because we distinguish between two kinds of relevant stmts -
658 those that are used by a reduction computation, and those that are
659 (also) used by a regular computation. This allows us later on to
660 identify stmts that are used solely by a reduction, and therefore the
661 order of the results that they produce does not have to be kept. */
663 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
664 tmp_relevant = relevant;
665 switch (def_type)
667 case vect_reduction_def:
668 switch (tmp_relevant)
670 case vect_unused_in_scope:
671 relevant = vect_used_by_reduction;
672 break;
674 case vect_used_by_reduction:
675 if (gimple_code (stmt) == GIMPLE_PHI)
676 break;
677 /* fall through */
679 default:
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682 "unsupported use of reduction.");
683 worklist.release ();
684 return false;
687 live_p = false;
688 break;
690 case vect_nested_cycle:
691 if (tmp_relevant != vect_unused_in_scope
692 && tmp_relevant != vect_used_in_outer_by_reduction
693 && tmp_relevant != vect_used_in_outer)
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
697 "unsupported use of nested cycle.");
699 worklist.release ();
700 return false;
703 live_p = false;
704 break;
706 case vect_double_reduction_def:
707 if (tmp_relevant != vect_unused_in_scope
708 && tmp_relevant != vect_used_by_reduction)
710 if (dump_enabled_p ())
711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
712 "unsupported use of double reduction.");
714 worklist.release ();
715 return false;
718 live_p = false;
719 break;
721 default:
722 break;
725 if (is_pattern_stmt_p (stmt_vinfo))
727 /* Pattern statements are not inserted into the code, so
728 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
729 have to scan the RHS or function arguments instead. */
730 if (is_gimple_assign (stmt))
732 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
733 tree op = gimple_assign_rhs1 (stmt);
735 i = 1;
736 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
738 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
739 live_p, relevant, &worklist, false)
740 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
741 live_p, relevant, &worklist, false))
743 worklist.release ();
744 return false;
746 i = 2;
748 for (; i < gimple_num_ops (stmt); i++)
750 op = gimple_op (stmt, i);
751 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
752 &worklist, false))
754 worklist.release ();
755 return false;
759 else if (is_gimple_call (stmt))
761 for (i = 0; i < gimple_call_num_args (stmt); i++)
763 tree arg = gimple_call_arg (stmt, i);
764 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
765 &worklist, false))
767 worklist.release ();
768 return false;
773 else
774 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
776 tree op = USE_FROM_PTR (use_p);
777 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
778 &worklist, false))
780 worklist.release ();
781 return false;
785 if (STMT_VINFO_GATHER_P (stmt_vinfo))
787 tree off;
788 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
789 gcc_assert (decl);
790 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
791 &worklist, true))
793 worklist.release ();
794 return false;
797 } /* while worklist */
799 worklist.release ();
800 return true;
804 /* Function vect_model_simple_cost.
806 Models cost for simple operations, i.e. those that only emit ncopies of a
807 single op. Right now, this does not account for multiple insns that could
808 be generated for the single vector op. We will handle that shortly. */
810 void
811 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
812 enum vect_def_type *dt,
813 stmt_vector_for_cost *prologue_cost_vec,
814 stmt_vector_for_cost *body_cost_vec)
816 int i;
817 int inside_cost = 0, prologue_cost = 0;
819 /* The SLP costs were already calculated during SLP tree build. */
820 if (PURE_SLP_STMT (stmt_info))
821 return;
823 /* FORNOW: Assuming maximum 2 args per stmts. */
824 for (i = 0; i < 2; i++)
825 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
826 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
827 stmt_info, 0, vect_prologue);
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
830 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
831 stmt_info, 0, vect_body);
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE, vect_location,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .", inside_cost, prologue_cost);
840 /* Model cost for type demotion and promotion operations. PWR is normally
841 zero for single-step promotions and demotions. It will be one if
842 two-step promotion/demotion is required, and so on. Each additional
843 step doubles the number of instructions required. */
845 static void
846 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
847 enum vect_def_type *dt, int pwr)
849 int i, tmp;
850 int inside_cost = 0, prologue_cost = 0;
851 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
852 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
853 void *target_cost_data;
855 /* The SLP costs were already calculated during SLP tree build. */
856 if (PURE_SLP_STMT (stmt_info))
857 return;
859 if (loop_vinfo)
860 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
861 else
862 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
864 for (i = 0; i < pwr + 1; i++)
866 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
867 (i + 1) : i;
868 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
869 vec_promote_demote, stmt_info, 0,
870 vect_body);
873 /* FORNOW: Assuming maximum 2 args per stmts. */
874 for (i = 0; i < 2; i++)
875 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
876 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
877 stmt_info, 0, vect_prologue);
879 if (dump_enabled_p ())
880 dump_printf_loc (MSG_NOTE, vect_location,
881 "vect_model_promotion_demotion_cost: inside_cost = %d, "
882 "prologue_cost = %d .", inside_cost, prologue_cost);
885 /* Function vect_cost_group_size
887 For grouped load or store, return the group_size only if it is the first
888 load or store of a group, else return 1. This ensures that group size is
889 only returned once per group. */
891 static int
892 vect_cost_group_size (stmt_vec_info stmt_info)
894 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
896 if (first_stmt == STMT_VINFO_STMT (stmt_info))
897 return GROUP_SIZE (stmt_info);
899 return 1;
903 /* Function vect_model_store_cost
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
908 void
909 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
910 bool store_lanes_p, enum vect_def_type dt,
911 slp_tree slp_node,
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
915 int group_size;
916 unsigned int inside_cost = 0, prologue_cost = 0;
917 struct data_reference *first_dr;
918 gimple first_stmt;
920 /* The SLP costs were already calculated during SLP tree build. */
921 if (PURE_SLP_STMT (stmt_info))
922 return;
924 if (dt == vect_constant_def || dt == vect_external_def)
925 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
926 stmt_info, 0, vect_prologue);
928 /* Grouped access? */
929 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
931 if (slp_node)
933 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
934 group_size = 1;
936 else
938 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
939 group_size = vect_cost_group_size (stmt_info);
942 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
944 /* Not a grouped access. */
945 else
947 group_size = 1;
948 first_dr = STMT_VINFO_DATA_REF (stmt_info);
951 /* We assume that the cost of a single store-lanes instruction is
952 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
953 access is instead being provided by a permute-and-store operation,
954 include the cost of the permutes. */
955 if (!store_lanes_p && group_size > 1)
957 /* Uses a high and low interleave operation for each needed permute. */
959 int nstmts = ncopies * exact_log2 (group_size) * group_size;
960 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
961 stmt_info, 0, vect_body);
963 if (dump_enabled_p ())
964 dump_printf_loc (MSG_NOTE, vect_location,
965 "vect_model_store_cost: strided group_size = %d .",
966 group_size);
969 /* Costs of the stores. */
970 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_NOTE, vect_location,
974 "vect_model_store_cost: inside_cost = %d, "
975 "prologue_cost = %d .", inside_cost, prologue_cost);
979 /* Calculate cost of DR's memory access. */
980 void
981 vect_get_store_cost (struct data_reference *dr, int ncopies,
982 unsigned int *inside_cost,
983 stmt_vector_for_cost *body_cost_vec)
985 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
986 gimple stmt = DR_STMT (dr);
987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
989 switch (alignment_support_scheme)
991 case dr_aligned:
993 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994 vector_store, stmt_info, 0,
995 vect_body);
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: aligned.");
1000 break;
1003 case dr_unaligned_supported:
1005 /* Here, we assign an additional cost for the unaligned store. */
1006 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1007 unaligned_store, stmt_info,
1008 DR_MISALIGNMENT (dr), vect_body);
1009 if (dump_enabled_p ())
1010 dump_printf_loc (MSG_NOTE, vect_location,
1011 "vect_model_store_cost: unaligned supported by "
1012 "hardware.");
1013 break;
1016 case dr_unaligned_unsupported:
1018 *inside_cost = VECT_MAX_COST;
1020 if (dump_enabled_p ())
1021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1022 "vect_model_store_cost: unsupported access.");
1023 break;
1026 default:
1027 gcc_unreachable ();
1032 /* Function vect_model_load_cost
1034 Models cost for loads. In the case of grouped accesses, the last access
1035 has the overhead of the grouped access attributed to it. Since unaligned
1036 accesses are supported for loads, we also account for the costs of the
1037 access scheme chosen. */
1039 void
1040 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1041 bool load_lanes_p, slp_tree slp_node,
1042 stmt_vector_for_cost *prologue_cost_vec,
1043 stmt_vector_for_cost *body_cost_vec)
1045 int group_size;
1046 gimple first_stmt;
1047 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1048 unsigned int inside_cost = 0, prologue_cost = 0;
1050 /* The SLP costs were already calculated during SLP tree build. */
1051 if (PURE_SLP_STMT (stmt_info))
1052 return;
1054 /* Grouped accesses? */
1055 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1056 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1058 group_size = vect_cost_group_size (stmt_info);
1059 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1061 /* Not a grouped access. */
1062 else
1064 group_size = 1;
1065 first_dr = dr;
1068 /* We assume that the cost of a single load-lanes instruction is
1069 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1070 access is instead being provided by a load-and-permute operation,
1071 include the cost of the permutes. */
1072 if (!load_lanes_p && group_size > 1)
1074 /* Uses an even and odd extract operations for each needed permute. */
1075 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1076 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1077 stmt_info, 0, vect_body);
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_load_cost: strided group_size = %d .",
1082 group_size);
1085 /* The loads themselves. */
1086 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1088 /* N scalar loads plus gathering them into a vector. */
1089 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1090 inside_cost += record_stmt_cost (body_cost_vec,
1091 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1092 scalar_load, stmt_info, 0, vect_body);
1093 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1094 stmt_info, 0, vect_body);
1096 else
1097 vect_get_load_cost (first_dr, ncopies,
1098 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1099 || group_size > 1 || slp_node),
1100 &inside_cost, &prologue_cost,
1101 prologue_cost_vec, body_cost_vec, true);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: inside_cost = %d, "
1106 "prologue_cost = %d .", inside_cost, prologue_cost);
1110 /* Calculate cost of DR's memory access. */
1111 void
1112 vect_get_load_cost (struct data_reference *dr, int ncopies,
1113 bool add_realign_cost, unsigned int *inside_cost,
1114 unsigned int *prologue_cost,
1115 stmt_vector_for_cost *prologue_cost_vec,
1116 stmt_vector_for_cost *body_cost_vec,
1117 bool record_prologue_costs)
1119 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1120 gimple stmt = DR_STMT (dr);
1121 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123 switch (alignment_support_scheme)
1125 case dr_aligned:
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1128 stmt_info, 0, vect_body);
1130 if (dump_enabled_p ())
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "vect_model_load_cost: aligned.");
1134 break;
1136 case dr_unaligned_supported:
1138 /* Here, we assign an additional cost for the unaligned load. */
1139 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1140 unaligned_load, stmt_info,
1141 DR_MISALIGNMENT (dr), vect_body);
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "vect_model_load_cost: unaligned supported by "
1146 "hardware.");
1148 break;
1150 case dr_explicit_realign:
1152 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1153 vector_load, stmt_info, 0, vect_body);
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1155 vec_perm, stmt_info, 0, vect_body);
1157 /* FIXME: If the misalignment remains fixed across the iterations of
1158 the containing loop, the following cost should be added to the
1159 prologue costs. */
1160 if (targetm.vectorize.builtin_mask_for_load)
1161 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1162 stmt_info, 0, vect_body);
1164 if (dump_enabled_p ())
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "vect_model_load_cost: explicit realign");
1168 break;
1170 case dr_explicit_realign_optimized:
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE, vect_location,
1174 "vect_model_load_cost: unaligned software "
1175 "pipelined.");
1177 /* Unaligned software pipeline has a load of an address, an initial
1178 load, and possibly a mask operation to "prime" the loop. However,
1179 if this is an access in a group of loads, which provide grouped
1180 access, then the above cost should only be considered for one
1181 access in the group. Inside the loop, there is a load op
1182 and a realignment op. */
1184 if (add_realign_cost && record_prologue_costs)
1186 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1187 vector_stmt, stmt_info,
1188 0, vect_prologue);
1189 if (targetm.vectorize.builtin_mask_for_load)
1190 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1191 vector_stmt, stmt_info,
1192 0, vect_prologue);
1195 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1196 stmt_info, 0, vect_body);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1198 stmt_info, 0, vect_body);
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: explicit realign optimized");
1204 break;
1207 case dr_unaligned_unsupported:
1209 *inside_cost = VECT_MAX_COST;
1211 if (dump_enabled_p ())
1212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1213 "vect_model_load_cost: unsupported access.");
1214 break;
1217 default:
1218 gcc_unreachable ();
1222 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1223 the loop preheader for the vectorized stmt STMT. */
1225 static void
1226 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1228 if (gsi)
1229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1230 else
1232 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1233 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1235 if (loop_vinfo)
1237 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1238 basic_block new_bb;
1239 edge pe;
1241 if (nested_in_vect_loop_p (loop, stmt))
1242 loop = loop->inner;
1244 pe = loop_preheader_edge (loop);
1245 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1246 gcc_assert (!new_bb);
1248 else
1250 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1251 basic_block bb;
1252 gimple_stmt_iterator gsi_bb_start;
1254 gcc_assert (bb_vinfo);
1255 bb = BB_VINFO_BB (bb_vinfo);
1256 gsi_bb_start = gsi_after_labels (bb);
1257 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1261 if (dump_enabled_p ())
1263 dump_printf_loc (MSG_NOTE, vect_location,
1264 "created new init_stmt: ");
1265 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1269 /* Function vect_init_vector.
1271 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1272 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1273 vector type a vector with all elements equal to VAL is created first.
1274 Place the initialization at BSI if it is not NULL. Otherwise, place the
1275 initialization at the loop preheader.
1276 Return the DEF of INIT_STMT.
1277 It will be used in the vectorization of STMT. */
1279 tree
1280 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1282 tree new_var;
1283 gimple init_stmt;
1284 tree vec_oprnd;
1285 tree new_temp;
1287 if (TREE_CODE (type) == VECTOR_TYPE
1288 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1290 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1292 if (CONSTANT_CLASS_P (val))
1293 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1294 else
1296 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1297 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1298 new_temp, val,
1299 NULL_TREE);
1300 vect_init_vector_1 (stmt, init_stmt, gsi);
1301 val = new_temp;
1304 val = build_vector_from_val (type, val);
1307 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1308 init_stmt = gimple_build_assign (new_var, val);
1309 new_temp = make_ssa_name (new_var, init_stmt);
1310 gimple_assign_set_lhs (init_stmt, new_temp);
1311 vect_init_vector_1 (stmt, init_stmt, gsi);
1312 vec_oprnd = gimple_assign_lhs (init_stmt);
1313 return vec_oprnd;
1317 /* Function vect_get_vec_def_for_operand.
1319 OP is an operand in STMT. This function returns a (vector) def that will be
1320 used in the vectorized stmt for STMT.
1322 In the case that OP is an SSA_NAME which is defined in the loop, then
1323 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1325 In case OP is an invariant or constant, a new stmt that creates a vector def
1326 needs to be introduced. */
1328 tree
1329 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1331 tree vec_oprnd;
1332 gimple vec_stmt;
1333 gimple def_stmt;
1334 stmt_vec_info def_stmt_info = NULL;
1335 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1336 unsigned int nunits;
1337 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1338 tree def;
1339 enum vect_def_type dt;
1340 bool is_simple_use;
1341 tree vector_type;
1343 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE, vect_location,
1346 "vect_get_vec_def_for_operand: ");
1347 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1350 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1351 &def_stmt, &def, &dt);
1352 gcc_assert (is_simple_use);
1353 if (dump_enabled_p ())
1355 int loc_printed = 0;
1356 if (def)
1358 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1359 loc_printed = 1;
1360 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1362 if (def_stmt)
1364 if (loc_printed)
1365 dump_printf (MSG_NOTE, " def_stmt = ");
1366 else
1367 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1368 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1372 switch (dt)
1374 /* Case 1: operand is a constant. */
1375 case vect_constant_def:
1377 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1378 gcc_assert (vector_type);
1379 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1381 if (scalar_def)
1382 *scalar_def = op;
1384 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1385 if (dump_enabled_p ())
1386 dump_printf_loc (MSG_NOTE, vect_location,
1387 "Create vector_cst. nunits = %d", nunits);
1389 return vect_init_vector (stmt, op, vector_type, NULL);
1392 /* Case 2: operand is defined outside the loop - loop invariant. */
1393 case vect_external_def:
1395 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1396 gcc_assert (vector_type);
1398 if (scalar_def)
1399 *scalar_def = def;
1401 /* Create 'vec_inv = {inv,inv,..,inv}' */
1402 if (dump_enabled_p ())
1403 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.");
1405 return vect_init_vector (stmt, def, vector_type, NULL);
1408 /* Case 3: operand is defined inside the loop. */
1409 case vect_internal_def:
1411 if (scalar_def)
1412 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1414 /* Get the def from the vectorized stmt. */
1415 def_stmt_info = vinfo_for_stmt (def_stmt);
1417 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1418 /* Get vectorized pattern statement. */
1419 if (!vec_stmt
1420 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1421 && !STMT_VINFO_RELEVANT (def_stmt_info))
1422 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1423 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1424 gcc_assert (vec_stmt);
1425 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1426 vec_oprnd = PHI_RESULT (vec_stmt);
1427 else if (is_gimple_call (vec_stmt))
1428 vec_oprnd = gimple_call_lhs (vec_stmt);
1429 else
1430 vec_oprnd = gimple_assign_lhs (vec_stmt);
1431 return vec_oprnd;
1434 /* Case 4: operand is defined by a loop header phi - reduction */
1435 case vect_reduction_def:
1436 case vect_double_reduction_def:
1437 case vect_nested_cycle:
1439 struct loop *loop;
1441 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1442 loop = (gimple_bb (def_stmt))->loop_father;
1444 /* Get the def before the loop */
1445 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1446 return get_initial_def_for_reduction (stmt, op, scalar_def);
1449 /* Case 5: operand is defined by loop-header phi - induction. */
1450 case vect_induction_def:
1452 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1454 /* Get the def from the vectorized stmt. */
1455 def_stmt_info = vinfo_for_stmt (def_stmt);
1456 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1457 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1458 vec_oprnd = PHI_RESULT (vec_stmt);
1459 else
1460 vec_oprnd = gimple_get_lhs (vec_stmt);
1461 return vec_oprnd;
1464 default:
1465 gcc_unreachable ();
1470 /* Function vect_get_vec_def_for_stmt_copy
1472 Return a vector-def for an operand. This function is used when the
1473 vectorized stmt to be created (by the caller to this function) is a "copy"
1474 created in case the vectorized result cannot fit in one vector, and several
1475 copies of the vector-stmt are required. In this case the vector-def is
1476 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1477 of the stmt that defines VEC_OPRND.
1478 DT is the type of the vector def VEC_OPRND.
1480 Context:
1481 In case the vectorization factor (VF) is bigger than the number
1482 of elements that can fit in a vectype (nunits), we have to generate
1483 more than one vector stmt to vectorize the scalar stmt. This situation
1484 arises when there are multiple data-types operated upon in the loop; the
1485 smallest data-type determines the VF, and as a result, when vectorizing
1486 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1487 vector stmt (each computing a vector of 'nunits' results, and together
1488 computing 'VF' results in each iteration). This function is called when
1489 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1490 which VF=16 and nunits=4, so the number of copies required is 4):
1492 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1494 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1495 VS1.1: vx.1 = memref1 VS1.2
1496 VS1.2: vx.2 = memref2 VS1.3
1497 VS1.3: vx.3 = memref3
1499 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1500 VSnew.1: vz1 = vx.1 + ... VSnew.2
1501 VSnew.2: vz2 = vx.2 + ... VSnew.3
1502 VSnew.3: vz3 = vx.3 + ...
1504 The vectorization of S1 is explained in vectorizable_load.
1505 The vectorization of S2:
1506 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1507 the function 'vect_get_vec_def_for_operand' is called to
1508 get the relevant vector-def for each operand of S2. For operand x it
1509 returns the vector-def 'vx.0'.
1511 To create the remaining copies of the vector-stmt (VSnew.j), this
1512 function is called to get the relevant vector-def for each operand. It is
1513 obtained from the respective VS1.j stmt, which is recorded in the
1514 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1516 For example, to obtain the vector-def 'vx.1' in order to create the
1517 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1518 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1519 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1520 and return its def ('vx.1').
1521 Overall, to create the above sequence this function will be called 3 times:
1522 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1523 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1524 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1526 tree
1527 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1529 gimple vec_stmt_for_operand;
1530 stmt_vec_info def_stmt_info;
1532 /* Do nothing; can reuse same def. */
1533 if (dt == vect_external_def || dt == vect_constant_def )
1534 return vec_oprnd;
1536 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1537 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1538 gcc_assert (def_stmt_info);
1539 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1540 gcc_assert (vec_stmt_for_operand);
1541 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1542 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1543 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1544 else
1545 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1546 return vec_oprnd;
1550 /* Get vectorized definitions for the operands to create a copy of an original
1551 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1553 static void
1554 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1555 vec<tree> *vec_oprnds0,
1556 vec<tree> *vec_oprnds1)
1558 tree vec_oprnd = vec_oprnds0->pop ();
1560 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1561 vec_oprnds0->quick_push (vec_oprnd);
1563 if (vec_oprnds1 && vec_oprnds1->length ())
1565 vec_oprnd = vec_oprnds1->pop ();
1566 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1567 vec_oprnds1->quick_push (vec_oprnd);
1572 /* Get vectorized definitions for OP0 and OP1.
1573 REDUC_INDEX is the index of reduction operand in case of reduction,
1574 and -1 otherwise. */
1576 void
1577 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1578 vec<tree> *vec_oprnds0,
1579 vec<tree> *vec_oprnds1,
1580 slp_tree slp_node, int reduc_index)
1582 if (slp_node)
1584 int nops = (op1 == NULL_TREE) ? 1 : 2;
1585 vec<tree> ops;
1586 ops.create (nops);
1587 vec<slp_void_p> vec_defs;
1588 vec_defs.create (nops);
1590 ops.quick_push (op0);
1591 if (op1)
1592 ops.quick_push (op1);
1594 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1596 *vec_oprnds0 = *((vec<tree> *) vec_defs[0]);
1597 if (op1)
1598 *vec_oprnds1 = *((vec<tree> *) vec_defs[1]);
1600 ops.release ();
1601 vec_defs.release ();
1603 else
1605 tree vec_oprnd;
1607 vec_oprnds0->create (1);
1608 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1609 vec_oprnds0->quick_push (vec_oprnd);
1611 if (op1)
1613 vec_oprnds1->create (1);
1614 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1615 vec_oprnds1->quick_push (vec_oprnd);
1621 /* Function vect_finish_stmt_generation.
1623 Insert a new stmt. */
1625 void
1626 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1627 gimple_stmt_iterator *gsi)
1629 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1630 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1633 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1635 if (!gsi_end_p (*gsi)
1636 && gimple_has_mem_ops (vec_stmt))
1638 gimple at_stmt = gsi_stmt (*gsi);
1639 tree vuse = gimple_vuse (at_stmt);
1640 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1642 tree vdef = gimple_vdef (at_stmt);
1643 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1644 /* If we have an SSA vuse and insert a store, update virtual
1645 SSA form to avoid triggering the renamer. Do so only
1646 if we can easily see all uses - which is what almost always
1647 happens with the way vectorized stmts are inserted. */
1648 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1649 && ((is_gimple_assign (vec_stmt)
1650 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1651 || (is_gimple_call (vec_stmt)
1652 && !(gimple_call_flags (vec_stmt)
1653 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1655 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1656 gimple_set_vdef (vec_stmt, new_vdef);
1657 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1661 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1663 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1664 bb_vinfo));
1666 if (dump_enabled_p ())
1668 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1672 gimple_set_location (vec_stmt, gimple_location (stmt));
1675 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1676 a function declaration if the target has a vectorized version
1677 of the function, or NULL_TREE if the function cannot be vectorized. */
1679 tree
1680 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1682 tree fndecl = gimple_call_fndecl (call);
1684 /* We only handle functions that do not read or clobber memory -- i.e.
1685 const or novops ones. */
1686 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1687 return NULL_TREE;
1689 if (!fndecl
1690 || TREE_CODE (fndecl) != FUNCTION_DECL
1691 || !DECL_BUILT_IN (fndecl))
1692 return NULL_TREE;
1694 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1695 vectype_in);
1698 /* Function vectorizable_call.
1700 Check if STMT performs a function call that can be vectorized.
1701 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1702 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1703 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1705 static bool
1706 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1707 slp_tree slp_node)
1709 tree vec_dest;
1710 tree scalar_dest;
1711 tree op, type;
1712 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1713 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1714 tree vectype_out, vectype_in;
1715 int nunits_in;
1716 int nunits_out;
1717 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1718 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1719 tree fndecl, new_temp, def, rhs_type;
1720 gimple def_stmt;
1721 enum vect_def_type dt[3]
1722 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1723 gimple new_stmt = NULL;
1724 int ncopies, j;
1725 vec<tree> vargs = vNULL;
1726 enum { NARROW, NONE, WIDEN } modifier;
1727 size_t i, nargs;
1728 tree lhs;
1730 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1731 return false;
1733 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1734 return false;
1736 /* Is STMT a vectorizable call? */
1737 if (!is_gimple_call (stmt))
1738 return false;
1740 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1741 return false;
1743 if (stmt_can_throw_internal (stmt))
1744 return false;
1746 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1748 /* Process function arguments. */
1749 rhs_type = NULL_TREE;
1750 vectype_in = NULL_TREE;
1751 nargs = gimple_call_num_args (stmt);
1753 /* Bail out if the function has more than three arguments, we do not have
1754 interesting builtin functions to vectorize with more than two arguments
1755 except for fma. No arguments is also not good. */
1756 if (nargs == 0 || nargs > 3)
1757 return false;
1759 for (i = 0; i < nargs; i++)
1761 tree opvectype;
1763 op = gimple_call_arg (stmt, i);
1765 /* We can only handle calls with arguments of the same type. */
1766 if (rhs_type
1767 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1769 if (dump_enabled_p ())
1770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1771 "argument types differ.");
1772 return false;
1774 if (!rhs_type)
1775 rhs_type = TREE_TYPE (op);
1777 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1778 &def_stmt, &def, &dt[i], &opvectype))
1780 if (dump_enabled_p ())
1781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1782 "use not simple.");
1783 return false;
1786 if (!vectype_in)
1787 vectype_in = opvectype;
1788 else if (opvectype
1789 && opvectype != vectype_in)
1791 if (dump_enabled_p ())
1792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1793 "argument vector types differ.");
1794 return false;
1797 /* If all arguments are external or constant defs use a vector type with
1798 the same size as the output vector type. */
1799 if (!vectype_in)
1800 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1801 if (vec_stmt)
1802 gcc_assert (vectype_in);
1803 if (!vectype_in)
1805 if (dump_enabled_p ())
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1808 "no vectype for scalar type ");
1809 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
1812 return false;
1815 /* FORNOW */
1816 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1817 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1818 if (nunits_in == nunits_out / 2)
1819 modifier = NARROW;
1820 else if (nunits_out == nunits_in)
1821 modifier = NONE;
1822 else if (nunits_out == nunits_in / 2)
1823 modifier = WIDEN;
1824 else
1825 return false;
1827 /* For now, we only vectorize functions if a target specific builtin
1828 is available. TODO -- in some cases, it might be profitable to
1829 insert the calls for pieces of the vector, in order to be able
1830 to vectorize other operations in the loop. */
1831 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1832 if (fndecl == NULL_TREE)
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1836 "function is not vectorizable.");
1838 return false;
1841 gcc_assert (!gimple_vuse (stmt));
1843 if (slp_node || PURE_SLP_STMT (stmt_info))
1844 ncopies = 1;
1845 else if (modifier == NARROW)
1846 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1847 else
1848 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1850 /* Sanity check: make sure that at least one copy of the vectorized stmt
1851 needs to be generated. */
1852 gcc_assert (ncopies >= 1);
1854 if (!vec_stmt) /* transformation not required. */
1856 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1857 if (dump_enabled_p ())
1858 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===");
1859 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1860 return true;
1863 /** Transform. **/
1865 if (dump_enabled_p ())
1866 dump_printf_loc (MSG_NOTE, vect_location, "transform call.");
1868 /* Handle def. */
1869 scalar_dest = gimple_call_lhs (stmt);
1870 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1872 prev_stmt_info = NULL;
1873 switch (modifier)
1875 case NONE:
1876 for (j = 0; j < ncopies; ++j)
1878 /* Build argument list for the vectorized call. */
1879 if (j == 0)
1880 vargs.create (nargs);
1881 else
1882 vargs.truncate (0);
1884 if (slp_node)
1886 vec<slp_void_p> vec_defs;
1887 vec_defs.create (nargs);
1888 vec<tree> vec_oprnds0;
1890 for (i = 0; i < nargs; i++)
1891 vargs.quick_push (gimple_call_arg (stmt, i));
1892 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1893 vec_oprnds0 = *((vec<tree> *) vec_defs[0]);
1895 /* Arguments are ready. Create the new vector stmt. */
1896 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
1898 size_t k;
1899 for (k = 0; k < nargs; k++)
1901 vec<tree> vec_oprndsk = *((vec<tree> *) vec_defs[k]);
1902 vargs[k] = vec_oprndsk[i];
1904 new_stmt = gimple_build_call_vec (fndecl, vargs);
1905 new_temp = make_ssa_name (vec_dest, new_stmt);
1906 gimple_call_set_lhs (new_stmt, new_temp);
1907 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1908 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1911 for (i = 0; i < nargs; i++)
1913 vec<tree> vec_oprndsi = *((vec<tree> *) vec_defs[i]);
1914 vec_oprndsi.release ();
1916 vec_defs.release ();
1917 continue;
1920 for (i = 0; i < nargs; i++)
1922 op = gimple_call_arg (stmt, i);
1923 if (j == 0)
1924 vec_oprnd0
1925 = vect_get_vec_def_for_operand (op, stmt, NULL);
1926 else
1928 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1929 vec_oprnd0
1930 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1933 vargs.quick_push (vec_oprnd0);
1936 new_stmt = gimple_build_call_vec (fndecl, vargs);
1937 new_temp = make_ssa_name (vec_dest, new_stmt);
1938 gimple_call_set_lhs (new_stmt, new_temp);
1939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1941 if (j == 0)
1942 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1943 else
1944 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1946 prev_stmt_info = vinfo_for_stmt (new_stmt);
1949 break;
1951 case NARROW:
1952 for (j = 0; j < ncopies; ++j)
1954 /* Build argument list for the vectorized call. */
1955 if (j == 0)
1956 vargs.create (nargs * 2);
1957 else
1958 vargs.truncate (0);
1960 if (slp_node)
1962 vec<slp_void_p> vec_defs;
1963 vec_defs.create (nargs);
1964 vec<tree> vec_oprnds0;
1966 for (i = 0; i < nargs; i++)
1967 vargs.quick_push (gimple_call_arg (stmt, i));
1968 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1969 vec_oprnds0 = *((vec<tree> *) vec_defs[0]);
1971 /* Arguments are ready. Create the new vector stmt. */
1972 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
1974 size_t k;
1975 vargs.truncate (0);
1976 for (k = 0; k < nargs; k++)
1978 vec<tree> vec_oprndsk = *((vec<tree> *) vec_defs[k]);
1979 vargs.quick_push (vec_oprndsk[i]);
1980 vargs.quick_push (vec_oprndsk[i + 1]);
1982 new_stmt = gimple_build_call_vec (fndecl, vargs);
1983 new_temp = make_ssa_name (vec_dest, new_stmt);
1984 gimple_call_set_lhs (new_stmt, new_temp);
1985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1986 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1989 for (i = 0; i < nargs; i++)
1991 vec<tree> vec_oprndsi = *((vec<tree> *) vec_defs[i]);
1992 vec_oprndsi.release ();
1994 vec_defs.release ();
1995 continue;
1998 for (i = 0; i < nargs; i++)
2000 op = gimple_call_arg (stmt, i);
2001 if (j == 0)
2003 vec_oprnd0
2004 = vect_get_vec_def_for_operand (op, stmt, NULL);
2005 vec_oprnd1
2006 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2008 else
2010 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2011 vec_oprnd0
2012 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2013 vec_oprnd1
2014 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2017 vargs.quick_push (vec_oprnd0);
2018 vargs.quick_push (vec_oprnd1);
2021 new_stmt = gimple_build_call_vec (fndecl, vargs);
2022 new_temp = make_ssa_name (vec_dest, new_stmt);
2023 gimple_call_set_lhs (new_stmt, new_temp);
2024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2026 if (j == 0)
2027 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2028 else
2029 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2031 prev_stmt_info = vinfo_for_stmt (new_stmt);
2034 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2036 break;
2038 case WIDEN:
2039 /* No current target implements this case. */
2040 return false;
2043 vargs.release ();
2045 /* Update the exception handling table with the vector stmt if necessary. */
2046 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2047 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2049 /* The call in STMT might prevent it from being removed in dce.
2050 We however cannot remove it here, due to the way the ssa name
2051 it defines is mapped to the new definition. So just replace
2052 rhs of the statement with something harmless. */
2054 if (slp_node)
2055 return true;
2057 type = TREE_TYPE (scalar_dest);
2058 if (is_pattern_stmt_p (stmt_info))
2059 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2060 else
2061 lhs = gimple_call_lhs (stmt);
2062 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2063 set_vinfo_for_stmt (new_stmt, stmt_info);
2064 set_vinfo_for_stmt (stmt, NULL);
2065 STMT_VINFO_STMT (stmt_info) = new_stmt;
2066 gsi_replace (gsi, new_stmt, false);
2067 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2069 return true;
2073 /* Function vect_gen_widened_results_half
2075 Create a vector stmt whose code, type, number of arguments, and result
2076 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2077 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2078 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2079 needs to be created (DECL is a function-decl of a target-builtin).
2080 STMT is the original scalar stmt that we are vectorizing. */
2082 static gimple
2083 vect_gen_widened_results_half (enum tree_code code,
2084 tree decl,
2085 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2086 tree vec_dest, gimple_stmt_iterator *gsi,
2087 gimple stmt)
2089 gimple new_stmt;
2090 tree new_temp;
2092 /* Generate half of the widened result: */
2093 if (code == CALL_EXPR)
2095 /* Target specific support */
2096 if (op_type == binary_op)
2097 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2098 else
2099 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2100 new_temp = make_ssa_name (vec_dest, new_stmt);
2101 gimple_call_set_lhs (new_stmt, new_temp);
2103 else
2105 /* Generic support */
2106 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2107 if (op_type != binary_op)
2108 vec_oprnd1 = NULL;
2109 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2110 vec_oprnd1);
2111 new_temp = make_ssa_name (vec_dest, new_stmt);
2112 gimple_assign_set_lhs (new_stmt, new_temp);
2114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2116 return new_stmt;
2120 /* Get vectorized definitions for loop-based vectorization. For the first
2121 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2122 scalar operand), and for the rest we get a copy with
2123 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2124 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2125 The vectors are collected into VEC_OPRNDS. */
2127 static void
2128 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2129 vec<tree> *vec_oprnds, int multi_step_cvt)
2131 tree vec_oprnd;
2133 /* Get first vector operand. */
2134 /* All the vector operands except the very first one (that is scalar oprnd)
2135 are stmt copies. */
2136 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2137 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2138 else
2139 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2141 vec_oprnds->quick_push (vec_oprnd);
2143 /* Get second vector operand. */
2144 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2145 vec_oprnds->quick_push (vec_oprnd);
2147 *oprnd = vec_oprnd;
2149 /* For conversion in multiple steps, continue to get operands
2150 recursively. */
2151 if (multi_step_cvt)
2152 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2156 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2157 For multi-step conversions store the resulting vectors and call the function
2158 recursively. */
2160 static void
2161 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
2162 int multi_step_cvt, gimple stmt,
2163 vec<tree> vec_dsts,
2164 gimple_stmt_iterator *gsi,
2165 slp_tree slp_node, enum tree_code code,
2166 stmt_vec_info *prev_stmt_info)
2168 unsigned int i;
2169 tree vop0, vop1, new_tmp, vec_dest;
2170 gimple new_stmt;
2171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2173 vec_dest = vec_dsts.pop ();
2175 for (i = 0; i < vec_oprnds->length (); i += 2)
2177 /* Create demotion operation. */
2178 vop0 = (*vec_oprnds)[i];
2179 vop1 = (*vec_oprnds)[i + 1];
2180 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2181 new_tmp = make_ssa_name (vec_dest, new_stmt);
2182 gimple_assign_set_lhs (new_stmt, new_tmp);
2183 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2185 if (multi_step_cvt)
2186 /* Store the resulting vector for next recursive call. */
2187 (*vec_oprnds)[i/2] = new_tmp;
2188 else
2190 /* This is the last step of the conversion sequence. Store the
2191 vectors in SLP_NODE or in vector info of the scalar statement
2192 (or in STMT_VINFO_RELATED_STMT chain). */
2193 if (slp_node)
2194 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2195 else
2197 if (!*prev_stmt_info)
2198 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2199 else
2200 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2202 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2207 /* For multi-step demotion operations we first generate demotion operations
2208 from the source type to the intermediate types, and then combine the
2209 results (stored in VEC_OPRNDS) in demotion operation to the destination
2210 type. */
2211 if (multi_step_cvt)
2213 /* At each level of recursion we have half of the operands we had at the
2214 previous level. */
2215 vec_oprnds->truncate ((i+1)/2);
2216 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2217 stmt, vec_dsts, gsi, slp_node,
2218 VEC_PACK_TRUNC_EXPR,
2219 prev_stmt_info);
2222 vec_dsts.quick_push (vec_dest);
2226 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2227 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2228 the resulting vectors and call the function recursively. */
2230 static void
2231 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2232 vec<tree> *vec_oprnds1,
2233 gimple stmt, tree vec_dest,
2234 gimple_stmt_iterator *gsi,
2235 enum tree_code code1,
2236 enum tree_code code2, tree decl1,
2237 tree decl2, int op_type)
2239 int i;
2240 tree vop0, vop1, new_tmp1, new_tmp2;
2241 gimple new_stmt1, new_stmt2;
2242 vec<tree> vec_tmp = vNULL;
2244 vec_tmp.create (vec_oprnds0->length () * 2);
2245 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
2247 if (op_type == binary_op)
2248 vop1 = (*vec_oprnds1)[i];
2249 else
2250 vop1 = NULL_TREE;
2252 /* Generate the two halves of promotion operation. */
2253 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2254 op_type, vec_dest, gsi, stmt);
2255 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2256 op_type, vec_dest, gsi, stmt);
2257 if (is_gimple_call (new_stmt1))
2259 new_tmp1 = gimple_call_lhs (new_stmt1);
2260 new_tmp2 = gimple_call_lhs (new_stmt2);
2262 else
2264 new_tmp1 = gimple_assign_lhs (new_stmt1);
2265 new_tmp2 = gimple_assign_lhs (new_stmt2);
2268 /* Store the results for the next step. */
2269 vec_tmp.quick_push (new_tmp1);
2270 vec_tmp.quick_push (new_tmp2);
2273 vec_oprnds0->truncate (0);
2274 *vec_oprnds0 = vec_tmp;
2278 /* Check if STMT performs a conversion operation, that can be vectorized.
2279 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2280 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2281 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2283 static bool
2284 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2285 gimple *vec_stmt, slp_tree slp_node)
2287 tree vec_dest;
2288 tree scalar_dest;
2289 tree op0, op1 = NULL_TREE;
2290 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2292 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2293 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2294 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2295 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2296 tree new_temp;
2297 tree def;
2298 gimple def_stmt;
2299 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2300 gimple new_stmt = NULL;
2301 stmt_vec_info prev_stmt_info;
2302 int nunits_in;
2303 int nunits_out;
2304 tree vectype_out, vectype_in;
2305 int ncopies, i, j;
2306 tree lhs_type, rhs_type;
2307 enum { NARROW, NONE, WIDEN } modifier;
2308 vec<tree> vec_oprnds0 = vNULL;
2309 vec<tree> vec_oprnds1 = vNULL;
2310 tree vop0;
2311 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2312 int multi_step_cvt = 0;
2313 vec<tree> vec_dsts = vNULL;
2314 vec<tree> interm_types = vNULL;
2315 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2316 int op_type;
2317 enum machine_mode rhs_mode;
2318 unsigned short fltsz;
2320 /* Is STMT a vectorizable conversion? */
2322 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2323 return false;
2325 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2326 return false;
2328 if (!is_gimple_assign (stmt))
2329 return false;
2331 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2332 return false;
2334 code = gimple_assign_rhs_code (stmt);
2335 if (!CONVERT_EXPR_CODE_P (code)
2336 && code != FIX_TRUNC_EXPR
2337 && code != FLOAT_EXPR
2338 && code != WIDEN_MULT_EXPR
2339 && code != WIDEN_LSHIFT_EXPR)
2340 return false;
2342 op_type = TREE_CODE_LENGTH (code);
2344 /* Check types of lhs and rhs. */
2345 scalar_dest = gimple_assign_lhs (stmt);
2346 lhs_type = TREE_TYPE (scalar_dest);
2347 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2349 op0 = gimple_assign_rhs1 (stmt);
2350 rhs_type = TREE_TYPE (op0);
2352 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2353 && !((INTEGRAL_TYPE_P (lhs_type)
2354 && INTEGRAL_TYPE_P (rhs_type))
2355 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2356 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2357 return false;
2359 if ((INTEGRAL_TYPE_P (lhs_type)
2360 && (TYPE_PRECISION (lhs_type)
2361 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2362 || (INTEGRAL_TYPE_P (rhs_type)
2363 && (TYPE_PRECISION (rhs_type)
2364 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2366 if (dump_enabled_p ())
2367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2368 "type conversion to/from bit-precision unsupported.");
2369 return false;
2372 /* Check the operands of the operation. */
2373 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2374 &def_stmt, &def, &dt[0], &vectype_in))
2376 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2378 "use not simple.");
2379 return false;
2381 if (op_type == binary_op)
2383 bool ok;
2385 op1 = gimple_assign_rhs2 (stmt);
2386 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2387 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2388 OP1. */
2389 if (CONSTANT_CLASS_P (op0))
2390 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2391 &def_stmt, &def, &dt[1], &vectype_in);
2392 else
2393 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2394 &def, &dt[1]);
2396 if (!ok)
2398 if (dump_enabled_p ())
2399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2400 "use not simple.");
2401 return false;
2405 /* If op0 is an external or constant defs use a vector type of
2406 the same size as the output vector type. */
2407 if (!vectype_in)
2408 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2409 if (vec_stmt)
2410 gcc_assert (vectype_in);
2411 if (!vectype_in)
2413 if (dump_enabled_p ())
2415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2416 "no vectype for scalar type ");
2417 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2420 return false;
2423 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2424 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2425 if (nunits_in < nunits_out)
2426 modifier = NARROW;
2427 else if (nunits_out == nunits_in)
2428 modifier = NONE;
2429 else
2430 modifier = WIDEN;
2432 /* Multiple types in SLP are handled by creating the appropriate number of
2433 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2434 case of SLP. */
2435 if (slp_node || PURE_SLP_STMT (stmt_info))
2436 ncopies = 1;
2437 else if (modifier == NARROW)
2438 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2439 else
2440 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2442 /* Sanity check: make sure that at least one copy of the vectorized stmt
2443 needs to be generated. */
2444 gcc_assert (ncopies >= 1);
2446 /* Supportable by target? */
2447 switch (modifier)
2449 case NONE:
2450 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2451 return false;
2452 if (supportable_convert_operation (code, vectype_out, vectype_in,
2453 &decl1, &code1))
2454 break;
2455 /* FALLTHRU */
2456 unsupported:
2457 if (dump_enabled_p ())
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459 "conversion not supported by target.");
2460 return false;
2462 case WIDEN:
2463 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2464 &code1, &code2, &multi_step_cvt,
2465 &interm_types))
2467 /* Binary widening operation can only be supported directly by the
2468 architecture. */
2469 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2470 break;
2473 if (code != FLOAT_EXPR
2474 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2475 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2476 goto unsupported;
2478 rhs_mode = TYPE_MODE (rhs_type);
2479 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2480 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2481 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2482 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2484 cvt_type
2485 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2486 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2487 if (cvt_type == NULL_TREE)
2488 goto unsupported;
2490 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2492 if (!supportable_convert_operation (code, vectype_out,
2493 cvt_type, &decl1, &codecvt1))
2494 goto unsupported;
2496 else if (!supportable_widening_operation (code, stmt, vectype_out,
2497 cvt_type, &codecvt1,
2498 &codecvt2, &multi_step_cvt,
2499 &interm_types))
2500 continue;
2501 else
2502 gcc_assert (multi_step_cvt == 0);
2504 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2505 vectype_in, &code1, &code2,
2506 &multi_step_cvt, &interm_types))
2507 break;
2510 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2511 goto unsupported;
2513 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2514 codecvt2 = ERROR_MARK;
2515 else
2517 multi_step_cvt++;
2518 interm_types.safe_push (cvt_type);
2519 cvt_type = NULL_TREE;
2521 break;
2523 case NARROW:
2524 gcc_assert (op_type == unary_op);
2525 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2526 &code1, &multi_step_cvt,
2527 &interm_types))
2528 break;
2530 if (code != FIX_TRUNC_EXPR
2531 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2532 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2533 goto unsupported;
2535 rhs_mode = TYPE_MODE (rhs_type);
2536 cvt_type
2537 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2538 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2539 if (cvt_type == NULL_TREE)
2540 goto unsupported;
2541 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2542 &decl1, &codecvt1))
2543 goto unsupported;
2544 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2545 &code1, &multi_step_cvt,
2546 &interm_types))
2547 break;
2548 goto unsupported;
2550 default:
2551 gcc_unreachable ();
2554 if (!vec_stmt) /* transformation not required. */
2556 if (dump_enabled_p ())
2557 dump_printf_loc (MSG_NOTE, vect_location,
2558 "=== vectorizable_conversion ===");
2559 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2561 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2562 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2564 else if (modifier == NARROW)
2566 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2567 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2569 else
2571 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2572 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2574 interm_types.release ();
2575 return true;
2578 /** Transform. **/
2579 if (dump_enabled_p ())
2580 dump_printf_loc (MSG_NOTE, vect_location,
2581 "transform conversion. ncopies = %d.", ncopies);
2583 if (op_type == binary_op)
2585 if (CONSTANT_CLASS_P (op0))
2586 op0 = fold_convert (TREE_TYPE (op1), op0);
2587 else if (CONSTANT_CLASS_P (op1))
2588 op1 = fold_convert (TREE_TYPE (op0), op1);
2591 /* In case of multi-step conversion, we first generate conversion operations
2592 to the intermediate types, and then from that types to the final one.
2593 We create vector destinations for the intermediate type (TYPES) received
2594 from supportable_*_operation, and store them in the correct order
2595 for future use in vect_create_vectorized_*_stmts (). */
2596 vec_dsts.create (multi_step_cvt + 1);
2597 vec_dest = vect_create_destination_var (scalar_dest,
2598 (cvt_type && modifier == WIDEN)
2599 ? cvt_type : vectype_out);
2600 vec_dsts.quick_push (vec_dest);
2602 if (multi_step_cvt)
2604 for (i = interm_types.length () - 1;
2605 interm_types.iterate (i, &intermediate_type); i--)
2607 vec_dest = vect_create_destination_var (scalar_dest,
2608 intermediate_type);
2609 vec_dsts.quick_push (vec_dest);
2613 if (cvt_type)
2614 vec_dest = vect_create_destination_var (scalar_dest,
2615 modifier == WIDEN
2616 ? vectype_out : cvt_type);
2618 if (!slp_node)
2620 if (modifier == NONE)
2621 vec_oprnds0.create (1);
2622 else if (modifier == WIDEN)
2624 vec_oprnds0.create (multi_step_cvt ? vect_pow2(multi_step_cvt) : 1);
2625 if (op_type == binary_op)
2626 vec_oprnds1.create (1);
2628 else
2629 vec_oprnds0.create (
2630 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2632 else if (code == WIDEN_LSHIFT_EXPR)
2633 vec_oprnds1.create (slp_node->vec_stmts_size);
2635 last_oprnd = op0;
2636 prev_stmt_info = NULL;
2637 switch (modifier)
2639 case NONE:
2640 for (j = 0; j < ncopies; j++)
2642 if (j == 0)
2643 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2644 -1);
2645 else
2646 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2648 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2650 /* Arguments are ready, create the new vector stmt. */
2651 if (code1 == CALL_EXPR)
2653 new_stmt = gimple_build_call (decl1, 1, vop0);
2654 new_temp = make_ssa_name (vec_dest, new_stmt);
2655 gimple_call_set_lhs (new_stmt, new_temp);
2657 else
2659 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2660 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2661 vop0, NULL);
2662 new_temp = make_ssa_name (vec_dest, new_stmt);
2663 gimple_assign_set_lhs (new_stmt, new_temp);
2666 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2667 if (slp_node)
2668 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2671 if (j == 0)
2672 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2673 else
2674 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2675 prev_stmt_info = vinfo_for_stmt (new_stmt);
2677 break;
2679 case WIDEN:
2680 /* In case the vectorization factor (VF) is bigger than the number
2681 of elements that we can fit in a vectype (nunits), we have to
2682 generate more than one vector stmt - i.e - we need to "unroll"
2683 the vector stmt by a factor VF/nunits. */
2684 for (j = 0; j < ncopies; j++)
2686 /* Handle uses. */
2687 if (j == 0)
2689 if (slp_node)
2691 if (code == WIDEN_LSHIFT_EXPR)
2693 unsigned int k;
2695 vec_oprnd1 = op1;
2696 /* Store vec_oprnd1 for every vector stmt to be created
2697 for SLP_NODE. We check during the analysis that all
2698 the shift arguments are the same. */
2699 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2700 vec_oprnds1.quick_push (vec_oprnd1);
2702 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2703 slp_node, -1);
2705 else
2706 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2707 &vec_oprnds1, slp_node, -1);
2709 else
2711 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2712 vec_oprnds0.quick_push (vec_oprnd0);
2713 if (op_type == binary_op)
2715 if (code == WIDEN_LSHIFT_EXPR)
2716 vec_oprnd1 = op1;
2717 else
2718 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2719 NULL);
2720 vec_oprnds1.quick_push (vec_oprnd1);
2724 else
2726 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2727 vec_oprnds0.truncate (0);
2728 vec_oprnds0.quick_push (vec_oprnd0);
2729 if (op_type == binary_op)
2731 if (code == WIDEN_LSHIFT_EXPR)
2732 vec_oprnd1 = op1;
2733 else
2734 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2735 vec_oprnd1);
2736 vec_oprnds1.truncate (0);
2737 vec_oprnds1.quick_push (vec_oprnd1);
2741 /* Arguments are ready. Create the new vector stmts. */
2742 for (i = multi_step_cvt; i >= 0; i--)
2744 tree this_dest = vec_dsts[i];
2745 enum tree_code c1 = code1, c2 = code2;
2746 if (i == 0 && codecvt2 != ERROR_MARK)
2748 c1 = codecvt1;
2749 c2 = codecvt2;
2751 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2752 &vec_oprnds1,
2753 stmt, this_dest, gsi,
2754 c1, c2, decl1, decl2,
2755 op_type);
2758 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2760 if (cvt_type)
2762 if (codecvt1 == CALL_EXPR)
2764 new_stmt = gimple_build_call (decl1, 1, vop0);
2765 new_temp = make_ssa_name (vec_dest, new_stmt);
2766 gimple_call_set_lhs (new_stmt, new_temp);
2768 else
2770 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2771 new_temp = make_ssa_name (vec_dest, NULL);
2772 new_stmt = gimple_build_assign_with_ops (codecvt1,
2773 new_temp,
2774 vop0, NULL);
2777 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2779 else
2780 new_stmt = SSA_NAME_DEF_STMT (vop0);
2782 if (slp_node)
2783 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2784 else
2786 if (!prev_stmt_info)
2787 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2788 else
2789 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2790 prev_stmt_info = vinfo_for_stmt (new_stmt);
2795 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2796 break;
2798 case NARROW:
2799 /* In case the vectorization factor (VF) is bigger than the number
2800 of elements that we can fit in a vectype (nunits), we have to
2801 generate more than one vector stmt - i.e - we need to "unroll"
2802 the vector stmt by a factor VF/nunits. */
2803 for (j = 0; j < ncopies; j++)
2805 /* Handle uses. */
2806 if (slp_node)
2807 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2808 slp_node, -1);
2809 else
2811 vec_oprnds0.truncate (0);
2812 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2813 vect_pow2 (multi_step_cvt) - 1);
2816 /* Arguments are ready. Create the new vector stmts. */
2817 if (cvt_type)
2818 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2820 if (codecvt1 == CALL_EXPR)
2822 new_stmt = gimple_build_call (decl1, 1, vop0);
2823 new_temp = make_ssa_name (vec_dest, new_stmt);
2824 gimple_call_set_lhs (new_stmt, new_temp);
2826 else
2828 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2829 new_temp = make_ssa_name (vec_dest, NULL);
2830 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2831 vop0, NULL);
2834 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2835 vec_oprnds0[i] = new_temp;
2838 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2839 stmt, vec_dsts, gsi,
2840 slp_node, code1,
2841 &prev_stmt_info);
2844 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2845 break;
2848 vec_oprnds0.release ();
2849 vec_oprnds1.release ();
2850 vec_dsts.release ();
2851 interm_types.release ();
2853 return true;
2857 /* Function vectorizable_assignment.
2859 Check if STMT performs an assignment (copy) that can be vectorized.
2860 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2861 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2862 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2864 static bool
2865 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2866 gimple *vec_stmt, slp_tree slp_node)
2868 tree vec_dest;
2869 tree scalar_dest;
2870 tree op;
2871 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2872 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2873 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2874 tree new_temp;
2875 tree def;
2876 gimple def_stmt;
2877 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2878 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2879 int ncopies;
2880 int i, j;
2881 vec<tree> vec_oprnds = vNULL;
2882 tree vop;
2883 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2884 gimple new_stmt = NULL;
2885 stmt_vec_info prev_stmt_info = NULL;
2886 enum tree_code code;
2887 tree vectype_in;
2889 /* Multiple types in SLP are handled by creating the appropriate number of
2890 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2891 case of SLP. */
2892 if (slp_node || PURE_SLP_STMT (stmt_info))
2893 ncopies = 1;
2894 else
2895 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2897 gcc_assert (ncopies >= 1);
2899 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2900 return false;
2902 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2903 return false;
2905 /* Is vectorizable assignment? */
2906 if (!is_gimple_assign (stmt))
2907 return false;
2909 scalar_dest = gimple_assign_lhs (stmt);
2910 if (TREE_CODE (scalar_dest) != SSA_NAME)
2911 return false;
2913 code = gimple_assign_rhs_code (stmt);
2914 if (gimple_assign_single_p (stmt)
2915 || code == PAREN_EXPR
2916 || CONVERT_EXPR_CODE_P (code))
2917 op = gimple_assign_rhs1 (stmt);
2918 else
2919 return false;
2921 if (code == VIEW_CONVERT_EXPR)
2922 op = TREE_OPERAND (op, 0);
2924 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2925 &def_stmt, &def, &dt[0], &vectype_in))
2927 if (dump_enabled_p ())
2928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2929 "use not simple.");
2930 return false;
2933 /* We can handle NOP_EXPR conversions that do not change the number
2934 of elements or the vector size. */
2935 if ((CONVERT_EXPR_CODE_P (code)
2936 || code == VIEW_CONVERT_EXPR)
2937 && (!vectype_in
2938 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2939 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2940 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2941 return false;
2943 /* We do not handle bit-precision changes. */
2944 if ((CONVERT_EXPR_CODE_P (code)
2945 || code == VIEW_CONVERT_EXPR)
2946 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2947 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2948 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2949 || ((TYPE_PRECISION (TREE_TYPE (op))
2950 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2951 /* But a conversion that does not change the bit-pattern is ok. */
2952 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2953 > TYPE_PRECISION (TREE_TYPE (op)))
2954 && TYPE_UNSIGNED (TREE_TYPE (op))))
2956 if (dump_enabled_p ())
2957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2958 "type conversion to/from bit-precision "
2959 "unsupported.");
2960 return false;
2963 if (!vec_stmt) /* transformation not required. */
2965 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2966 if (dump_enabled_p ())
2967 dump_printf_loc (MSG_NOTE, vect_location,
2968 "=== vectorizable_assignment ===");
2969 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2970 return true;
2973 /** Transform. **/
2974 if (dump_enabled_p ())
2975 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.");
2977 /* Handle def. */
2978 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2980 /* Handle use. */
2981 for (j = 0; j < ncopies; j++)
2983 /* Handle uses. */
2984 if (j == 0)
2985 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2986 else
2987 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2989 /* Arguments are ready. create the new vector stmt. */
2990 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2992 if (CONVERT_EXPR_CODE_P (code)
2993 || code == VIEW_CONVERT_EXPR)
2994 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2995 new_stmt = gimple_build_assign (vec_dest, vop);
2996 new_temp = make_ssa_name (vec_dest, new_stmt);
2997 gimple_assign_set_lhs (new_stmt, new_temp);
2998 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2999 if (slp_node)
3000 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3003 if (slp_node)
3004 continue;
3006 if (j == 0)
3007 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3008 else
3009 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3011 prev_stmt_info = vinfo_for_stmt (new_stmt);
3014 vec_oprnds.release ();
3015 return true;
3019 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3020 either as shift by a scalar or by a vector. */
3022 bool
3023 vect_supportable_shift (enum tree_code code, tree scalar_type)
3026 enum machine_mode vec_mode;
3027 optab optab;
3028 int icode;
3029 tree vectype;
3031 vectype = get_vectype_for_scalar_type (scalar_type);
3032 if (!vectype)
3033 return false;
3035 optab = optab_for_tree_code (code, vectype, optab_scalar);
3036 if (!optab
3037 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3039 optab = optab_for_tree_code (code, vectype, optab_vector);
3040 if (!optab
3041 || (optab_handler (optab, TYPE_MODE (vectype))
3042 == CODE_FOR_nothing))
3043 return false;
3046 vec_mode = TYPE_MODE (vectype);
3047 icode = (int) optab_handler (optab, vec_mode);
3048 if (icode == CODE_FOR_nothing)
3049 return false;
3051 return true;
3055 /* Function vectorizable_shift.
3057 Check if STMT performs a shift operation that can be vectorized.
3058 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3059 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3060 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3062 static bool
3063 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3064 gimple *vec_stmt, slp_tree slp_node)
3066 tree vec_dest;
3067 tree scalar_dest;
3068 tree op0, op1 = NULL;
3069 tree vec_oprnd1 = NULL_TREE;
3070 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3071 tree vectype;
3072 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3073 enum tree_code code;
3074 enum machine_mode vec_mode;
3075 tree new_temp;
3076 optab optab;
3077 int icode;
3078 enum machine_mode optab_op2_mode;
3079 tree def;
3080 gimple def_stmt;
3081 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3082 gimple new_stmt = NULL;
3083 stmt_vec_info prev_stmt_info;
3084 int nunits_in;
3085 int nunits_out;
3086 tree vectype_out;
3087 tree op1_vectype;
3088 int ncopies;
3089 int j, i;
3090 vec<tree> vec_oprnds0 = vNULL;
3091 vec<tree> vec_oprnds1 = vNULL;
3092 tree vop0, vop1;
3093 unsigned int k;
3094 bool scalar_shift_arg = true;
3095 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3096 int vf;
3098 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3099 return false;
3101 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3102 return false;
3104 /* Is STMT a vectorizable binary/unary operation? */
3105 if (!is_gimple_assign (stmt))
3106 return false;
3108 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3109 return false;
3111 code = gimple_assign_rhs_code (stmt);
3113 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3114 || code == RROTATE_EXPR))
3115 return false;
3117 scalar_dest = gimple_assign_lhs (stmt);
3118 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3119 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3120 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3122 if (dump_enabled_p ())
3123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3124 "bit-precision shifts not supported.");
3125 return false;
3128 op0 = gimple_assign_rhs1 (stmt);
3129 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3130 &def_stmt, &def, &dt[0], &vectype))
3132 if (dump_enabled_p ())
3133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3134 "use not simple.");
3135 return false;
3137 /* If op0 is an external or constant def use a vector type with
3138 the same size as the output vector type. */
3139 if (!vectype)
3140 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3141 if (vec_stmt)
3142 gcc_assert (vectype);
3143 if (!vectype)
3145 if (dump_enabled_p ())
3146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3147 "no vectype for scalar type ");
3148 return false;
3151 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3152 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3153 if (nunits_out != nunits_in)
3154 return false;
3156 op1 = gimple_assign_rhs2 (stmt);
3157 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3158 &def, &dt[1], &op1_vectype))
3160 if (dump_enabled_p ())
3161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3162 "use not simple.");
3163 return false;
3166 if (loop_vinfo)
3167 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3168 else
3169 vf = 1;
3171 /* Multiple types in SLP are handled by creating the appropriate number of
3172 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3173 case of SLP. */
3174 if (slp_node || PURE_SLP_STMT (stmt_info))
3175 ncopies = 1;
3176 else
3177 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3179 gcc_assert (ncopies >= 1);
3181 /* Determine whether the shift amount is a vector, or scalar. If the
3182 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3184 if (dt[1] == vect_internal_def && !slp_node)
3185 scalar_shift_arg = false;
3186 else if (dt[1] == vect_constant_def
3187 || dt[1] == vect_external_def
3188 || dt[1] == vect_internal_def)
3190 /* In SLP, need to check whether the shift count is the same,
3191 in loops if it is a constant or invariant, it is always
3192 a scalar shift. */
3193 if (slp_node)
3195 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3196 gimple slpstmt;
3198 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
3199 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3200 scalar_shift_arg = false;
3203 else
3205 if (dump_enabled_p ())
3206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3207 "operand mode requires invariant argument.");
3208 return false;
3211 /* Vector shifted by vector. */
3212 if (!scalar_shift_arg)
3214 optab = optab_for_tree_code (code, vectype, optab_vector);
3215 if (dump_enabled_p ())
3216 dump_printf_loc (MSG_NOTE, vect_location,
3217 "vector/vector shift/rotate found.");
3219 if (!op1_vectype)
3220 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3221 if (op1_vectype == NULL_TREE
3222 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3224 if (dump_enabled_p ())
3225 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3226 "unusable type for last operand in"
3227 " vector/vector shift/rotate.");
3228 return false;
3231 /* See if the machine has a vector shifted by scalar insn and if not
3232 then see if it has a vector shifted by vector insn. */
3233 else
3235 optab = optab_for_tree_code (code, vectype, optab_scalar);
3236 if (optab
3237 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3239 if (dump_enabled_p ())
3240 dump_printf_loc (MSG_NOTE, vect_location,
3241 "vector/scalar shift/rotate found.");
3243 else
3245 optab = optab_for_tree_code (code, vectype, optab_vector);
3246 if (optab
3247 && (optab_handler (optab, TYPE_MODE (vectype))
3248 != CODE_FOR_nothing))
3250 scalar_shift_arg = false;
3252 if (dump_enabled_p ())
3253 dump_printf_loc (MSG_NOTE, vect_location,
3254 "vector/vector shift/rotate found.");
3256 /* Unlike the other binary operators, shifts/rotates have
3257 the rhs being int, instead of the same type as the lhs,
3258 so make sure the scalar is the right type if we are
3259 dealing with vectors of long long/long/short/char. */
3260 if (dt[1] == vect_constant_def)
3261 op1 = fold_convert (TREE_TYPE (vectype), op1);
3262 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3263 TREE_TYPE (op1)))
3265 if (slp_node
3266 && TYPE_MODE (TREE_TYPE (vectype))
3267 != TYPE_MODE (TREE_TYPE (op1)))
3269 if (dump_enabled_p ())
3270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3271 "unusable type for last operand in"
3272 " vector/vector shift/rotate.");
3273 return false;
3275 if (vec_stmt && !slp_node)
3277 op1 = fold_convert (TREE_TYPE (vectype), op1);
3278 op1 = vect_init_vector (stmt, op1,
3279 TREE_TYPE (vectype), NULL);
3286 /* Supportable by target? */
3287 if (!optab)
3289 if (dump_enabled_p ())
3290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3291 "no optab.");
3292 return false;
3294 vec_mode = TYPE_MODE (vectype);
3295 icode = (int) optab_handler (optab, vec_mode);
3296 if (icode == CODE_FOR_nothing)
3298 if (dump_enabled_p ())
3299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3300 "op not supported by target.");
3301 /* Check only during analysis. */
3302 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3303 || (vf < vect_min_worthwhile_factor (code)
3304 && !vec_stmt))
3305 return false;
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3310 /* Worthwhile without SIMD support? Check only during analysis. */
3311 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3312 && vf < vect_min_worthwhile_factor (code)
3313 && !vec_stmt)
3315 if (dump_enabled_p ())
3316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3317 "not worthwhile without SIMD support.");
3318 return false;
3321 if (!vec_stmt) /* transformation not required. */
3323 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3324 if (dump_enabled_p ())
3325 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===");
3326 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3327 return true;
3330 /** Transform. **/
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_NOTE, vect_location,
3334 "transform binary/unary operation.");
3336 /* Handle def. */
3337 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3339 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3340 created in the previous stages of the recursion, so no allocation is
3341 needed, except for the case of shift with scalar shift argument. In that
3342 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3343 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3344 In case of loop-based vectorization we allocate VECs of size 1. We
3345 allocate VEC_OPRNDS1 only in case of binary operation. */
3346 if (!slp_node)
3348 vec_oprnds0.create (1);
3349 vec_oprnds1.create (1);
3351 else if (scalar_shift_arg)
3352 vec_oprnds1.create (slp_node->vec_stmts_size);
3354 prev_stmt_info = NULL;
3355 for (j = 0; j < ncopies; j++)
3357 /* Handle uses. */
3358 if (j == 0)
3360 if (scalar_shift_arg)
3362 /* Vector shl and shr insn patterns can be defined with scalar
3363 operand 2 (shift operand). In this case, use constant or loop
3364 invariant op1 directly, without extending it to vector mode
3365 first. */
3366 optab_op2_mode = insn_data[icode].operand[2].mode;
3367 if (!VECTOR_MODE_P (optab_op2_mode))
3369 if (dump_enabled_p ())
3370 dump_printf_loc (MSG_NOTE, vect_location,
3371 "operand 1 using scalar mode.");
3372 vec_oprnd1 = op1;
3373 vec_oprnds1.quick_push (vec_oprnd1);
3374 if (slp_node)
3376 /* Store vec_oprnd1 for every vector stmt to be created
3377 for SLP_NODE. We check during the analysis that all
3378 the shift arguments are the same.
3379 TODO: Allow different constants for different vector
3380 stmts generated for an SLP instance. */
3381 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3382 vec_oprnds1.quick_push (vec_oprnd1);
3387 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3388 (a special case for certain kind of vector shifts); otherwise,
3389 operand 1 should be of a vector type (the usual case). */
3390 if (vec_oprnd1)
3391 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3392 slp_node, -1);
3393 else
3394 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3395 slp_node, -1);
3397 else
3398 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3400 /* Arguments are ready. Create the new vector stmt. */
3401 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3403 vop1 = vec_oprnds1[i];
3404 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3405 new_temp = make_ssa_name (vec_dest, new_stmt);
3406 gimple_assign_set_lhs (new_stmt, new_temp);
3407 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3408 if (slp_node)
3409 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3412 if (slp_node)
3413 continue;
3415 if (j == 0)
3416 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3417 else
3418 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3419 prev_stmt_info = vinfo_for_stmt (new_stmt);
3422 vec_oprnds0.release ();
3423 vec_oprnds1.release ();
3425 return true;
3429 static tree permute_vec_elements (tree, tree, tree, gimple,
3430 gimple_stmt_iterator *);
3433 /* Function vectorizable_operation.
3435 Check if STMT performs a binary, unary or ternary operation that can
3436 be vectorized.
3437 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3438 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3439 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3441 static bool
3442 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3443 gimple *vec_stmt, slp_tree slp_node)
3445 tree vec_dest;
3446 tree scalar_dest;
3447 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3448 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3449 tree vectype;
3450 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3451 enum tree_code code;
3452 enum machine_mode vec_mode;
3453 tree new_temp;
3454 int op_type;
3455 optab optab;
3456 int icode;
3457 tree def;
3458 gimple def_stmt;
3459 enum vect_def_type dt[3]
3460 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3461 gimple new_stmt = NULL;
3462 stmt_vec_info prev_stmt_info;
3463 int nunits_in;
3464 int nunits_out;
3465 tree vectype_out;
3466 int ncopies;
3467 int j, i;
3468 vec<tree> vec_oprnds0 = vNULL;
3469 vec<tree> vec_oprnds1 = vNULL;
3470 vec<tree> vec_oprnds2 = vNULL;
3471 tree vop0, vop1, vop2;
3472 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3473 int vf;
3475 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3476 return false;
3478 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3479 return false;
3481 /* Is STMT a vectorizable binary/unary operation? */
3482 if (!is_gimple_assign (stmt))
3483 return false;
3485 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3486 return false;
3488 code = gimple_assign_rhs_code (stmt);
3490 /* For pointer addition, we should use the normal plus for
3491 the vector addition. */
3492 if (code == POINTER_PLUS_EXPR)
3493 code = PLUS_EXPR;
3495 /* Support only unary or binary operations. */
3496 op_type = TREE_CODE_LENGTH (code);
3497 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3499 if (dump_enabled_p ())
3500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3501 "num. args = %d (not unary/binary/ternary op).",
3502 op_type);
3503 return false;
3506 scalar_dest = gimple_assign_lhs (stmt);
3507 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3509 /* Most operations cannot handle bit-precision types without extra
3510 truncations. */
3511 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3512 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3513 /* Exception are bitwise binary operations. */
3514 && code != BIT_IOR_EXPR
3515 && code != BIT_XOR_EXPR
3516 && code != BIT_AND_EXPR)
3518 if (dump_enabled_p ())
3519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3520 "bit-precision arithmetic not supported.");
3521 return false;
3524 op0 = gimple_assign_rhs1 (stmt);
3525 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3526 &def_stmt, &def, &dt[0], &vectype))
3528 if (dump_enabled_p ())
3529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3530 "use not simple.");
3531 return false;
3533 /* If op0 is an external or constant def use a vector type with
3534 the same size as the output vector type. */
3535 if (!vectype)
3536 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3537 if (vec_stmt)
3538 gcc_assert (vectype);
3539 if (!vectype)
3541 if (dump_enabled_p ())
3543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3544 "no vectype for scalar type ");
3545 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3546 TREE_TYPE (op0));
3549 return false;
3552 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3553 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3554 if (nunits_out != nunits_in)
3555 return false;
3557 if (op_type == binary_op || op_type == ternary_op)
3559 op1 = gimple_assign_rhs2 (stmt);
3560 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3561 &def, &dt[1]))
3563 if (dump_enabled_p ())
3564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3565 "use not simple.");
3566 return false;
3569 if (op_type == ternary_op)
3571 op2 = gimple_assign_rhs3 (stmt);
3572 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3573 &def, &dt[2]))
3575 if (dump_enabled_p ())
3576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3577 "use not simple.");
3578 return false;
3582 if (loop_vinfo)
3583 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3584 else
3585 vf = 1;
3587 /* Multiple types in SLP are handled by creating the appropriate number of
3588 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3589 case of SLP. */
3590 if (slp_node || PURE_SLP_STMT (stmt_info))
3591 ncopies = 1;
3592 else
3593 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3595 gcc_assert (ncopies >= 1);
3597 /* Shifts are handled in vectorizable_shift (). */
3598 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3599 || code == RROTATE_EXPR)
3600 return false;
3602 /* Supportable by target? */
3604 vec_mode = TYPE_MODE (vectype);
3605 if (code == MULT_HIGHPART_EXPR)
3607 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3608 icode = LAST_INSN_CODE;
3609 else
3610 icode = CODE_FOR_nothing;
3612 else
3614 optab = optab_for_tree_code (code, vectype, optab_default);
3615 if (!optab)
3617 if (dump_enabled_p ())
3618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3619 "no optab.");
3620 return false;
3622 icode = (int) optab_handler (optab, vec_mode);
3625 if (icode == CODE_FOR_nothing)
3627 if (dump_enabled_p ())
3628 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3629 "op not supported by target.");
3630 /* Check only during analysis. */
3631 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3632 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3633 return false;
3634 if (dump_enabled_p ())
3635 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3638 /* Worthwhile without SIMD support? Check only during analysis. */
3639 if (!VECTOR_MODE_P (vec_mode)
3640 && !vec_stmt
3641 && vf < vect_min_worthwhile_factor (code))
3643 if (dump_enabled_p ())
3644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3645 "not worthwhile without SIMD support.");
3646 return false;
3649 if (!vec_stmt) /* transformation not required. */
3651 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3652 if (dump_enabled_p ())
3653 dump_printf_loc (MSG_NOTE, vect_location,
3654 "=== vectorizable_operation ===");
3655 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3656 return true;
3659 /** Transform. **/
3661 if (dump_enabled_p ())
3662 dump_printf_loc (MSG_NOTE, vect_location,
3663 "transform binary/unary operation.");
3665 /* Handle def. */
3666 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3668 /* In case the vectorization factor (VF) is bigger than the number
3669 of elements that we can fit in a vectype (nunits), we have to generate
3670 more than one vector stmt - i.e - we need to "unroll" the
3671 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3672 from one copy of the vector stmt to the next, in the field
3673 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3674 stages to find the correct vector defs to be used when vectorizing
3675 stmts that use the defs of the current stmt. The example below
3676 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3677 we need to create 4 vectorized stmts):
3679 before vectorization:
3680 RELATED_STMT VEC_STMT
3681 S1: x = memref - -
3682 S2: z = x + 1 - -
3684 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3685 there):
3686 RELATED_STMT VEC_STMT
3687 VS1_0: vx0 = memref0 VS1_1 -
3688 VS1_1: vx1 = memref1 VS1_2 -
3689 VS1_2: vx2 = memref2 VS1_3 -
3690 VS1_3: vx3 = memref3 - -
3691 S1: x = load - VS1_0
3692 S2: z = x + 1 - -
3694 step2: vectorize stmt S2 (done here):
3695 To vectorize stmt S2 we first need to find the relevant vector
3696 def for the first operand 'x'. This is, as usual, obtained from
3697 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3698 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3699 relevant vector def 'vx0'. Having found 'vx0' we can generate
3700 the vector stmt VS2_0, and as usual, record it in the
3701 STMT_VINFO_VEC_STMT of stmt S2.
3702 When creating the second copy (VS2_1), we obtain the relevant vector
3703 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3704 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3705 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3706 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3707 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3708 chain of stmts and pointers:
3709 RELATED_STMT VEC_STMT
3710 VS1_0: vx0 = memref0 VS1_1 -
3711 VS1_1: vx1 = memref1 VS1_2 -
3712 VS1_2: vx2 = memref2 VS1_3 -
3713 VS1_3: vx3 = memref3 - -
3714 S1: x = load - VS1_0
3715 VS2_0: vz0 = vx0 + v1 VS2_1 -
3716 VS2_1: vz1 = vx1 + v1 VS2_2 -
3717 VS2_2: vz2 = vx2 + v1 VS2_3 -
3718 VS2_3: vz3 = vx3 + v1 - -
3719 S2: z = x + 1 - VS2_0 */
3721 prev_stmt_info = NULL;
3722 for (j = 0; j < ncopies; j++)
3724 /* Handle uses. */
3725 if (j == 0)
3727 if (op_type == binary_op || op_type == ternary_op)
3728 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3729 slp_node, -1);
3730 else
3731 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3732 slp_node, -1);
3733 if (op_type == ternary_op)
3735 vec_oprnds2.create (1);
3736 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3737 stmt,
3738 NULL));
3741 else
3743 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3744 if (op_type == ternary_op)
3746 tree vec_oprnd = vec_oprnds2.pop ();
3747 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3748 vec_oprnd));
3752 /* Arguments are ready. Create the new vector stmt. */
3753 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3755 vop1 = ((op_type == binary_op || op_type == ternary_op)
3756 ? vec_oprnds1[i] : NULL_TREE);
3757 vop2 = ((op_type == ternary_op)
3758 ? vec_oprnds2[i] : NULL_TREE);
3759 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3760 vop0, vop1, vop2);
3761 new_temp = make_ssa_name (vec_dest, new_stmt);
3762 gimple_assign_set_lhs (new_stmt, new_temp);
3763 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3764 if (slp_node)
3765 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3768 if (slp_node)
3769 continue;
3771 if (j == 0)
3772 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3773 else
3774 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3775 prev_stmt_info = vinfo_for_stmt (new_stmt);
3778 vec_oprnds0.release ();
3779 vec_oprnds1.release ();
3780 vec_oprnds2.release ();
3782 return true;
3786 /* Function vectorizable_store.
3788 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3789 can be vectorized.
3790 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3791 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3792 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3794 static bool
3795 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3796 slp_tree slp_node)
3798 tree scalar_dest;
3799 tree data_ref;
3800 tree op;
3801 tree vec_oprnd = NULL_TREE;
3802 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3803 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3804 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3805 tree elem_type;
3806 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3807 struct loop *loop = NULL;
3808 enum machine_mode vec_mode;
3809 tree dummy;
3810 enum dr_alignment_support alignment_support_scheme;
3811 tree def;
3812 gimple def_stmt;
3813 enum vect_def_type dt;
3814 stmt_vec_info prev_stmt_info = NULL;
3815 tree dataref_ptr = NULL_TREE;
3816 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3817 int ncopies;
3818 int j;
3819 gimple next_stmt, first_stmt = NULL;
3820 bool grouped_store = false;
3821 bool store_lanes_p = false;
3822 unsigned int group_size, i;
3823 vec<tree> dr_chain = vNULL;
3824 vec<tree> oprnds = vNULL;
3825 vec<tree> result_chain = vNULL;
3826 bool inv_p;
3827 vec<tree> vec_oprnds = vNULL;
3828 bool slp = (slp_node != NULL);
3829 unsigned int vec_num;
3830 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3831 tree aggr_type;
3833 if (loop_vinfo)
3834 loop = LOOP_VINFO_LOOP (loop_vinfo);
3836 /* Multiple types in SLP are handled by creating the appropriate number of
3837 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3838 case of SLP. */
3839 if (slp || PURE_SLP_STMT (stmt_info))
3840 ncopies = 1;
3841 else
3842 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3844 gcc_assert (ncopies >= 1);
3846 /* FORNOW. This restriction should be relaxed. */
3847 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3849 if (dump_enabled_p ())
3850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3851 "multiple types in nested loop.");
3852 return false;
3855 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3856 return false;
3858 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3859 return false;
3861 /* Is vectorizable store? */
3863 if (!is_gimple_assign (stmt))
3864 return false;
3866 scalar_dest = gimple_assign_lhs (stmt);
3867 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3868 && is_pattern_stmt_p (stmt_info))
3869 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3870 if (TREE_CODE (scalar_dest) != ARRAY_REF
3871 && TREE_CODE (scalar_dest) != INDIRECT_REF
3872 && TREE_CODE (scalar_dest) != COMPONENT_REF
3873 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3874 && TREE_CODE (scalar_dest) != REALPART_EXPR
3875 && TREE_CODE (scalar_dest) != MEM_REF)
3876 return false;
3878 gcc_assert (gimple_assign_single_p (stmt));
3879 op = gimple_assign_rhs1 (stmt);
3880 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3881 &def, &dt))
3883 if (dump_enabled_p ())
3884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3885 "use not simple.");
3886 return false;
3889 elem_type = TREE_TYPE (vectype);
3890 vec_mode = TYPE_MODE (vectype);
3892 /* FORNOW. In some cases can vectorize even if data-type not supported
3893 (e.g. - array initialization with 0). */
3894 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3895 return false;
3897 if (!STMT_VINFO_DATA_REF (stmt_info))
3898 return false;
3900 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3901 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3902 size_zero_node) < 0)
3904 if (dump_enabled_p ())
3905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3906 "negative step for store.");
3907 return false;
3910 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3912 grouped_store = true;
3913 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3914 if (!slp && !PURE_SLP_STMT (stmt_info))
3916 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3917 if (vect_store_lanes_supported (vectype, group_size))
3918 store_lanes_p = true;
3919 else if (!vect_grouped_store_supported (vectype, group_size))
3920 return false;
3923 if (first_stmt == stmt)
3925 /* STMT is the leader of the group. Check the operands of all the
3926 stmts of the group. */
3927 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3928 while (next_stmt)
3930 gcc_assert (gimple_assign_single_p (next_stmt));
3931 op = gimple_assign_rhs1 (next_stmt);
3932 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3933 &def_stmt, &def, &dt))
3935 if (dump_enabled_p ())
3936 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3937 "use not simple.");
3938 return false;
3940 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3945 if (!vec_stmt) /* transformation not required. */
3947 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3948 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3949 NULL, NULL, NULL);
3950 return true;
3953 /** Transform. **/
3955 if (grouped_store)
3957 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3958 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3960 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3962 /* FORNOW */
3963 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3965 /* We vectorize all the stmts of the interleaving group when we
3966 reach the last stmt in the group. */
3967 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3968 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3969 && !slp)
3971 *vec_stmt = NULL;
3972 return true;
3975 if (slp)
3977 grouped_store = false;
3978 /* VEC_NUM is the number of vect stmts to be created for this
3979 group. */
3980 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3981 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3982 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3983 op = gimple_assign_rhs1 (first_stmt);
3985 else
3986 /* VEC_NUM is the number of vect stmts to be created for this
3987 group. */
3988 vec_num = group_size;
3990 else
3992 first_stmt = stmt;
3993 first_dr = dr;
3994 group_size = vec_num = 1;
3997 if (dump_enabled_p ())
3998 dump_printf_loc (MSG_NOTE, vect_location,
3999 "transform store. ncopies = %d", ncopies);
4001 dr_chain.create (group_size);
4002 oprnds.create (group_size);
4004 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4005 gcc_assert (alignment_support_scheme);
4006 /* Targets with store-lane instructions must not require explicit
4007 realignment. */
4008 gcc_assert (!store_lanes_p
4009 || alignment_support_scheme == dr_aligned
4010 || alignment_support_scheme == dr_unaligned_supported);
4012 if (store_lanes_p)
4013 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4014 else
4015 aggr_type = vectype;
4017 /* In case the vectorization factor (VF) is bigger than the number
4018 of elements that we can fit in a vectype (nunits), we have to generate
4019 more than one vector stmt - i.e - we need to "unroll" the
4020 vector stmt by a factor VF/nunits. For more details see documentation in
4021 vect_get_vec_def_for_copy_stmt. */
4023 /* In case of interleaving (non-unit grouped access):
4025 S1: &base + 2 = x2
4026 S2: &base = x0
4027 S3: &base + 1 = x1
4028 S4: &base + 3 = x3
4030 We create vectorized stores starting from base address (the access of the
4031 first stmt in the chain (S2 in the above example), when the last store stmt
4032 of the chain (S4) is reached:
4034 VS1: &base = vx2
4035 VS2: &base + vec_size*1 = vx0
4036 VS3: &base + vec_size*2 = vx1
4037 VS4: &base + vec_size*3 = vx3
4039 Then permutation statements are generated:
4041 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4042 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4045 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4046 (the order of the data-refs in the output of vect_permute_store_chain
4047 corresponds to the order of scalar stmts in the interleaving chain - see
4048 the documentation of vect_permute_store_chain()).
4050 In case of both multiple types and interleaving, above vector stores and
4051 permutation stmts are created for every copy. The result vector stmts are
4052 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4053 STMT_VINFO_RELATED_STMT for the next copies.
4056 prev_stmt_info = NULL;
4057 for (j = 0; j < ncopies; j++)
4059 gimple new_stmt;
4060 gimple ptr_incr;
4062 if (j == 0)
4064 if (slp)
4066 /* Get vectorized arguments for SLP_NODE. */
4067 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4068 NULL, slp_node, -1);
4070 vec_oprnd = vec_oprnds[0];
4072 else
4074 /* For interleaved stores we collect vectorized defs for all the
4075 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4076 used as an input to vect_permute_store_chain(), and OPRNDS as
4077 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4079 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4080 OPRNDS are of size 1. */
4081 next_stmt = first_stmt;
4082 for (i = 0; i < group_size; i++)
4084 /* Since gaps are not supported for interleaved stores,
4085 GROUP_SIZE is the exact number of stmts in the chain.
4086 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4087 there is no interleaving, GROUP_SIZE is 1, and only one
4088 iteration of the loop will be executed. */
4089 gcc_assert (next_stmt
4090 && gimple_assign_single_p (next_stmt));
4091 op = gimple_assign_rhs1 (next_stmt);
4093 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4094 NULL);
4095 dr_chain.quick_push (vec_oprnd);
4096 oprnds.quick_push (vec_oprnd);
4097 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4101 /* We should have catched mismatched types earlier. */
4102 gcc_assert (useless_type_conversion_p (vectype,
4103 TREE_TYPE (vec_oprnd)));
4104 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4105 NULL_TREE, &dummy, gsi,
4106 &ptr_incr, false, &inv_p);
4107 gcc_assert (bb_vinfo || !inv_p);
4109 else
4111 /* For interleaved stores we created vectorized defs for all the
4112 defs stored in OPRNDS in the previous iteration (previous copy).
4113 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4114 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4115 next copy.
4116 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4117 OPRNDS are of size 1. */
4118 for (i = 0; i < group_size; i++)
4120 op = oprnds[i];
4121 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4122 &def, &dt);
4123 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4124 dr_chain[i] = vec_oprnd;
4125 oprnds[i] = vec_oprnd;
4127 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4128 TYPE_SIZE_UNIT (aggr_type));
4131 if (store_lanes_p)
4133 tree vec_array;
4135 /* Combine all the vectors into an array. */
4136 vec_array = create_vector_array (vectype, vec_num);
4137 for (i = 0; i < vec_num; i++)
4139 vec_oprnd = dr_chain[i];
4140 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4143 /* Emit:
4144 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4145 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4146 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4147 gimple_call_set_lhs (new_stmt, data_ref);
4148 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4150 else
4152 new_stmt = NULL;
4153 if (grouped_store)
4155 result_chain.create (group_size);
4156 /* Permute. */
4157 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4158 &result_chain);
4161 next_stmt = first_stmt;
4162 for (i = 0; i < vec_num; i++)
4164 unsigned align, misalign;
4166 if (i > 0)
4167 /* Bump the vector pointer. */
4168 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4169 stmt, NULL_TREE);
4171 if (slp)
4172 vec_oprnd = vec_oprnds[i];
4173 else if (grouped_store)
4174 /* For grouped stores vectorized defs are interleaved in
4175 vect_permute_store_chain(). */
4176 vec_oprnd = result_chain[i];
4178 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4179 build_int_cst (reference_alias_ptr_type
4180 (DR_REF (first_dr)), 0));
4181 align = TYPE_ALIGN_UNIT (vectype);
4182 if (aligned_access_p (first_dr))
4183 misalign = 0;
4184 else if (DR_MISALIGNMENT (first_dr) == -1)
4186 TREE_TYPE (data_ref)
4187 = build_aligned_type (TREE_TYPE (data_ref),
4188 TYPE_ALIGN (elem_type));
4189 align = TYPE_ALIGN_UNIT (elem_type);
4190 misalign = 0;
4192 else
4194 TREE_TYPE (data_ref)
4195 = build_aligned_type (TREE_TYPE (data_ref),
4196 TYPE_ALIGN (elem_type));
4197 misalign = DR_MISALIGNMENT (first_dr);
4199 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4200 misalign);
4202 /* Arguments are ready. Create the new vector stmt. */
4203 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4206 if (slp)
4207 continue;
4209 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4210 if (!next_stmt)
4211 break;
4214 if (!slp)
4216 if (j == 0)
4217 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4218 else
4219 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4220 prev_stmt_info = vinfo_for_stmt (new_stmt);
4224 dr_chain.release ();
4225 oprnds.release ();
4226 result_chain.release ();
4227 vec_oprnds.release ();
4229 return true;
4232 /* Given a vector type VECTYPE and permutation SEL returns
4233 the VECTOR_CST mask that implements the permutation of the
4234 vector elements. If that is impossible to do, returns NULL. */
4236 tree
4237 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4239 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4240 int i, nunits;
4242 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4244 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4245 return NULL;
4247 mask_elt_type = lang_hooks.types.type_for_mode
4248 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4249 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4251 mask_elts = XALLOCAVEC (tree, nunits);
4252 for (i = nunits - 1; i >= 0; i--)
4253 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4254 mask_vec = build_vector (mask_type, mask_elts);
4256 return mask_vec;
4259 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4260 reversal of the vector elements. If that is impossible to do,
4261 returns NULL. */
4263 static tree
4264 perm_mask_for_reverse (tree vectype)
4266 int i, nunits;
4267 unsigned char *sel;
4269 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4270 sel = XALLOCAVEC (unsigned char, nunits);
4272 for (i = 0; i < nunits; ++i)
4273 sel[i] = nunits - 1 - i;
4275 return vect_gen_perm_mask (vectype, sel);
4278 /* Given a vector variable X and Y, that was generated for the scalar
4279 STMT, generate instructions to permute the vector elements of X and Y
4280 using permutation mask MASK_VEC, insert them at *GSI and return the
4281 permuted vector variable. */
4283 static tree
4284 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4285 gimple_stmt_iterator *gsi)
4287 tree vectype = TREE_TYPE (x);
4288 tree perm_dest, data_ref;
4289 gimple perm_stmt;
4291 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4292 data_ref = make_ssa_name (perm_dest, NULL);
4294 /* Generate the permute statement. */
4295 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4296 x, y, mask_vec);
4297 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4299 return data_ref;
4302 /* vectorizable_load.
4304 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4305 can be vectorized.
4306 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4307 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4308 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4310 static bool
4311 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4312 slp_tree slp_node, slp_instance slp_node_instance)
4314 tree scalar_dest;
4315 tree vec_dest = NULL;
4316 tree data_ref = NULL;
4317 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4318 stmt_vec_info prev_stmt_info;
4319 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4320 struct loop *loop = NULL;
4321 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4322 bool nested_in_vect_loop = false;
4323 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4324 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4325 tree elem_type;
4326 tree new_temp;
4327 enum machine_mode mode;
4328 gimple new_stmt = NULL;
4329 tree dummy;
4330 enum dr_alignment_support alignment_support_scheme;
4331 tree dataref_ptr = NULL_TREE;
4332 gimple ptr_incr;
4333 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4334 int ncopies;
4335 int i, j, group_size;
4336 tree msq = NULL_TREE, lsq;
4337 tree offset = NULL_TREE;
4338 tree realignment_token = NULL_TREE;
4339 gimple phi = NULL;
4340 vec<tree> dr_chain = vNULL;
4341 bool grouped_load = false;
4342 bool load_lanes_p = false;
4343 gimple first_stmt;
4344 bool inv_p;
4345 bool negative = false;
4346 bool compute_in_loop = false;
4347 struct loop *at_loop;
4348 int vec_num;
4349 bool slp = (slp_node != NULL);
4350 bool slp_perm = false;
4351 enum tree_code code;
4352 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4353 int vf;
4354 tree aggr_type;
4355 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4356 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4357 tree stride_base, stride_step;
4358 int gather_scale = 1;
4359 enum vect_def_type gather_dt = vect_unknown_def_type;
4361 if (loop_vinfo)
4363 loop = LOOP_VINFO_LOOP (loop_vinfo);
4364 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4365 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4367 else
4368 vf = 1;
4370 /* Multiple types in SLP are handled by creating the appropriate number of
4371 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4372 case of SLP. */
4373 if (slp || PURE_SLP_STMT (stmt_info))
4374 ncopies = 1;
4375 else
4376 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4378 gcc_assert (ncopies >= 1);
4380 /* FORNOW. This restriction should be relaxed. */
4381 if (nested_in_vect_loop && ncopies > 1)
4383 if (dump_enabled_p ())
4384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4385 "multiple types in nested loop.");
4386 return false;
4389 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4390 return false;
4392 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4393 return false;
4395 /* Is vectorizable load? */
4396 if (!is_gimple_assign (stmt))
4397 return false;
4399 scalar_dest = gimple_assign_lhs (stmt);
4400 if (TREE_CODE (scalar_dest) != SSA_NAME)
4401 return false;
4403 code = gimple_assign_rhs_code (stmt);
4404 if (code != ARRAY_REF
4405 && code != INDIRECT_REF
4406 && code != COMPONENT_REF
4407 && code != IMAGPART_EXPR
4408 && code != REALPART_EXPR
4409 && code != MEM_REF
4410 && TREE_CODE_CLASS (code) != tcc_declaration)
4411 return false;
4413 if (!STMT_VINFO_DATA_REF (stmt_info))
4414 return false;
4416 elem_type = TREE_TYPE (vectype);
4417 mode = TYPE_MODE (vectype);
4419 /* FORNOW. In some cases can vectorize even if data-type not supported
4420 (e.g. - data copies). */
4421 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4423 if (dump_enabled_p ())
4424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4425 "Aligned load, but unsupported type.");
4426 return false;
4429 /* Check if the load is a part of an interleaving chain. */
4430 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4432 grouped_load = true;
4433 /* FORNOW */
4434 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4436 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4437 if (!slp && !PURE_SLP_STMT (stmt_info))
4439 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4440 if (vect_load_lanes_supported (vectype, group_size))
4441 load_lanes_p = true;
4442 else if (!vect_grouped_load_supported (vectype, group_size))
4443 return false;
4448 if (STMT_VINFO_GATHER_P (stmt_info))
4450 gimple def_stmt;
4451 tree def;
4452 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4453 &gather_off, &gather_scale);
4454 gcc_assert (gather_decl);
4455 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4456 &def_stmt, &def, &gather_dt,
4457 &gather_off_vectype))
4459 if (dump_enabled_p ())
4460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4461 "gather index use not simple.");
4462 return false;
4465 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4467 if (!vect_check_strided_load (stmt, loop_vinfo,
4468 &stride_base, &stride_step))
4469 return false;
4471 else
4473 negative = tree_int_cst_compare (nested_in_vect_loop
4474 ? STMT_VINFO_DR_STEP (stmt_info)
4475 : DR_STEP (dr),
4476 size_zero_node) < 0;
4477 if (negative && ncopies > 1)
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4481 "multiple types with negative step.");
4482 return false;
4485 if (negative)
4487 gcc_assert (!grouped_load);
4488 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4489 if (alignment_support_scheme != dr_aligned
4490 && alignment_support_scheme != dr_unaligned_supported)
4492 if (dump_enabled_p ())
4493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4494 "negative step but alignment required.");
4495 return false;
4497 if (!perm_mask_for_reverse (vectype))
4499 if (dump_enabled_p ())
4500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4501 "negative step and reversing not supported.");
4502 return false;
4507 if (!vec_stmt) /* transformation not required. */
4509 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4510 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4511 return true;
4514 if (dump_enabled_p ())
4515 dump_printf_loc (MSG_NOTE, vect_location,
4516 "transform load. ncopies = %d", ncopies);
4518 /** Transform. **/
4520 if (STMT_VINFO_GATHER_P (stmt_info))
4522 tree vec_oprnd0 = NULL_TREE, op;
4523 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4524 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4525 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4526 edge pe = loop_preheader_edge (loop);
4527 gimple_seq seq;
4528 basic_block new_bb;
4529 enum { NARROW, NONE, WIDEN } modifier;
4530 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4532 if (nunits == gather_off_nunits)
4533 modifier = NONE;
4534 else if (nunits == gather_off_nunits / 2)
4536 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4537 modifier = WIDEN;
4539 for (i = 0; i < gather_off_nunits; ++i)
4540 sel[i] = i | nunits;
4542 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4543 gcc_assert (perm_mask != NULL_TREE);
4545 else if (nunits == gather_off_nunits * 2)
4547 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4548 modifier = NARROW;
4550 for (i = 0; i < nunits; ++i)
4551 sel[i] = i < gather_off_nunits
4552 ? i : i + nunits - gather_off_nunits;
4554 perm_mask = vect_gen_perm_mask (vectype, sel);
4555 gcc_assert (perm_mask != NULL_TREE);
4556 ncopies *= 2;
4558 else
4559 gcc_unreachable ();
4561 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4562 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4563 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4564 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4565 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4566 scaletype = TREE_VALUE (arglist);
4567 gcc_checking_assert (types_compatible_p (srctype, rettype)
4568 && types_compatible_p (srctype, masktype));
4570 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4572 ptr = fold_convert (ptrtype, gather_base);
4573 if (!is_gimple_min_invariant (ptr))
4575 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4576 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4577 gcc_assert (!new_bb);
4580 /* Currently we support only unconditional gather loads,
4581 so mask should be all ones. */
4582 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4583 mask = build_int_cst (TREE_TYPE (masktype), -1);
4584 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4586 REAL_VALUE_TYPE r;
4587 long tmp[6];
4588 for (j = 0; j < 6; ++j)
4589 tmp[j] = -1;
4590 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4591 mask = build_real (TREE_TYPE (masktype), r);
4593 else
4594 gcc_unreachable ();
4595 mask = build_vector_from_val (masktype, mask);
4596 mask = vect_init_vector (stmt, mask, masktype, NULL);
4598 scale = build_int_cst (scaletype, gather_scale);
4600 prev_stmt_info = NULL;
4601 for (j = 0; j < ncopies; ++j)
4603 if (modifier == WIDEN && (j & 1))
4604 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4605 perm_mask, stmt, gsi);
4606 else if (j == 0)
4607 op = vec_oprnd0
4608 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4609 else
4610 op = vec_oprnd0
4611 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4613 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4615 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4616 == TYPE_VECTOR_SUBPARTS (idxtype));
4617 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4618 var = make_ssa_name (var, NULL);
4619 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4620 new_stmt
4621 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4622 op, NULL_TREE);
4623 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4624 op = var;
4627 new_stmt
4628 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4630 if (!useless_type_conversion_p (vectype, rettype))
4632 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4633 == TYPE_VECTOR_SUBPARTS (rettype));
4634 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4635 op = make_ssa_name (var, new_stmt);
4636 gimple_call_set_lhs (new_stmt, op);
4637 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4638 var = make_ssa_name (vec_dest, NULL);
4639 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4640 new_stmt
4641 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4642 NULL_TREE);
4644 else
4646 var = make_ssa_name (vec_dest, new_stmt);
4647 gimple_call_set_lhs (new_stmt, var);
4650 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4652 if (modifier == NARROW)
4654 if ((j & 1) == 0)
4656 prev_res = var;
4657 continue;
4659 var = permute_vec_elements (prev_res, var,
4660 perm_mask, stmt, gsi);
4661 new_stmt = SSA_NAME_DEF_STMT (var);
4664 if (prev_stmt_info == NULL)
4665 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4666 else
4667 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4668 prev_stmt_info = vinfo_for_stmt (new_stmt);
4670 return true;
4672 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4674 gimple_stmt_iterator incr_gsi;
4675 bool insert_after;
4676 gimple incr;
4677 tree offvar;
4678 tree ref = DR_REF (dr);
4679 tree ivstep;
4680 tree running_off;
4681 vec<constructor_elt, va_gc> *v = NULL;
4682 gimple_seq stmts = NULL;
4684 gcc_assert (stride_base && stride_step);
4686 /* For a load with loop-invariant (but other than power-of-2)
4687 stride (i.e. not a grouped access) like so:
4689 for (i = 0; i < n; i += stride)
4690 ... = array[i];
4692 we generate a new induction variable and new accesses to
4693 form a new vector (or vectors, depending on ncopies):
4695 for (j = 0; ; j += VF*stride)
4696 tmp1 = array[j];
4697 tmp2 = array[j + stride];
4699 vectemp = {tmp1, tmp2, ...}
4702 ivstep = stride_step;
4703 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4704 build_int_cst (TREE_TYPE (ivstep), vf));
4706 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4708 create_iv (stride_base, ivstep, NULL,
4709 loop, &incr_gsi, insert_after,
4710 &offvar, NULL);
4711 incr = gsi_stmt (incr_gsi);
4712 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4714 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4715 if (stmts)
4716 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4718 prev_stmt_info = NULL;
4719 running_off = offvar;
4720 for (j = 0; j < ncopies; j++)
4722 tree vec_inv;
4724 vec_alloc (v, nunits);
4725 for (i = 0; i < nunits; i++)
4727 tree newref, newoff;
4728 gimple incr;
4729 if (TREE_CODE (ref) == ARRAY_REF)
4731 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
4732 unshare_expr (TREE_OPERAND (ref, 0)),
4733 running_off,
4734 NULL_TREE, NULL_TREE);
4735 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4736 TREE_TYPE (newref)))
4737 newref = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype),
4738 newref);
4740 else
4741 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4742 running_off,
4743 TREE_OPERAND (ref, 1));
4745 newref = force_gimple_operand_gsi (gsi, newref, true,
4746 NULL_TREE, true,
4747 GSI_SAME_STMT);
4748 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4749 newoff = copy_ssa_name (running_off, NULL);
4750 if (POINTER_TYPE_P (TREE_TYPE (newoff)))
4751 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4752 running_off, stride_step);
4753 else
4754 incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
4755 running_off, stride_step);
4756 vect_finish_stmt_generation (stmt, incr, gsi);
4758 running_off = newoff;
4761 vec_inv = build_constructor (vectype, v);
4762 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4763 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4765 if (j == 0)
4766 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4767 else
4768 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4769 prev_stmt_info = vinfo_for_stmt (new_stmt);
4771 return true;
4774 if (grouped_load)
4776 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4777 if (slp
4778 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ()
4779 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4780 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4782 /* Check if the chain of loads is already vectorized. */
4783 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4785 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4786 return true;
4788 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4789 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4791 /* VEC_NUM is the number of vect stmts to be created for this group. */
4792 if (slp)
4794 grouped_load = false;
4795 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4796 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
4797 slp_perm = true;
4799 else
4800 vec_num = group_size;
4802 else
4804 first_stmt = stmt;
4805 first_dr = dr;
4806 group_size = vec_num = 1;
4809 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4810 gcc_assert (alignment_support_scheme);
4811 /* Targets with load-lane instructions must not require explicit
4812 realignment. */
4813 gcc_assert (!load_lanes_p
4814 || alignment_support_scheme == dr_aligned
4815 || alignment_support_scheme == dr_unaligned_supported);
4817 /* In case the vectorization factor (VF) is bigger than the number
4818 of elements that we can fit in a vectype (nunits), we have to generate
4819 more than one vector stmt - i.e - we need to "unroll" the
4820 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4821 from one copy of the vector stmt to the next, in the field
4822 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4823 stages to find the correct vector defs to be used when vectorizing
4824 stmts that use the defs of the current stmt. The example below
4825 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4826 need to create 4 vectorized stmts):
4828 before vectorization:
4829 RELATED_STMT VEC_STMT
4830 S1: x = memref - -
4831 S2: z = x + 1 - -
4833 step 1: vectorize stmt S1:
4834 We first create the vector stmt VS1_0, and, as usual, record a
4835 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4836 Next, we create the vector stmt VS1_1, and record a pointer to
4837 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4838 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4839 stmts and pointers:
4840 RELATED_STMT VEC_STMT
4841 VS1_0: vx0 = memref0 VS1_1 -
4842 VS1_1: vx1 = memref1 VS1_2 -
4843 VS1_2: vx2 = memref2 VS1_3 -
4844 VS1_3: vx3 = memref3 - -
4845 S1: x = load - VS1_0
4846 S2: z = x + 1 - -
4848 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4849 information we recorded in RELATED_STMT field is used to vectorize
4850 stmt S2. */
4852 /* In case of interleaving (non-unit grouped access):
4854 S1: x2 = &base + 2
4855 S2: x0 = &base
4856 S3: x1 = &base + 1
4857 S4: x3 = &base + 3
4859 Vectorized loads are created in the order of memory accesses
4860 starting from the access of the first stmt of the chain:
4862 VS1: vx0 = &base
4863 VS2: vx1 = &base + vec_size*1
4864 VS3: vx3 = &base + vec_size*2
4865 VS4: vx4 = &base + vec_size*3
4867 Then permutation statements are generated:
4869 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4870 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4873 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4874 (the order of the data-refs in the output of vect_permute_load_chain
4875 corresponds to the order of scalar stmts in the interleaving chain - see
4876 the documentation of vect_permute_load_chain()).
4877 The generation of permutation stmts and recording them in
4878 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4880 In case of both multiple types and interleaving, the vector loads and
4881 permutation stmts above are created for every copy. The result vector
4882 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4883 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4885 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4886 on a target that supports unaligned accesses (dr_unaligned_supported)
4887 we generate the following code:
4888 p = initial_addr;
4889 indx = 0;
4890 loop {
4891 p = p + indx * vectype_size;
4892 vec_dest = *(p);
4893 indx = indx + 1;
4896 Otherwise, the data reference is potentially unaligned on a target that
4897 does not support unaligned accesses (dr_explicit_realign_optimized) -
4898 then generate the following code, in which the data in each iteration is
4899 obtained by two vector loads, one from the previous iteration, and one
4900 from the current iteration:
4901 p1 = initial_addr;
4902 msq_init = *(floor(p1))
4903 p2 = initial_addr + VS - 1;
4904 realignment_token = call target_builtin;
4905 indx = 0;
4906 loop {
4907 p2 = p2 + indx * vectype_size
4908 lsq = *(floor(p2))
4909 vec_dest = realign_load (msq, lsq, realignment_token)
4910 indx = indx + 1;
4911 msq = lsq;
4912 } */
4914 /* If the misalignment remains the same throughout the execution of the
4915 loop, we can create the init_addr and permutation mask at the loop
4916 preheader. Otherwise, it needs to be created inside the loop.
4917 This can only occur when vectorizing memory accesses in the inner-loop
4918 nested within an outer-loop that is being vectorized. */
4920 if (nested_in_vect_loop
4921 && (TREE_INT_CST_LOW (DR_STEP (dr))
4922 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4924 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4925 compute_in_loop = true;
4928 if ((alignment_support_scheme == dr_explicit_realign_optimized
4929 || alignment_support_scheme == dr_explicit_realign)
4930 && !compute_in_loop
4931 && !integer_zerop (DR_STEP (dr)))
4933 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4934 alignment_support_scheme, NULL_TREE,
4935 &at_loop);
4936 if (alignment_support_scheme == dr_explicit_realign_optimized)
4938 phi = SSA_NAME_DEF_STMT (msq);
4939 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4942 else
4943 at_loop = loop;
4945 if (negative)
4946 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4948 if (load_lanes_p)
4949 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4950 else
4951 aggr_type = vectype;
4953 prev_stmt_info = NULL;
4954 for (j = 0; j < ncopies; j++)
4956 /* 1. Create the vector or array pointer update chain. */
4957 if (j == 0)
4958 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4959 offset, &dummy, gsi,
4960 &ptr_incr, false, &inv_p);
4961 else
4962 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4963 TYPE_SIZE_UNIT (aggr_type));
4965 if (grouped_load || slp_perm)
4966 dr_chain.create (vec_num);
4968 if (load_lanes_p)
4970 tree vec_array;
4972 vec_array = create_vector_array (vectype, vec_num);
4974 /* Emit:
4975 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4976 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4977 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4978 gimple_call_set_lhs (new_stmt, vec_array);
4979 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4981 /* Extract each vector into an SSA_NAME. */
4982 for (i = 0; i < vec_num; i++)
4984 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4985 vec_array, i);
4986 dr_chain.quick_push (new_temp);
4989 /* Record the mapping between SSA_NAMEs and statements. */
4990 vect_record_grouped_load_vectors (stmt, dr_chain);
4992 /* Handle invariant-load. */
4993 else if (inv_p && !bb_vinfo)
4995 gimple_stmt_iterator gsi2 = *gsi;
4996 gcc_assert (!grouped_load && !slp_perm);
4997 gsi_next (&gsi2);
4998 new_temp = vect_init_vector (stmt, scalar_dest,
4999 vectype, &gsi2);
5000 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5001 /* Store vector loads in the corresponding SLP_NODE. */
5002 if (slp)
5003 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5005 else
5007 for (i = 0; i < vec_num; i++)
5009 if (i > 0)
5010 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5011 stmt, NULL_TREE);
5013 /* 2. Create the vector-load in the loop. */
5014 switch (alignment_support_scheme)
5016 case dr_aligned:
5017 case dr_unaligned_supported:
5019 unsigned int align, misalign;
5021 data_ref
5022 = build2 (MEM_REF, vectype, dataref_ptr,
5023 build_int_cst (reference_alias_ptr_type
5024 (DR_REF (first_dr)), 0));
5025 align = TYPE_ALIGN_UNIT (vectype);
5026 if (alignment_support_scheme == dr_aligned)
5028 gcc_assert (aligned_access_p (first_dr));
5029 misalign = 0;
5031 else if (DR_MISALIGNMENT (first_dr) == -1)
5033 TREE_TYPE (data_ref)
5034 = build_aligned_type (TREE_TYPE (data_ref),
5035 TYPE_ALIGN (elem_type));
5036 align = TYPE_ALIGN_UNIT (elem_type);
5037 misalign = 0;
5039 else
5041 TREE_TYPE (data_ref)
5042 = build_aligned_type (TREE_TYPE (data_ref),
5043 TYPE_ALIGN (elem_type));
5044 misalign = DR_MISALIGNMENT (first_dr);
5046 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5047 align, misalign);
5048 break;
5050 case dr_explicit_realign:
5052 tree ptr, bump;
5053 tree vs_minus_1;
5055 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5057 if (compute_in_loop)
5058 msq = vect_setup_realignment (first_stmt, gsi,
5059 &realignment_token,
5060 dr_explicit_realign,
5061 dataref_ptr, NULL);
5063 ptr = copy_ssa_name (dataref_ptr, NULL);
5064 new_stmt = gimple_build_assign_with_ops
5065 (BIT_AND_EXPR, ptr, dataref_ptr,
5066 build_int_cst
5067 (TREE_TYPE (dataref_ptr),
5068 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5069 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5070 data_ref
5071 = build2 (MEM_REF, vectype, ptr,
5072 build_int_cst (reference_alias_ptr_type
5073 (DR_REF (first_dr)), 0));
5074 vec_dest = vect_create_destination_var (scalar_dest,
5075 vectype);
5076 new_stmt = gimple_build_assign (vec_dest, data_ref);
5077 new_temp = make_ssa_name (vec_dest, new_stmt);
5078 gimple_assign_set_lhs (new_stmt, new_temp);
5079 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5080 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5082 msq = new_temp;
5084 bump = size_binop (MULT_EXPR, vs_minus_1,
5085 TYPE_SIZE_UNIT (elem_type));
5086 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5087 new_stmt = gimple_build_assign_with_ops
5088 (BIT_AND_EXPR, NULL_TREE, ptr,
5089 build_int_cst
5090 (TREE_TYPE (ptr),
5091 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5092 ptr = copy_ssa_name (dataref_ptr, new_stmt);
5093 gimple_assign_set_lhs (new_stmt, ptr);
5094 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5095 data_ref
5096 = build2 (MEM_REF, vectype, ptr,
5097 build_int_cst (reference_alias_ptr_type
5098 (DR_REF (first_dr)), 0));
5099 break;
5101 case dr_explicit_realign_optimized:
5102 new_temp = copy_ssa_name (dataref_ptr, NULL);
5103 new_stmt = gimple_build_assign_with_ops
5104 (BIT_AND_EXPR, new_temp, dataref_ptr,
5105 build_int_cst
5106 (TREE_TYPE (dataref_ptr),
5107 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5108 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5109 data_ref
5110 = build2 (MEM_REF, vectype, new_temp,
5111 build_int_cst (reference_alias_ptr_type
5112 (DR_REF (first_dr)), 0));
5113 break;
5114 default:
5115 gcc_unreachable ();
5117 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5118 new_stmt = gimple_build_assign (vec_dest, data_ref);
5119 new_temp = make_ssa_name (vec_dest, new_stmt);
5120 gimple_assign_set_lhs (new_stmt, new_temp);
5121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5123 /* 3. Handle explicit realignment if necessary/supported.
5124 Create in loop:
5125 vec_dest = realign_load (msq, lsq, realignment_token) */
5126 if (alignment_support_scheme == dr_explicit_realign_optimized
5127 || alignment_support_scheme == dr_explicit_realign)
5129 lsq = gimple_assign_lhs (new_stmt);
5130 if (!realignment_token)
5131 realignment_token = dataref_ptr;
5132 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5133 new_stmt
5134 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5135 vec_dest, msq, lsq,
5136 realignment_token);
5137 new_temp = make_ssa_name (vec_dest, new_stmt);
5138 gimple_assign_set_lhs (new_stmt, new_temp);
5139 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5141 if (alignment_support_scheme == dr_explicit_realign_optimized)
5143 gcc_assert (phi);
5144 if (i == vec_num - 1 && j == ncopies - 1)
5145 add_phi_arg (phi, lsq,
5146 loop_latch_edge (containing_loop),
5147 UNKNOWN_LOCATION);
5148 msq = lsq;
5152 if (negative)
5154 tree perm_mask = perm_mask_for_reverse (vectype);
5155 new_temp = permute_vec_elements (new_temp, new_temp,
5156 perm_mask, stmt, gsi);
5157 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5160 /* Collect vector loads and later create their permutation in
5161 vect_transform_grouped_load (). */
5162 if (grouped_load || slp_perm)
5163 dr_chain.quick_push (new_temp);
5165 /* Store vector loads in the corresponding SLP_NODE. */
5166 if (slp && !slp_perm)
5167 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5171 if (slp && !slp_perm)
5172 continue;
5174 if (slp_perm)
5176 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
5177 slp_node_instance, false))
5179 dr_chain.release ();
5180 return false;
5183 else
5185 if (grouped_load)
5187 if (!load_lanes_p)
5188 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5189 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5191 else
5193 if (j == 0)
5194 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5195 else
5196 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5197 prev_stmt_info = vinfo_for_stmt (new_stmt);
5200 dr_chain.release ();
5203 return true;
5206 /* Function vect_is_simple_cond.
5208 Input:
5209 LOOP - the loop that is being vectorized.
5210 COND - Condition that is checked for simple use.
5212 Output:
5213 *COMP_VECTYPE - the vector type for the comparison.
5215 Returns whether a COND can be vectorized. Checks whether
5216 condition operands are supportable using vec_is_simple_use. */
5218 static bool
5219 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5220 bb_vec_info bb_vinfo, tree *comp_vectype)
5222 tree lhs, rhs;
5223 tree def;
5224 enum vect_def_type dt;
5225 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5227 if (!COMPARISON_CLASS_P (cond))
5228 return false;
5230 lhs = TREE_OPERAND (cond, 0);
5231 rhs = TREE_OPERAND (cond, 1);
5233 if (TREE_CODE (lhs) == SSA_NAME)
5235 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5236 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5237 &lhs_def_stmt, &def, &dt, &vectype1))
5238 return false;
5240 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5241 && TREE_CODE (lhs) != FIXED_CST)
5242 return false;
5244 if (TREE_CODE (rhs) == SSA_NAME)
5246 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5247 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5248 &rhs_def_stmt, &def, &dt, &vectype2))
5249 return false;
5251 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5252 && TREE_CODE (rhs) != FIXED_CST)
5253 return false;
5255 *comp_vectype = vectype1 ? vectype1 : vectype2;
5256 return true;
5259 /* vectorizable_condition.
5261 Check if STMT is conditional modify expression that can be vectorized.
5262 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5263 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5264 at GSI.
5266 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5267 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5268 else caluse if it is 2).
5270 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5272 bool
5273 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5274 gimple *vec_stmt, tree reduc_def, int reduc_index,
5275 slp_tree slp_node)
5277 tree scalar_dest = NULL_TREE;
5278 tree vec_dest = NULL_TREE;
5279 tree cond_expr, then_clause, else_clause;
5280 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5281 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5282 tree comp_vectype = NULL_TREE;
5283 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5284 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5285 tree vec_compare, vec_cond_expr;
5286 tree new_temp;
5287 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5288 tree def;
5289 enum vect_def_type dt, dts[4];
5290 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5291 int ncopies;
5292 enum tree_code code;
5293 stmt_vec_info prev_stmt_info = NULL;
5294 int i, j;
5295 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5296 vec<tree> vec_oprnds0 = vNULL;
5297 vec<tree> vec_oprnds1 = vNULL;
5298 vec<tree> vec_oprnds2 = vNULL;
5299 vec<tree> vec_oprnds3 = vNULL;
5300 tree vec_cmp_type = vectype;
5302 if (slp_node || PURE_SLP_STMT (stmt_info))
5303 ncopies = 1;
5304 else
5305 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5307 gcc_assert (ncopies >= 1);
5308 if (reduc_index && ncopies > 1)
5309 return false; /* FORNOW */
5311 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5312 return false;
5314 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5315 return false;
5317 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5318 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5319 && reduc_def))
5320 return false;
5322 /* FORNOW: not yet supported. */
5323 if (STMT_VINFO_LIVE_P (stmt_info))
5325 if (dump_enabled_p ())
5326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5327 "value used after loop.");
5328 return false;
5331 /* Is vectorizable conditional operation? */
5332 if (!is_gimple_assign (stmt))
5333 return false;
5335 code = gimple_assign_rhs_code (stmt);
5337 if (code != COND_EXPR)
5338 return false;
5340 cond_expr = gimple_assign_rhs1 (stmt);
5341 then_clause = gimple_assign_rhs2 (stmt);
5342 else_clause = gimple_assign_rhs3 (stmt);
5344 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5345 &comp_vectype)
5346 || !comp_vectype)
5347 return false;
5349 if (TREE_CODE (then_clause) == SSA_NAME)
5351 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5352 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5353 &then_def_stmt, &def, &dt))
5354 return false;
5356 else if (TREE_CODE (then_clause) != INTEGER_CST
5357 && TREE_CODE (then_clause) != REAL_CST
5358 && TREE_CODE (then_clause) != FIXED_CST)
5359 return false;
5361 if (TREE_CODE (else_clause) == SSA_NAME)
5363 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5364 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5365 &else_def_stmt, &def, &dt))
5366 return false;
5368 else if (TREE_CODE (else_clause) != INTEGER_CST
5369 && TREE_CODE (else_clause) != REAL_CST
5370 && TREE_CODE (else_clause) != FIXED_CST)
5371 return false;
5373 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
5375 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5376 tree cmp_type = build_nonstandard_integer_type (prec, 1);
5377 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5378 if (vec_cmp_type == NULL_TREE)
5379 return false;
5382 if (!vec_stmt)
5384 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5385 return expand_vec_cond_expr_p (vectype, comp_vectype);
5388 /* Transform. */
5390 if (!slp_node)
5392 vec_oprnds0.create (1);
5393 vec_oprnds1.create (1);
5394 vec_oprnds2.create (1);
5395 vec_oprnds3.create (1);
5398 /* Handle def. */
5399 scalar_dest = gimple_assign_lhs (stmt);
5400 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5402 /* Handle cond expr. */
5403 for (j = 0; j < ncopies; j++)
5405 gimple new_stmt = NULL;
5406 if (j == 0)
5408 if (slp_node)
5410 vec<tree> ops;
5411 ops.create (4);
5412 vec<slp_void_p> vec_defs;
5414 vec_defs.create (4);
5415 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5416 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5417 ops.safe_push (then_clause);
5418 ops.safe_push (else_clause);
5419 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5420 vec_oprnds3 = *((vec<tree> *) vec_defs.pop ());
5421 vec_oprnds2 = *((vec<tree> *) vec_defs.pop ());
5422 vec_oprnds1 = *((vec<tree> *) vec_defs.pop ());
5423 vec_oprnds0 = *((vec<tree> *) vec_defs.pop ());
5425 ops.release ();
5426 vec_defs.release ();
5428 else
5430 gimple gtemp;
5431 vec_cond_lhs =
5432 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5433 stmt, NULL);
5434 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5435 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5437 vec_cond_rhs =
5438 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5439 stmt, NULL);
5440 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5441 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5442 if (reduc_index == 1)
5443 vec_then_clause = reduc_def;
5444 else
5446 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5447 stmt, NULL);
5448 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5449 NULL, &gtemp, &def, &dts[2]);
5451 if (reduc_index == 2)
5452 vec_else_clause = reduc_def;
5453 else
5455 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5456 stmt, NULL);
5457 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5458 NULL, &gtemp, &def, &dts[3]);
5462 else
5464 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5465 vec_oprnds0.pop ());
5466 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5467 vec_oprnds1.pop ());
5468 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5469 vec_oprnds2.pop ());
5470 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5471 vec_oprnds3.pop ());
5474 if (!slp_node)
5476 vec_oprnds0.quick_push (vec_cond_lhs);
5477 vec_oprnds1.quick_push (vec_cond_rhs);
5478 vec_oprnds2.quick_push (vec_then_clause);
5479 vec_oprnds3.quick_push (vec_else_clause);
5482 /* Arguments are ready. Create the new vector stmt. */
5483 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
5485 vec_cond_rhs = vec_oprnds1[i];
5486 vec_then_clause = vec_oprnds2[i];
5487 vec_else_clause = vec_oprnds3[i];
5489 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5490 vec_cond_lhs, vec_cond_rhs);
5491 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5492 vec_compare, vec_then_clause, vec_else_clause);
5494 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5495 new_temp = make_ssa_name (vec_dest, new_stmt);
5496 gimple_assign_set_lhs (new_stmt, new_temp);
5497 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5498 if (slp_node)
5499 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5502 if (slp_node)
5503 continue;
5505 if (j == 0)
5506 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5507 else
5508 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5510 prev_stmt_info = vinfo_for_stmt (new_stmt);
5513 vec_oprnds0.release ();
5514 vec_oprnds1.release ();
5515 vec_oprnds2.release ();
5516 vec_oprnds3.release ();
5518 return true;
5522 /* Make sure the statement is vectorizable. */
5524 bool
5525 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5527 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5528 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5529 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5530 bool ok;
5531 tree scalar_type, vectype;
5532 gimple pattern_stmt;
5533 gimple_seq pattern_def_seq;
5535 if (dump_enabled_p ())
5537 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5538 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5541 if (gimple_has_volatile_ops (stmt))
5543 if (dump_enabled_p ())
5544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5545 "not vectorized: stmt has volatile operands");
5547 return false;
5550 /* Skip stmts that do not need to be vectorized. In loops this is expected
5551 to include:
5552 - the COND_EXPR which is the loop exit condition
5553 - any LABEL_EXPRs in the loop
5554 - computations that are used only for array indexing or loop control.
5555 In basic blocks we only analyze statements that are a part of some SLP
5556 instance, therefore, all the statements are relevant.
5558 Pattern statement needs to be analyzed instead of the original statement
5559 if the original statement is not relevant. Otherwise, we analyze both
5560 statements. In basic blocks we are called from some SLP instance
5561 traversal, don't analyze pattern stmts instead, the pattern stmts
5562 already will be part of SLP instance. */
5564 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5565 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5566 && !STMT_VINFO_LIVE_P (stmt_info))
5568 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5569 && pattern_stmt
5570 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5571 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5573 /* Analyze PATTERN_STMT instead of the original stmt. */
5574 stmt = pattern_stmt;
5575 stmt_info = vinfo_for_stmt (pattern_stmt);
5576 if (dump_enabled_p ())
5578 dump_printf_loc (MSG_NOTE, vect_location,
5579 "==> examining pattern statement: ");
5580 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5583 else
5585 if (dump_enabled_p ())
5586 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.");
5588 return true;
5591 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5592 && node == NULL
5593 && pattern_stmt
5594 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5595 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5597 /* Analyze PATTERN_STMT too. */
5598 if (dump_enabled_p ())
5600 dump_printf_loc (MSG_NOTE, vect_location,
5601 "==> examining pattern statement: ");
5602 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5605 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5606 return false;
5609 if (is_pattern_stmt_p (stmt_info)
5610 && node == NULL
5611 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5613 gimple_stmt_iterator si;
5615 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5617 gimple pattern_def_stmt = gsi_stmt (si);
5618 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5619 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5621 /* Analyze def stmt of STMT if it's a pattern stmt. */
5622 if (dump_enabled_p ())
5624 dump_printf_loc (MSG_NOTE, vect_location,
5625 "==> examining pattern def statement: ");
5626 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
5629 if (!vect_analyze_stmt (pattern_def_stmt,
5630 need_to_vectorize, node))
5631 return false;
5636 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5638 case vect_internal_def:
5639 break;
5641 case vect_reduction_def:
5642 case vect_nested_cycle:
5643 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5644 || relevance == vect_used_in_outer_by_reduction
5645 || relevance == vect_unused_in_scope));
5646 break;
5648 case vect_induction_def:
5649 case vect_constant_def:
5650 case vect_external_def:
5651 case vect_unknown_def_type:
5652 default:
5653 gcc_unreachable ();
5656 if (bb_vinfo)
5658 gcc_assert (PURE_SLP_STMT (stmt_info));
5660 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5661 if (dump_enabled_p ())
5663 dump_printf_loc (MSG_NOTE, vect_location,
5664 "get vectype for scalar type: ");
5665 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
5668 vectype = get_vectype_for_scalar_type (scalar_type);
5669 if (!vectype)
5671 if (dump_enabled_p ())
5673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5674 "not SLPed: unsupported data-type ");
5675 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5676 scalar_type);
5678 return false;
5681 if (dump_enabled_p ())
5683 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5684 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
5687 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5690 if (STMT_VINFO_RELEVANT_P (stmt_info))
5692 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5693 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5694 *need_to_vectorize = true;
5697 ok = true;
5698 if (!bb_vinfo
5699 && (STMT_VINFO_RELEVANT_P (stmt_info)
5700 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5701 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5702 || vectorizable_shift (stmt, NULL, NULL, NULL)
5703 || vectorizable_operation (stmt, NULL, NULL, NULL)
5704 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5705 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5706 || vectorizable_call (stmt, NULL, NULL, NULL)
5707 || vectorizable_store (stmt, NULL, NULL, NULL)
5708 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5709 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5710 else
5712 if (bb_vinfo)
5713 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5714 || vectorizable_shift (stmt, NULL, NULL, node)
5715 || vectorizable_operation (stmt, NULL, NULL, node)
5716 || vectorizable_assignment (stmt, NULL, NULL, node)
5717 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5718 || vectorizable_call (stmt, NULL, NULL, node)
5719 || vectorizable_store (stmt, NULL, NULL, node)
5720 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5723 if (!ok)
5725 if (dump_enabled_p ())
5727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5728 "not vectorized: relevant stmt not ");
5729 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5730 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5733 return false;
5736 if (bb_vinfo)
5737 return true;
5739 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5740 need extra handling, except for vectorizable reductions. */
5741 if (STMT_VINFO_LIVE_P (stmt_info)
5742 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5743 ok = vectorizable_live_operation (stmt, NULL, NULL);
5745 if (!ok)
5747 if (dump_enabled_p ())
5749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5750 "not vectorized: live stmt not ");
5751 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5752 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5755 return false;
5758 return true;
5762 /* Function vect_transform_stmt.
5764 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5766 bool
5767 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5768 bool *grouped_store, slp_tree slp_node,
5769 slp_instance slp_node_instance)
5771 bool is_store = false;
5772 gimple vec_stmt = NULL;
5773 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5774 bool done;
5776 switch (STMT_VINFO_TYPE (stmt_info))
5778 case type_demotion_vec_info_type:
5779 case type_promotion_vec_info_type:
5780 case type_conversion_vec_info_type:
5781 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5782 gcc_assert (done);
5783 break;
5785 case induc_vec_info_type:
5786 gcc_assert (!slp_node);
5787 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5788 gcc_assert (done);
5789 break;
5791 case shift_vec_info_type:
5792 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5793 gcc_assert (done);
5794 break;
5796 case op_vec_info_type:
5797 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5798 gcc_assert (done);
5799 break;
5801 case assignment_vec_info_type:
5802 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5803 gcc_assert (done);
5804 break;
5806 case load_vec_info_type:
5807 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5808 slp_node_instance);
5809 gcc_assert (done);
5810 break;
5812 case store_vec_info_type:
5813 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5814 gcc_assert (done);
5815 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5817 /* In case of interleaving, the whole chain is vectorized when the
5818 last store in the chain is reached. Store stmts before the last
5819 one are skipped, and there vec_stmt_info shouldn't be freed
5820 meanwhile. */
5821 *grouped_store = true;
5822 if (STMT_VINFO_VEC_STMT (stmt_info))
5823 is_store = true;
5825 else
5826 is_store = true;
5827 break;
5829 case condition_vec_info_type:
5830 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5831 gcc_assert (done);
5832 break;
5834 case call_vec_info_type:
5835 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5836 stmt = gsi_stmt (*gsi);
5837 break;
5839 case reduc_vec_info_type:
5840 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5841 gcc_assert (done);
5842 break;
5844 default:
5845 if (!STMT_VINFO_LIVE_P (stmt_info))
5847 if (dump_enabled_p ())
5848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5849 "stmt not supported.");
5850 gcc_unreachable ();
5854 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5855 is being vectorized, but outside the immediately enclosing loop. */
5856 if (vec_stmt
5857 && STMT_VINFO_LOOP_VINFO (stmt_info)
5858 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5859 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5860 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5861 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5862 || STMT_VINFO_RELEVANT (stmt_info) ==
5863 vect_used_in_outer_by_reduction))
5865 struct loop *innerloop = LOOP_VINFO_LOOP (
5866 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5867 imm_use_iterator imm_iter;
5868 use_operand_p use_p;
5869 tree scalar_dest;
5870 gimple exit_phi;
5872 if (dump_enabled_p ())
5873 dump_printf_loc (MSG_NOTE, vect_location,
5874 "Record the vdef for outer-loop vectorization.");
5876 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5877 (to be used when vectorizing outer-loop stmts that use the DEF of
5878 STMT). */
5879 if (gimple_code (stmt) == GIMPLE_PHI)
5880 scalar_dest = PHI_RESULT (stmt);
5881 else
5882 scalar_dest = gimple_assign_lhs (stmt);
5884 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5886 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5888 exit_phi = USE_STMT (use_p);
5889 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5894 /* Handle stmts whose DEF is used outside the loop-nest that is
5895 being vectorized. */
5896 if (STMT_VINFO_LIVE_P (stmt_info)
5897 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5899 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5900 gcc_assert (done);
5903 if (vec_stmt)
5904 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5906 return is_store;
5910 /* Remove a group of stores (for SLP or interleaving), free their
5911 stmt_vec_info. */
5913 void
5914 vect_remove_stores (gimple first_stmt)
5916 gimple next = first_stmt;
5917 gimple tmp;
5918 gimple_stmt_iterator next_si;
5920 while (next)
5922 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5924 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5925 if (is_pattern_stmt_p (stmt_info))
5926 next = STMT_VINFO_RELATED_STMT (stmt_info);
5927 /* Free the attached stmt_vec_info and remove the stmt. */
5928 next_si = gsi_for_stmt (next);
5929 unlink_stmt_vdef (next);
5930 gsi_remove (&next_si, true);
5931 release_defs (next);
5932 free_stmt_vec_info (next);
5933 next = tmp;
5938 /* Function new_stmt_vec_info.
5940 Create and initialize a new stmt_vec_info struct for STMT. */
5942 stmt_vec_info
5943 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5944 bb_vec_info bb_vinfo)
5946 stmt_vec_info res;
5947 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5949 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5950 STMT_VINFO_STMT (res) = stmt;
5951 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5952 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5953 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5954 STMT_VINFO_LIVE_P (res) = false;
5955 STMT_VINFO_VECTYPE (res) = NULL;
5956 STMT_VINFO_VEC_STMT (res) = NULL;
5957 STMT_VINFO_VECTORIZABLE (res) = true;
5958 STMT_VINFO_IN_PATTERN_P (res) = false;
5959 STMT_VINFO_RELATED_STMT (res) = NULL;
5960 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5961 STMT_VINFO_DATA_REF (res) = NULL;
5963 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5964 STMT_VINFO_DR_OFFSET (res) = NULL;
5965 STMT_VINFO_DR_INIT (res) = NULL;
5966 STMT_VINFO_DR_STEP (res) = NULL;
5967 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5969 if (gimple_code (stmt) == GIMPLE_PHI
5970 && is_loop_header_bb_p (gimple_bb (stmt)))
5971 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5972 else
5973 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5975 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
5976 STMT_SLP_TYPE (res) = loop_vect;
5977 GROUP_FIRST_ELEMENT (res) = NULL;
5978 GROUP_NEXT_ELEMENT (res) = NULL;
5979 GROUP_SIZE (res) = 0;
5980 GROUP_STORE_COUNT (res) = 0;
5981 GROUP_GAP (res) = 0;
5982 GROUP_SAME_DR_STMT (res) = NULL;
5983 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5985 return res;
5989 /* Create a hash table for stmt_vec_info. */
5991 void
5992 init_stmt_vec_info_vec (void)
5994 gcc_assert (!stmt_vec_info_vec.exists ());
5995 stmt_vec_info_vec.create (50);
5999 /* Free hash table for stmt_vec_info. */
6001 void
6002 free_stmt_vec_info_vec (void)
6004 gcc_assert (stmt_vec_info_vec.exists ());
6005 stmt_vec_info_vec.release ();
6009 /* Free stmt vectorization related info. */
6011 void
6012 free_stmt_vec_info (gimple stmt)
6014 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6016 if (!stmt_info)
6017 return;
6019 /* Check if this statement has a related "pattern stmt"
6020 (introduced by the vectorizer during the pattern recognition
6021 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6022 too. */
6023 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6025 stmt_vec_info patt_info
6026 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6027 if (patt_info)
6029 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6030 if (seq)
6032 gimple_stmt_iterator si;
6033 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6034 free_stmt_vec_info (gsi_stmt (si));
6036 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6040 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6041 set_vinfo_for_stmt (stmt, NULL);
6042 free (stmt_info);
6046 /* Function get_vectype_for_scalar_type_and_size.
6048 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6049 by the target. */
6051 static tree
6052 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6054 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6055 enum machine_mode simd_mode;
6056 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6057 int nunits;
6058 tree vectype;
6060 if (nbytes == 0)
6061 return NULL_TREE;
6063 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6064 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6065 return NULL_TREE;
6067 /* For vector types of elements whose mode precision doesn't
6068 match their types precision we use a element type of mode
6069 precision. The vectorization routines will have to make sure
6070 they support the proper result truncation/extension.
6071 We also make sure to build vector types with INTEGER_TYPE
6072 component type only. */
6073 if (INTEGRAL_TYPE_P (scalar_type)
6074 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6075 || TREE_CODE (scalar_type) != INTEGER_TYPE))
6076 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6077 TYPE_UNSIGNED (scalar_type));
6079 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6080 When the component mode passes the above test simply use a type
6081 corresponding to that mode. The theory is that any use that
6082 would cause problems with this will disable vectorization anyway. */
6083 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6084 && !INTEGRAL_TYPE_P (scalar_type)
6085 && !POINTER_TYPE_P (scalar_type))
6086 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6088 /* We can't build a vector type of elements with alignment bigger than
6089 their size. */
6090 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6091 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6093 /* If we felt back to using the mode fail if there was
6094 no scalar type for it. */
6095 if (scalar_type == NULL_TREE)
6096 return NULL_TREE;
6098 /* If no size was supplied use the mode the target prefers. Otherwise
6099 lookup a vector mode of the specified size. */
6100 if (size == 0)
6101 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6102 else
6103 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6104 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6105 if (nunits <= 1)
6106 return NULL_TREE;
6108 vectype = build_vector_type (scalar_type, nunits);
6109 if (dump_enabled_p ())
6111 dump_printf_loc (MSG_NOTE, vect_location,
6112 "get vectype with %d units of type ", nunits);
6113 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
6116 if (!vectype)
6117 return NULL_TREE;
6119 if (dump_enabled_p ())
6121 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
6122 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
6125 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6126 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6130 "mode not supported by target.");
6131 return NULL_TREE;
6134 return vectype;
6137 unsigned int current_vector_size;
6139 /* Function get_vectype_for_scalar_type.
6141 Returns the vector type corresponding to SCALAR_TYPE as supported
6142 by the target. */
6144 tree
6145 get_vectype_for_scalar_type (tree scalar_type)
6147 tree vectype;
6148 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6149 current_vector_size);
6150 if (vectype
6151 && current_vector_size == 0)
6152 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6153 return vectype;
6156 /* Function get_same_sized_vectype
6158 Returns a vector type corresponding to SCALAR_TYPE of size
6159 VECTOR_TYPE if supported by the target. */
6161 tree
6162 get_same_sized_vectype (tree scalar_type, tree vector_type)
6164 return get_vectype_for_scalar_type_and_size
6165 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6168 /* Function vect_is_simple_use.
6170 Input:
6171 LOOP_VINFO - the vect info of the loop that is being vectorized.
6172 BB_VINFO - the vect info of the basic block that is being vectorized.
6173 OPERAND - operand of STMT in the loop or bb.
6174 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6176 Returns whether a stmt with OPERAND can be vectorized.
6177 For loops, supportable operands are constants, loop invariants, and operands
6178 that are defined by the current iteration of the loop. Unsupportable
6179 operands are those that are defined by a previous iteration of the loop (as
6180 is the case in reduction/induction computations).
6181 For basic blocks, supportable operands are constants and bb invariants.
6182 For now, operands defined outside the basic block are not supported. */
6184 bool
6185 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6186 bb_vec_info bb_vinfo, gimple *def_stmt,
6187 tree *def, enum vect_def_type *dt)
6189 basic_block bb;
6190 stmt_vec_info stmt_vinfo;
6191 struct loop *loop = NULL;
6193 if (loop_vinfo)
6194 loop = LOOP_VINFO_LOOP (loop_vinfo);
6196 *def_stmt = NULL;
6197 *def = NULL_TREE;
6199 if (dump_enabled_p ())
6201 dump_printf_loc (MSG_NOTE, vect_location,
6202 "vect_is_simple_use: operand ");
6203 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
6206 if (CONSTANT_CLASS_P (operand))
6208 *dt = vect_constant_def;
6209 return true;
6212 if (is_gimple_min_invariant (operand))
6214 *def = operand;
6215 *dt = vect_external_def;
6216 return true;
6219 if (TREE_CODE (operand) == PAREN_EXPR)
6221 if (dump_enabled_p ())
6222 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.");
6223 operand = TREE_OPERAND (operand, 0);
6226 if (TREE_CODE (operand) != SSA_NAME)
6228 if (dump_enabled_p ())
6229 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6230 "not ssa-name.");
6231 return false;
6234 *def_stmt = SSA_NAME_DEF_STMT (operand);
6235 if (*def_stmt == NULL)
6237 if (dump_enabled_p ())
6238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6239 "no def_stmt.");
6240 return false;
6243 if (dump_enabled_p ())
6245 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6246 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
6249 /* Empty stmt is expected only in case of a function argument.
6250 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6251 if (gimple_nop_p (*def_stmt))
6253 *def = operand;
6254 *dt = vect_external_def;
6255 return true;
6258 bb = gimple_bb (*def_stmt);
6260 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6261 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6262 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6263 *dt = vect_external_def;
6264 else
6266 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6267 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6270 if (*dt == vect_unknown_def_type
6271 || (stmt
6272 && *dt == vect_double_reduction_def
6273 && gimple_code (stmt) != GIMPLE_PHI))
6275 if (dump_enabled_p ())
6276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6277 "Unsupported pattern.");
6278 return false;
6281 if (dump_enabled_p ())
6282 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt);
6284 switch (gimple_code (*def_stmt))
6286 case GIMPLE_PHI:
6287 *def = gimple_phi_result (*def_stmt);
6288 break;
6290 case GIMPLE_ASSIGN:
6291 *def = gimple_assign_lhs (*def_stmt);
6292 break;
6294 case GIMPLE_CALL:
6295 *def = gimple_call_lhs (*def_stmt);
6296 if (*def != NULL)
6297 break;
6298 /* FALLTHRU */
6299 default:
6300 if (dump_enabled_p ())
6301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6302 "unsupported defining stmt: ");
6303 return false;
6306 return true;
6309 /* Function vect_is_simple_use_1.
6311 Same as vect_is_simple_use_1 but also determines the vector operand
6312 type of OPERAND and stores it to *VECTYPE. If the definition of
6313 OPERAND is vect_uninitialized_def, vect_constant_def or
6314 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6315 is responsible to compute the best suited vector type for the
6316 scalar operand. */
6318 bool
6319 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6320 bb_vec_info bb_vinfo, gimple *def_stmt,
6321 tree *def, enum vect_def_type *dt, tree *vectype)
6323 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6324 def, dt))
6325 return false;
6327 /* Now get a vector type if the def is internal, otherwise supply
6328 NULL_TREE and leave it up to the caller to figure out a proper
6329 type for the use stmt. */
6330 if (*dt == vect_internal_def
6331 || *dt == vect_induction_def
6332 || *dt == vect_reduction_def
6333 || *dt == vect_double_reduction_def
6334 || *dt == vect_nested_cycle)
6336 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6338 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6339 && !STMT_VINFO_RELEVANT (stmt_info)
6340 && !STMT_VINFO_LIVE_P (stmt_info))
6341 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6343 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6344 gcc_assert (*vectype != NULL_TREE);
6346 else if (*dt == vect_uninitialized_def
6347 || *dt == vect_constant_def
6348 || *dt == vect_external_def)
6349 *vectype = NULL_TREE;
6350 else
6351 gcc_unreachable ();
6353 return true;
6357 /* Function supportable_widening_operation
6359 Check whether an operation represented by the code CODE is a
6360 widening operation that is supported by the target platform in
6361 vector form (i.e., when operating on arguments of type VECTYPE_IN
6362 producing a result of type VECTYPE_OUT).
6364 Widening operations we currently support are NOP (CONVERT), FLOAT
6365 and WIDEN_MULT. This function checks if these operations are supported
6366 by the target platform either directly (via vector tree-codes), or via
6367 target builtins.
6369 Output:
6370 - CODE1 and CODE2 are codes of vector operations to be used when
6371 vectorizing the operation, if available.
6372 - MULTI_STEP_CVT determines the number of required intermediate steps in
6373 case of multi-step conversion (like char->short->int - in that case
6374 MULTI_STEP_CVT will be 1).
6375 - INTERM_TYPES contains the intermediate type required to perform the
6376 widening operation (short in the above example). */
6378 bool
6379 supportable_widening_operation (enum tree_code code, gimple stmt,
6380 tree vectype_out, tree vectype_in,
6381 enum tree_code *code1, enum tree_code *code2,
6382 int *multi_step_cvt,
6383 vec<tree> *interm_types)
6385 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6386 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6387 struct loop *vect_loop = NULL;
6388 enum machine_mode vec_mode;
6389 enum insn_code icode1, icode2;
6390 optab optab1, optab2;
6391 tree vectype = vectype_in;
6392 tree wide_vectype = vectype_out;
6393 enum tree_code c1, c2;
6394 int i;
6395 tree prev_type, intermediate_type;
6396 enum machine_mode intermediate_mode, prev_mode;
6397 optab optab3, optab4;
6399 *multi_step_cvt = 0;
6400 if (loop_info)
6401 vect_loop = LOOP_VINFO_LOOP (loop_info);
6403 switch (code)
6405 case WIDEN_MULT_EXPR:
6406 /* The result of a vectorized widening operation usually requires
6407 two vectors (because the widened results do not fit into one vector).
6408 The generated vector results would normally be expected to be
6409 generated in the same order as in the original scalar computation,
6410 i.e. if 8 results are generated in each vector iteration, they are
6411 to be organized as follows:
6412 vect1: [res1,res2,res3,res4],
6413 vect2: [res5,res6,res7,res8].
6415 However, in the special case that the result of the widening
6416 operation is used in a reduction computation only, the order doesn't
6417 matter (because when vectorizing a reduction we change the order of
6418 the computation). Some targets can take advantage of this and
6419 generate more efficient code. For example, targets like Altivec,
6420 that support widen_mult using a sequence of {mult_even,mult_odd}
6421 generate the following vectors:
6422 vect1: [res1,res3,res5,res7],
6423 vect2: [res2,res4,res6,res8].
6425 When vectorizing outer-loops, we execute the inner-loop sequentially
6426 (each vectorized inner-loop iteration contributes to VF outer-loop
6427 iterations in parallel). We therefore don't allow to change the
6428 order of the computation in the inner-loop during outer-loop
6429 vectorization. */
6430 /* TODO: Another case in which order doesn't *really* matter is when we
6431 widen and then contract again, e.g. (short)((int)x * y >> 8).
6432 Normally, pack_trunc performs an even/odd permute, whereas the
6433 repack from an even/odd expansion would be an interleave, which
6434 would be significantly simpler for e.g. AVX2. */
6435 /* In any case, in order to avoid duplicating the code below, recurse
6436 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6437 are properly set up for the caller. If we fail, we'll continue with
6438 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6439 if (vect_loop
6440 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6441 && !nested_in_vect_loop_p (vect_loop, stmt)
6442 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6443 stmt, vectype_out, vectype_in,
6444 code1, code2, multi_step_cvt,
6445 interm_types))
6446 return true;
6447 c1 = VEC_WIDEN_MULT_LO_EXPR;
6448 c2 = VEC_WIDEN_MULT_HI_EXPR;
6449 break;
6451 case VEC_WIDEN_MULT_EVEN_EXPR:
6452 /* Support the recursion induced just above. */
6453 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6454 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6455 break;
6457 case WIDEN_LSHIFT_EXPR:
6458 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6459 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6460 break;
6462 CASE_CONVERT:
6463 c1 = VEC_UNPACK_LO_EXPR;
6464 c2 = VEC_UNPACK_HI_EXPR;
6465 break;
6467 case FLOAT_EXPR:
6468 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6469 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6470 break;
6472 case FIX_TRUNC_EXPR:
6473 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6474 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6475 computing the operation. */
6476 return false;
6478 default:
6479 gcc_unreachable ();
6482 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6484 enum tree_code ctmp = c1;
6485 c1 = c2;
6486 c2 = ctmp;
6489 if (code == FIX_TRUNC_EXPR)
6491 /* The signedness is determined from output operand. */
6492 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6493 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6495 else
6497 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6498 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6501 if (!optab1 || !optab2)
6502 return false;
6504 vec_mode = TYPE_MODE (vectype);
6505 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6506 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6507 return false;
6509 *code1 = c1;
6510 *code2 = c2;
6512 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6513 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6514 return true;
6516 /* Check if it's a multi-step conversion that can be done using intermediate
6517 types. */
6519 prev_type = vectype;
6520 prev_mode = vec_mode;
6522 if (!CONVERT_EXPR_CODE_P (code))
6523 return false;
6525 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6526 intermediate steps in promotion sequence. We try
6527 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6528 not. */
6529 interm_types->create (MAX_INTERM_CVT_STEPS);
6530 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6532 intermediate_mode = insn_data[icode1].operand[0].mode;
6533 intermediate_type
6534 = lang_hooks.types.type_for_mode (intermediate_mode,
6535 TYPE_UNSIGNED (prev_type));
6536 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6537 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6539 if (!optab3 || !optab4
6540 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6541 || insn_data[icode1].operand[0].mode != intermediate_mode
6542 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6543 || insn_data[icode2].operand[0].mode != intermediate_mode
6544 || ((icode1 = optab_handler (optab3, intermediate_mode))
6545 == CODE_FOR_nothing)
6546 || ((icode2 = optab_handler (optab4, intermediate_mode))
6547 == CODE_FOR_nothing))
6548 break;
6550 interm_types->quick_push (intermediate_type);
6551 (*multi_step_cvt)++;
6553 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6554 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6555 return true;
6557 prev_type = intermediate_type;
6558 prev_mode = intermediate_mode;
6561 interm_types->release ();
6562 return false;
6566 /* Function supportable_narrowing_operation
6568 Check whether an operation represented by the code CODE is a
6569 narrowing operation that is supported by the target platform in
6570 vector form (i.e., when operating on arguments of type VECTYPE_IN
6571 and producing a result of type VECTYPE_OUT).
6573 Narrowing operations we currently support are NOP (CONVERT) and
6574 FIX_TRUNC. This function checks if these operations are supported by
6575 the target platform directly via vector tree-codes.
6577 Output:
6578 - CODE1 is the code of a vector operation to be used when
6579 vectorizing the operation, if available.
6580 - MULTI_STEP_CVT determines the number of required intermediate steps in
6581 case of multi-step conversion (like int->short->char - in that case
6582 MULTI_STEP_CVT will be 1).
6583 - INTERM_TYPES contains the intermediate type required to perform the
6584 narrowing operation (short in the above example). */
6586 bool
6587 supportable_narrowing_operation (enum tree_code code,
6588 tree vectype_out, tree vectype_in,
6589 enum tree_code *code1, int *multi_step_cvt,
6590 vec<tree> *interm_types)
6592 enum machine_mode vec_mode;
6593 enum insn_code icode1;
6594 optab optab1, interm_optab;
6595 tree vectype = vectype_in;
6596 tree narrow_vectype = vectype_out;
6597 enum tree_code c1;
6598 tree intermediate_type;
6599 enum machine_mode intermediate_mode, prev_mode;
6600 int i;
6601 bool uns;
6603 *multi_step_cvt = 0;
6604 switch (code)
6606 CASE_CONVERT:
6607 c1 = VEC_PACK_TRUNC_EXPR;
6608 break;
6610 case FIX_TRUNC_EXPR:
6611 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6612 break;
6614 case FLOAT_EXPR:
6615 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6616 tree code and optabs used for computing the operation. */
6617 return false;
6619 default:
6620 gcc_unreachable ();
6623 if (code == FIX_TRUNC_EXPR)
6624 /* The signedness is determined from output operand. */
6625 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6626 else
6627 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6629 if (!optab1)
6630 return false;
6632 vec_mode = TYPE_MODE (vectype);
6633 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6634 return false;
6636 *code1 = c1;
6638 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6639 return true;
6641 /* Check if it's a multi-step conversion that can be done using intermediate
6642 types. */
6643 prev_mode = vec_mode;
6644 if (code == FIX_TRUNC_EXPR)
6645 uns = TYPE_UNSIGNED (vectype_out);
6646 else
6647 uns = TYPE_UNSIGNED (vectype);
6649 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6650 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6651 costly than signed. */
6652 if (code == FIX_TRUNC_EXPR && uns)
6654 enum insn_code icode2;
6656 intermediate_type
6657 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6658 interm_optab
6659 = optab_for_tree_code (c1, intermediate_type, optab_default);
6660 if (interm_optab != unknown_optab
6661 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6662 && insn_data[icode1].operand[0].mode
6663 == insn_data[icode2].operand[0].mode)
6665 uns = false;
6666 optab1 = interm_optab;
6667 icode1 = icode2;
6671 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6672 intermediate steps in promotion sequence. We try
6673 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6674 interm_types->create (MAX_INTERM_CVT_STEPS);
6675 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6677 intermediate_mode = insn_data[icode1].operand[0].mode;
6678 intermediate_type
6679 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6680 interm_optab
6681 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6682 optab_default);
6683 if (!interm_optab
6684 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6685 || insn_data[icode1].operand[0].mode != intermediate_mode
6686 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6687 == CODE_FOR_nothing))
6688 break;
6690 interm_types->quick_push (intermediate_type);
6691 (*multi_step_cvt)++;
6693 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6694 return true;
6696 prev_mode = intermediate_mode;
6697 optab1 = interm_optab;
6700 interm_types->release ();
6701 return false;