* configure: Regenerated.
[official-gcc.git] / gcc / tree-vect-stmts.c
blobab4a26c2b28e3586ae5d2c5042896fd8b5d730e0
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "recog.h" /* FIXME: for insn_data */
36 #include "optabs.h"
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
39 #include "dumpfile.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
46 tree
47 stmt_vectype (struct _stmt_vec_info *stmt_info)
49 return STMT_VINFO_VECTYPE (stmt_info);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
54 bool
55 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
57 gimple stmt = STMT_VINFO_STMT (stmt_info);
58 basic_block bb = gimple_bb (stmt);
59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60 struct loop* loop;
62 if (!loop_vinfo)
63 return false;
65 loop = LOOP_VINFO_LOOP (loop_vinfo);
67 return (bb->loop_father == loop->inner);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
74 unsigned
75 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
77 int misalign, enum vect_cost_model_location where)
79 if (body_cost_vec)
81 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
82 add_stmt_info_to_vec (body_cost_vec, count, kind,
83 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
84 misalign);
85 return (unsigned)
86 (builtin_vectorization_cost (kind, vectype, misalign) * count);
89 else
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93 void *target_cost_data;
95 if (loop_vinfo)
96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97 else
98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
100 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
101 misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
119 static tree
120 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
124 gimple new_stmt;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
138 return vect_name;
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
145 static void
146 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
149 tree array_ref;
150 gimple new_stmt;
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
164 static tree
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167 tree mem_ref, alias_ptr_type;
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173 return mem_ref;
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 static void
183 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
190 gimple pattern_stmt;
192 if (vect_print_dump_info (REPORT_DETAILS))
193 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
195 /* If this stmt is an original stmt in a pattern, we might need to mark its
196 related pattern stmt instead of the original stmt. However, such stmts
197 may have their own uses that are not in any pattern, in such cases the
198 stmt itself should be marked. */
199 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
201 bool found = false;
202 if (!used_in_pattern)
204 imm_use_iterator imm_iter;
205 use_operand_p use_p;
206 gimple use_stmt;
207 tree lhs;
208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
209 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
211 if (is_gimple_assign (stmt))
212 lhs = gimple_assign_lhs (stmt);
213 else
214 lhs = gimple_call_lhs (stmt);
216 /* This use is out of pattern use, if LHS has other uses that are
217 pattern uses, we should mark the stmt itself, and not the pattern
218 stmt. */
219 if (TREE_CODE (lhs) == SSA_NAME)
220 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
222 if (is_gimple_debug (USE_STMT (use_p)))
223 continue;
224 use_stmt = USE_STMT (use_p);
226 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
227 continue;
229 if (vinfo_for_stmt (use_stmt)
230 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
232 found = true;
233 break;
238 if (!found)
240 /* This is the last stmt in a sequence that was detected as a
241 pattern that can potentially be vectorized. Don't mark the stmt
242 as relevant/live because it's not going to be vectorized.
243 Instead mark the pattern-stmt that replaces it. */
245 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
247 if (vect_print_dump_info (REPORT_DETAILS))
248 fprintf (vect_dump, "last stmt in pattern. don't mark"
249 " relevant/live.");
250 stmt_info = vinfo_for_stmt (pattern_stmt);
251 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
252 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
253 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
254 stmt = pattern_stmt;
258 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
259 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
260 STMT_VINFO_RELEVANT (stmt_info) = relevant;
262 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
263 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
265 if (vect_print_dump_info (REPORT_DETAILS))
266 fprintf (vect_dump, "already marked relevant/live.");
267 return;
270 VEC_safe_push (gimple, heap, *worklist, stmt);
274 /* Function vect_stmt_relevant_p.
276 Return true if STMT in loop that is represented by LOOP_VINFO is
277 "relevant for vectorization".
279 A stmt is considered "relevant for vectorization" if:
280 - it has uses outside the loop.
281 - it has vdefs (it alters memory).
282 - control stmts in the loop (except for the exit condition).
284 CHECKME: what other side effects would the vectorizer allow? */
286 static bool
287 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
288 enum vect_relevant *relevant, bool *live_p)
290 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
291 ssa_op_iter op_iter;
292 imm_use_iterator imm_iter;
293 use_operand_p use_p;
294 def_operand_p def_p;
296 *relevant = vect_unused_in_scope;
297 *live_p = false;
299 /* cond stmt other than loop exit cond. */
300 if (is_ctrl_stmt (stmt)
301 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
302 != loop_exit_ctrl_vec_info_type)
303 *relevant = vect_used_in_scope;
305 /* changing memory. */
306 if (gimple_code (stmt) != GIMPLE_PHI)
307 if (gimple_vdef (stmt))
309 if (vect_print_dump_info (REPORT_DETAILS))
310 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
311 *relevant = vect_used_in_scope;
314 /* uses outside the loop. */
315 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
317 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
319 basic_block bb = gimple_bb (USE_STMT (use_p));
320 if (!flow_bb_inside_loop_p (loop, bb))
322 if (vect_print_dump_info (REPORT_DETAILS))
323 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
325 if (is_gimple_debug (USE_STMT (use_p)))
326 continue;
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
331 gcc_assert (bb == single_exit (loop)->dest);
333 *live_p = true;
338 return (*live_p || *relevant);
342 /* Function exist_non_indexing_operands_for_use_p
344 USE is one of the uses attached to STMT. Check if USE is
345 used in STMT for anything other than indexing an array. */
347 static bool
348 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
350 tree operand;
351 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
353 /* USE corresponds to some operand in STMT. If there is no data
354 reference in STMT, then any operand that corresponds to USE
355 is not indexing an array. */
356 if (!STMT_VINFO_DATA_REF (stmt_info))
357 return true;
359 /* STMT has a data_ref. FORNOW this means that its of one of
360 the following forms:
361 -1- ARRAY_REF = var
362 -2- var = ARRAY_REF
363 (This should have been verified in analyze_data_refs).
365 'var' in the second case corresponds to a def, not a use,
366 so USE cannot correspond to any operands that are not used
367 for array indexing.
369 Therefore, all we need to check is if STMT falls into the
370 first case, and whether var corresponds to USE. */
372 if (!gimple_assign_copy_p (stmt))
373 return false;
374 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
375 return false;
376 operand = gimple_assign_rhs1 (stmt);
377 if (TREE_CODE (operand) != SSA_NAME)
378 return false;
380 if (operand == use)
381 return true;
383 return false;
388 Function process_use.
390 Inputs:
391 - a USE in STMT in a loop represented by LOOP_VINFO
392 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
393 that defined USE. This is done by calling mark_relevant and passing it
394 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
395 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
396 be performed.
398 Outputs:
399 Generally, LIVE_P and RELEVANT are used to define the liveness and
400 relevance info of the DEF_STMT of this USE:
401 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
402 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
403 Exceptions:
404 - case 1: If USE is used only for address computations (e.g. array indexing),
405 which does not need to be directly vectorized, then the liveness/relevance
406 of the respective DEF_STMT is left unchanged.
407 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
408 skip DEF_STMT cause it had already been processed.
409 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
410 be modified accordingly.
412 Return true if everything is as expected. Return false otherwise. */
414 static bool
415 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
416 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
417 bool force)
419 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
420 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
421 stmt_vec_info dstmt_vinfo;
422 basic_block bb, def_bb;
423 tree def;
424 gimple def_stmt;
425 enum vect_def_type dt;
427 /* case 1: we are only interested in uses that need to be vectorized. Uses
428 that are used for address computation are not considered relevant. */
429 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
430 return true;
432 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
434 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
435 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
436 return false;
439 if (!def_stmt || gimple_nop_p (def_stmt))
440 return true;
442 def_bb = gimple_bb (def_stmt);
443 if (!flow_bb_inside_loop_p (loop, def_bb))
445 if (vect_print_dump_info (REPORT_DETAILS))
446 fprintf (vect_dump, "def_stmt is out of loop.");
447 return true;
450 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
451 DEF_STMT must have already been processed, because this should be the
452 only way that STMT, which is a reduction-phi, was put in the worklist,
453 as there should be no other uses for DEF_STMT in the loop. So we just
454 check that everything is as expected, and we are done. */
455 dstmt_vinfo = vinfo_for_stmt (def_stmt);
456 bb = gimple_bb (stmt);
457 if (gimple_code (stmt) == GIMPLE_PHI
458 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
459 && gimple_code (def_stmt) != GIMPLE_PHI
460 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
461 && bb->loop_father == def_bb->loop_father)
463 if (vect_print_dump_info (REPORT_DETAILS))
464 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
465 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
466 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
467 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
468 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
469 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
470 return true;
473 /* case 3a: outer-loop stmt defining an inner-loop stmt:
474 outer-loop-header-bb:
475 d = def_stmt
476 inner-loop:
477 stmt # use (d)
478 outer-loop-tail-bb:
479 ... */
480 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
482 if (vect_print_dump_info (REPORT_DETAILS))
483 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
485 switch (relevant)
487 case vect_unused_in_scope:
488 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
489 vect_used_in_scope : vect_unused_in_scope;
490 break;
492 case vect_used_in_outer_by_reduction:
493 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
494 relevant = vect_used_by_reduction;
495 break;
497 case vect_used_in_outer:
498 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
499 relevant = vect_used_in_scope;
500 break;
502 case vect_used_in_scope:
503 break;
505 default:
506 gcc_unreachable ();
510 /* case 3b: inner-loop stmt defining an outer-loop stmt:
511 outer-loop-header-bb:
513 inner-loop:
514 d = def_stmt
515 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
516 stmt # use (d) */
517 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
522 switch (relevant)
524 case vect_unused_in_scope:
525 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
526 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
527 vect_used_in_outer_by_reduction : vect_unused_in_scope;
528 break;
530 case vect_used_by_reduction:
531 relevant = vect_used_in_outer_by_reduction;
532 break;
534 case vect_used_in_scope:
535 relevant = vect_used_in_outer;
536 break;
538 default:
539 gcc_unreachable ();
543 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
544 is_pattern_stmt_p (stmt_vinfo));
545 return true;
549 /* Function vect_mark_stmts_to_be_vectorized.
551 Not all stmts in the loop need to be vectorized. For example:
553 for i...
554 for j...
555 1. T0 = i + j
556 2. T1 = a[T0]
558 3. j = j + 1
560 Stmt 1 and 3 do not need to be vectorized, because loop control and
561 addressing of vectorized data-refs are handled differently.
563 This pass detects such stmts. */
565 bool
566 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
568 VEC(gimple,heap) *worklist;
569 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
570 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
571 unsigned int nbbs = loop->num_nodes;
572 gimple_stmt_iterator si;
573 gimple stmt;
574 unsigned int i;
575 stmt_vec_info stmt_vinfo;
576 basic_block bb;
577 gimple phi;
578 bool live_p;
579 enum vect_relevant relevant, tmp_relevant;
580 enum vect_def_type def_type;
582 if (vect_print_dump_info (REPORT_DETAILS))
583 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
585 worklist = VEC_alloc (gimple, heap, 64);
587 /* 1. Init worklist. */
588 for (i = 0; i < nbbs; i++)
590 bb = bbs[i];
591 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
593 phi = gsi_stmt (si);
594 if (vect_print_dump_info (REPORT_DETAILS))
596 fprintf (vect_dump, "init: phi relevant? ");
597 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
600 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
601 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
603 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
605 stmt = gsi_stmt (si);
606 if (vect_print_dump_info (REPORT_DETAILS))
608 fprintf (vect_dump, "init: stmt relevant? ");
609 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
612 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
613 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
617 /* 2. Process_worklist */
618 while (VEC_length (gimple, worklist) > 0)
620 use_operand_p use_p;
621 ssa_op_iter iter;
623 stmt = VEC_pop (gimple, worklist);
624 if (vect_print_dump_info (REPORT_DETAILS))
626 fprintf (vect_dump, "worklist: examine stmt: ");
627 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
630 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
631 (DEF_STMT) as relevant/irrelevant and live/dead according to the
632 liveness and relevance properties of STMT. */
633 stmt_vinfo = vinfo_for_stmt (stmt);
634 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
635 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
637 /* Generally, the liveness and relevance properties of STMT are
638 propagated as is to the DEF_STMTs of its USEs:
639 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
640 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
642 One exception is when STMT has been identified as defining a reduction
643 variable; in this case we set the liveness/relevance as follows:
644 live_p = false
645 relevant = vect_used_by_reduction
646 This is because we distinguish between two kinds of relevant stmts -
647 those that are used by a reduction computation, and those that are
648 (also) used by a regular computation. This allows us later on to
649 identify stmts that are used solely by a reduction, and therefore the
650 order of the results that they produce does not have to be kept. */
652 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
653 tmp_relevant = relevant;
654 switch (def_type)
656 case vect_reduction_def:
657 switch (tmp_relevant)
659 case vect_unused_in_scope:
660 relevant = vect_used_by_reduction;
661 break;
663 case vect_used_by_reduction:
664 if (gimple_code (stmt) == GIMPLE_PHI)
665 break;
666 /* fall through */
668 default:
669 if (vect_print_dump_info (REPORT_DETAILS))
670 fprintf (vect_dump, "unsupported use of reduction.");
672 VEC_free (gimple, heap, worklist);
673 return false;
676 live_p = false;
677 break;
679 case vect_nested_cycle:
680 if (tmp_relevant != vect_unused_in_scope
681 && tmp_relevant != vect_used_in_outer_by_reduction
682 && tmp_relevant != vect_used_in_outer)
684 if (vect_print_dump_info (REPORT_DETAILS))
685 fprintf (vect_dump, "unsupported use of nested cycle.");
687 VEC_free (gimple, heap, worklist);
688 return false;
691 live_p = false;
692 break;
694 case vect_double_reduction_def:
695 if (tmp_relevant != vect_unused_in_scope
696 && tmp_relevant != vect_used_by_reduction)
698 if (vect_print_dump_info (REPORT_DETAILS))
699 fprintf (vect_dump, "unsupported use of double reduction.");
701 VEC_free (gimple, heap, worklist);
702 return false;
705 live_p = false;
706 break;
708 default:
709 break;
712 if (is_pattern_stmt_p (stmt_vinfo))
714 /* Pattern statements are not inserted into the code, so
715 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
716 have to scan the RHS or function arguments instead. */
717 if (is_gimple_assign (stmt))
719 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
720 tree op = gimple_assign_rhs1 (stmt);
722 i = 1;
723 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
725 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
726 live_p, relevant, &worklist, false)
727 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
728 live_p, relevant, &worklist, false))
730 VEC_free (gimple, heap, worklist);
731 return false;
733 i = 2;
735 for (; i < gimple_num_ops (stmt); i++)
737 op = gimple_op (stmt, i);
738 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
739 &worklist, false))
741 VEC_free (gimple, heap, worklist);
742 return false;
746 else if (is_gimple_call (stmt))
748 for (i = 0; i < gimple_call_num_args (stmt); i++)
750 tree arg = gimple_call_arg (stmt, i);
751 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
752 &worklist, false))
754 VEC_free (gimple, heap, worklist);
755 return false;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
765 &worklist, false))
767 VEC_free (gimple, heap, worklist);
768 return false;
772 if (STMT_VINFO_GATHER_P (stmt_vinfo))
774 tree off;
775 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
776 gcc_assert (decl);
777 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
778 &worklist, true))
780 VEC_free (gimple, heap, worklist);
781 return false;
784 } /* while worklist */
786 VEC_free (gimple, heap, worklist);
787 return true;
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 void
798 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
799 enum vect_def_type *dt,
800 stmt_vector_for_cost *prologue_cost_vec,
801 stmt_vector_for_cost *body_cost_vec)
803 int i;
804 int inside_cost = 0, prologue_cost = 0;
806 /* The SLP costs were already calculated during SLP tree build. */
807 if (PURE_SLP_STMT (stmt_info))
808 return;
810 /* FORNOW: Assuming maximum 2 args per stmts. */
811 for (i = 0; i < 2; i++)
812 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
813 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
814 stmt_info, 0, vect_prologue);
816 /* Pass the inside-of-loop statements to the target-specific cost model. */
817 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
818 stmt_info, 0, vect_body);
820 if (vect_print_dump_info (REPORT_COST))
821 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
822 "prologue_cost = %d .", inside_cost, prologue_cost);
826 /* Model cost for type demotion and promotion operations. PWR is normally
827 zero for single-step promotions and demotions. It will be one if
828 two-step promotion/demotion is required, and so on. Each additional
829 step doubles the number of instructions required. */
831 static void
832 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
833 enum vect_def_type *dt, int pwr)
835 int i, tmp;
836 int inside_cost = 0, prologue_cost = 0;
837 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
838 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
839 void *target_cost_data;
841 /* The SLP costs were already calculated during SLP tree build. */
842 if (PURE_SLP_STMT (stmt_info))
843 return;
845 if (loop_vinfo)
846 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
847 else
848 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
850 for (i = 0; i < pwr + 1; i++)
852 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
853 (i + 1) : i;
854 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
855 vec_promote_demote, stmt_info, 0,
856 vect_body);
859 /* FORNOW: Assuming maximum 2 args per stmts. */
860 for (i = 0; i < 2; i++)
861 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
862 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
863 stmt_info, 0, vect_prologue);
865 if (vect_print_dump_info (REPORT_COST))
866 fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
867 "prologue_cost = %d .", inside_cost, prologue_cost);
870 /* Function vect_cost_group_size
872 For grouped load or store, return the group_size only if it is the first
873 load or store of a group, else return 1. This ensures that group size is
874 only returned once per group. */
876 static int
877 vect_cost_group_size (stmt_vec_info stmt_info)
879 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
881 if (first_stmt == STMT_VINFO_STMT (stmt_info))
882 return GROUP_SIZE (stmt_info);
884 return 1;
888 /* Function vect_model_store_cost
890 Models cost for stores. In the case of grouped accesses, one access
891 has the overhead of the grouped access attributed to it. */
893 void
894 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
895 bool store_lanes_p, enum vect_def_type dt,
896 slp_tree slp_node,
897 stmt_vector_for_cost *prologue_cost_vec,
898 stmt_vector_for_cost *body_cost_vec)
900 int group_size;
901 unsigned int inside_cost = 0, prologue_cost = 0;
902 struct data_reference *first_dr;
903 gimple first_stmt;
905 /* The SLP costs were already calculated during SLP tree build. */
906 if (PURE_SLP_STMT (stmt_info))
907 return;
909 if (dt == vect_constant_def || dt == vect_external_def)
910 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
911 stmt_info, 0, vect_prologue);
913 /* Grouped access? */
914 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
916 if (slp_node)
918 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
919 group_size = 1;
921 else
923 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
924 group_size = vect_cost_group_size (stmt_info);
927 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
929 /* Not a grouped access. */
930 else
932 group_size = 1;
933 first_dr = STMT_VINFO_DATA_REF (stmt_info);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (!store_lanes_p && group_size > 1)
942 /* Uses a high and low interleave operation for each needed permute. */
944 int nstmts = ncopies * exact_log2 (group_size) * group_size;
945 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
946 stmt_info, 0, vect_body);
948 if (vect_print_dump_info (REPORT_COST))
949 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
950 group_size);
953 /* Costs of the stores. */
954 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
956 if (vect_print_dump_info (REPORT_COST))
957 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
958 "prologue_cost = %d .", inside_cost, prologue_cost);
962 /* Calculate cost of DR's memory access. */
963 void
964 vect_get_store_cost (struct data_reference *dr, int ncopies,
965 unsigned int *inside_cost,
966 stmt_vector_for_cost *body_cost_vec)
968 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
969 gimple stmt = DR_STMT (dr);
970 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
972 switch (alignment_support_scheme)
974 case dr_aligned:
976 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
977 vector_store, stmt_info, 0,
978 vect_body);
980 if (vect_print_dump_info (REPORT_COST))
981 fprintf (vect_dump, "vect_model_store_cost: aligned.");
983 break;
986 case dr_unaligned_supported:
988 /* Here, we assign an additional cost for the unaligned store. */
989 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
990 unaligned_store, stmt_info,
991 DR_MISALIGNMENT (dr), vect_body);
993 if (vect_print_dump_info (REPORT_COST))
994 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
995 "hardware.");
997 break;
1000 case dr_unaligned_unsupported:
1002 *inside_cost = VECT_MAX_COST;
1004 if (vect_print_dump_info (REPORT_COST))
1005 fprintf (vect_dump, "vect_model_store_cost: unsupported access.");
1007 break;
1010 default:
1011 gcc_unreachable ();
1016 /* Function vect_model_load_cost
1018 Models cost for loads. In the case of grouped accesses, the last access
1019 has the overhead of the grouped access attributed to it. Since unaligned
1020 accesses are supported for loads, we also account for the costs of the
1021 access scheme chosen. */
1023 void
1024 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1025 bool load_lanes_p, slp_tree slp_node,
1026 stmt_vector_for_cost *prologue_cost_vec,
1027 stmt_vector_for_cost *body_cost_vec)
1029 int group_size;
1030 gimple first_stmt;
1031 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1032 unsigned int inside_cost = 0, prologue_cost = 0;
1034 /* The SLP costs were already calculated during SLP tree build. */
1035 if (PURE_SLP_STMT (stmt_info))
1036 return;
1038 /* Grouped accesses? */
1039 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1040 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1042 group_size = vect_cost_group_size (stmt_info);
1043 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1045 /* Not a grouped access. */
1046 else
1048 group_size = 1;
1049 first_dr = dr;
1052 /* We assume that the cost of a single load-lanes instruction is
1053 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1054 access is instead being provided by a load-and-permute operation,
1055 include the cost of the permutes. */
1056 if (!load_lanes_p && group_size > 1)
1058 /* Uses an even and odd extract operations for each needed permute. */
1059 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1060 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1061 stmt_info, 0, vect_body);
1063 if (vect_print_dump_info (REPORT_COST))
1064 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
1065 group_size);
1068 /* The loads themselves. */
1069 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1071 /* N scalar loads plus gathering them into a vector. */
1072 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1073 inside_cost += record_stmt_cost (body_cost_vec,
1074 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1075 scalar_load, stmt_info, 0, vect_body);
1076 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1077 stmt_info, 0, vect_body);
1079 else
1080 vect_get_load_cost (first_dr, ncopies,
1081 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1082 || group_size > 1 || slp_node),
1083 &inside_cost, &prologue_cost,
1084 prologue_cost_vec, body_cost_vec, true);
1086 if (vect_print_dump_info (REPORT_COST))
1087 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1088 "prologue_cost = %d .", inside_cost, prologue_cost);
1092 /* Calculate cost of DR's memory access. */
1093 void
1094 vect_get_load_cost (struct data_reference *dr, int ncopies,
1095 bool add_realign_cost, unsigned int *inside_cost,
1096 unsigned int *prologue_cost,
1097 stmt_vector_for_cost *prologue_cost_vec,
1098 stmt_vector_for_cost *body_cost_vec,
1099 bool record_prologue_costs)
1101 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1102 gimple stmt = DR_STMT (dr);
1103 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1105 switch (alignment_support_scheme)
1107 case dr_aligned:
1109 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1110 stmt_info, 0, vect_body);
1112 if (vect_print_dump_info (REPORT_COST))
1113 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1115 break;
1117 case dr_unaligned_supported:
1119 /* Here, we assign an additional cost for the unaligned load. */
1120 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1121 unaligned_load, stmt_info,
1122 DR_MISALIGNMENT (dr), vect_body);
1124 if (vect_print_dump_info (REPORT_COST))
1125 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1126 "hardware.");
1128 break;
1130 case dr_explicit_realign:
1132 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1133 vector_load, stmt_info, 0, vect_body);
1134 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1135 vec_perm, stmt_info, 0, vect_body);
1137 /* FIXME: If the misalignment remains fixed across the iterations of
1138 the containing loop, the following cost should be added to the
1139 prologue costs. */
1140 if (targetm.vectorize.builtin_mask_for_load)
1141 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1142 stmt_info, 0, vect_body);
1144 if (vect_print_dump_info (REPORT_COST))
1145 fprintf (vect_dump, "vect_model_load_cost: explicit realign");
1147 break;
1149 case dr_explicit_realign_optimized:
1151 if (vect_print_dump_info (REPORT_COST))
1152 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1153 "pipelined.");
1155 /* Unaligned software pipeline has a load of an address, an initial
1156 load, and possibly a mask operation to "prime" the loop. However,
1157 if this is an access in a group of loads, which provide grouped
1158 access, then the above cost should only be considered for one
1159 access in the group. Inside the loop, there is a load op
1160 and a realignment op. */
1162 if (add_realign_cost && record_prologue_costs)
1164 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1165 vector_stmt, stmt_info,
1166 0, vect_prologue);
1167 if (targetm.vectorize.builtin_mask_for_load)
1168 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1169 vector_stmt, stmt_info,
1170 0, vect_prologue);
1173 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1174 stmt_info, 0, vect_body);
1175 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1176 stmt_info, 0, vect_body);
1178 if (vect_print_dump_info (REPORT_COST))
1179 fprintf (vect_dump,
1180 "vect_model_load_cost: explicit realign optimized");
1182 break;
1185 case dr_unaligned_unsupported:
1187 *inside_cost = VECT_MAX_COST;
1189 if (vect_print_dump_info (REPORT_COST))
1190 fprintf (vect_dump, "vect_model_load_cost: unsupported access.");
1192 break;
1195 default:
1196 gcc_unreachable ();
1200 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1201 the loop preheader for the vectorized stmt STMT. */
1203 static void
1204 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1206 if (gsi)
1207 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1208 else
1210 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1213 if (loop_vinfo)
1215 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1216 basic_block new_bb;
1217 edge pe;
1219 if (nested_in_vect_loop_p (loop, stmt))
1220 loop = loop->inner;
1222 pe = loop_preheader_edge (loop);
1223 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1224 gcc_assert (!new_bb);
1226 else
1228 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1229 basic_block bb;
1230 gimple_stmt_iterator gsi_bb_start;
1232 gcc_assert (bb_vinfo);
1233 bb = BB_VINFO_BB (bb_vinfo);
1234 gsi_bb_start = gsi_after_labels (bb);
1235 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1239 if (vect_print_dump_info (REPORT_DETAILS))
1241 fprintf (vect_dump, "created new init_stmt: ");
1242 print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
1246 /* Function vect_init_vector.
1248 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1249 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1250 vector type a vector with all elements equal to VAL is created first.
1251 Place the initialization at BSI if it is not NULL. Otherwise, place the
1252 initialization at the loop preheader.
1253 Return the DEF of INIT_STMT.
1254 It will be used in the vectorization of STMT. */
1256 tree
1257 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1259 tree new_var;
1260 gimple init_stmt;
1261 tree vec_oprnd;
1262 tree new_temp;
1264 if (TREE_CODE (type) == VECTOR_TYPE
1265 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1267 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1269 if (CONSTANT_CLASS_P (val))
1270 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1271 else
1273 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1274 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1275 new_temp, val,
1276 NULL_TREE);
1277 vect_init_vector_1 (stmt, init_stmt, gsi);
1278 val = new_temp;
1281 val = build_vector_from_val (type, val);
1284 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1285 init_stmt = gimple_build_assign (new_var, val);
1286 new_temp = make_ssa_name (new_var, init_stmt);
1287 gimple_assign_set_lhs (init_stmt, new_temp);
1288 vect_init_vector_1 (stmt, init_stmt, gsi);
1289 vec_oprnd = gimple_assign_lhs (init_stmt);
1290 return vec_oprnd;
1294 /* Function vect_get_vec_def_for_operand.
1296 OP is an operand in STMT. This function returns a (vector) def that will be
1297 used in the vectorized stmt for STMT.
1299 In the case that OP is an SSA_NAME which is defined in the loop, then
1300 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1302 In case OP is an invariant or constant, a new stmt that creates a vector def
1303 needs to be introduced. */
1305 tree
1306 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1308 tree vec_oprnd;
1309 gimple vec_stmt;
1310 gimple def_stmt;
1311 stmt_vec_info def_stmt_info = NULL;
1312 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1313 unsigned int nunits;
1314 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1315 tree def;
1316 enum vect_def_type dt;
1317 bool is_simple_use;
1318 tree vector_type;
1320 if (vect_print_dump_info (REPORT_DETAILS))
1322 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1323 print_generic_expr (vect_dump, op, TDF_SLIM);
1326 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1327 &def_stmt, &def, &dt);
1328 gcc_assert (is_simple_use);
1329 if (vect_print_dump_info (REPORT_DETAILS))
1331 if (def)
1333 fprintf (vect_dump, "def = ");
1334 print_generic_expr (vect_dump, def, TDF_SLIM);
1336 if (def_stmt)
1338 fprintf (vect_dump, " def_stmt = ");
1339 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1343 switch (dt)
1345 /* Case 1: operand is a constant. */
1346 case vect_constant_def:
1348 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1349 gcc_assert (vector_type);
1350 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1352 if (scalar_def)
1353 *scalar_def = op;
1355 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1356 if (vect_print_dump_info (REPORT_DETAILS))
1357 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1359 return vect_init_vector (stmt, op, vector_type, NULL);
1362 /* Case 2: operand is defined outside the loop - loop invariant. */
1363 case vect_external_def:
1365 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1366 gcc_assert (vector_type);
1368 if (scalar_def)
1369 *scalar_def = def;
1371 /* Create 'vec_inv = {inv,inv,..,inv}' */
1372 if (vect_print_dump_info (REPORT_DETAILS))
1373 fprintf (vect_dump, "Create vector_inv.");
1375 return vect_init_vector (stmt, def, vector_type, NULL);
1378 /* Case 3: operand is defined inside the loop. */
1379 case vect_internal_def:
1381 if (scalar_def)
1382 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1384 /* Get the def from the vectorized stmt. */
1385 def_stmt_info = vinfo_for_stmt (def_stmt);
1387 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1388 /* Get vectorized pattern statement. */
1389 if (!vec_stmt
1390 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1391 && !STMT_VINFO_RELEVANT (def_stmt_info))
1392 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1393 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1394 gcc_assert (vec_stmt);
1395 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1396 vec_oprnd = PHI_RESULT (vec_stmt);
1397 else if (is_gimple_call (vec_stmt))
1398 vec_oprnd = gimple_call_lhs (vec_stmt);
1399 else
1400 vec_oprnd = gimple_assign_lhs (vec_stmt);
1401 return vec_oprnd;
1404 /* Case 4: operand is defined by a loop header phi - reduction */
1405 case vect_reduction_def:
1406 case vect_double_reduction_def:
1407 case vect_nested_cycle:
1409 struct loop *loop;
1411 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1412 loop = (gimple_bb (def_stmt))->loop_father;
1414 /* Get the def before the loop */
1415 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1416 return get_initial_def_for_reduction (stmt, op, scalar_def);
1419 /* Case 5: operand is defined by loop-header phi - induction. */
1420 case vect_induction_def:
1422 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1424 /* Get the def from the vectorized stmt. */
1425 def_stmt_info = vinfo_for_stmt (def_stmt);
1426 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1427 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1428 vec_oprnd = PHI_RESULT (vec_stmt);
1429 else
1430 vec_oprnd = gimple_get_lhs (vec_stmt);
1431 return vec_oprnd;
1434 default:
1435 gcc_unreachable ();
1440 /* Function vect_get_vec_def_for_stmt_copy
1442 Return a vector-def for an operand. This function is used when the
1443 vectorized stmt to be created (by the caller to this function) is a "copy"
1444 created in case the vectorized result cannot fit in one vector, and several
1445 copies of the vector-stmt are required. In this case the vector-def is
1446 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1447 of the stmt that defines VEC_OPRND.
1448 DT is the type of the vector def VEC_OPRND.
1450 Context:
1451 In case the vectorization factor (VF) is bigger than the number
1452 of elements that can fit in a vectype (nunits), we have to generate
1453 more than one vector stmt to vectorize the scalar stmt. This situation
1454 arises when there are multiple data-types operated upon in the loop; the
1455 smallest data-type determines the VF, and as a result, when vectorizing
1456 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1457 vector stmt (each computing a vector of 'nunits' results, and together
1458 computing 'VF' results in each iteration). This function is called when
1459 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1460 which VF=16 and nunits=4, so the number of copies required is 4):
1462 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1464 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1465 VS1.1: vx.1 = memref1 VS1.2
1466 VS1.2: vx.2 = memref2 VS1.3
1467 VS1.3: vx.3 = memref3
1469 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1470 VSnew.1: vz1 = vx.1 + ... VSnew.2
1471 VSnew.2: vz2 = vx.2 + ... VSnew.3
1472 VSnew.3: vz3 = vx.3 + ...
1474 The vectorization of S1 is explained in vectorizable_load.
1475 The vectorization of S2:
1476 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1477 the function 'vect_get_vec_def_for_operand' is called to
1478 get the relevant vector-def for each operand of S2. For operand x it
1479 returns the vector-def 'vx.0'.
1481 To create the remaining copies of the vector-stmt (VSnew.j), this
1482 function is called to get the relevant vector-def for each operand. It is
1483 obtained from the respective VS1.j stmt, which is recorded in the
1484 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1486 For example, to obtain the vector-def 'vx.1' in order to create the
1487 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1488 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1489 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1490 and return its def ('vx.1').
1491 Overall, to create the above sequence this function will be called 3 times:
1492 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1493 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1494 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1496 tree
1497 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1499 gimple vec_stmt_for_operand;
1500 stmt_vec_info def_stmt_info;
1502 /* Do nothing; can reuse same def. */
1503 if (dt == vect_external_def || dt == vect_constant_def )
1504 return vec_oprnd;
1506 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1507 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1508 gcc_assert (def_stmt_info);
1509 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1510 gcc_assert (vec_stmt_for_operand);
1511 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1512 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1513 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1514 else
1515 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1516 return vec_oprnd;
1520 /* Get vectorized definitions for the operands to create a copy of an original
1521 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1523 static void
1524 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1525 VEC(tree,heap) **vec_oprnds0,
1526 VEC(tree,heap) **vec_oprnds1)
1528 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1530 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1531 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1533 if (vec_oprnds1 && *vec_oprnds1)
1535 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1536 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1537 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1542 /* Get vectorized definitions for OP0 and OP1.
1543 REDUC_INDEX is the index of reduction operand in case of reduction,
1544 and -1 otherwise. */
1546 void
1547 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1548 VEC (tree, heap) **vec_oprnds0,
1549 VEC (tree, heap) **vec_oprnds1,
1550 slp_tree slp_node, int reduc_index)
1552 if (slp_node)
1554 int nops = (op1 == NULL_TREE) ? 1 : 2;
1555 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1556 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1558 VEC_quick_push (tree, ops, op0);
1559 if (op1)
1560 VEC_quick_push (tree, ops, op1);
1562 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1564 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1565 if (op1)
1566 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1568 VEC_free (tree, heap, ops);
1569 VEC_free (slp_void_p, heap, vec_defs);
1571 else
1573 tree vec_oprnd;
1575 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1576 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1577 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1579 if (op1)
1581 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1582 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1583 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1589 /* Function vect_finish_stmt_generation.
1591 Insert a new stmt. */
1593 void
1594 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1595 gimple_stmt_iterator *gsi)
1597 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1598 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1599 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1601 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1603 if (!gsi_end_p (*gsi)
1604 && gimple_has_mem_ops (vec_stmt))
1606 gimple at_stmt = gsi_stmt (*gsi);
1607 tree vuse = gimple_vuse (at_stmt);
1608 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1610 tree vdef = gimple_vdef (at_stmt);
1611 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1612 /* If we have an SSA vuse and insert a store, update virtual
1613 SSA form to avoid triggering the renamer. Do so only
1614 if we can easily see all uses - which is what almost always
1615 happens with the way vectorized stmts are inserted. */
1616 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1617 && ((is_gimple_assign (vec_stmt)
1618 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1619 || (is_gimple_call (vec_stmt)
1620 && !(gimple_call_flags (vec_stmt)
1621 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1623 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1624 gimple_set_vdef (vec_stmt, new_vdef);
1625 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1629 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1631 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1632 bb_vinfo));
1634 if (vect_print_dump_info (REPORT_DETAILS))
1636 fprintf (vect_dump, "add new stmt: ");
1637 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1640 gimple_set_location (vec_stmt, gimple_location (stmt));
1643 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1644 a function declaration if the target has a vectorized version
1645 of the function, or NULL_TREE if the function cannot be vectorized. */
1647 tree
1648 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1650 tree fndecl = gimple_call_fndecl (call);
1652 /* We only handle functions that do not read or clobber memory -- i.e.
1653 const or novops ones. */
1654 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1655 return NULL_TREE;
1657 if (!fndecl
1658 || TREE_CODE (fndecl) != FUNCTION_DECL
1659 || !DECL_BUILT_IN (fndecl))
1660 return NULL_TREE;
1662 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1663 vectype_in);
1666 /* Function vectorizable_call.
1668 Check if STMT performs a function call that can be vectorized.
1669 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1670 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1671 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1673 static bool
1674 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1675 slp_tree slp_node)
1677 tree vec_dest;
1678 tree scalar_dest;
1679 tree op, type;
1680 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1681 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1682 tree vectype_out, vectype_in;
1683 int nunits_in;
1684 int nunits_out;
1685 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1686 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1687 tree fndecl, new_temp, def, rhs_type;
1688 gimple def_stmt;
1689 enum vect_def_type dt[3]
1690 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1691 gimple new_stmt = NULL;
1692 int ncopies, j;
1693 VEC(tree, heap) *vargs = NULL;
1694 enum { NARROW, NONE, WIDEN } modifier;
1695 size_t i, nargs;
1696 tree lhs;
1698 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1699 return false;
1701 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1702 return false;
1704 /* Is STMT a vectorizable call? */
1705 if (!is_gimple_call (stmt))
1706 return false;
1708 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1709 return false;
1711 if (stmt_can_throw_internal (stmt))
1712 return false;
1714 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1716 /* Process function arguments. */
1717 rhs_type = NULL_TREE;
1718 vectype_in = NULL_TREE;
1719 nargs = gimple_call_num_args (stmt);
1721 /* Bail out if the function has more than three arguments, we do not have
1722 interesting builtin functions to vectorize with more than two arguments
1723 except for fma. No arguments is also not good. */
1724 if (nargs == 0 || nargs > 3)
1725 return false;
1727 for (i = 0; i < nargs; i++)
1729 tree opvectype;
1731 op = gimple_call_arg (stmt, i);
1733 /* We can only handle calls with arguments of the same type. */
1734 if (rhs_type
1735 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1737 if (vect_print_dump_info (REPORT_DETAILS))
1738 fprintf (vect_dump, "argument types differ.");
1739 return false;
1741 if (!rhs_type)
1742 rhs_type = TREE_TYPE (op);
1744 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1745 &def_stmt, &def, &dt[i], &opvectype))
1747 if (vect_print_dump_info (REPORT_DETAILS))
1748 fprintf (vect_dump, "use not simple.");
1749 return false;
1752 if (!vectype_in)
1753 vectype_in = opvectype;
1754 else if (opvectype
1755 && opvectype != vectype_in)
1757 if (vect_print_dump_info (REPORT_DETAILS))
1758 fprintf (vect_dump, "argument vector types differ.");
1759 return false;
1762 /* If all arguments are external or constant defs use a vector type with
1763 the same size as the output vector type. */
1764 if (!vectype_in)
1765 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1766 if (vec_stmt)
1767 gcc_assert (vectype_in);
1768 if (!vectype_in)
1770 if (vect_print_dump_info (REPORT_DETAILS))
1772 fprintf (vect_dump, "no vectype for scalar type ");
1773 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1776 return false;
1779 /* FORNOW */
1780 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1781 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1782 if (nunits_in == nunits_out / 2)
1783 modifier = NARROW;
1784 else if (nunits_out == nunits_in)
1785 modifier = NONE;
1786 else if (nunits_out == nunits_in / 2)
1787 modifier = WIDEN;
1788 else
1789 return false;
1791 /* For now, we only vectorize functions if a target specific builtin
1792 is available. TODO -- in some cases, it might be profitable to
1793 insert the calls for pieces of the vector, in order to be able
1794 to vectorize other operations in the loop. */
1795 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1796 if (fndecl == NULL_TREE)
1798 if (vect_print_dump_info (REPORT_DETAILS))
1799 fprintf (vect_dump, "function is not vectorizable.");
1801 return false;
1804 gcc_assert (!gimple_vuse (stmt));
1806 if (slp_node || PURE_SLP_STMT (stmt_info))
1807 ncopies = 1;
1808 else if (modifier == NARROW)
1809 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1810 else
1811 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1813 /* Sanity check: make sure that at least one copy of the vectorized stmt
1814 needs to be generated. */
1815 gcc_assert (ncopies >= 1);
1817 if (!vec_stmt) /* transformation not required. */
1819 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1820 if (vect_print_dump_info (REPORT_DETAILS))
1821 fprintf (vect_dump, "=== vectorizable_call ===");
1822 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1823 return true;
1826 /** Transform. **/
1828 if (vect_print_dump_info (REPORT_DETAILS))
1829 fprintf (vect_dump, "transform call.");
1831 /* Handle def. */
1832 scalar_dest = gimple_call_lhs (stmt);
1833 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1835 prev_stmt_info = NULL;
1836 switch (modifier)
1838 case NONE:
1839 for (j = 0; j < ncopies; ++j)
1841 /* Build argument list for the vectorized call. */
1842 if (j == 0)
1843 vargs = VEC_alloc (tree, heap, nargs);
1844 else
1845 VEC_truncate (tree, vargs, 0);
1847 if (slp_node)
1849 VEC (slp_void_p, heap) *vec_defs
1850 = VEC_alloc (slp_void_p, heap, nargs);
1851 VEC (tree, heap) *vec_oprnds0;
1853 for (i = 0; i < nargs; i++)
1854 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1855 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1856 vec_oprnds0
1857 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1859 /* Arguments are ready. Create the new vector stmt. */
1860 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1862 size_t k;
1863 for (k = 0; k < nargs; k++)
1865 VEC (tree, heap) *vec_oprndsk
1866 = (VEC (tree, heap) *)
1867 VEC_index (slp_void_p, vec_defs, k);
1868 VEC_replace (tree, vargs, k,
1869 VEC_index (tree, vec_oprndsk, i));
1871 new_stmt = gimple_build_call_vec (fndecl, vargs);
1872 new_temp = make_ssa_name (vec_dest, new_stmt);
1873 gimple_call_set_lhs (new_stmt, new_temp);
1874 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1875 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1876 new_stmt);
1879 for (i = 0; i < nargs; i++)
1881 VEC (tree, heap) *vec_oprndsi
1882 = (VEC (tree, heap) *)
1883 VEC_index (slp_void_p, vec_defs, i);
1884 VEC_free (tree, heap, vec_oprndsi);
1886 VEC_free (slp_void_p, heap, vec_defs);
1887 continue;
1890 for (i = 0; i < nargs; i++)
1892 op = gimple_call_arg (stmt, i);
1893 if (j == 0)
1894 vec_oprnd0
1895 = vect_get_vec_def_for_operand (op, stmt, NULL);
1896 else
1898 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1899 vec_oprnd0
1900 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1903 VEC_quick_push (tree, vargs, vec_oprnd0);
1906 new_stmt = gimple_build_call_vec (fndecl, vargs);
1907 new_temp = make_ssa_name (vec_dest, new_stmt);
1908 gimple_call_set_lhs (new_stmt, new_temp);
1909 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1911 if (j == 0)
1912 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1913 else
1914 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1916 prev_stmt_info = vinfo_for_stmt (new_stmt);
1919 break;
1921 case NARROW:
1922 for (j = 0; j < ncopies; ++j)
1924 /* Build argument list for the vectorized call. */
1925 if (j == 0)
1926 vargs = VEC_alloc (tree, heap, nargs * 2);
1927 else
1928 VEC_truncate (tree, vargs, 0);
1930 if (slp_node)
1932 VEC (slp_void_p, heap) *vec_defs
1933 = VEC_alloc (slp_void_p, heap, nargs);
1934 VEC (tree, heap) *vec_oprnds0;
1936 for (i = 0; i < nargs; i++)
1937 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1938 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1939 vec_oprnds0
1940 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1942 /* Arguments are ready. Create the new vector stmt. */
1943 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1944 i += 2)
1946 size_t k;
1947 VEC_truncate (tree, vargs, 0);
1948 for (k = 0; k < nargs; k++)
1950 VEC (tree, heap) *vec_oprndsk
1951 = (VEC (tree, heap) *)
1952 VEC_index (slp_void_p, vec_defs, k);
1953 VEC_quick_push (tree, vargs,
1954 VEC_index (tree, vec_oprndsk, i));
1955 VEC_quick_push (tree, vargs,
1956 VEC_index (tree, vec_oprndsk, i + 1));
1958 new_stmt = gimple_build_call_vec (fndecl, vargs);
1959 new_temp = make_ssa_name (vec_dest, new_stmt);
1960 gimple_call_set_lhs (new_stmt, new_temp);
1961 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1962 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1963 new_stmt);
1966 for (i = 0; i < nargs; i++)
1968 VEC (tree, heap) *vec_oprndsi
1969 = (VEC (tree, heap) *)
1970 VEC_index (slp_void_p, vec_defs, i);
1971 VEC_free (tree, heap, vec_oprndsi);
1973 VEC_free (slp_void_p, heap, vec_defs);
1974 continue;
1977 for (i = 0; i < nargs; i++)
1979 op = gimple_call_arg (stmt, i);
1980 if (j == 0)
1982 vec_oprnd0
1983 = vect_get_vec_def_for_operand (op, stmt, NULL);
1984 vec_oprnd1
1985 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1987 else
1989 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1990 vec_oprnd0
1991 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1992 vec_oprnd1
1993 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1996 VEC_quick_push (tree, vargs, vec_oprnd0);
1997 VEC_quick_push (tree, vargs, vec_oprnd1);
2000 new_stmt = gimple_build_call_vec (fndecl, vargs);
2001 new_temp = make_ssa_name (vec_dest, new_stmt);
2002 gimple_call_set_lhs (new_stmt, new_temp);
2003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2005 if (j == 0)
2006 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2007 else
2008 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2010 prev_stmt_info = vinfo_for_stmt (new_stmt);
2013 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2015 break;
2017 case WIDEN:
2018 /* No current target implements this case. */
2019 return false;
2022 VEC_free (tree, heap, vargs);
2024 /* Update the exception handling table with the vector stmt if necessary. */
2025 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2026 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2028 /* The call in STMT might prevent it from being removed in dce.
2029 We however cannot remove it here, due to the way the ssa name
2030 it defines is mapped to the new definition. So just replace
2031 rhs of the statement with something harmless. */
2033 if (slp_node)
2034 return true;
2036 type = TREE_TYPE (scalar_dest);
2037 if (is_pattern_stmt_p (stmt_info))
2038 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2039 else
2040 lhs = gimple_call_lhs (stmt);
2041 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2042 set_vinfo_for_stmt (new_stmt, stmt_info);
2043 set_vinfo_for_stmt (stmt, NULL);
2044 STMT_VINFO_STMT (stmt_info) = new_stmt;
2045 gsi_replace (gsi, new_stmt, false);
2046 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2048 return true;
2052 /* Function vect_gen_widened_results_half
2054 Create a vector stmt whose code, type, number of arguments, and result
2055 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2056 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2057 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2058 needs to be created (DECL is a function-decl of a target-builtin).
2059 STMT is the original scalar stmt that we are vectorizing. */
2061 static gimple
2062 vect_gen_widened_results_half (enum tree_code code,
2063 tree decl,
2064 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2065 tree vec_dest, gimple_stmt_iterator *gsi,
2066 gimple stmt)
2068 gimple new_stmt;
2069 tree new_temp;
2071 /* Generate half of the widened result: */
2072 if (code == CALL_EXPR)
2074 /* Target specific support */
2075 if (op_type == binary_op)
2076 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2077 else
2078 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2079 new_temp = make_ssa_name (vec_dest, new_stmt);
2080 gimple_call_set_lhs (new_stmt, new_temp);
2082 else
2084 /* Generic support */
2085 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2086 if (op_type != binary_op)
2087 vec_oprnd1 = NULL;
2088 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2089 vec_oprnd1);
2090 new_temp = make_ssa_name (vec_dest, new_stmt);
2091 gimple_assign_set_lhs (new_stmt, new_temp);
2093 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2095 return new_stmt;
2099 /* Get vectorized definitions for loop-based vectorization. For the first
2100 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2101 scalar operand), and for the rest we get a copy with
2102 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2103 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2104 The vectors are collected into VEC_OPRNDS. */
2106 static void
2107 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2108 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2110 tree vec_oprnd;
2112 /* Get first vector operand. */
2113 /* All the vector operands except the very first one (that is scalar oprnd)
2114 are stmt copies. */
2115 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2116 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2117 else
2118 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2120 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2122 /* Get second vector operand. */
2123 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2124 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2126 *oprnd = vec_oprnd;
2128 /* For conversion in multiple steps, continue to get operands
2129 recursively. */
2130 if (multi_step_cvt)
2131 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2135 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2136 For multi-step conversions store the resulting vectors and call the function
2137 recursively. */
2139 static void
2140 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2141 int multi_step_cvt, gimple stmt,
2142 VEC (tree, heap) *vec_dsts,
2143 gimple_stmt_iterator *gsi,
2144 slp_tree slp_node, enum tree_code code,
2145 stmt_vec_info *prev_stmt_info)
2147 unsigned int i;
2148 tree vop0, vop1, new_tmp, vec_dest;
2149 gimple new_stmt;
2150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2152 vec_dest = VEC_pop (tree, vec_dsts);
2154 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2156 /* Create demotion operation. */
2157 vop0 = VEC_index (tree, *vec_oprnds, i);
2158 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2159 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2160 new_tmp = make_ssa_name (vec_dest, new_stmt);
2161 gimple_assign_set_lhs (new_stmt, new_tmp);
2162 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2164 if (multi_step_cvt)
2165 /* Store the resulting vector for next recursive call. */
2166 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2167 else
2169 /* This is the last step of the conversion sequence. Store the
2170 vectors in SLP_NODE or in vector info of the scalar statement
2171 (or in STMT_VINFO_RELATED_STMT chain). */
2172 if (slp_node)
2173 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2174 else
2176 if (!*prev_stmt_info)
2177 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2178 else
2179 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2181 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2186 /* For multi-step demotion operations we first generate demotion operations
2187 from the source type to the intermediate types, and then combine the
2188 results (stored in VEC_OPRNDS) in demotion operation to the destination
2189 type. */
2190 if (multi_step_cvt)
2192 /* At each level of recursion we have half of the operands we had at the
2193 previous level. */
2194 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2195 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2196 stmt, vec_dsts, gsi, slp_node,
2197 VEC_PACK_TRUNC_EXPR,
2198 prev_stmt_info);
2201 VEC_quick_push (tree, vec_dsts, vec_dest);
2205 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2206 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2207 the resulting vectors and call the function recursively. */
2209 static void
2210 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2211 VEC (tree, heap) **vec_oprnds1,
2212 gimple stmt, tree vec_dest,
2213 gimple_stmt_iterator *gsi,
2214 enum tree_code code1,
2215 enum tree_code code2, tree decl1,
2216 tree decl2, int op_type)
2218 int i;
2219 tree vop0, vop1, new_tmp1, new_tmp2;
2220 gimple new_stmt1, new_stmt2;
2221 VEC (tree, heap) *vec_tmp = NULL;
2223 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2224 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2226 if (op_type == binary_op)
2227 vop1 = VEC_index (tree, *vec_oprnds1, i);
2228 else
2229 vop1 = NULL_TREE;
2231 /* Generate the two halves of promotion operation. */
2232 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2233 op_type, vec_dest, gsi, stmt);
2234 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2235 op_type, vec_dest, gsi, stmt);
2236 if (is_gimple_call (new_stmt1))
2238 new_tmp1 = gimple_call_lhs (new_stmt1);
2239 new_tmp2 = gimple_call_lhs (new_stmt2);
2241 else
2243 new_tmp1 = gimple_assign_lhs (new_stmt1);
2244 new_tmp2 = gimple_assign_lhs (new_stmt2);
2247 /* Store the results for the next step. */
2248 VEC_quick_push (tree, vec_tmp, new_tmp1);
2249 VEC_quick_push (tree, vec_tmp, new_tmp2);
2252 VEC_free (tree, heap, *vec_oprnds0);
2253 *vec_oprnds0 = vec_tmp;
2257 /* Check if STMT performs a conversion operation, that can be vectorized.
2258 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2259 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2260 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2262 static bool
2263 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2264 gimple *vec_stmt, slp_tree slp_node)
2266 tree vec_dest;
2267 tree scalar_dest;
2268 tree op0, op1 = NULL_TREE;
2269 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2270 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2271 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2272 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2273 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2274 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2275 tree new_temp;
2276 tree def;
2277 gimple def_stmt;
2278 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2279 gimple new_stmt = NULL;
2280 stmt_vec_info prev_stmt_info;
2281 int nunits_in;
2282 int nunits_out;
2283 tree vectype_out, vectype_in;
2284 int ncopies, i, j;
2285 tree lhs_type, rhs_type;
2286 enum { NARROW, NONE, WIDEN } modifier;
2287 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2288 tree vop0;
2289 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2290 int multi_step_cvt = 0;
2291 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2292 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2293 int op_type;
2294 enum machine_mode rhs_mode;
2295 unsigned short fltsz;
2297 /* Is STMT a vectorizable conversion? */
2299 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2300 return false;
2302 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2303 return false;
2305 if (!is_gimple_assign (stmt))
2306 return false;
2308 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2309 return false;
2311 code = gimple_assign_rhs_code (stmt);
2312 if (!CONVERT_EXPR_CODE_P (code)
2313 && code != FIX_TRUNC_EXPR
2314 && code != FLOAT_EXPR
2315 && code != WIDEN_MULT_EXPR
2316 && code != WIDEN_LSHIFT_EXPR)
2317 return false;
2319 op_type = TREE_CODE_LENGTH (code);
2321 /* Check types of lhs and rhs. */
2322 scalar_dest = gimple_assign_lhs (stmt);
2323 lhs_type = TREE_TYPE (scalar_dest);
2324 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2326 op0 = gimple_assign_rhs1 (stmt);
2327 rhs_type = TREE_TYPE (op0);
2329 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2330 && !((INTEGRAL_TYPE_P (lhs_type)
2331 && INTEGRAL_TYPE_P (rhs_type))
2332 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2333 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2334 return false;
2336 if ((INTEGRAL_TYPE_P (lhs_type)
2337 && (TYPE_PRECISION (lhs_type)
2338 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2339 || (INTEGRAL_TYPE_P (rhs_type)
2340 && (TYPE_PRECISION (rhs_type)
2341 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2343 if (vect_print_dump_info (REPORT_DETAILS))
2344 fprintf (vect_dump,
2345 "type conversion to/from bit-precision unsupported.");
2346 return false;
2349 /* Check the operands of the operation. */
2350 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2351 &def_stmt, &def, &dt[0], &vectype_in))
2353 if (vect_print_dump_info (REPORT_DETAILS))
2354 fprintf (vect_dump, "use not simple.");
2355 return false;
2357 if (op_type == binary_op)
2359 bool ok;
2361 op1 = gimple_assign_rhs2 (stmt);
2362 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2363 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2364 OP1. */
2365 if (CONSTANT_CLASS_P (op0))
2366 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2367 &def_stmt, &def, &dt[1], &vectype_in);
2368 else
2369 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2370 &def, &dt[1]);
2372 if (!ok)
2374 if (vect_print_dump_info (REPORT_DETAILS))
2375 fprintf (vect_dump, "use not simple.");
2376 return false;
2380 /* If op0 is an external or constant defs use a vector type of
2381 the same size as the output vector type. */
2382 if (!vectype_in)
2383 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2384 if (vec_stmt)
2385 gcc_assert (vectype_in);
2386 if (!vectype_in)
2388 if (vect_print_dump_info (REPORT_DETAILS))
2390 fprintf (vect_dump, "no vectype for scalar type ");
2391 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2394 return false;
2397 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2398 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2399 if (nunits_in < nunits_out)
2400 modifier = NARROW;
2401 else if (nunits_out == nunits_in)
2402 modifier = NONE;
2403 else
2404 modifier = WIDEN;
2406 /* Multiple types in SLP are handled by creating the appropriate number of
2407 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2408 case of SLP. */
2409 if (slp_node || PURE_SLP_STMT (stmt_info))
2410 ncopies = 1;
2411 else if (modifier == NARROW)
2412 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2413 else
2414 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2416 /* Sanity check: make sure that at least one copy of the vectorized stmt
2417 needs to be generated. */
2418 gcc_assert (ncopies >= 1);
2420 /* Supportable by target? */
2421 switch (modifier)
2423 case NONE:
2424 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2425 return false;
2426 if (supportable_convert_operation (code, vectype_out, vectype_in,
2427 &decl1, &code1))
2428 break;
2429 /* FALLTHRU */
2430 unsupported:
2431 if (vect_print_dump_info (REPORT_DETAILS))
2432 fprintf (vect_dump, "conversion not supported by target.");
2433 return false;
2435 case WIDEN:
2436 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2437 &code1, &code2, &multi_step_cvt,
2438 &interm_types))
2440 /* Binary widening operation can only be supported directly by the
2441 architecture. */
2442 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2443 break;
2446 if (code != FLOAT_EXPR
2447 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2448 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2449 goto unsupported;
2451 rhs_mode = TYPE_MODE (rhs_type);
2452 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2453 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2454 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2455 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2457 cvt_type
2458 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2459 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2460 if (cvt_type == NULL_TREE)
2461 goto unsupported;
2463 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2465 if (!supportable_convert_operation (code, vectype_out,
2466 cvt_type, &decl1, &codecvt1))
2467 goto unsupported;
2469 else if (!supportable_widening_operation (code, stmt, vectype_out,
2470 cvt_type, &codecvt1,
2471 &codecvt2, &multi_step_cvt,
2472 &interm_types))
2473 continue;
2474 else
2475 gcc_assert (multi_step_cvt == 0);
2477 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2478 vectype_in, &code1, &code2,
2479 &multi_step_cvt, &interm_types))
2480 break;
2483 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2484 goto unsupported;
2486 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2487 codecvt2 = ERROR_MARK;
2488 else
2490 multi_step_cvt++;
2491 VEC_safe_push (tree, heap, interm_types, cvt_type);
2492 cvt_type = NULL_TREE;
2494 break;
2496 case NARROW:
2497 gcc_assert (op_type == unary_op);
2498 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2499 &code1, &multi_step_cvt,
2500 &interm_types))
2501 break;
2503 if (code != FIX_TRUNC_EXPR
2504 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2505 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2506 goto unsupported;
2508 rhs_mode = TYPE_MODE (rhs_type);
2509 cvt_type
2510 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2511 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2512 if (cvt_type == NULL_TREE)
2513 goto unsupported;
2514 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2515 &decl1, &codecvt1))
2516 goto unsupported;
2517 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2518 &code1, &multi_step_cvt,
2519 &interm_types))
2520 break;
2521 goto unsupported;
2523 default:
2524 gcc_unreachable ();
2527 if (!vec_stmt) /* transformation not required. */
2529 if (vect_print_dump_info (REPORT_DETAILS))
2530 fprintf (vect_dump, "=== vectorizable_conversion ===");
2531 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2533 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2534 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2536 else if (modifier == NARROW)
2538 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2539 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2541 else
2543 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2544 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2546 VEC_free (tree, heap, interm_types);
2547 return true;
2550 /** Transform. **/
2551 if (vect_print_dump_info (REPORT_DETAILS))
2552 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2554 if (op_type == binary_op)
2556 if (CONSTANT_CLASS_P (op0))
2557 op0 = fold_convert (TREE_TYPE (op1), op0);
2558 else if (CONSTANT_CLASS_P (op1))
2559 op1 = fold_convert (TREE_TYPE (op0), op1);
2562 /* In case of multi-step conversion, we first generate conversion operations
2563 to the intermediate types, and then from that types to the final one.
2564 We create vector destinations for the intermediate type (TYPES) received
2565 from supportable_*_operation, and store them in the correct order
2566 for future use in vect_create_vectorized_*_stmts (). */
2567 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2568 vec_dest = vect_create_destination_var (scalar_dest,
2569 (cvt_type && modifier == WIDEN)
2570 ? cvt_type : vectype_out);
2571 VEC_quick_push (tree, vec_dsts, vec_dest);
2573 if (multi_step_cvt)
2575 for (i = VEC_length (tree, interm_types) - 1;
2576 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2578 vec_dest = vect_create_destination_var (scalar_dest,
2579 intermediate_type);
2580 VEC_quick_push (tree, vec_dsts, vec_dest);
2584 if (cvt_type)
2585 vec_dest = vect_create_destination_var (scalar_dest,
2586 modifier == WIDEN
2587 ? vectype_out : cvt_type);
2589 if (!slp_node)
2591 if (modifier == NONE)
2592 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2593 else if (modifier == WIDEN)
2595 vec_oprnds0 = VEC_alloc (tree, heap,
2596 (multi_step_cvt
2597 ? vect_pow2 (multi_step_cvt) : 1));
2598 if (op_type == binary_op)
2599 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2601 else
2602 vec_oprnds0 = VEC_alloc (tree, heap,
2603 2 * (multi_step_cvt
2604 ? vect_pow2 (multi_step_cvt) : 1));
2606 else if (code == WIDEN_LSHIFT_EXPR)
2607 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2609 last_oprnd = op0;
2610 prev_stmt_info = NULL;
2611 switch (modifier)
2613 case NONE:
2614 for (j = 0; j < ncopies; j++)
2616 if (j == 0)
2617 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2618 -1);
2619 else
2620 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2622 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2624 /* Arguments are ready, create the new vector stmt. */
2625 if (code1 == CALL_EXPR)
2627 new_stmt = gimple_build_call (decl1, 1, vop0);
2628 new_temp = make_ssa_name (vec_dest, new_stmt);
2629 gimple_call_set_lhs (new_stmt, new_temp);
2631 else
2633 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2634 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2635 vop0, NULL);
2636 new_temp = make_ssa_name (vec_dest, new_stmt);
2637 gimple_assign_set_lhs (new_stmt, new_temp);
2640 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2641 if (slp_node)
2642 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2643 new_stmt);
2646 if (j == 0)
2647 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2648 else
2649 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2650 prev_stmt_info = vinfo_for_stmt (new_stmt);
2652 break;
2654 case WIDEN:
2655 /* In case the vectorization factor (VF) is bigger than the number
2656 of elements that we can fit in a vectype (nunits), we have to
2657 generate more than one vector stmt - i.e - we need to "unroll"
2658 the vector stmt by a factor VF/nunits. */
2659 for (j = 0; j < ncopies; j++)
2661 /* Handle uses. */
2662 if (j == 0)
2664 if (slp_node)
2666 if (code == WIDEN_LSHIFT_EXPR)
2668 unsigned int k;
2670 vec_oprnd1 = op1;
2671 /* Store vec_oprnd1 for every vector stmt to be created
2672 for SLP_NODE. We check during the analysis that all
2673 the shift arguments are the same. */
2674 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2675 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2677 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2678 slp_node, -1);
2680 else
2681 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2682 &vec_oprnds1, slp_node, -1);
2684 else
2686 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2687 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2688 if (op_type == binary_op)
2690 if (code == WIDEN_LSHIFT_EXPR)
2691 vec_oprnd1 = op1;
2692 else
2693 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2694 NULL);
2695 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2699 else
2701 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2702 VEC_truncate (tree, vec_oprnds0, 0);
2703 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2704 if (op_type == binary_op)
2706 if (code == WIDEN_LSHIFT_EXPR)
2707 vec_oprnd1 = op1;
2708 else
2709 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2710 vec_oprnd1);
2711 VEC_truncate (tree, vec_oprnds1, 0);
2712 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2716 /* Arguments are ready. Create the new vector stmts. */
2717 for (i = multi_step_cvt; i >= 0; i--)
2719 tree this_dest = VEC_index (tree, vec_dsts, i);
2720 enum tree_code c1 = code1, c2 = code2;
2721 if (i == 0 && codecvt2 != ERROR_MARK)
2723 c1 = codecvt1;
2724 c2 = codecvt2;
2726 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2727 &vec_oprnds1,
2728 stmt, this_dest, gsi,
2729 c1, c2, decl1, decl2,
2730 op_type);
2733 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2735 if (cvt_type)
2737 if (codecvt1 == CALL_EXPR)
2739 new_stmt = gimple_build_call (decl1, 1, vop0);
2740 new_temp = make_ssa_name (vec_dest, new_stmt);
2741 gimple_call_set_lhs (new_stmt, new_temp);
2743 else
2745 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2746 new_temp = make_ssa_name (vec_dest, NULL);
2747 new_stmt = gimple_build_assign_with_ops (codecvt1,
2748 new_temp,
2749 vop0, NULL);
2752 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2754 else
2755 new_stmt = SSA_NAME_DEF_STMT (vop0);
2757 if (slp_node)
2758 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2759 new_stmt);
2760 else
2762 if (!prev_stmt_info)
2763 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2764 else
2765 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2766 prev_stmt_info = vinfo_for_stmt (new_stmt);
2771 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2772 break;
2774 case NARROW:
2775 /* In case the vectorization factor (VF) is bigger than the number
2776 of elements that we can fit in a vectype (nunits), we have to
2777 generate more than one vector stmt - i.e - we need to "unroll"
2778 the vector stmt by a factor VF/nunits. */
2779 for (j = 0; j < ncopies; j++)
2781 /* Handle uses. */
2782 if (slp_node)
2783 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2784 slp_node, -1);
2785 else
2787 VEC_truncate (tree, vec_oprnds0, 0);
2788 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2789 vect_pow2 (multi_step_cvt) - 1);
2792 /* Arguments are ready. Create the new vector stmts. */
2793 if (cvt_type)
2794 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2796 if (codecvt1 == CALL_EXPR)
2798 new_stmt = gimple_build_call (decl1, 1, vop0);
2799 new_temp = make_ssa_name (vec_dest, new_stmt);
2800 gimple_call_set_lhs (new_stmt, new_temp);
2802 else
2804 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2805 new_temp = make_ssa_name (vec_dest, NULL);
2806 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2807 vop0, NULL);
2810 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2811 VEC_replace (tree, vec_oprnds0, i, new_temp);
2814 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2815 stmt, vec_dsts, gsi,
2816 slp_node, code1,
2817 &prev_stmt_info);
2820 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2821 break;
2824 VEC_free (tree, heap, vec_oprnds0);
2825 VEC_free (tree, heap, vec_oprnds1);
2826 VEC_free (tree, heap, vec_dsts);
2827 VEC_free (tree, heap, interm_types);
2829 return true;
2833 /* Function vectorizable_assignment.
2835 Check if STMT performs an assignment (copy) that can be vectorized.
2836 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2837 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2838 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2840 static bool
2841 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2842 gimple *vec_stmt, slp_tree slp_node)
2844 tree vec_dest;
2845 tree scalar_dest;
2846 tree op;
2847 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2848 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2849 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2850 tree new_temp;
2851 tree def;
2852 gimple def_stmt;
2853 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2854 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2855 int ncopies;
2856 int i, j;
2857 VEC(tree,heap) *vec_oprnds = NULL;
2858 tree vop;
2859 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2860 gimple new_stmt = NULL;
2861 stmt_vec_info prev_stmt_info = NULL;
2862 enum tree_code code;
2863 tree vectype_in;
2865 /* Multiple types in SLP are handled by creating the appropriate number of
2866 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2867 case of SLP. */
2868 if (slp_node || PURE_SLP_STMT (stmt_info))
2869 ncopies = 1;
2870 else
2871 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2873 gcc_assert (ncopies >= 1);
2875 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2876 return false;
2878 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2879 return false;
2881 /* Is vectorizable assignment? */
2882 if (!is_gimple_assign (stmt))
2883 return false;
2885 scalar_dest = gimple_assign_lhs (stmt);
2886 if (TREE_CODE (scalar_dest) != SSA_NAME)
2887 return false;
2889 code = gimple_assign_rhs_code (stmt);
2890 if (gimple_assign_single_p (stmt)
2891 || code == PAREN_EXPR
2892 || CONVERT_EXPR_CODE_P (code))
2893 op = gimple_assign_rhs1 (stmt);
2894 else
2895 return false;
2897 if (code == VIEW_CONVERT_EXPR)
2898 op = TREE_OPERAND (op, 0);
2900 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2901 &def_stmt, &def, &dt[0], &vectype_in))
2903 if (vect_print_dump_info (REPORT_DETAILS))
2904 fprintf (vect_dump, "use not simple.");
2905 return false;
2908 /* We can handle NOP_EXPR conversions that do not change the number
2909 of elements or the vector size. */
2910 if ((CONVERT_EXPR_CODE_P (code)
2911 || code == VIEW_CONVERT_EXPR)
2912 && (!vectype_in
2913 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2914 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2915 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2916 return false;
2918 /* We do not handle bit-precision changes. */
2919 if ((CONVERT_EXPR_CODE_P (code)
2920 || code == VIEW_CONVERT_EXPR)
2921 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2922 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2923 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2924 || ((TYPE_PRECISION (TREE_TYPE (op))
2925 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2926 /* But a conversion that does not change the bit-pattern is ok. */
2927 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2928 > TYPE_PRECISION (TREE_TYPE (op)))
2929 && TYPE_UNSIGNED (TREE_TYPE (op))))
2931 if (vect_print_dump_info (REPORT_DETAILS))
2932 fprintf (vect_dump, "type conversion to/from bit-precision "
2933 "unsupported.");
2934 return false;
2937 if (!vec_stmt) /* transformation not required. */
2939 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2940 if (vect_print_dump_info (REPORT_DETAILS))
2941 fprintf (vect_dump, "=== vectorizable_assignment ===");
2942 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2943 return true;
2946 /** Transform. **/
2947 if (vect_print_dump_info (REPORT_DETAILS))
2948 fprintf (vect_dump, "transform assignment.");
2950 /* Handle def. */
2951 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2953 /* Handle use. */
2954 for (j = 0; j < ncopies; j++)
2956 /* Handle uses. */
2957 if (j == 0)
2958 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2959 else
2960 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2962 /* Arguments are ready. create the new vector stmt. */
2963 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2965 if (CONVERT_EXPR_CODE_P (code)
2966 || code == VIEW_CONVERT_EXPR)
2967 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2968 new_stmt = gimple_build_assign (vec_dest, vop);
2969 new_temp = make_ssa_name (vec_dest, new_stmt);
2970 gimple_assign_set_lhs (new_stmt, new_temp);
2971 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2972 if (slp_node)
2973 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2976 if (slp_node)
2977 continue;
2979 if (j == 0)
2980 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2981 else
2982 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2984 prev_stmt_info = vinfo_for_stmt (new_stmt);
2987 VEC_free (tree, heap, vec_oprnds);
2988 return true;
2992 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2993 either as shift by a scalar or by a vector. */
2995 bool
2996 vect_supportable_shift (enum tree_code code, tree scalar_type)
2999 enum machine_mode vec_mode;
3000 optab optab;
3001 int icode;
3002 tree vectype;
3004 vectype = get_vectype_for_scalar_type (scalar_type);
3005 if (!vectype)
3006 return false;
3008 optab = optab_for_tree_code (code, vectype, optab_scalar);
3009 if (!optab
3010 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3012 optab = optab_for_tree_code (code, vectype, optab_vector);
3013 if (!optab
3014 || (optab_handler (optab, TYPE_MODE (vectype))
3015 == CODE_FOR_nothing))
3016 return false;
3019 vec_mode = TYPE_MODE (vectype);
3020 icode = (int) optab_handler (optab, vec_mode);
3021 if (icode == CODE_FOR_nothing)
3022 return false;
3024 return true;
3028 /* Function vectorizable_shift.
3030 Check if STMT performs a shift operation that can be vectorized.
3031 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3032 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3033 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3035 static bool
3036 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3037 gimple *vec_stmt, slp_tree slp_node)
3039 tree vec_dest;
3040 tree scalar_dest;
3041 tree op0, op1 = NULL;
3042 tree vec_oprnd1 = NULL_TREE;
3043 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3044 tree vectype;
3045 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3046 enum tree_code code;
3047 enum machine_mode vec_mode;
3048 tree new_temp;
3049 optab optab;
3050 int icode;
3051 enum machine_mode optab_op2_mode;
3052 tree def;
3053 gimple def_stmt;
3054 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3055 gimple new_stmt = NULL;
3056 stmt_vec_info prev_stmt_info;
3057 int nunits_in;
3058 int nunits_out;
3059 tree vectype_out;
3060 tree op1_vectype;
3061 int ncopies;
3062 int j, i;
3063 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3064 tree vop0, vop1;
3065 unsigned int k;
3066 bool scalar_shift_arg = true;
3067 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3068 int vf;
3070 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3071 return false;
3073 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3074 return false;
3076 /* Is STMT a vectorizable binary/unary operation? */
3077 if (!is_gimple_assign (stmt))
3078 return false;
3080 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3081 return false;
3083 code = gimple_assign_rhs_code (stmt);
3085 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3086 || code == RROTATE_EXPR))
3087 return false;
3089 scalar_dest = gimple_assign_lhs (stmt);
3090 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3091 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3092 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3094 if (vect_print_dump_info (REPORT_DETAILS))
3095 fprintf (vect_dump, "bit-precision shifts not supported.");
3096 return false;
3099 op0 = gimple_assign_rhs1 (stmt);
3100 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3101 &def_stmt, &def, &dt[0], &vectype))
3103 if (vect_print_dump_info (REPORT_DETAILS))
3104 fprintf (vect_dump, "use not simple.");
3105 return false;
3107 /* If op0 is an external or constant def use a vector type with
3108 the same size as the output vector type. */
3109 if (!vectype)
3110 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3111 if (vec_stmt)
3112 gcc_assert (vectype);
3113 if (!vectype)
3115 if (vect_print_dump_info (REPORT_DETAILS))
3117 fprintf (vect_dump, "no vectype for scalar type ");
3118 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3121 return false;
3124 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3125 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3126 if (nunits_out != nunits_in)
3127 return false;
3129 op1 = gimple_assign_rhs2 (stmt);
3130 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3131 &def, &dt[1], &op1_vectype))
3133 if (vect_print_dump_info (REPORT_DETAILS))
3134 fprintf (vect_dump, "use not simple.");
3135 return false;
3138 if (loop_vinfo)
3139 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3140 else
3141 vf = 1;
3143 /* Multiple types in SLP are handled by creating the appropriate number of
3144 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3145 case of SLP. */
3146 if (slp_node || PURE_SLP_STMT (stmt_info))
3147 ncopies = 1;
3148 else
3149 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3151 gcc_assert (ncopies >= 1);
3153 /* Determine whether the shift amount is a vector, or scalar. If the
3154 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3156 if (dt[1] == vect_internal_def && !slp_node)
3157 scalar_shift_arg = false;
3158 else if (dt[1] == vect_constant_def
3159 || dt[1] == vect_external_def
3160 || dt[1] == vect_internal_def)
3162 /* In SLP, need to check whether the shift count is the same,
3163 in loops if it is a constant or invariant, it is always
3164 a scalar shift. */
3165 if (slp_node)
3167 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3168 gimple slpstmt;
3170 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3171 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3172 scalar_shift_arg = false;
3175 else
3177 if (vect_print_dump_info (REPORT_DETAILS))
3178 fprintf (vect_dump, "operand mode requires invariant argument.");
3179 return false;
3182 /* Vector shifted by vector. */
3183 if (!scalar_shift_arg)
3185 optab = optab_for_tree_code (code, vectype, optab_vector);
3186 if (vect_print_dump_info (REPORT_DETAILS))
3187 fprintf (vect_dump, "vector/vector shift/rotate found.");
3188 if (!op1_vectype)
3189 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3190 if (op1_vectype == NULL_TREE
3191 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3193 if (vect_print_dump_info (REPORT_DETAILS))
3194 fprintf (vect_dump, "unusable type for last operand in"
3195 " vector/vector shift/rotate.");
3196 return false;
3199 /* See if the machine has a vector shifted by scalar insn and if not
3200 then see if it has a vector shifted by vector insn. */
3201 else
3203 optab = optab_for_tree_code (code, vectype, optab_scalar);
3204 if (optab
3205 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3207 if (vect_print_dump_info (REPORT_DETAILS))
3208 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3210 else
3212 optab = optab_for_tree_code (code, vectype, optab_vector);
3213 if (optab
3214 && (optab_handler (optab, TYPE_MODE (vectype))
3215 != CODE_FOR_nothing))
3217 scalar_shift_arg = false;
3219 if (vect_print_dump_info (REPORT_DETAILS))
3220 fprintf (vect_dump, "vector/vector shift/rotate found.");
3222 /* Unlike the other binary operators, shifts/rotates have
3223 the rhs being int, instead of the same type as the lhs,
3224 so make sure the scalar is the right type if we are
3225 dealing with vectors of long long/long/short/char. */
3226 if (dt[1] == vect_constant_def)
3227 op1 = fold_convert (TREE_TYPE (vectype), op1);
3228 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3229 TREE_TYPE (op1)))
3231 if (slp_node
3232 && TYPE_MODE (TREE_TYPE (vectype))
3233 != TYPE_MODE (TREE_TYPE (op1)))
3235 if (vect_print_dump_info (REPORT_DETAILS))
3236 fprintf (vect_dump, "unusable type for last operand in"
3237 " vector/vector shift/rotate.");
3238 return false;
3240 if (vec_stmt && !slp_node)
3242 op1 = fold_convert (TREE_TYPE (vectype), op1);
3243 op1 = vect_init_vector (stmt, op1,
3244 TREE_TYPE (vectype), NULL);
3251 /* Supportable by target? */
3252 if (!optab)
3254 if (vect_print_dump_info (REPORT_DETAILS))
3255 fprintf (vect_dump, "no optab.");
3256 return false;
3258 vec_mode = TYPE_MODE (vectype);
3259 icode = (int) optab_handler (optab, vec_mode);
3260 if (icode == CODE_FOR_nothing)
3262 if (vect_print_dump_info (REPORT_DETAILS))
3263 fprintf (vect_dump, "op not supported by target.");
3264 /* Check only during analysis. */
3265 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3266 || (vf < vect_min_worthwhile_factor (code)
3267 && !vec_stmt))
3268 return false;
3269 if (vect_print_dump_info (REPORT_DETAILS))
3270 fprintf (vect_dump, "proceeding using word mode.");
3273 /* Worthwhile without SIMD support? Check only during analysis. */
3274 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3275 && vf < vect_min_worthwhile_factor (code)
3276 && !vec_stmt)
3278 if (vect_print_dump_info (REPORT_DETAILS))
3279 fprintf (vect_dump, "not worthwhile without SIMD support.");
3280 return false;
3283 if (!vec_stmt) /* transformation not required. */
3285 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3286 if (vect_print_dump_info (REPORT_DETAILS))
3287 fprintf (vect_dump, "=== vectorizable_shift ===");
3288 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3289 return true;
3292 /** Transform. **/
3294 if (vect_print_dump_info (REPORT_DETAILS))
3295 fprintf (vect_dump, "transform binary/unary operation.");
3297 /* Handle def. */
3298 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3300 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3301 created in the previous stages of the recursion, so no allocation is
3302 needed, except for the case of shift with scalar shift argument. In that
3303 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3304 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3305 In case of loop-based vectorization we allocate VECs of size 1. We
3306 allocate VEC_OPRNDS1 only in case of binary operation. */
3307 if (!slp_node)
3309 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3310 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3312 else if (scalar_shift_arg)
3313 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3315 prev_stmt_info = NULL;
3316 for (j = 0; j < ncopies; j++)
3318 /* Handle uses. */
3319 if (j == 0)
3321 if (scalar_shift_arg)
3323 /* Vector shl and shr insn patterns can be defined with scalar
3324 operand 2 (shift operand). In this case, use constant or loop
3325 invariant op1 directly, without extending it to vector mode
3326 first. */
3327 optab_op2_mode = insn_data[icode].operand[2].mode;
3328 if (!VECTOR_MODE_P (optab_op2_mode))
3330 if (vect_print_dump_info (REPORT_DETAILS))
3331 fprintf (vect_dump, "operand 1 using scalar mode.");
3332 vec_oprnd1 = op1;
3333 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3334 if (slp_node)
3336 /* Store vec_oprnd1 for every vector stmt to be created
3337 for SLP_NODE. We check during the analysis that all
3338 the shift arguments are the same.
3339 TODO: Allow different constants for different vector
3340 stmts generated for an SLP instance. */
3341 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3342 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3347 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3348 (a special case for certain kind of vector shifts); otherwise,
3349 operand 1 should be of a vector type (the usual case). */
3350 if (vec_oprnd1)
3351 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3352 slp_node, -1);
3353 else
3354 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3355 slp_node, -1);
3357 else
3358 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3360 /* Arguments are ready. Create the new vector stmt. */
3361 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3363 vop1 = VEC_index (tree, vec_oprnds1, i);
3364 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3365 new_temp = make_ssa_name (vec_dest, new_stmt);
3366 gimple_assign_set_lhs (new_stmt, new_temp);
3367 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3368 if (slp_node)
3369 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3372 if (slp_node)
3373 continue;
3375 if (j == 0)
3376 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3377 else
3378 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3379 prev_stmt_info = vinfo_for_stmt (new_stmt);
3382 VEC_free (tree, heap, vec_oprnds0);
3383 VEC_free (tree, heap, vec_oprnds1);
3385 return true;
3389 static tree permute_vec_elements (tree, tree, tree, gimple,
3390 gimple_stmt_iterator *);
3393 /* Function vectorizable_operation.
3395 Check if STMT performs a binary, unary or ternary operation that can
3396 be vectorized.
3397 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3398 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3399 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3401 static bool
3402 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3403 gimple *vec_stmt, slp_tree slp_node)
3405 tree vec_dest;
3406 tree scalar_dest;
3407 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3409 tree vectype;
3410 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3411 enum tree_code code;
3412 enum machine_mode vec_mode;
3413 tree new_temp;
3414 int op_type;
3415 optab optab;
3416 int icode;
3417 tree def;
3418 gimple def_stmt;
3419 enum vect_def_type dt[3]
3420 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3421 gimple new_stmt = NULL;
3422 stmt_vec_info prev_stmt_info;
3423 int nunits_in;
3424 int nunits_out;
3425 tree vectype_out;
3426 int ncopies;
3427 int j, i;
3428 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3429 tree vop0, vop1, vop2;
3430 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3431 int vf;
3433 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3434 return false;
3436 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3437 return false;
3439 /* Is STMT a vectorizable binary/unary operation? */
3440 if (!is_gimple_assign (stmt))
3441 return false;
3443 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3444 return false;
3446 code = gimple_assign_rhs_code (stmt);
3448 /* For pointer addition, we should use the normal plus for
3449 the vector addition. */
3450 if (code == POINTER_PLUS_EXPR)
3451 code = PLUS_EXPR;
3453 /* Support only unary or binary operations. */
3454 op_type = TREE_CODE_LENGTH (code);
3455 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3457 if (vect_print_dump_info (REPORT_DETAILS))
3458 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3459 op_type);
3460 return false;
3463 scalar_dest = gimple_assign_lhs (stmt);
3464 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3466 /* Most operations cannot handle bit-precision types without extra
3467 truncations. */
3468 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3469 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3470 /* Exception are bitwise binary operations. */
3471 && code != BIT_IOR_EXPR
3472 && code != BIT_XOR_EXPR
3473 && code != BIT_AND_EXPR)
3475 if (vect_print_dump_info (REPORT_DETAILS))
3476 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3477 return false;
3480 op0 = gimple_assign_rhs1 (stmt);
3481 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3482 &def_stmt, &def, &dt[0], &vectype))
3484 if (vect_print_dump_info (REPORT_DETAILS))
3485 fprintf (vect_dump, "use not simple.");
3486 return false;
3488 /* If op0 is an external or constant def use a vector type with
3489 the same size as the output vector type. */
3490 if (!vectype)
3491 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3492 if (vec_stmt)
3493 gcc_assert (vectype);
3494 if (!vectype)
3496 if (vect_print_dump_info (REPORT_DETAILS))
3498 fprintf (vect_dump, "no vectype for scalar type ");
3499 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3502 return false;
3505 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3506 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3507 if (nunits_out != nunits_in)
3508 return false;
3510 if (op_type == binary_op || op_type == ternary_op)
3512 op1 = gimple_assign_rhs2 (stmt);
3513 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3514 &def, &dt[1]))
3516 if (vect_print_dump_info (REPORT_DETAILS))
3517 fprintf (vect_dump, "use not simple.");
3518 return false;
3521 if (op_type == ternary_op)
3523 op2 = gimple_assign_rhs3 (stmt);
3524 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3525 &def, &dt[2]))
3527 if (vect_print_dump_info (REPORT_DETAILS))
3528 fprintf (vect_dump, "use not simple.");
3529 return false;
3533 if (loop_vinfo)
3534 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3535 else
3536 vf = 1;
3538 /* Multiple types in SLP are handled by creating the appropriate number of
3539 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3540 case of SLP. */
3541 if (slp_node || PURE_SLP_STMT (stmt_info))
3542 ncopies = 1;
3543 else
3544 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3546 gcc_assert (ncopies >= 1);
3548 /* Shifts are handled in vectorizable_shift (). */
3549 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3550 || code == RROTATE_EXPR)
3551 return false;
3553 /* Supportable by target? */
3555 vec_mode = TYPE_MODE (vectype);
3556 if (code == MULT_HIGHPART_EXPR)
3558 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3559 icode = LAST_INSN_CODE;
3560 else
3561 icode = CODE_FOR_nothing;
3563 else
3565 optab = optab_for_tree_code (code, vectype, optab_default);
3566 if (!optab)
3568 if (vect_print_dump_info (REPORT_DETAILS))
3569 fprintf (vect_dump, "no optab.");
3570 return false;
3572 icode = (int) optab_handler (optab, vec_mode);
3575 if (icode == CODE_FOR_nothing)
3577 if (vect_print_dump_info (REPORT_DETAILS))
3578 fprintf (vect_dump, "op not supported by target.");
3579 /* Check only during analysis. */
3580 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3581 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3582 return false;
3583 if (vect_print_dump_info (REPORT_DETAILS))
3584 fprintf (vect_dump, "proceeding using word mode.");
3587 /* Worthwhile without SIMD support? Check only during analysis. */
3588 if (!VECTOR_MODE_P (vec_mode)
3589 && !vec_stmt
3590 && vf < vect_min_worthwhile_factor (code))
3592 if (vect_print_dump_info (REPORT_DETAILS))
3593 fprintf (vect_dump, "not worthwhile without SIMD support.");
3594 return false;
3597 if (!vec_stmt) /* transformation not required. */
3599 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3600 if (vect_print_dump_info (REPORT_DETAILS))
3601 fprintf (vect_dump, "=== vectorizable_operation ===");
3602 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3603 return true;
3606 /** Transform. **/
3608 if (vect_print_dump_info (REPORT_DETAILS))
3609 fprintf (vect_dump, "transform binary/unary operation.");
3611 /* Handle def. */
3612 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3614 /* In case the vectorization factor (VF) is bigger than the number
3615 of elements that we can fit in a vectype (nunits), we have to generate
3616 more than one vector stmt - i.e - we need to "unroll" the
3617 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3618 from one copy of the vector stmt to the next, in the field
3619 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3620 stages to find the correct vector defs to be used when vectorizing
3621 stmts that use the defs of the current stmt. The example below
3622 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3623 we need to create 4 vectorized stmts):
3625 before vectorization:
3626 RELATED_STMT VEC_STMT
3627 S1: x = memref - -
3628 S2: z = x + 1 - -
3630 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3631 there):
3632 RELATED_STMT VEC_STMT
3633 VS1_0: vx0 = memref0 VS1_1 -
3634 VS1_1: vx1 = memref1 VS1_2 -
3635 VS1_2: vx2 = memref2 VS1_3 -
3636 VS1_3: vx3 = memref3 - -
3637 S1: x = load - VS1_0
3638 S2: z = x + 1 - -
3640 step2: vectorize stmt S2 (done here):
3641 To vectorize stmt S2 we first need to find the relevant vector
3642 def for the first operand 'x'. This is, as usual, obtained from
3643 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3644 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3645 relevant vector def 'vx0'. Having found 'vx0' we can generate
3646 the vector stmt VS2_0, and as usual, record it in the
3647 STMT_VINFO_VEC_STMT of stmt S2.
3648 When creating the second copy (VS2_1), we obtain the relevant vector
3649 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3650 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3651 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3652 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3653 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3654 chain of stmts and pointers:
3655 RELATED_STMT VEC_STMT
3656 VS1_0: vx0 = memref0 VS1_1 -
3657 VS1_1: vx1 = memref1 VS1_2 -
3658 VS1_2: vx2 = memref2 VS1_3 -
3659 VS1_3: vx3 = memref3 - -
3660 S1: x = load - VS1_0
3661 VS2_0: vz0 = vx0 + v1 VS2_1 -
3662 VS2_1: vz1 = vx1 + v1 VS2_2 -
3663 VS2_2: vz2 = vx2 + v1 VS2_3 -
3664 VS2_3: vz3 = vx3 + v1 - -
3665 S2: z = x + 1 - VS2_0 */
3667 prev_stmt_info = NULL;
3668 for (j = 0; j < ncopies; j++)
3670 /* Handle uses. */
3671 if (j == 0)
3673 if (op_type == binary_op || op_type == ternary_op)
3674 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3675 slp_node, -1);
3676 else
3677 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3678 slp_node, -1);
3679 if (op_type == ternary_op)
3681 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3682 VEC_quick_push (tree, vec_oprnds2,
3683 vect_get_vec_def_for_operand (op2, stmt, NULL));
3686 else
3688 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3689 if (op_type == ternary_op)
3691 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3692 VEC_quick_push (tree, vec_oprnds2,
3693 vect_get_vec_def_for_stmt_copy (dt[2],
3694 vec_oprnd));
3698 /* Arguments are ready. Create the new vector stmt. */
3699 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3701 vop1 = ((op_type == binary_op || op_type == ternary_op)
3702 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3703 vop2 = ((op_type == ternary_op)
3704 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3705 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3706 vop0, vop1, vop2);
3707 new_temp = make_ssa_name (vec_dest, new_stmt);
3708 gimple_assign_set_lhs (new_stmt, new_temp);
3709 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3710 if (slp_node)
3711 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3714 if (slp_node)
3715 continue;
3717 if (j == 0)
3718 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3719 else
3720 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3721 prev_stmt_info = vinfo_for_stmt (new_stmt);
3724 VEC_free (tree, heap, vec_oprnds0);
3725 if (vec_oprnds1)
3726 VEC_free (tree, heap, vec_oprnds1);
3727 if (vec_oprnds2)
3728 VEC_free (tree, heap, vec_oprnds2);
3730 return true;
3734 /* Function vectorizable_store.
3736 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3737 can be vectorized.
3738 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3739 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3740 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3742 static bool
3743 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3744 slp_tree slp_node)
3746 tree scalar_dest;
3747 tree data_ref;
3748 tree op;
3749 tree vec_oprnd = NULL_TREE;
3750 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3751 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3752 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3753 tree elem_type;
3754 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3755 struct loop *loop = NULL;
3756 enum machine_mode vec_mode;
3757 tree dummy;
3758 enum dr_alignment_support alignment_support_scheme;
3759 tree def;
3760 gimple def_stmt;
3761 enum vect_def_type dt;
3762 stmt_vec_info prev_stmt_info = NULL;
3763 tree dataref_ptr = NULL_TREE;
3764 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3765 int ncopies;
3766 int j;
3767 gimple next_stmt, first_stmt = NULL;
3768 bool grouped_store = false;
3769 bool store_lanes_p = false;
3770 unsigned int group_size, i;
3771 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3772 bool inv_p;
3773 VEC(tree,heap) *vec_oprnds = NULL;
3774 bool slp = (slp_node != NULL);
3775 unsigned int vec_num;
3776 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3777 tree aggr_type;
3779 if (loop_vinfo)
3780 loop = LOOP_VINFO_LOOP (loop_vinfo);
3782 /* Multiple types in SLP are handled by creating the appropriate number of
3783 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3784 case of SLP. */
3785 if (slp || PURE_SLP_STMT (stmt_info))
3786 ncopies = 1;
3787 else
3788 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3790 gcc_assert (ncopies >= 1);
3792 /* FORNOW. This restriction should be relaxed. */
3793 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3795 if (vect_print_dump_info (REPORT_DETAILS))
3796 fprintf (vect_dump, "multiple types in nested loop.");
3797 return false;
3800 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3801 return false;
3803 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3804 return false;
3806 /* Is vectorizable store? */
3808 if (!is_gimple_assign (stmt))
3809 return false;
3811 scalar_dest = gimple_assign_lhs (stmt);
3812 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3813 && is_pattern_stmt_p (stmt_info))
3814 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3815 if (TREE_CODE (scalar_dest) != ARRAY_REF
3816 && TREE_CODE (scalar_dest) != INDIRECT_REF
3817 && TREE_CODE (scalar_dest) != COMPONENT_REF
3818 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3819 && TREE_CODE (scalar_dest) != REALPART_EXPR
3820 && TREE_CODE (scalar_dest) != MEM_REF)
3821 return false;
3823 gcc_assert (gimple_assign_single_p (stmt));
3824 op = gimple_assign_rhs1 (stmt);
3825 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3826 &def, &dt))
3828 if (vect_print_dump_info (REPORT_DETAILS))
3829 fprintf (vect_dump, "use not simple.");
3830 return false;
3833 elem_type = TREE_TYPE (vectype);
3834 vec_mode = TYPE_MODE (vectype);
3836 /* FORNOW. In some cases can vectorize even if data-type not supported
3837 (e.g. - array initialization with 0). */
3838 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3839 return false;
3841 if (!STMT_VINFO_DATA_REF (stmt_info))
3842 return false;
3844 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3845 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3846 size_zero_node) < 0)
3848 if (vect_print_dump_info (REPORT_DETAILS))
3849 fprintf (vect_dump, "negative step for store.");
3850 return false;
3853 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3855 grouped_store = true;
3856 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3857 if (!slp && !PURE_SLP_STMT (stmt_info))
3859 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3860 if (vect_store_lanes_supported (vectype, group_size))
3861 store_lanes_p = true;
3862 else if (!vect_grouped_store_supported (vectype, group_size))
3863 return false;
3866 if (first_stmt == stmt)
3868 /* STMT is the leader of the group. Check the operands of all the
3869 stmts of the group. */
3870 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3871 while (next_stmt)
3873 gcc_assert (gimple_assign_single_p (next_stmt));
3874 op = gimple_assign_rhs1 (next_stmt);
3875 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3876 &def_stmt, &def, &dt))
3878 if (vect_print_dump_info (REPORT_DETAILS))
3879 fprintf (vect_dump, "use not simple.");
3880 return false;
3882 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3887 if (!vec_stmt) /* transformation not required. */
3889 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3890 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3891 NULL, NULL, NULL);
3892 return true;
3895 /** Transform. **/
3897 if (grouped_store)
3899 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3900 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3902 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3904 /* FORNOW */
3905 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3907 /* We vectorize all the stmts of the interleaving group when we
3908 reach the last stmt in the group. */
3909 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3910 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3911 && !slp)
3913 *vec_stmt = NULL;
3914 return true;
3917 if (slp)
3919 grouped_store = false;
3920 /* VEC_NUM is the number of vect stmts to be created for this
3921 group. */
3922 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3923 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3924 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3925 op = gimple_assign_rhs1 (first_stmt);
3927 else
3928 /* VEC_NUM is the number of vect stmts to be created for this
3929 group. */
3930 vec_num = group_size;
3932 else
3934 first_stmt = stmt;
3935 first_dr = dr;
3936 group_size = vec_num = 1;
3939 if (vect_print_dump_info (REPORT_DETAILS))
3940 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3942 dr_chain = VEC_alloc (tree, heap, group_size);
3943 oprnds = VEC_alloc (tree, heap, group_size);
3945 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3946 gcc_assert (alignment_support_scheme);
3947 /* Targets with store-lane instructions must not require explicit
3948 realignment. */
3949 gcc_assert (!store_lanes_p
3950 || alignment_support_scheme == dr_aligned
3951 || alignment_support_scheme == dr_unaligned_supported);
3953 if (store_lanes_p)
3954 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3955 else
3956 aggr_type = vectype;
3958 /* In case the vectorization factor (VF) is bigger than the number
3959 of elements that we can fit in a vectype (nunits), we have to generate
3960 more than one vector stmt - i.e - we need to "unroll" the
3961 vector stmt by a factor VF/nunits. For more details see documentation in
3962 vect_get_vec_def_for_copy_stmt. */
3964 /* In case of interleaving (non-unit grouped access):
3966 S1: &base + 2 = x2
3967 S2: &base = x0
3968 S3: &base + 1 = x1
3969 S4: &base + 3 = x3
3971 We create vectorized stores starting from base address (the access of the
3972 first stmt in the chain (S2 in the above example), when the last store stmt
3973 of the chain (S4) is reached:
3975 VS1: &base = vx2
3976 VS2: &base + vec_size*1 = vx0
3977 VS3: &base + vec_size*2 = vx1
3978 VS4: &base + vec_size*3 = vx3
3980 Then permutation statements are generated:
3982 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3983 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3986 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3987 (the order of the data-refs in the output of vect_permute_store_chain
3988 corresponds to the order of scalar stmts in the interleaving chain - see
3989 the documentation of vect_permute_store_chain()).
3991 In case of both multiple types and interleaving, above vector stores and
3992 permutation stmts are created for every copy. The result vector stmts are
3993 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3994 STMT_VINFO_RELATED_STMT for the next copies.
3997 prev_stmt_info = NULL;
3998 for (j = 0; j < ncopies; j++)
4000 gimple new_stmt;
4001 gimple ptr_incr;
4003 if (j == 0)
4005 if (slp)
4007 /* Get vectorized arguments for SLP_NODE. */
4008 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4009 NULL, slp_node, -1);
4011 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4013 else
4015 /* For interleaved stores we collect vectorized defs for all the
4016 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4017 used as an input to vect_permute_store_chain(), and OPRNDS as
4018 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4020 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4021 OPRNDS are of size 1. */
4022 next_stmt = first_stmt;
4023 for (i = 0; i < group_size; i++)
4025 /* Since gaps are not supported for interleaved stores,
4026 GROUP_SIZE is the exact number of stmts in the chain.
4027 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4028 there is no interleaving, GROUP_SIZE is 1, and only one
4029 iteration of the loop will be executed. */
4030 gcc_assert (next_stmt
4031 && gimple_assign_single_p (next_stmt));
4032 op = gimple_assign_rhs1 (next_stmt);
4034 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4035 NULL);
4036 VEC_quick_push(tree, dr_chain, vec_oprnd);
4037 VEC_quick_push(tree, oprnds, vec_oprnd);
4038 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4042 /* We should have catched mismatched types earlier. */
4043 gcc_assert (useless_type_conversion_p (vectype,
4044 TREE_TYPE (vec_oprnd)));
4045 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4046 NULL_TREE, &dummy, gsi,
4047 &ptr_incr, false, &inv_p);
4048 gcc_assert (bb_vinfo || !inv_p);
4050 else
4052 /* For interleaved stores we created vectorized defs for all the
4053 defs stored in OPRNDS in the previous iteration (previous copy).
4054 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4055 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4056 next copy.
4057 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4058 OPRNDS are of size 1. */
4059 for (i = 0; i < group_size; i++)
4061 op = VEC_index (tree, oprnds, i);
4062 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4063 &def, &dt);
4064 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4065 VEC_replace(tree, dr_chain, i, vec_oprnd);
4066 VEC_replace(tree, oprnds, i, vec_oprnd);
4068 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4069 TYPE_SIZE_UNIT (aggr_type));
4072 if (store_lanes_p)
4074 tree vec_array;
4076 /* Combine all the vectors into an array. */
4077 vec_array = create_vector_array (vectype, vec_num);
4078 for (i = 0; i < vec_num; i++)
4080 vec_oprnd = VEC_index (tree, dr_chain, i);
4081 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4084 /* Emit:
4085 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4086 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4087 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4088 gimple_call_set_lhs (new_stmt, data_ref);
4089 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4091 else
4093 new_stmt = NULL;
4094 if (grouped_store)
4096 result_chain = VEC_alloc (tree, heap, group_size);
4097 /* Permute. */
4098 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4099 &result_chain);
4102 next_stmt = first_stmt;
4103 for (i = 0; i < vec_num; i++)
4105 unsigned align, misalign;
4107 if (i > 0)
4108 /* Bump the vector pointer. */
4109 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4110 stmt, NULL_TREE);
4112 if (slp)
4113 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4114 else if (grouped_store)
4115 /* For grouped stores vectorized defs are interleaved in
4116 vect_permute_store_chain(). */
4117 vec_oprnd = VEC_index (tree, result_chain, i);
4119 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4120 build_int_cst (reference_alias_ptr_type
4121 (DR_REF (first_dr)), 0));
4122 align = TYPE_ALIGN_UNIT (vectype);
4123 if (aligned_access_p (first_dr))
4124 misalign = 0;
4125 else if (DR_MISALIGNMENT (first_dr) == -1)
4127 TREE_TYPE (data_ref)
4128 = build_aligned_type (TREE_TYPE (data_ref),
4129 TYPE_ALIGN (elem_type));
4130 align = TYPE_ALIGN_UNIT (elem_type);
4131 misalign = 0;
4133 else
4135 TREE_TYPE (data_ref)
4136 = build_aligned_type (TREE_TYPE (data_ref),
4137 TYPE_ALIGN (elem_type));
4138 misalign = DR_MISALIGNMENT (first_dr);
4140 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4141 misalign);
4143 /* Arguments are ready. Create the new vector stmt. */
4144 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4145 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4147 if (slp)
4148 continue;
4150 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4151 if (!next_stmt)
4152 break;
4155 if (!slp)
4157 if (j == 0)
4158 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4159 else
4160 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4161 prev_stmt_info = vinfo_for_stmt (new_stmt);
4165 VEC_free (tree, heap, dr_chain);
4166 VEC_free (tree, heap, oprnds);
4167 if (result_chain)
4168 VEC_free (tree, heap, result_chain);
4169 if (vec_oprnds)
4170 VEC_free (tree, heap, vec_oprnds);
4172 return true;
4175 /* Given a vector type VECTYPE and permutation SEL returns
4176 the VECTOR_CST mask that implements the permutation of the
4177 vector elements. If that is impossible to do, returns NULL. */
4179 tree
4180 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4182 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4183 int i, nunits;
4185 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4187 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4188 return NULL;
4190 mask_elt_type = lang_hooks.types.type_for_mode
4191 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4192 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4194 mask_elts = XALLOCAVEC (tree, nunits);
4195 for (i = nunits - 1; i >= 0; i--)
4196 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4197 mask_vec = build_vector (mask_type, mask_elts);
4199 return mask_vec;
4202 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4203 reversal of the vector elements. If that is impossible to do,
4204 returns NULL. */
4206 static tree
4207 perm_mask_for_reverse (tree vectype)
4209 int i, nunits;
4210 unsigned char *sel;
4212 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4213 sel = XALLOCAVEC (unsigned char, nunits);
4215 for (i = 0; i < nunits; ++i)
4216 sel[i] = nunits - 1 - i;
4218 return vect_gen_perm_mask (vectype, sel);
4221 /* Given a vector variable X and Y, that was generated for the scalar
4222 STMT, generate instructions to permute the vector elements of X and Y
4223 using permutation mask MASK_VEC, insert them at *GSI and return the
4224 permuted vector variable. */
4226 static tree
4227 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4228 gimple_stmt_iterator *gsi)
4230 tree vectype = TREE_TYPE (x);
4231 tree perm_dest, data_ref;
4232 gimple perm_stmt;
4234 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4235 data_ref = make_ssa_name (perm_dest, NULL);
4237 /* Generate the permute statement. */
4238 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4239 x, y, mask_vec);
4240 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4242 return data_ref;
4245 /* vectorizable_load.
4247 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4248 can be vectorized.
4249 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4250 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4251 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4253 static bool
4254 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4255 slp_tree slp_node, slp_instance slp_node_instance)
4257 tree scalar_dest;
4258 tree vec_dest = NULL;
4259 tree data_ref = NULL;
4260 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4261 stmt_vec_info prev_stmt_info;
4262 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4263 struct loop *loop = NULL;
4264 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4265 bool nested_in_vect_loop = false;
4266 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4267 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4268 tree elem_type;
4269 tree new_temp;
4270 enum machine_mode mode;
4271 gimple new_stmt = NULL;
4272 tree dummy;
4273 enum dr_alignment_support alignment_support_scheme;
4274 tree dataref_ptr = NULL_TREE;
4275 gimple ptr_incr;
4276 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4277 int ncopies;
4278 int i, j, group_size;
4279 tree msq = NULL_TREE, lsq;
4280 tree offset = NULL_TREE;
4281 tree realignment_token = NULL_TREE;
4282 gimple phi = NULL;
4283 VEC(tree,heap) *dr_chain = NULL;
4284 bool grouped_load = false;
4285 bool load_lanes_p = false;
4286 gimple first_stmt;
4287 bool inv_p;
4288 bool negative = false;
4289 bool compute_in_loop = false;
4290 struct loop *at_loop;
4291 int vec_num;
4292 bool slp = (slp_node != NULL);
4293 bool slp_perm = false;
4294 enum tree_code code;
4295 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4296 int vf;
4297 tree aggr_type;
4298 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4299 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4300 tree stride_base, stride_step;
4301 int gather_scale = 1;
4302 enum vect_def_type gather_dt = vect_unknown_def_type;
4304 if (loop_vinfo)
4306 loop = LOOP_VINFO_LOOP (loop_vinfo);
4307 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4308 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4310 else
4311 vf = 1;
4313 /* Multiple types in SLP are handled by creating the appropriate number of
4314 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4315 case of SLP. */
4316 if (slp || PURE_SLP_STMT (stmt_info))
4317 ncopies = 1;
4318 else
4319 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4321 gcc_assert (ncopies >= 1);
4323 /* FORNOW. This restriction should be relaxed. */
4324 if (nested_in_vect_loop && ncopies > 1)
4326 if (vect_print_dump_info (REPORT_DETAILS))
4327 fprintf (vect_dump, "multiple types in nested loop.");
4328 return false;
4331 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4332 return false;
4334 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4335 return false;
4337 /* Is vectorizable load? */
4338 if (!is_gimple_assign (stmt))
4339 return false;
4341 scalar_dest = gimple_assign_lhs (stmt);
4342 if (TREE_CODE (scalar_dest) != SSA_NAME)
4343 return false;
4345 code = gimple_assign_rhs_code (stmt);
4346 if (code != ARRAY_REF
4347 && code != INDIRECT_REF
4348 && code != COMPONENT_REF
4349 && code != IMAGPART_EXPR
4350 && code != REALPART_EXPR
4351 && code != MEM_REF
4352 && TREE_CODE_CLASS (code) != tcc_declaration)
4353 return false;
4355 if (!STMT_VINFO_DATA_REF (stmt_info))
4356 return false;
4358 elem_type = TREE_TYPE (vectype);
4359 mode = TYPE_MODE (vectype);
4361 /* FORNOW. In some cases can vectorize even if data-type not supported
4362 (e.g. - data copies). */
4363 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4365 if (vect_print_dump_info (REPORT_DETAILS))
4366 fprintf (vect_dump, "Aligned load, but unsupported type.");
4367 return false;
4370 /* Check if the load is a part of an interleaving chain. */
4371 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4373 grouped_load = true;
4374 /* FORNOW */
4375 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4377 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4378 if (!slp && !PURE_SLP_STMT (stmt_info))
4380 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4381 if (vect_load_lanes_supported (vectype, group_size))
4382 load_lanes_p = true;
4383 else if (!vect_grouped_load_supported (vectype, group_size))
4384 return false;
4389 if (STMT_VINFO_GATHER_P (stmt_info))
4391 gimple def_stmt;
4392 tree def;
4393 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4394 &gather_off, &gather_scale);
4395 gcc_assert (gather_decl);
4396 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4397 &def_stmt, &def, &gather_dt,
4398 &gather_off_vectype))
4400 if (vect_print_dump_info (REPORT_DETAILS))
4401 fprintf (vect_dump, "gather index use not simple.");
4402 return false;
4405 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4407 if (!vect_check_strided_load (stmt, loop_vinfo,
4408 &stride_base, &stride_step))
4409 return false;
4411 else
4413 negative = tree_int_cst_compare (nested_in_vect_loop
4414 ? STMT_VINFO_DR_STEP (stmt_info)
4415 : DR_STEP (dr),
4416 size_zero_node) < 0;
4417 if (negative && ncopies > 1)
4419 if (vect_print_dump_info (REPORT_DETAILS))
4420 fprintf (vect_dump, "multiple types with negative step.");
4421 return false;
4424 if (negative)
4426 gcc_assert (!grouped_load);
4427 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4428 if (alignment_support_scheme != dr_aligned
4429 && alignment_support_scheme != dr_unaligned_supported)
4431 if (vect_print_dump_info (REPORT_DETAILS))
4432 fprintf (vect_dump, "negative step but alignment required.");
4433 return false;
4435 if (!perm_mask_for_reverse (vectype))
4437 if (vect_print_dump_info (REPORT_DETAILS))
4438 fprintf (vect_dump, "negative step and reversing not supported.");
4439 return false;
4444 if (!vec_stmt) /* transformation not required. */
4446 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4447 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4448 return true;
4451 if (vect_print_dump_info (REPORT_DETAILS))
4452 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4454 /** Transform. **/
4456 if (STMT_VINFO_GATHER_P (stmt_info))
4458 tree vec_oprnd0 = NULL_TREE, op;
4459 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4460 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4461 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4462 edge pe = loop_preheader_edge (loop);
4463 gimple_seq seq;
4464 basic_block new_bb;
4465 enum { NARROW, NONE, WIDEN } modifier;
4466 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4468 if (nunits == gather_off_nunits)
4469 modifier = NONE;
4470 else if (nunits == gather_off_nunits / 2)
4472 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4473 modifier = WIDEN;
4475 for (i = 0; i < gather_off_nunits; ++i)
4476 sel[i] = i | nunits;
4478 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4479 gcc_assert (perm_mask != NULL_TREE);
4481 else if (nunits == gather_off_nunits * 2)
4483 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4484 modifier = NARROW;
4486 for (i = 0; i < nunits; ++i)
4487 sel[i] = i < gather_off_nunits
4488 ? i : i + nunits - gather_off_nunits;
4490 perm_mask = vect_gen_perm_mask (vectype, sel);
4491 gcc_assert (perm_mask != NULL_TREE);
4492 ncopies *= 2;
4494 else
4495 gcc_unreachable ();
4497 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4498 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4499 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4500 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4501 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4502 scaletype = TREE_VALUE (arglist);
4503 gcc_checking_assert (types_compatible_p (srctype, rettype)
4504 && types_compatible_p (srctype, masktype));
4506 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4508 ptr = fold_convert (ptrtype, gather_base);
4509 if (!is_gimple_min_invariant (ptr))
4511 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4512 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4513 gcc_assert (!new_bb);
4516 /* Currently we support only unconditional gather loads,
4517 so mask should be all ones. */
4518 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4519 mask = build_int_cst (TREE_TYPE (masktype), -1);
4520 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4522 REAL_VALUE_TYPE r;
4523 long tmp[6];
4524 for (j = 0; j < 6; ++j)
4525 tmp[j] = -1;
4526 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4527 mask = build_real (TREE_TYPE (masktype), r);
4529 else
4530 gcc_unreachable ();
4531 mask = build_vector_from_val (masktype, mask);
4532 mask = vect_init_vector (stmt, mask, masktype, NULL);
4534 scale = build_int_cst (scaletype, gather_scale);
4536 prev_stmt_info = NULL;
4537 for (j = 0; j < ncopies; ++j)
4539 if (modifier == WIDEN && (j & 1))
4540 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4541 perm_mask, stmt, gsi);
4542 else if (j == 0)
4543 op = vec_oprnd0
4544 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4545 else
4546 op = vec_oprnd0
4547 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4549 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4551 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4552 == TYPE_VECTOR_SUBPARTS (idxtype));
4553 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4554 var = make_ssa_name (var, NULL);
4555 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4556 new_stmt
4557 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4558 op, NULL_TREE);
4559 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4560 op = var;
4563 new_stmt
4564 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4566 if (!useless_type_conversion_p (vectype, rettype))
4568 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4569 == TYPE_VECTOR_SUBPARTS (rettype));
4570 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4571 op = make_ssa_name (var, new_stmt);
4572 gimple_call_set_lhs (new_stmt, op);
4573 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4574 var = make_ssa_name (vec_dest, NULL);
4575 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4576 new_stmt
4577 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4578 NULL_TREE);
4580 else
4582 var = make_ssa_name (vec_dest, new_stmt);
4583 gimple_call_set_lhs (new_stmt, var);
4586 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4588 if (modifier == NARROW)
4590 if ((j & 1) == 0)
4592 prev_res = var;
4593 continue;
4595 var = permute_vec_elements (prev_res, var,
4596 perm_mask, stmt, gsi);
4597 new_stmt = SSA_NAME_DEF_STMT (var);
4600 if (prev_stmt_info == NULL)
4601 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4602 else
4603 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4604 prev_stmt_info = vinfo_for_stmt (new_stmt);
4606 return true;
4608 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4610 gimple_stmt_iterator incr_gsi;
4611 bool insert_after;
4612 gimple incr;
4613 tree offvar;
4614 tree ref = DR_REF (dr);
4615 tree ivstep;
4616 tree running_off;
4617 VEC(constructor_elt, gc) *v = NULL;
4618 gimple_seq stmts = NULL;
4620 gcc_assert (stride_base && stride_step);
4622 /* For a load with loop-invariant (but other than power-of-2)
4623 stride (i.e. not a grouped access) like so:
4625 for (i = 0; i < n; i += stride)
4626 ... = array[i];
4628 we generate a new induction variable and new accesses to
4629 form a new vector (or vectors, depending on ncopies):
4631 for (j = 0; ; j += VF*stride)
4632 tmp1 = array[j];
4633 tmp2 = array[j + stride];
4635 vectemp = {tmp1, tmp2, ...}
4638 ivstep = stride_step;
4639 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4640 build_int_cst (TREE_TYPE (ivstep), vf));
4642 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4644 create_iv (stride_base, ivstep, NULL,
4645 loop, &incr_gsi, insert_after,
4646 &offvar, NULL);
4647 incr = gsi_stmt (incr_gsi);
4648 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4650 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4651 if (stmts)
4652 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4654 prev_stmt_info = NULL;
4655 running_off = offvar;
4656 for (j = 0; j < ncopies; j++)
4658 tree vec_inv;
4660 v = VEC_alloc (constructor_elt, gc, nunits);
4661 for (i = 0; i < nunits; i++)
4663 tree newref, newoff;
4664 gimple incr;
4665 if (TREE_CODE (ref) == ARRAY_REF)
4666 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
4667 unshare_expr (TREE_OPERAND (ref, 0)),
4668 running_off,
4669 NULL_TREE, NULL_TREE);
4670 else
4671 newref = build2 (MEM_REF, TREE_TYPE (ref),
4672 running_off,
4673 TREE_OPERAND (ref, 1));
4675 newref = force_gimple_operand_gsi (gsi, newref, true,
4676 NULL_TREE, true,
4677 GSI_SAME_STMT);
4678 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4679 newoff = copy_ssa_name (running_off, NULL);
4680 if (POINTER_TYPE_P (TREE_TYPE (newoff)))
4681 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4682 running_off, stride_step);
4683 else
4684 incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
4685 running_off, stride_step);
4686 vect_finish_stmt_generation (stmt, incr, gsi);
4688 running_off = newoff;
4691 vec_inv = build_constructor (vectype, v);
4692 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4693 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4695 if (j == 0)
4696 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4697 else
4698 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4699 prev_stmt_info = vinfo_for_stmt (new_stmt);
4701 return true;
4704 if (grouped_load)
4706 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4707 if (slp
4708 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4709 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4710 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4712 /* Check if the chain of loads is already vectorized. */
4713 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4715 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4716 return true;
4718 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4719 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4721 /* VEC_NUM is the number of vect stmts to be created for this group. */
4722 if (slp)
4724 grouped_load = false;
4725 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4726 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4727 slp_perm = true;
4729 else
4730 vec_num = group_size;
4732 else
4734 first_stmt = stmt;
4735 first_dr = dr;
4736 group_size = vec_num = 1;
4739 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4740 gcc_assert (alignment_support_scheme);
4741 /* Targets with load-lane instructions must not require explicit
4742 realignment. */
4743 gcc_assert (!load_lanes_p
4744 || alignment_support_scheme == dr_aligned
4745 || alignment_support_scheme == dr_unaligned_supported);
4747 /* In case the vectorization factor (VF) is bigger than the number
4748 of elements that we can fit in a vectype (nunits), we have to generate
4749 more than one vector stmt - i.e - we need to "unroll" the
4750 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4751 from one copy of the vector stmt to the next, in the field
4752 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4753 stages to find the correct vector defs to be used when vectorizing
4754 stmts that use the defs of the current stmt. The example below
4755 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4756 need to create 4 vectorized stmts):
4758 before vectorization:
4759 RELATED_STMT VEC_STMT
4760 S1: x = memref - -
4761 S2: z = x + 1 - -
4763 step 1: vectorize stmt S1:
4764 We first create the vector stmt VS1_0, and, as usual, record a
4765 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4766 Next, we create the vector stmt VS1_1, and record a pointer to
4767 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4768 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4769 stmts and pointers:
4770 RELATED_STMT VEC_STMT
4771 VS1_0: vx0 = memref0 VS1_1 -
4772 VS1_1: vx1 = memref1 VS1_2 -
4773 VS1_2: vx2 = memref2 VS1_3 -
4774 VS1_3: vx3 = memref3 - -
4775 S1: x = load - VS1_0
4776 S2: z = x + 1 - -
4778 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4779 information we recorded in RELATED_STMT field is used to vectorize
4780 stmt S2. */
4782 /* In case of interleaving (non-unit grouped access):
4784 S1: x2 = &base + 2
4785 S2: x0 = &base
4786 S3: x1 = &base + 1
4787 S4: x3 = &base + 3
4789 Vectorized loads are created in the order of memory accesses
4790 starting from the access of the first stmt of the chain:
4792 VS1: vx0 = &base
4793 VS2: vx1 = &base + vec_size*1
4794 VS3: vx3 = &base + vec_size*2
4795 VS4: vx4 = &base + vec_size*3
4797 Then permutation statements are generated:
4799 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4800 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4803 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4804 (the order of the data-refs in the output of vect_permute_load_chain
4805 corresponds to the order of scalar stmts in the interleaving chain - see
4806 the documentation of vect_permute_load_chain()).
4807 The generation of permutation stmts and recording them in
4808 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4810 In case of both multiple types and interleaving, the vector loads and
4811 permutation stmts above are created for every copy. The result vector
4812 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4813 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4815 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4816 on a target that supports unaligned accesses (dr_unaligned_supported)
4817 we generate the following code:
4818 p = initial_addr;
4819 indx = 0;
4820 loop {
4821 p = p + indx * vectype_size;
4822 vec_dest = *(p);
4823 indx = indx + 1;
4826 Otherwise, the data reference is potentially unaligned on a target that
4827 does not support unaligned accesses (dr_explicit_realign_optimized) -
4828 then generate the following code, in which the data in each iteration is
4829 obtained by two vector loads, one from the previous iteration, and one
4830 from the current iteration:
4831 p1 = initial_addr;
4832 msq_init = *(floor(p1))
4833 p2 = initial_addr + VS - 1;
4834 realignment_token = call target_builtin;
4835 indx = 0;
4836 loop {
4837 p2 = p2 + indx * vectype_size
4838 lsq = *(floor(p2))
4839 vec_dest = realign_load (msq, lsq, realignment_token)
4840 indx = indx + 1;
4841 msq = lsq;
4842 } */
4844 /* If the misalignment remains the same throughout the execution of the
4845 loop, we can create the init_addr and permutation mask at the loop
4846 preheader. Otherwise, it needs to be created inside the loop.
4847 This can only occur when vectorizing memory accesses in the inner-loop
4848 nested within an outer-loop that is being vectorized. */
4850 if (nested_in_vect_loop
4851 && (TREE_INT_CST_LOW (DR_STEP (dr))
4852 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4854 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4855 compute_in_loop = true;
4858 if ((alignment_support_scheme == dr_explicit_realign_optimized
4859 || alignment_support_scheme == dr_explicit_realign)
4860 && !compute_in_loop)
4862 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4863 alignment_support_scheme, NULL_TREE,
4864 &at_loop);
4865 if (alignment_support_scheme == dr_explicit_realign_optimized)
4867 phi = SSA_NAME_DEF_STMT (msq);
4868 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4871 else
4872 at_loop = loop;
4874 if (negative)
4875 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4877 if (load_lanes_p)
4878 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4879 else
4880 aggr_type = vectype;
4882 prev_stmt_info = NULL;
4883 for (j = 0; j < ncopies; j++)
4885 /* 1. Create the vector or array pointer update chain. */
4886 if (j == 0)
4887 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4888 offset, &dummy, gsi,
4889 &ptr_incr, false, &inv_p);
4890 else
4891 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4892 TYPE_SIZE_UNIT (aggr_type));
4894 if (grouped_load || slp_perm)
4895 dr_chain = VEC_alloc (tree, heap, vec_num);
4897 if (load_lanes_p)
4899 tree vec_array;
4901 vec_array = create_vector_array (vectype, vec_num);
4903 /* Emit:
4904 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4905 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4906 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4907 gimple_call_set_lhs (new_stmt, vec_array);
4908 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4910 /* Extract each vector into an SSA_NAME. */
4911 for (i = 0; i < vec_num; i++)
4913 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4914 vec_array, i);
4915 VEC_quick_push (tree, dr_chain, new_temp);
4918 /* Record the mapping between SSA_NAMEs and statements. */
4919 vect_record_grouped_load_vectors (stmt, dr_chain);
4921 else
4923 for (i = 0; i < vec_num; i++)
4925 if (i > 0)
4926 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4927 stmt, NULL_TREE);
4929 /* 2. Create the vector-load in the loop. */
4930 switch (alignment_support_scheme)
4932 case dr_aligned:
4933 case dr_unaligned_supported:
4935 unsigned int align, misalign;
4937 data_ref
4938 = build2 (MEM_REF, vectype, dataref_ptr,
4939 build_int_cst (reference_alias_ptr_type
4940 (DR_REF (first_dr)), 0));
4941 align = TYPE_ALIGN_UNIT (vectype);
4942 if (alignment_support_scheme == dr_aligned)
4944 gcc_assert (aligned_access_p (first_dr));
4945 misalign = 0;
4947 else if (DR_MISALIGNMENT (first_dr) == -1)
4949 TREE_TYPE (data_ref)
4950 = build_aligned_type (TREE_TYPE (data_ref),
4951 TYPE_ALIGN (elem_type));
4952 align = TYPE_ALIGN_UNIT (elem_type);
4953 misalign = 0;
4955 else
4957 TREE_TYPE (data_ref)
4958 = build_aligned_type (TREE_TYPE (data_ref),
4959 TYPE_ALIGN (elem_type));
4960 misalign = DR_MISALIGNMENT (first_dr);
4962 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
4963 align, misalign);
4964 break;
4966 case dr_explicit_realign:
4968 tree ptr, bump;
4969 tree vs_minus_1;
4971 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4973 if (compute_in_loop)
4974 msq = vect_setup_realignment (first_stmt, gsi,
4975 &realignment_token,
4976 dr_explicit_realign,
4977 dataref_ptr, NULL);
4979 ptr = copy_ssa_name (dataref_ptr, NULL);
4980 new_stmt = gimple_build_assign_with_ops
4981 (BIT_AND_EXPR, ptr, dataref_ptr,
4982 build_int_cst
4983 (TREE_TYPE (dataref_ptr),
4984 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4986 data_ref
4987 = build2 (MEM_REF, vectype, ptr,
4988 build_int_cst (reference_alias_ptr_type
4989 (DR_REF (first_dr)), 0));
4990 vec_dest = vect_create_destination_var (scalar_dest,
4991 vectype);
4992 new_stmt = gimple_build_assign (vec_dest, data_ref);
4993 new_temp = make_ssa_name (vec_dest, new_stmt);
4994 gimple_assign_set_lhs (new_stmt, new_temp);
4995 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4996 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4997 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4998 msq = new_temp;
5000 bump = size_binop (MULT_EXPR, vs_minus_1,
5001 TYPE_SIZE_UNIT (elem_type));
5002 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5003 new_stmt = gimple_build_assign_with_ops
5004 (BIT_AND_EXPR, NULL_TREE, ptr,
5005 build_int_cst
5006 (TREE_TYPE (ptr),
5007 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5008 ptr = copy_ssa_name (dataref_ptr, new_stmt);
5009 gimple_assign_set_lhs (new_stmt, ptr);
5010 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5011 data_ref
5012 = build2 (MEM_REF, vectype, ptr,
5013 build_int_cst (reference_alias_ptr_type
5014 (DR_REF (first_dr)), 0));
5015 break;
5017 case dr_explicit_realign_optimized:
5018 new_temp = copy_ssa_name (dataref_ptr, NULL);
5019 new_stmt = gimple_build_assign_with_ops
5020 (BIT_AND_EXPR, new_temp, dataref_ptr,
5021 build_int_cst
5022 (TREE_TYPE (dataref_ptr),
5023 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5025 data_ref
5026 = build2 (MEM_REF, vectype, new_temp,
5027 build_int_cst (reference_alias_ptr_type
5028 (DR_REF (first_dr)), 0));
5029 break;
5030 default:
5031 gcc_unreachable ();
5033 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5034 new_stmt = gimple_build_assign (vec_dest, data_ref);
5035 new_temp = make_ssa_name (vec_dest, new_stmt);
5036 gimple_assign_set_lhs (new_stmt, new_temp);
5037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5039 /* 3. Handle explicit realignment if necessary/supported.
5040 Create in loop:
5041 vec_dest = realign_load (msq, lsq, realignment_token) */
5042 if (alignment_support_scheme == dr_explicit_realign_optimized
5043 || alignment_support_scheme == dr_explicit_realign)
5045 lsq = gimple_assign_lhs (new_stmt);
5046 if (!realignment_token)
5047 realignment_token = dataref_ptr;
5048 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5049 new_stmt
5050 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
5051 vec_dest, msq, lsq,
5052 realignment_token);
5053 new_temp = make_ssa_name (vec_dest, new_stmt);
5054 gimple_assign_set_lhs (new_stmt, new_temp);
5055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5057 if (alignment_support_scheme == dr_explicit_realign_optimized)
5059 gcc_assert (phi);
5060 if (i == vec_num - 1 && j == ncopies - 1)
5061 add_phi_arg (phi, lsq,
5062 loop_latch_edge (containing_loop),
5063 UNKNOWN_LOCATION);
5064 msq = lsq;
5068 /* 4. Handle invariant-load. */
5069 if (inv_p && !bb_vinfo)
5071 gimple_stmt_iterator gsi2 = *gsi;
5072 gcc_assert (!grouped_load);
5073 gsi_next (&gsi2);
5074 new_temp = vect_init_vector (stmt, scalar_dest,
5075 vectype, &gsi2);
5076 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5079 if (negative)
5081 tree perm_mask = perm_mask_for_reverse (vectype);
5082 new_temp = permute_vec_elements (new_temp, new_temp,
5083 perm_mask, stmt, gsi);
5084 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5087 /* Collect vector loads and later create their permutation in
5088 vect_transform_grouped_load (). */
5089 if (grouped_load || slp_perm)
5090 VEC_quick_push (tree, dr_chain, new_temp);
5092 /* Store vector loads in the corresponding SLP_NODE. */
5093 if (slp && !slp_perm)
5094 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
5095 new_stmt);
5099 if (slp && !slp_perm)
5100 continue;
5102 if (slp_perm)
5104 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
5105 slp_node_instance, false))
5107 VEC_free (tree, heap, dr_chain);
5108 return false;
5111 else
5113 if (grouped_load)
5115 if (!load_lanes_p)
5116 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5117 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5119 else
5121 if (j == 0)
5122 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5123 else
5124 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5125 prev_stmt_info = vinfo_for_stmt (new_stmt);
5128 if (dr_chain)
5129 VEC_free (tree, heap, dr_chain);
5132 return true;
5135 /* Function vect_is_simple_cond.
5137 Input:
5138 LOOP - the loop that is being vectorized.
5139 COND - Condition that is checked for simple use.
5141 Output:
5142 *COMP_VECTYPE - the vector type for the comparison.
5144 Returns whether a COND can be vectorized. Checks whether
5145 condition operands are supportable using vec_is_simple_use. */
5147 static bool
5148 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5149 bb_vec_info bb_vinfo, tree *comp_vectype)
5151 tree lhs, rhs;
5152 tree def;
5153 enum vect_def_type dt;
5154 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5156 if (!COMPARISON_CLASS_P (cond))
5157 return false;
5159 lhs = TREE_OPERAND (cond, 0);
5160 rhs = TREE_OPERAND (cond, 1);
5162 if (TREE_CODE (lhs) == SSA_NAME)
5164 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5165 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5166 &lhs_def_stmt, &def, &dt, &vectype1))
5167 return false;
5169 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5170 && TREE_CODE (lhs) != FIXED_CST)
5171 return false;
5173 if (TREE_CODE (rhs) == SSA_NAME)
5175 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5176 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5177 &rhs_def_stmt, &def, &dt, &vectype2))
5178 return false;
5180 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5181 && TREE_CODE (rhs) != FIXED_CST)
5182 return false;
5184 *comp_vectype = vectype1 ? vectype1 : vectype2;
5185 return true;
5188 /* vectorizable_condition.
5190 Check if STMT is conditional modify expression that can be vectorized.
5191 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5192 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5193 at GSI.
5195 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5196 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5197 else caluse if it is 2).
5199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5201 bool
5202 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5203 gimple *vec_stmt, tree reduc_def, int reduc_index,
5204 slp_tree slp_node)
5206 tree scalar_dest = NULL_TREE;
5207 tree vec_dest = NULL_TREE;
5208 tree cond_expr, then_clause, else_clause;
5209 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5210 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5211 tree comp_vectype = NULL_TREE;
5212 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5213 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5214 tree vec_compare, vec_cond_expr;
5215 tree new_temp;
5216 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5217 tree def;
5218 enum vect_def_type dt, dts[4];
5219 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5220 int ncopies;
5221 enum tree_code code;
5222 stmt_vec_info prev_stmt_info = NULL;
5223 int i, j;
5224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5225 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5226 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
5228 if (slp_node || PURE_SLP_STMT (stmt_info))
5229 ncopies = 1;
5230 else
5231 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5233 gcc_assert (ncopies >= 1);
5234 if (reduc_index && ncopies > 1)
5235 return false; /* FORNOW */
5237 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5238 return false;
5240 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5241 return false;
5243 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5244 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5245 && reduc_def))
5246 return false;
5248 /* FORNOW: not yet supported. */
5249 if (STMT_VINFO_LIVE_P (stmt_info))
5251 if (vect_print_dump_info (REPORT_DETAILS))
5252 fprintf (vect_dump, "value used after loop.");
5253 return false;
5256 /* Is vectorizable conditional operation? */
5257 if (!is_gimple_assign (stmt))
5258 return false;
5260 code = gimple_assign_rhs_code (stmt);
5262 if (code != COND_EXPR)
5263 return false;
5265 cond_expr = gimple_assign_rhs1 (stmt);
5266 then_clause = gimple_assign_rhs2 (stmt);
5267 else_clause = gimple_assign_rhs3 (stmt);
5269 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5270 &comp_vectype)
5271 || !comp_vectype)
5272 return false;
5274 if (TREE_CODE (then_clause) == SSA_NAME)
5276 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5277 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5278 &then_def_stmt, &def, &dt))
5279 return false;
5281 else if (TREE_CODE (then_clause) != INTEGER_CST
5282 && TREE_CODE (then_clause) != REAL_CST
5283 && TREE_CODE (then_clause) != FIXED_CST)
5284 return false;
5286 if (TREE_CODE (else_clause) == SSA_NAME)
5288 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5289 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5290 &else_def_stmt, &def, &dt))
5291 return false;
5293 else if (TREE_CODE (else_clause) != INTEGER_CST
5294 && TREE_CODE (else_clause) != REAL_CST
5295 && TREE_CODE (else_clause) != FIXED_CST)
5296 return false;
5298 if (!vec_stmt)
5300 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5301 return expand_vec_cond_expr_p (vectype, comp_vectype);
5304 /* Transform. */
5306 if (!slp_node)
5308 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5309 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5310 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5311 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5314 /* Handle def. */
5315 scalar_dest = gimple_assign_lhs (stmt);
5316 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5318 /* Handle cond expr. */
5319 for (j = 0; j < ncopies; j++)
5321 gimple new_stmt = NULL;
5322 if (j == 0)
5324 if (slp_node)
5326 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5327 VEC (slp_void_p, heap) *vec_defs;
5329 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5330 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5331 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5332 VEC_safe_push (tree, heap, ops, then_clause);
5333 VEC_safe_push (tree, heap, ops, else_clause);
5334 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5335 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5336 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5337 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5338 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5340 VEC_free (tree, heap, ops);
5341 VEC_free (slp_void_p, heap, vec_defs);
5343 else
5345 gimple gtemp;
5346 vec_cond_lhs =
5347 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5348 stmt, NULL);
5349 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5350 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5352 vec_cond_rhs =
5353 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5354 stmt, NULL);
5355 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5356 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5357 if (reduc_index == 1)
5358 vec_then_clause = reduc_def;
5359 else
5361 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5362 stmt, NULL);
5363 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5364 NULL, &gtemp, &def, &dts[2]);
5366 if (reduc_index == 2)
5367 vec_else_clause = reduc_def;
5368 else
5370 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5371 stmt, NULL);
5372 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5373 NULL, &gtemp, &def, &dts[3]);
5377 else
5379 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5380 VEC_pop (tree, vec_oprnds0));
5381 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5382 VEC_pop (tree, vec_oprnds1));
5383 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5384 VEC_pop (tree, vec_oprnds2));
5385 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5386 VEC_pop (tree, vec_oprnds3));
5389 if (!slp_node)
5391 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5392 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5393 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5394 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5397 /* Arguments are ready. Create the new vector stmt. */
5398 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5400 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5401 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5402 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5404 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5405 vec_cond_lhs, vec_cond_rhs);
5406 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5407 vec_compare, vec_then_clause, vec_else_clause);
5409 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5410 new_temp = make_ssa_name (vec_dest, new_stmt);
5411 gimple_assign_set_lhs (new_stmt, new_temp);
5412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5413 if (slp_node)
5414 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5417 if (slp_node)
5418 continue;
5420 if (j == 0)
5421 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5422 else
5423 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5425 prev_stmt_info = vinfo_for_stmt (new_stmt);
5428 VEC_free (tree, heap, vec_oprnds0);
5429 VEC_free (tree, heap, vec_oprnds1);
5430 VEC_free (tree, heap, vec_oprnds2);
5431 VEC_free (tree, heap, vec_oprnds3);
5433 return true;
5437 /* Make sure the statement is vectorizable. */
5439 bool
5440 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5442 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5443 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5444 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5445 bool ok;
5446 tree scalar_type, vectype;
5447 gimple pattern_stmt;
5448 gimple_seq pattern_def_seq;
5450 if (vect_print_dump_info (REPORT_DETAILS))
5452 fprintf (vect_dump, "==> examining statement: ");
5453 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5456 if (gimple_has_volatile_ops (stmt))
5458 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5459 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5461 return false;
5464 /* Skip stmts that do not need to be vectorized. In loops this is expected
5465 to include:
5466 - the COND_EXPR which is the loop exit condition
5467 - any LABEL_EXPRs in the loop
5468 - computations that are used only for array indexing or loop control.
5469 In basic blocks we only analyze statements that are a part of some SLP
5470 instance, therefore, all the statements are relevant.
5472 Pattern statement needs to be analyzed instead of the original statement
5473 if the original statement is not relevant. Otherwise, we analyze both
5474 statements. In basic blocks we are called from some SLP instance
5475 traversal, don't analyze pattern stmts instead, the pattern stmts
5476 already will be part of SLP instance. */
5478 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5479 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5480 && !STMT_VINFO_LIVE_P (stmt_info))
5482 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5483 && pattern_stmt
5484 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5485 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5487 /* Analyze PATTERN_STMT instead of the original stmt. */
5488 stmt = pattern_stmt;
5489 stmt_info = vinfo_for_stmt (pattern_stmt);
5490 if (vect_print_dump_info (REPORT_DETAILS))
5492 fprintf (vect_dump, "==> examining pattern statement: ");
5493 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5496 else
5498 if (vect_print_dump_info (REPORT_DETAILS))
5499 fprintf (vect_dump, "irrelevant.");
5501 return true;
5504 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5505 && node == NULL
5506 && pattern_stmt
5507 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5508 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5510 /* Analyze PATTERN_STMT too. */
5511 if (vect_print_dump_info (REPORT_DETAILS))
5513 fprintf (vect_dump, "==> examining pattern statement: ");
5514 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5517 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5518 return false;
5521 if (is_pattern_stmt_p (stmt_info)
5522 && node == NULL
5523 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5525 gimple_stmt_iterator si;
5527 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5529 gimple pattern_def_stmt = gsi_stmt (si);
5530 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5531 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5533 /* Analyze def stmt of STMT if it's a pattern stmt. */
5534 if (vect_print_dump_info (REPORT_DETAILS))
5536 fprintf (vect_dump, "==> examining pattern def statement: ");
5537 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5540 if (!vect_analyze_stmt (pattern_def_stmt,
5541 need_to_vectorize, node))
5542 return false;
5547 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5549 case vect_internal_def:
5550 break;
5552 case vect_reduction_def:
5553 case vect_nested_cycle:
5554 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5555 || relevance == vect_used_in_outer_by_reduction
5556 || relevance == vect_unused_in_scope));
5557 break;
5559 case vect_induction_def:
5560 case vect_constant_def:
5561 case vect_external_def:
5562 case vect_unknown_def_type:
5563 default:
5564 gcc_unreachable ();
5567 if (bb_vinfo)
5569 gcc_assert (PURE_SLP_STMT (stmt_info));
5571 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5572 if (vect_print_dump_info (REPORT_DETAILS))
5574 fprintf (vect_dump, "get vectype for scalar type: ");
5575 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5578 vectype = get_vectype_for_scalar_type (scalar_type);
5579 if (!vectype)
5581 if (vect_print_dump_info (REPORT_DETAILS))
5583 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5584 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5586 return false;
5589 if (vect_print_dump_info (REPORT_DETAILS))
5591 fprintf (vect_dump, "vectype: ");
5592 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5595 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5598 if (STMT_VINFO_RELEVANT_P (stmt_info))
5600 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5601 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5602 *need_to_vectorize = true;
5605 ok = true;
5606 if (!bb_vinfo
5607 && (STMT_VINFO_RELEVANT_P (stmt_info)
5608 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5609 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5610 || vectorizable_shift (stmt, NULL, NULL, NULL)
5611 || vectorizable_operation (stmt, NULL, NULL, NULL)
5612 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5613 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5614 || vectorizable_call (stmt, NULL, NULL, NULL)
5615 || vectorizable_store (stmt, NULL, NULL, NULL)
5616 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5617 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5618 else
5620 if (bb_vinfo)
5621 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5622 || vectorizable_shift (stmt, NULL, NULL, node)
5623 || vectorizable_operation (stmt, NULL, NULL, node)
5624 || vectorizable_assignment (stmt, NULL, NULL, node)
5625 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5626 || vectorizable_call (stmt, NULL, NULL, node)
5627 || vectorizable_store (stmt, NULL, NULL, node)
5628 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5631 if (!ok)
5633 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5635 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5636 fprintf (vect_dump, "supported: ");
5637 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5640 return false;
5643 if (bb_vinfo)
5644 return true;
5646 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5647 need extra handling, except for vectorizable reductions. */
5648 if (STMT_VINFO_LIVE_P (stmt_info)
5649 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5650 ok = vectorizable_live_operation (stmt, NULL, NULL);
5652 if (!ok)
5654 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5656 fprintf (vect_dump, "not vectorized: live stmt not ");
5657 fprintf (vect_dump, "supported: ");
5658 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5661 return false;
5664 return true;
5668 /* Function vect_transform_stmt.
5670 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5672 bool
5673 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5674 bool *grouped_store, slp_tree slp_node,
5675 slp_instance slp_node_instance)
5677 bool is_store = false;
5678 gimple vec_stmt = NULL;
5679 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5680 bool done;
5682 switch (STMT_VINFO_TYPE (stmt_info))
5684 case type_demotion_vec_info_type:
5685 case type_promotion_vec_info_type:
5686 case type_conversion_vec_info_type:
5687 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5688 gcc_assert (done);
5689 break;
5691 case induc_vec_info_type:
5692 gcc_assert (!slp_node);
5693 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5694 gcc_assert (done);
5695 break;
5697 case shift_vec_info_type:
5698 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5699 gcc_assert (done);
5700 break;
5702 case op_vec_info_type:
5703 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5704 gcc_assert (done);
5705 break;
5707 case assignment_vec_info_type:
5708 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5709 gcc_assert (done);
5710 break;
5712 case load_vec_info_type:
5713 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5714 slp_node_instance);
5715 gcc_assert (done);
5716 break;
5718 case store_vec_info_type:
5719 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5720 gcc_assert (done);
5721 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5723 /* In case of interleaving, the whole chain is vectorized when the
5724 last store in the chain is reached. Store stmts before the last
5725 one are skipped, and there vec_stmt_info shouldn't be freed
5726 meanwhile. */
5727 *grouped_store = true;
5728 if (STMT_VINFO_VEC_STMT (stmt_info))
5729 is_store = true;
5731 else
5732 is_store = true;
5733 break;
5735 case condition_vec_info_type:
5736 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5737 gcc_assert (done);
5738 break;
5740 case call_vec_info_type:
5741 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5742 stmt = gsi_stmt (*gsi);
5743 break;
5745 case reduc_vec_info_type:
5746 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5747 gcc_assert (done);
5748 break;
5750 default:
5751 if (!STMT_VINFO_LIVE_P (stmt_info))
5753 if (vect_print_dump_info (REPORT_DETAILS))
5754 fprintf (vect_dump, "stmt not supported.");
5755 gcc_unreachable ();
5759 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5760 is being vectorized, but outside the immediately enclosing loop. */
5761 if (vec_stmt
5762 && STMT_VINFO_LOOP_VINFO (stmt_info)
5763 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5764 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5765 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5766 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5767 || STMT_VINFO_RELEVANT (stmt_info) ==
5768 vect_used_in_outer_by_reduction))
5770 struct loop *innerloop = LOOP_VINFO_LOOP (
5771 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5772 imm_use_iterator imm_iter;
5773 use_operand_p use_p;
5774 tree scalar_dest;
5775 gimple exit_phi;
5777 if (vect_print_dump_info (REPORT_DETAILS))
5778 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5780 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5781 (to be used when vectorizing outer-loop stmts that use the DEF of
5782 STMT). */
5783 if (gimple_code (stmt) == GIMPLE_PHI)
5784 scalar_dest = PHI_RESULT (stmt);
5785 else
5786 scalar_dest = gimple_assign_lhs (stmt);
5788 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5790 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5792 exit_phi = USE_STMT (use_p);
5793 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5798 /* Handle stmts whose DEF is used outside the loop-nest that is
5799 being vectorized. */
5800 if (STMT_VINFO_LIVE_P (stmt_info)
5801 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5803 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5804 gcc_assert (done);
5807 if (vec_stmt)
5808 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5810 return is_store;
5814 /* Remove a group of stores (for SLP or interleaving), free their
5815 stmt_vec_info. */
5817 void
5818 vect_remove_stores (gimple first_stmt)
5820 gimple next = first_stmt;
5821 gimple tmp;
5822 gimple_stmt_iterator next_si;
5824 while (next)
5826 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5828 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5829 if (is_pattern_stmt_p (stmt_info))
5830 next = STMT_VINFO_RELATED_STMT (stmt_info);
5831 /* Free the attached stmt_vec_info and remove the stmt. */
5832 next_si = gsi_for_stmt (next);
5833 unlink_stmt_vdef (next);
5834 gsi_remove (&next_si, true);
5835 release_defs (next);
5836 free_stmt_vec_info (next);
5837 next = tmp;
5842 /* Function new_stmt_vec_info.
5844 Create and initialize a new stmt_vec_info struct for STMT. */
5846 stmt_vec_info
5847 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5848 bb_vec_info bb_vinfo)
5850 stmt_vec_info res;
5851 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5853 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5854 STMT_VINFO_STMT (res) = stmt;
5855 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5856 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5857 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5858 STMT_VINFO_LIVE_P (res) = false;
5859 STMT_VINFO_VECTYPE (res) = NULL;
5860 STMT_VINFO_VEC_STMT (res) = NULL;
5861 STMT_VINFO_VECTORIZABLE (res) = true;
5862 STMT_VINFO_IN_PATTERN_P (res) = false;
5863 STMT_VINFO_RELATED_STMT (res) = NULL;
5864 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5865 STMT_VINFO_DATA_REF (res) = NULL;
5867 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5868 STMT_VINFO_DR_OFFSET (res) = NULL;
5869 STMT_VINFO_DR_INIT (res) = NULL;
5870 STMT_VINFO_DR_STEP (res) = NULL;
5871 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5873 if (gimple_code (stmt) == GIMPLE_PHI
5874 && is_loop_header_bb_p (gimple_bb (stmt)))
5875 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5876 else
5877 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5879 STMT_VINFO_SAME_ALIGN_REFS (res) = NULL;
5880 STMT_SLP_TYPE (res) = loop_vect;
5881 GROUP_FIRST_ELEMENT (res) = NULL;
5882 GROUP_NEXT_ELEMENT (res) = NULL;
5883 GROUP_SIZE (res) = 0;
5884 GROUP_STORE_COUNT (res) = 0;
5885 GROUP_GAP (res) = 0;
5886 GROUP_SAME_DR_STMT (res) = NULL;
5887 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5889 return res;
5893 /* Create a hash table for stmt_vec_info. */
5895 void
5896 init_stmt_vec_info_vec (void)
5898 gcc_assert (!stmt_vec_info_vec);
5899 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5903 /* Free hash table for stmt_vec_info. */
5905 void
5906 free_stmt_vec_info_vec (void)
5908 gcc_assert (stmt_vec_info_vec);
5909 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5913 /* Free stmt vectorization related info. */
5915 void
5916 free_stmt_vec_info (gimple stmt)
5918 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5920 if (!stmt_info)
5921 return;
5923 /* Check if this statement has a related "pattern stmt"
5924 (introduced by the vectorizer during the pattern recognition
5925 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5926 too. */
5927 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5929 stmt_vec_info patt_info
5930 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5931 if (patt_info)
5933 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5934 if (seq)
5936 gimple_stmt_iterator si;
5937 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5938 free_stmt_vec_info (gsi_stmt (si));
5940 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5944 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5945 set_vinfo_for_stmt (stmt, NULL);
5946 free (stmt_info);
5950 /* Function get_vectype_for_scalar_type_and_size.
5952 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5953 by the target. */
5955 static tree
5956 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5958 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5959 enum machine_mode simd_mode;
5960 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5961 int nunits;
5962 tree vectype;
5964 if (nbytes == 0)
5965 return NULL_TREE;
5967 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5968 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5969 return NULL_TREE;
5971 /* We can't build a vector type of elements with alignment bigger than
5972 their size. */
5973 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5974 return NULL_TREE;
5976 /* For vector types of elements whose mode precision doesn't
5977 match their types precision we use a element type of mode
5978 precision. The vectorization routines will have to make sure
5979 they support the proper result truncation/extension.
5980 We also make sure to build vector types with INTEGER_TYPE
5981 component type only. */
5982 if (INTEGRAL_TYPE_P (scalar_type)
5983 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5984 || TREE_CODE (scalar_type) != INTEGER_TYPE))
5985 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5986 TYPE_UNSIGNED (scalar_type));
5988 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5989 When the component mode passes the above test simply use a type
5990 corresponding to that mode. The theory is that any use that
5991 would cause problems with this will disable vectorization anyway. */
5992 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5993 && !INTEGRAL_TYPE_P (scalar_type)
5994 && !POINTER_TYPE_P (scalar_type))
5995 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5997 /* If no size was supplied use the mode the target prefers. Otherwise
5998 lookup a vector mode of the specified size. */
5999 if (size == 0)
6000 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6001 else
6002 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6003 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6004 if (nunits <= 1)
6005 return NULL_TREE;
6007 vectype = build_vector_type (scalar_type, nunits);
6008 if (vect_print_dump_info (REPORT_DETAILS))
6010 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
6011 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
6014 if (!vectype)
6015 return NULL_TREE;
6017 if (vect_print_dump_info (REPORT_DETAILS))
6019 fprintf (vect_dump, "vectype: ");
6020 print_generic_expr (vect_dump, vectype, TDF_SLIM);
6023 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6024 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6026 if (vect_print_dump_info (REPORT_DETAILS))
6027 fprintf (vect_dump, "mode not supported by target.");
6028 return NULL_TREE;
6031 return vectype;
6034 unsigned int current_vector_size;
6036 /* Function get_vectype_for_scalar_type.
6038 Returns the vector type corresponding to SCALAR_TYPE as supported
6039 by the target. */
6041 tree
6042 get_vectype_for_scalar_type (tree scalar_type)
6044 tree vectype;
6045 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6046 current_vector_size);
6047 if (vectype
6048 && current_vector_size == 0)
6049 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6050 return vectype;
6053 /* Function get_same_sized_vectype
6055 Returns a vector type corresponding to SCALAR_TYPE of size
6056 VECTOR_TYPE if supported by the target. */
6058 tree
6059 get_same_sized_vectype (tree scalar_type, tree vector_type)
6061 return get_vectype_for_scalar_type_and_size
6062 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6065 /* Function vect_is_simple_use.
6067 Input:
6068 LOOP_VINFO - the vect info of the loop that is being vectorized.
6069 BB_VINFO - the vect info of the basic block that is being vectorized.
6070 OPERAND - operand of STMT in the loop or bb.
6071 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6073 Returns whether a stmt with OPERAND can be vectorized.
6074 For loops, supportable operands are constants, loop invariants, and operands
6075 that are defined by the current iteration of the loop. Unsupportable
6076 operands are those that are defined by a previous iteration of the loop (as
6077 is the case in reduction/induction computations).
6078 For basic blocks, supportable operands are constants and bb invariants.
6079 For now, operands defined outside the basic block are not supported. */
6081 bool
6082 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6083 bb_vec_info bb_vinfo, gimple *def_stmt,
6084 tree *def, enum vect_def_type *dt)
6086 basic_block bb;
6087 stmt_vec_info stmt_vinfo;
6088 struct loop *loop = NULL;
6090 if (loop_vinfo)
6091 loop = LOOP_VINFO_LOOP (loop_vinfo);
6093 *def_stmt = NULL;
6094 *def = NULL_TREE;
6096 if (vect_print_dump_info (REPORT_DETAILS))
6098 fprintf (vect_dump, "vect_is_simple_use: operand ");
6099 print_generic_expr (vect_dump, operand, TDF_SLIM);
6102 if (CONSTANT_CLASS_P (operand))
6104 *dt = vect_constant_def;
6105 return true;
6108 if (is_gimple_min_invariant (operand))
6110 *def = operand;
6111 *dt = vect_external_def;
6112 return true;
6115 if (TREE_CODE (operand) == PAREN_EXPR)
6117 if (vect_print_dump_info (REPORT_DETAILS))
6118 fprintf (vect_dump, "non-associatable copy.");
6119 operand = TREE_OPERAND (operand, 0);
6122 if (TREE_CODE (operand) != SSA_NAME)
6124 if (vect_print_dump_info (REPORT_DETAILS))
6125 fprintf (vect_dump, "not ssa-name.");
6126 return false;
6129 *def_stmt = SSA_NAME_DEF_STMT (operand);
6130 if (*def_stmt == NULL)
6132 if (vect_print_dump_info (REPORT_DETAILS))
6133 fprintf (vect_dump, "no def_stmt.");
6134 return false;
6137 if (vect_print_dump_info (REPORT_DETAILS))
6139 fprintf (vect_dump, "def_stmt: ");
6140 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
6143 /* Empty stmt is expected only in case of a function argument.
6144 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6145 if (gimple_nop_p (*def_stmt))
6147 *def = operand;
6148 *dt = vect_external_def;
6149 return true;
6152 bb = gimple_bb (*def_stmt);
6154 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6155 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6156 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6157 *dt = vect_external_def;
6158 else
6160 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6161 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6164 if (*dt == vect_unknown_def_type
6165 || (stmt
6166 && *dt == vect_double_reduction_def
6167 && gimple_code (stmt) != GIMPLE_PHI))
6169 if (vect_print_dump_info (REPORT_DETAILS))
6170 fprintf (vect_dump, "Unsupported pattern.");
6171 return false;
6174 if (vect_print_dump_info (REPORT_DETAILS))
6175 fprintf (vect_dump, "type of def: %d.",*dt);
6177 switch (gimple_code (*def_stmt))
6179 case GIMPLE_PHI:
6180 *def = gimple_phi_result (*def_stmt);
6181 break;
6183 case GIMPLE_ASSIGN:
6184 *def = gimple_assign_lhs (*def_stmt);
6185 break;
6187 case GIMPLE_CALL:
6188 *def = gimple_call_lhs (*def_stmt);
6189 if (*def != NULL)
6190 break;
6191 /* FALLTHRU */
6192 default:
6193 if (vect_print_dump_info (REPORT_DETAILS))
6194 fprintf (vect_dump, "unsupported defining stmt: ");
6195 return false;
6198 return true;
6201 /* Function vect_is_simple_use_1.
6203 Same as vect_is_simple_use_1 but also determines the vector operand
6204 type of OPERAND and stores it to *VECTYPE. If the definition of
6205 OPERAND is vect_uninitialized_def, vect_constant_def or
6206 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6207 is responsible to compute the best suited vector type for the
6208 scalar operand. */
6210 bool
6211 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6212 bb_vec_info bb_vinfo, gimple *def_stmt,
6213 tree *def, enum vect_def_type *dt, tree *vectype)
6215 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6216 def, dt))
6217 return false;
6219 /* Now get a vector type if the def is internal, otherwise supply
6220 NULL_TREE and leave it up to the caller to figure out a proper
6221 type for the use stmt. */
6222 if (*dt == vect_internal_def
6223 || *dt == vect_induction_def
6224 || *dt == vect_reduction_def
6225 || *dt == vect_double_reduction_def
6226 || *dt == vect_nested_cycle)
6228 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6230 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6231 && !STMT_VINFO_RELEVANT (stmt_info)
6232 && !STMT_VINFO_LIVE_P (stmt_info))
6233 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6235 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6236 gcc_assert (*vectype != NULL_TREE);
6238 else if (*dt == vect_uninitialized_def
6239 || *dt == vect_constant_def
6240 || *dt == vect_external_def)
6241 *vectype = NULL_TREE;
6242 else
6243 gcc_unreachable ();
6245 return true;
6249 /* Function supportable_widening_operation
6251 Check whether an operation represented by the code CODE is a
6252 widening operation that is supported by the target platform in
6253 vector form (i.e., when operating on arguments of type VECTYPE_IN
6254 producing a result of type VECTYPE_OUT).
6256 Widening operations we currently support are NOP (CONVERT), FLOAT
6257 and WIDEN_MULT. This function checks if these operations are supported
6258 by the target platform either directly (via vector tree-codes), or via
6259 target builtins.
6261 Output:
6262 - CODE1 and CODE2 are codes of vector operations to be used when
6263 vectorizing the operation, if available.
6264 - MULTI_STEP_CVT determines the number of required intermediate steps in
6265 case of multi-step conversion (like char->short->int - in that case
6266 MULTI_STEP_CVT will be 1).
6267 - INTERM_TYPES contains the intermediate type required to perform the
6268 widening operation (short in the above example). */
6270 bool
6271 supportable_widening_operation (enum tree_code code, gimple stmt,
6272 tree vectype_out, tree vectype_in,
6273 enum tree_code *code1, enum tree_code *code2,
6274 int *multi_step_cvt,
6275 VEC (tree, heap) **interm_types)
6277 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6278 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6279 struct loop *vect_loop = NULL;
6280 enum machine_mode vec_mode;
6281 enum insn_code icode1, icode2;
6282 optab optab1, optab2;
6283 tree vectype = vectype_in;
6284 tree wide_vectype = vectype_out;
6285 enum tree_code c1, c2;
6286 int i;
6287 tree prev_type, intermediate_type;
6288 enum machine_mode intermediate_mode, prev_mode;
6289 optab optab3, optab4;
6291 *multi_step_cvt = 0;
6292 if (loop_info)
6293 vect_loop = LOOP_VINFO_LOOP (loop_info);
6295 switch (code)
6297 case WIDEN_MULT_EXPR:
6298 /* The result of a vectorized widening operation usually requires
6299 two vectors (because the widened results do not fit into one vector).
6300 The generated vector results would normally be expected to be
6301 generated in the same order as in the original scalar computation,
6302 i.e. if 8 results are generated in each vector iteration, they are
6303 to be organized as follows:
6304 vect1: [res1,res2,res3,res4],
6305 vect2: [res5,res6,res7,res8].
6307 However, in the special case that the result of the widening
6308 operation is used in a reduction computation only, the order doesn't
6309 matter (because when vectorizing a reduction we change the order of
6310 the computation). Some targets can take advantage of this and
6311 generate more efficient code. For example, targets like Altivec,
6312 that support widen_mult using a sequence of {mult_even,mult_odd}
6313 generate the following vectors:
6314 vect1: [res1,res3,res5,res7],
6315 vect2: [res2,res4,res6,res8].
6317 When vectorizing outer-loops, we execute the inner-loop sequentially
6318 (each vectorized inner-loop iteration contributes to VF outer-loop
6319 iterations in parallel). We therefore don't allow to change the
6320 order of the computation in the inner-loop during outer-loop
6321 vectorization. */
6322 /* TODO: Another case in which order doesn't *really* matter is when we
6323 widen and then contract again, e.g. (short)((int)x * y >> 8).
6324 Normally, pack_trunc performs an even/odd permute, whereas the
6325 repack from an even/odd expansion would be an interleave, which
6326 would be significantly simpler for e.g. AVX2. */
6327 /* In any case, in order to avoid duplicating the code below, recurse
6328 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6329 are properly set up for the caller. If we fail, we'll continue with
6330 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6331 if (vect_loop
6332 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6333 && !nested_in_vect_loop_p (vect_loop, stmt)
6334 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6335 stmt, vectype_out, vectype_in,
6336 code1, code2, multi_step_cvt,
6337 interm_types))
6338 return true;
6339 c1 = VEC_WIDEN_MULT_LO_EXPR;
6340 c2 = VEC_WIDEN_MULT_HI_EXPR;
6341 break;
6343 case VEC_WIDEN_MULT_EVEN_EXPR:
6344 /* Support the recursion induced just above. */
6345 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6346 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6347 break;
6349 case WIDEN_LSHIFT_EXPR:
6350 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6351 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6352 break;
6354 CASE_CONVERT:
6355 c1 = VEC_UNPACK_LO_EXPR;
6356 c2 = VEC_UNPACK_HI_EXPR;
6357 break;
6359 case FLOAT_EXPR:
6360 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6361 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6362 break;
6364 case FIX_TRUNC_EXPR:
6365 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6366 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6367 computing the operation. */
6368 return false;
6370 default:
6371 gcc_unreachable ();
6374 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6376 enum tree_code ctmp = c1;
6377 c1 = c2;
6378 c2 = ctmp;
6381 if (code == FIX_TRUNC_EXPR)
6383 /* The signedness is determined from output operand. */
6384 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6385 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6387 else
6389 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6390 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6393 if (!optab1 || !optab2)
6394 return false;
6396 vec_mode = TYPE_MODE (vectype);
6397 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6398 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6399 return false;
6401 *code1 = c1;
6402 *code2 = c2;
6404 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6405 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6406 return true;
6408 /* Check if it's a multi-step conversion that can be done using intermediate
6409 types. */
6411 prev_type = vectype;
6412 prev_mode = vec_mode;
6414 if (!CONVERT_EXPR_CODE_P (code))
6415 return false;
6417 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6418 intermediate steps in promotion sequence. We try
6419 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6420 not. */
6421 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6422 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6424 intermediate_mode = insn_data[icode1].operand[0].mode;
6425 intermediate_type
6426 = lang_hooks.types.type_for_mode (intermediate_mode,
6427 TYPE_UNSIGNED (prev_type));
6428 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6429 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6431 if (!optab3 || !optab4
6432 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6433 || insn_data[icode1].operand[0].mode != intermediate_mode
6434 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6435 || insn_data[icode2].operand[0].mode != intermediate_mode
6436 || ((icode1 = optab_handler (optab3, intermediate_mode))
6437 == CODE_FOR_nothing)
6438 || ((icode2 = optab_handler (optab4, intermediate_mode))
6439 == CODE_FOR_nothing))
6440 break;
6442 VEC_quick_push (tree, *interm_types, intermediate_type);
6443 (*multi_step_cvt)++;
6445 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6446 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6447 return true;
6449 prev_type = intermediate_type;
6450 prev_mode = intermediate_mode;
6453 VEC_free (tree, heap, *interm_types);
6454 return false;
6458 /* Function supportable_narrowing_operation
6460 Check whether an operation represented by the code CODE is a
6461 narrowing operation that is supported by the target platform in
6462 vector form (i.e., when operating on arguments of type VECTYPE_IN
6463 and producing a result of type VECTYPE_OUT).
6465 Narrowing operations we currently support are NOP (CONVERT) and
6466 FIX_TRUNC. This function checks if these operations are supported by
6467 the target platform directly via vector tree-codes.
6469 Output:
6470 - CODE1 is the code of a vector operation to be used when
6471 vectorizing the operation, if available.
6472 - MULTI_STEP_CVT determines the number of required intermediate steps in
6473 case of multi-step conversion (like int->short->char - in that case
6474 MULTI_STEP_CVT will be 1).
6475 - INTERM_TYPES contains the intermediate type required to perform the
6476 narrowing operation (short in the above example). */
6478 bool
6479 supportable_narrowing_operation (enum tree_code code,
6480 tree vectype_out, tree vectype_in,
6481 enum tree_code *code1, int *multi_step_cvt,
6482 VEC (tree, heap) **interm_types)
6484 enum machine_mode vec_mode;
6485 enum insn_code icode1;
6486 optab optab1, interm_optab;
6487 tree vectype = vectype_in;
6488 tree narrow_vectype = vectype_out;
6489 enum tree_code c1;
6490 tree intermediate_type;
6491 enum machine_mode intermediate_mode, prev_mode;
6492 int i;
6493 bool uns;
6495 *multi_step_cvt = 0;
6496 switch (code)
6498 CASE_CONVERT:
6499 c1 = VEC_PACK_TRUNC_EXPR;
6500 break;
6502 case FIX_TRUNC_EXPR:
6503 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6504 break;
6506 case FLOAT_EXPR:
6507 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6508 tree code and optabs used for computing the operation. */
6509 return false;
6511 default:
6512 gcc_unreachable ();
6515 if (code == FIX_TRUNC_EXPR)
6516 /* The signedness is determined from output operand. */
6517 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6518 else
6519 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6521 if (!optab1)
6522 return false;
6524 vec_mode = TYPE_MODE (vectype);
6525 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6526 return false;
6528 *code1 = c1;
6530 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6531 return true;
6533 /* Check if it's a multi-step conversion that can be done using intermediate
6534 types. */
6535 prev_mode = vec_mode;
6536 if (code == FIX_TRUNC_EXPR)
6537 uns = TYPE_UNSIGNED (vectype_out);
6538 else
6539 uns = TYPE_UNSIGNED (vectype);
6541 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6542 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6543 costly than signed. */
6544 if (code == FIX_TRUNC_EXPR && uns)
6546 enum insn_code icode2;
6548 intermediate_type
6549 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6550 interm_optab
6551 = optab_for_tree_code (c1, intermediate_type, optab_default);
6552 if (interm_optab != unknown_optab
6553 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6554 && insn_data[icode1].operand[0].mode
6555 == insn_data[icode2].operand[0].mode)
6557 uns = false;
6558 optab1 = interm_optab;
6559 icode1 = icode2;
6563 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6564 intermediate steps in promotion sequence. We try
6565 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6566 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6567 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6569 intermediate_mode = insn_data[icode1].operand[0].mode;
6570 intermediate_type
6571 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6572 interm_optab
6573 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6574 optab_default);
6575 if (!interm_optab
6576 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6577 || insn_data[icode1].operand[0].mode != intermediate_mode
6578 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6579 == CODE_FOR_nothing))
6580 break;
6582 VEC_quick_push (tree, *interm_types, intermediate_type);
6583 (*multi_step_cvt)++;
6585 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6586 return true;
6588 prev_mode = intermediate_mode;
6589 optab1 = interm_optab;
6592 VEC_free (tree, heap, *interm_types);
6593 return false;