re PR bootstrap/54281 (Fails to bootstrap with --disable-nls)
[official-gcc.git] / gcc / tree-vect-stmts.c
blob706b0bc6e93406e8361596ad3c8fbb9113c3d8e1
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "recog.h" /* FIXME: for insn_data */
36 #include "optabs.h"
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
39 #include "dumpfile.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
46 tree
47 stmt_vectype (struct _stmt_vec_info *stmt_info)
49 return STMT_VINFO_VECTYPE (stmt_info);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
54 bool
55 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
57 gimple stmt = STMT_VINFO_STMT (stmt_info);
58 basic_block bb = gimple_bb (stmt);
59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60 struct loop* loop;
62 if (!loop_vinfo)
63 return false;
65 loop = LOOP_VINFO_LOOP (loop_vinfo);
67 return (bb->loop_father == loop->inner);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
74 unsigned
75 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
77 int misalign, enum vect_cost_model_location where)
79 if (body_cost_vec)
81 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
82 add_stmt_info_to_vec (body_cost_vec, count, kind,
83 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
84 misalign);
85 return (unsigned)
86 (builtin_vectorization_cost (kind, vectype, misalign) * count);
89 else
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93 void *target_cost_data;
95 if (loop_vinfo)
96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97 else
98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
100 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
101 misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
119 static tree
120 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
124 gimple new_stmt;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
138 return vect_name;
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
145 static void
146 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
149 tree array_ref;
150 gimple new_stmt;
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
164 static tree
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167 tree mem_ref, alias_ptr_type;
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173 return mem_ref;
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 static void
183 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
190 gimple pattern_stmt;
192 if (vect_print_dump_info (REPORT_DETAILS))
193 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
195 /* If this stmt is an original stmt in a pattern, we might need to mark its
196 related pattern stmt instead of the original stmt. However, such stmts
197 may have their own uses that are not in any pattern, in such cases the
198 stmt itself should be marked. */
199 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
201 bool found = false;
202 if (!used_in_pattern)
204 imm_use_iterator imm_iter;
205 use_operand_p use_p;
206 gimple use_stmt;
207 tree lhs;
208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
209 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
211 if (is_gimple_assign (stmt))
212 lhs = gimple_assign_lhs (stmt);
213 else
214 lhs = gimple_call_lhs (stmt);
216 /* This use is out of pattern use, if LHS has other uses that are
217 pattern uses, we should mark the stmt itself, and not the pattern
218 stmt. */
219 if (TREE_CODE (lhs) == SSA_NAME)
220 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
222 if (is_gimple_debug (USE_STMT (use_p)))
223 continue;
224 use_stmt = USE_STMT (use_p);
226 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
227 continue;
229 if (vinfo_for_stmt (use_stmt)
230 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
232 found = true;
233 break;
238 if (!found)
240 /* This is the last stmt in a sequence that was detected as a
241 pattern that can potentially be vectorized. Don't mark the stmt
242 as relevant/live because it's not going to be vectorized.
243 Instead mark the pattern-stmt that replaces it. */
245 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
247 if (vect_print_dump_info (REPORT_DETAILS))
248 fprintf (vect_dump, "last stmt in pattern. don't mark"
249 " relevant/live.");
250 stmt_info = vinfo_for_stmt (pattern_stmt);
251 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
252 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
253 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
254 stmt = pattern_stmt;
258 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
259 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
260 STMT_VINFO_RELEVANT (stmt_info) = relevant;
262 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
263 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
265 if (vect_print_dump_info (REPORT_DETAILS))
266 fprintf (vect_dump, "already marked relevant/live.");
267 return;
270 VEC_safe_push (gimple, heap, *worklist, stmt);
274 /* Function vect_stmt_relevant_p.
276 Return true if STMT in loop that is represented by LOOP_VINFO is
277 "relevant for vectorization".
279 A stmt is considered "relevant for vectorization" if:
280 - it has uses outside the loop.
281 - it has vdefs (it alters memory).
282 - control stmts in the loop (except for the exit condition).
284 CHECKME: what other side effects would the vectorizer allow? */
286 static bool
287 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
288 enum vect_relevant *relevant, bool *live_p)
290 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
291 ssa_op_iter op_iter;
292 imm_use_iterator imm_iter;
293 use_operand_p use_p;
294 def_operand_p def_p;
296 *relevant = vect_unused_in_scope;
297 *live_p = false;
299 /* cond stmt other than loop exit cond. */
300 if (is_ctrl_stmt (stmt)
301 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
302 != loop_exit_ctrl_vec_info_type)
303 *relevant = vect_used_in_scope;
305 /* changing memory. */
306 if (gimple_code (stmt) != GIMPLE_PHI)
307 if (gimple_vdef (stmt))
309 if (vect_print_dump_info (REPORT_DETAILS))
310 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
311 *relevant = vect_used_in_scope;
314 /* uses outside the loop. */
315 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
317 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
319 basic_block bb = gimple_bb (USE_STMT (use_p));
320 if (!flow_bb_inside_loop_p (loop, bb))
322 if (vect_print_dump_info (REPORT_DETAILS))
323 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
325 if (is_gimple_debug (USE_STMT (use_p)))
326 continue;
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
331 gcc_assert (bb == single_exit (loop)->dest);
333 *live_p = true;
338 return (*live_p || *relevant);
342 /* Function exist_non_indexing_operands_for_use_p
344 USE is one of the uses attached to STMT. Check if USE is
345 used in STMT for anything other than indexing an array. */
347 static bool
348 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
350 tree operand;
351 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
353 /* USE corresponds to some operand in STMT. If there is no data
354 reference in STMT, then any operand that corresponds to USE
355 is not indexing an array. */
356 if (!STMT_VINFO_DATA_REF (stmt_info))
357 return true;
359 /* STMT has a data_ref. FORNOW this means that its of one of
360 the following forms:
361 -1- ARRAY_REF = var
362 -2- var = ARRAY_REF
363 (This should have been verified in analyze_data_refs).
365 'var' in the second case corresponds to a def, not a use,
366 so USE cannot correspond to any operands that are not used
367 for array indexing.
369 Therefore, all we need to check is if STMT falls into the
370 first case, and whether var corresponds to USE. */
372 if (!gimple_assign_copy_p (stmt))
373 return false;
374 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
375 return false;
376 operand = gimple_assign_rhs1 (stmt);
377 if (TREE_CODE (operand) != SSA_NAME)
378 return false;
380 if (operand == use)
381 return true;
383 return false;
388 Function process_use.
390 Inputs:
391 - a USE in STMT in a loop represented by LOOP_VINFO
392 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
393 that defined USE. This is done by calling mark_relevant and passing it
394 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
395 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
396 be performed.
398 Outputs:
399 Generally, LIVE_P and RELEVANT are used to define the liveness and
400 relevance info of the DEF_STMT of this USE:
401 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
402 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
403 Exceptions:
404 - case 1: If USE is used only for address computations (e.g. array indexing),
405 which does not need to be directly vectorized, then the liveness/relevance
406 of the respective DEF_STMT is left unchanged.
407 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
408 skip DEF_STMT cause it had already been processed.
409 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
410 be modified accordingly.
412 Return true if everything is as expected. Return false otherwise. */
414 static bool
415 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
416 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
417 bool force)
419 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
420 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
421 stmt_vec_info dstmt_vinfo;
422 basic_block bb, def_bb;
423 tree def;
424 gimple def_stmt;
425 enum vect_def_type dt;
427 /* case 1: we are only interested in uses that need to be vectorized. Uses
428 that are used for address computation are not considered relevant. */
429 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
430 return true;
432 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
434 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
435 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
436 return false;
439 if (!def_stmt || gimple_nop_p (def_stmt))
440 return true;
442 def_bb = gimple_bb (def_stmt);
443 if (!flow_bb_inside_loop_p (loop, def_bb))
445 if (vect_print_dump_info (REPORT_DETAILS))
446 fprintf (vect_dump, "def_stmt is out of loop.");
447 return true;
450 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
451 DEF_STMT must have already been processed, because this should be the
452 only way that STMT, which is a reduction-phi, was put in the worklist,
453 as there should be no other uses for DEF_STMT in the loop. So we just
454 check that everything is as expected, and we are done. */
455 dstmt_vinfo = vinfo_for_stmt (def_stmt);
456 bb = gimple_bb (stmt);
457 if (gimple_code (stmt) == GIMPLE_PHI
458 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
459 && gimple_code (def_stmt) != GIMPLE_PHI
460 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
461 && bb->loop_father == def_bb->loop_father)
463 if (vect_print_dump_info (REPORT_DETAILS))
464 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
465 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
466 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
467 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
468 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
469 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
470 return true;
473 /* case 3a: outer-loop stmt defining an inner-loop stmt:
474 outer-loop-header-bb:
475 d = def_stmt
476 inner-loop:
477 stmt # use (d)
478 outer-loop-tail-bb:
479 ... */
480 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
482 if (vect_print_dump_info (REPORT_DETAILS))
483 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
485 switch (relevant)
487 case vect_unused_in_scope:
488 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
489 vect_used_in_scope : vect_unused_in_scope;
490 break;
492 case vect_used_in_outer_by_reduction:
493 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
494 relevant = vect_used_by_reduction;
495 break;
497 case vect_used_in_outer:
498 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
499 relevant = vect_used_in_scope;
500 break;
502 case vect_used_in_scope:
503 break;
505 default:
506 gcc_unreachable ();
510 /* case 3b: inner-loop stmt defining an outer-loop stmt:
511 outer-loop-header-bb:
513 inner-loop:
514 d = def_stmt
515 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
516 stmt # use (d) */
517 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
522 switch (relevant)
524 case vect_unused_in_scope:
525 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
526 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
527 vect_used_in_outer_by_reduction : vect_unused_in_scope;
528 break;
530 case vect_used_by_reduction:
531 relevant = vect_used_in_outer_by_reduction;
532 break;
534 case vect_used_in_scope:
535 relevant = vect_used_in_outer;
536 break;
538 default:
539 gcc_unreachable ();
543 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
544 is_pattern_stmt_p (stmt_vinfo));
545 return true;
549 /* Function vect_mark_stmts_to_be_vectorized.
551 Not all stmts in the loop need to be vectorized. For example:
553 for i...
554 for j...
555 1. T0 = i + j
556 2. T1 = a[T0]
558 3. j = j + 1
560 Stmt 1 and 3 do not need to be vectorized, because loop control and
561 addressing of vectorized data-refs are handled differently.
563 This pass detects such stmts. */
565 bool
566 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
568 VEC(gimple,heap) *worklist;
569 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
570 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
571 unsigned int nbbs = loop->num_nodes;
572 gimple_stmt_iterator si;
573 gimple stmt;
574 unsigned int i;
575 stmt_vec_info stmt_vinfo;
576 basic_block bb;
577 gimple phi;
578 bool live_p;
579 enum vect_relevant relevant, tmp_relevant;
580 enum vect_def_type def_type;
582 if (vect_print_dump_info (REPORT_DETAILS))
583 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
585 worklist = VEC_alloc (gimple, heap, 64);
587 /* 1. Init worklist. */
588 for (i = 0; i < nbbs; i++)
590 bb = bbs[i];
591 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
593 phi = gsi_stmt (si);
594 if (vect_print_dump_info (REPORT_DETAILS))
596 fprintf (vect_dump, "init: phi relevant? ");
597 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
600 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
601 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
603 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
605 stmt = gsi_stmt (si);
606 if (vect_print_dump_info (REPORT_DETAILS))
608 fprintf (vect_dump, "init: stmt relevant? ");
609 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
612 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
613 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
617 /* 2. Process_worklist */
618 while (VEC_length (gimple, worklist) > 0)
620 use_operand_p use_p;
621 ssa_op_iter iter;
623 stmt = VEC_pop (gimple, worklist);
624 if (vect_print_dump_info (REPORT_DETAILS))
626 fprintf (vect_dump, "worklist: examine stmt: ");
627 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
630 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
631 (DEF_STMT) as relevant/irrelevant and live/dead according to the
632 liveness and relevance properties of STMT. */
633 stmt_vinfo = vinfo_for_stmt (stmt);
634 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
635 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
637 /* Generally, the liveness and relevance properties of STMT are
638 propagated as is to the DEF_STMTs of its USEs:
639 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
640 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
642 One exception is when STMT has been identified as defining a reduction
643 variable; in this case we set the liveness/relevance as follows:
644 live_p = false
645 relevant = vect_used_by_reduction
646 This is because we distinguish between two kinds of relevant stmts -
647 those that are used by a reduction computation, and those that are
648 (also) used by a regular computation. This allows us later on to
649 identify stmts that are used solely by a reduction, and therefore the
650 order of the results that they produce does not have to be kept. */
652 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
653 tmp_relevant = relevant;
654 switch (def_type)
656 case vect_reduction_def:
657 switch (tmp_relevant)
659 case vect_unused_in_scope:
660 relevant = vect_used_by_reduction;
661 break;
663 case vect_used_by_reduction:
664 if (gimple_code (stmt) == GIMPLE_PHI)
665 break;
666 /* fall through */
668 default:
669 if (vect_print_dump_info (REPORT_DETAILS))
670 fprintf (vect_dump, "unsupported use of reduction.");
672 VEC_free (gimple, heap, worklist);
673 return false;
676 live_p = false;
677 break;
679 case vect_nested_cycle:
680 if (tmp_relevant != vect_unused_in_scope
681 && tmp_relevant != vect_used_in_outer_by_reduction
682 && tmp_relevant != vect_used_in_outer)
684 if (vect_print_dump_info (REPORT_DETAILS))
685 fprintf (vect_dump, "unsupported use of nested cycle.");
687 VEC_free (gimple, heap, worklist);
688 return false;
691 live_p = false;
692 break;
694 case vect_double_reduction_def:
695 if (tmp_relevant != vect_unused_in_scope
696 && tmp_relevant != vect_used_by_reduction)
698 if (vect_print_dump_info (REPORT_DETAILS))
699 fprintf (vect_dump, "unsupported use of double reduction.");
701 VEC_free (gimple, heap, worklist);
702 return false;
705 live_p = false;
706 break;
708 default:
709 break;
712 if (is_pattern_stmt_p (stmt_vinfo))
714 /* Pattern statements are not inserted into the code, so
715 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
716 have to scan the RHS or function arguments instead. */
717 if (is_gimple_assign (stmt))
719 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
720 tree op = gimple_assign_rhs1 (stmt);
722 i = 1;
723 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
725 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
726 live_p, relevant, &worklist, false)
727 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
728 live_p, relevant, &worklist, false))
730 VEC_free (gimple, heap, worklist);
731 return false;
733 i = 2;
735 for (; i < gimple_num_ops (stmt); i++)
737 op = gimple_op (stmt, i);
738 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
739 &worklist, false))
741 VEC_free (gimple, heap, worklist);
742 return false;
746 else if (is_gimple_call (stmt))
748 for (i = 0; i < gimple_call_num_args (stmt); i++)
750 tree arg = gimple_call_arg (stmt, i);
751 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
752 &worklist, false))
754 VEC_free (gimple, heap, worklist);
755 return false;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
765 &worklist, false))
767 VEC_free (gimple, heap, worklist);
768 return false;
772 if (STMT_VINFO_GATHER_P (stmt_vinfo))
774 tree off;
775 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
776 gcc_assert (decl);
777 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
778 &worklist, true))
780 VEC_free (gimple, heap, worklist);
781 return false;
784 } /* while worklist */
786 VEC_free (gimple, heap, worklist);
787 return true;
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 void
798 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
799 enum vect_def_type *dt,
800 stmt_vector_for_cost *prologue_cost_vec,
801 stmt_vector_for_cost *body_cost_vec)
803 int i;
804 int inside_cost = 0, prologue_cost = 0;
806 /* The SLP costs were already calculated during SLP tree build. */
807 if (PURE_SLP_STMT (stmt_info))
808 return;
810 /* FORNOW: Assuming maximum 2 args per stmts. */
811 for (i = 0; i < 2; i++)
812 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
813 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
814 stmt_info, 0, vect_prologue);
816 /* Pass the inside-of-loop statements to the target-specific cost model. */
817 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
818 stmt_info, 0, vect_body);
820 if (vect_print_dump_info (REPORT_COST))
821 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
822 "prologue_cost = %d .", inside_cost, prologue_cost);
826 /* Model cost for type demotion and promotion operations. PWR is normally
827 zero for single-step promotions and demotions. It will be one if
828 two-step promotion/demotion is required, and so on. Each additional
829 step doubles the number of instructions required. */
831 static void
832 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
833 enum vect_def_type *dt, int pwr)
835 int i, tmp;
836 int inside_cost = 0, prologue_cost = 0;
837 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
838 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
839 void *target_cost_data;
841 /* The SLP costs were already calculated during SLP tree build. */
842 if (PURE_SLP_STMT (stmt_info))
843 return;
845 if (loop_vinfo)
846 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
847 else
848 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
850 for (i = 0; i < pwr + 1; i++)
852 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
853 (i + 1) : i;
854 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
855 vec_promote_demote, stmt_info, 0,
856 vect_body);
859 /* FORNOW: Assuming maximum 2 args per stmts. */
860 for (i = 0; i < 2; i++)
861 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
862 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
863 stmt_info, 0, vect_prologue);
865 if (vect_print_dump_info (REPORT_COST))
866 fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
867 "prologue_cost = %d .", inside_cost, prologue_cost);
870 /* Function vect_cost_group_size
872 For grouped load or store, return the group_size only if it is the first
873 load or store of a group, else return 1. This ensures that group size is
874 only returned once per group. */
876 static int
877 vect_cost_group_size (stmt_vec_info stmt_info)
879 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
881 if (first_stmt == STMT_VINFO_STMT (stmt_info))
882 return GROUP_SIZE (stmt_info);
884 return 1;
888 /* Function vect_model_store_cost
890 Models cost for stores. In the case of grouped accesses, one access
891 has the overhead of the grouped access attributed to it. */
893 void
894 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
895 bool store_lanes_p, enum vect_def_type dt,
896 slp_tree slp_node,
897 stmt_vector_for_cost *prologue_cost_vec,
898 stmt_vector_for_cost *body_cost_vec)
900 int group_size;
901 unsigned int inside_cost = 0, prologue_cost = 0;
902 struct data_reference *first_dr;
903 gimple first_stmt;
905 /* The SLP costs were already calculated during SLP tree build. */
906 if (PURE_SLP_STMT (stmt_info))
907 return;
909 if (dt == vect_constant_def || dt == vect_external_def)
910 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
911 stmt_info, 0, vect_prologue);
913 /* Grouped access? */
914 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
916 if (slp_node)
918 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
919 group_size = 1;
921 else
923 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
924 group_size = vect_cost_group_size (stmt_info);
927 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
929 /* Not a grouped access. */
930 else
932 group_size = 1;
933 first_dr = STMT_VINFO_DATA_REF (stmt_info);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (!store_lanes_p && group_size > 1)
942 /* Uses a high and low interleave operation for each needed permute. */
944 int nstmts = ncopies * exact_log2 (group_size) * group_size;
945 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
946 stmt_info, 0, vect_body);
948 if (vect_print_dump_info (REPORT_COST))
949 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
950 group_size);
953 /* Costs of the stores. */
954 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
956 if (vect_print_dump_info (REPORT_COST))
957 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
958 "prologue_cost = %d .", inside_cost, prologue_cost);
962 /* Calculate cost of DR's memory access. */
963 void
964 vect_get_store_cost (struct data_reference *dr, int ncopies,
965 unsigned int *inside_cost,
966 stmt_vector_for_cost *body_cost_vec)
968 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
969 gimple stmt = DR_STMT (dr);
970 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
972 switch (alignment_support_scheme)
974 case dr_aligned:
976 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
977 vector_store, stmt_info, 0,
978 vect_body);
980 if (vect_print_dump_info (REPORT_COST))
981 fprintf (vect_dump, "vect_model_store_cost: aligned.");
983 break;
986 case dr_unaligned_supported:
988 /* Here, we assign an additional cost for the unaligned store. */
989 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
990 unaligned_store, stmt_info,
991 DR_MISALIGNMENT (dr), vect_body);
993 if (vect_print_dump_info (REPORT_COST))
994 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
995 "hardware.");
997 break;
1000 case dr_unaligned_unsupported:
1002 *inside_cost = VECT_MAX_COST;
1004 if (vect_print_dump_info (REPORT_COST))
1005 fprintf (vect_dump, "vect_model_store_cost: unsupported access.");
1007 break;
1010 default:
1011 gcc_unreachable ();
1016 /* Function vect_model_load_cost
1018 Models cost for loads. In the case of grouped accesses, the last access
1019 has the overhead of the grouped access attributed to it. Since unaligned
1020 accesses are supported for loads, we also account for the costs of the
1021 access scheme chosen. */
1023 void
1024 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1025 bool load_lanes_p, slp_tree slp_node,
1026 stmt_vector_for_cost *prologue_cost_vec,
1027 stmt_vector_for_cost *body_cost_vec)
1029 int group_size;
1030 gimple first_stmt;
1031 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1032 unsigned int inside_cost = 0, prologue_cost = 0;
1034 /* The SLP costs were already calculated during SLP tree build. */
1035 if (PURE_SLP_STMT (stmt_info))
1036 return;
1038 /* Grouped accesses? */
1039 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1040 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1042 group_size = vect_cost_group_size (stmt_info);
1043 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1045 /* Not a grouped access. */
1046 else
1048 group_size = 1;
1049 first_dr = dr;
1052 /* We assume that the cost of a single load-lanes instruction is
1053 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1054 access is instead being provided by a load-and-permute operation,
1055 include the cost of the permutes. */
1056 if (!load_lanes_p && group_size > 1)
1058 /* Uses an even and odd extract operations for each needed permute. */
1059 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1060 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1061 stmt_info, 0, vect_body);
1063 if (vect_print_dump_info (REPORT_COST))
1064 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
1065 group_size);
1068 /* The loads themselves. */
1069 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1071 /* N scalar loads plus gathering them into a vector. */
1072 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1073 inside_cost += record_stmt_cost (body_cost_vec,
1074 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1075 scalar_load, stmt_info, 0, vect_body);
1076 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1077 stmt_info, 0, vect_body);
1079 else
1080 vect_get_load_cost (first_dr, ncopies,
1081 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1082 || group_size > 1 || slp_node),
1083 &inside_cost, &prologue_cost,
1084 prologue_cost_vec, body_cost_vec, true);
1086 if (vect_print_dump_info (REPORT_COST))
1087 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1088 "prologue_cost = %d .", inside_cost, prologue_cost);
1092 /* Calculate cost of DR's memory access. */
1093 void
1094 vect_get_load_cost (struct data_reference *dr, int ncopies,
1095 bool add_realign_cost, unsigned int *inside_cost,
1096 unsigned int *prologue_cost,
1097 stmt_vector_for_cost *prologue_cost_vec,
1098 stmt_vector_for_cost *body_cost_vec,
1099 bool record_prologue_costs)
1101 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1102 gimple stmt = DR_STMT (dr);
1103 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1105 switch (alignment_support_scheme)
1107 case dr_aligned:
1109 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1110 stmt_info, 0, vect_body);
1112 if (vect_print_dump_info (REPORT_COST))
1113 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1115 break;
1117 case dr_unaligned_supported:
1119 /* Here, we assign an additional cost for the unaligned load. */
1120 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1121 unaligned_load, stmt_info,
1122 DR_MISALIGNMENT (dr), vect_body);
1124 if (vect_print_dump_info (REPORT_COST))
1125 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1126 "hardware.");
1128 break;
1130 case dr_explicit_realign:
1132 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1133 vector_load, stmt_info, 0, vect_body);
1134 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1135 vec_perm, stmt_info, 0, vect_body);
1137 /* FIXME: If the misalignment remains fixed across the iterations of
1138 the containing loop, the following cost should be added to the
1139 prologue costs. */
1140 if (targetm.vectorize.builtin_mask_for_load)
1141 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1142 stmt_info, 0, vect_body);
1144 if (vect_print_dump_info (REPORT_COST))
1145 fprintf (vect_dump, "vect_model_load_cost: explicit realign");
1147 break;
1149 case dr_explicit_realign_optimized:
1151 if (vect_print_dump_info (REPORT_COST))
1152 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1153 "pipelined.");
1155 /* Unaligned software pipeline has a load of an address, an initial
1156 load, and possibly a mask operation to "prime" the loop. However,
1157 if this is an access in a group of loads, which provide grouped
1158 access, then the above cost should only be considered for one
1159 access in the group. Inside the loop, there is a load op
1160 and a realignment op. */
1162 if (add_realign_cost && record_prologue_costs)
1164 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1165 vector_stmt, stmt_info,
1166 0, vect_prologue);
1167 if (targetm.vectorize.builtin_mask_for_load)
1168 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1169 vector_stmt, stmt_info,
1170 0, vect_prologue);
1173 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1174 stmt_info, 0, vect_body);
1175 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1176 stmt_info, 0, vect_body);
1178 if (vect_print_dump_info (REPORT_COST))
1179 fprintf (vect_dump,
1180 "vect_model_load_cost: explicit realign optimized");
1182 break;
1185 case dr_unaligned_unsupported:
1187 *inside_cost = VECT_MAX_COST;
1189 if (vect_print_dump_info (REPORT_COST))
1190 fprintf (vect_dump, "vect_model_load_cost: unsupported access.");
1192 break;
1195 default:
1196 gcc_unreachable ();
1200 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1201 the loop preheader for the vectorized stmt STMT. */
1203 static void
1204 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1206 if (gsi)
1207 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1208 else
1210 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1213 if (loop_vinfo)
1215 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1216 basic_block new_bb;
1217 edge pe;
1219 if (nested_in_vect_loop_p (loop, stmt))
1220 loop = loop->inner;
1222 pe = loop_preheader_edge (loop);
1223 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1224 gcc_assert (!new_bb);
1226 else
1228 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1229 basic_block bb;
1230 gimple_stmt_iterator gsi_bb_start;
1232 gcc_assert (bb_vinfo);
1233 bb = BB_VINFO_BB (bb_vinfo);
1234 gsi_bb_start = gsi_after_labels (bb);
1235 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1239 if (vect_print_dump_info (REPORT_DETAILS))
1241 fprintf (vect_dump, "created new init_stmt: ");
1242 print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
1246 /* Function vect_init_vector.
1248 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1249 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1250 vector type a vector with all elements equal to VAL is created first.
1251 Place the initialization at BSI if it is not NULL. Otherwise, place the
1252 initialization at the loop preheader.
1253 Return the DEF of INIT_STMT.
1254 It will be used in the vectorization of STMT. */
1256 tree
1257 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1259 tree new_var;
1260 gimple init_stmt;
1261 tree vec_oprnd;
1262 tree new_temp;
1264 if (TREE_CODE (type) == VECTOR_TYPE
1265 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1267 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1269 if (CONSTANT_CLASS_P (val))
1270 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1271 else
1273 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1274 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1275 new_temp, val,
1276 NULL_TREE);
1277 vect_init_vector_1 (stmt, init_stmt, gsi);
1278 val = new_temp;
1281 val = build_vector_from_val (type, val);
1284 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1285 init_stmt = gimple_build_assign (new_var, val);
1286 new_temp = make_ssa_name (new_var, init_stmt);
1287 gimple_assign_set_lhs (init_stmt, new_temp);
1288 vect_init_vector_1 (stmt, init_stmt, gsi);
1289 vec_oprnd = gimple_assign_lhs (init_stmt);
1290 return vec_oprnd;
1294 /* Function vect_get_vec_def_for_operand.
1296 OP is an operand in STMT. This function returns a (vector) def that will be
1297 used in the vectorized stmt for STMT.
1299 In the case that OP is an SSA_NAME which is defined in the loop, then
1300 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1302 In case OP is an invariant or constant, a new stmt that creates a vector def
1303 needs to be introduced. */
1305 tree
1306 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1308 tree vec_oprnd;
1309 gimple vec_stmt;
1310 gimple def_stmt;
1311 stmt_vec_info def_stmt_info = NULL;
1312 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1313 unsigned int nunits;
1314 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1315 tree def;
1316 enum vect_def_type dt;
1317 bool is_simple_use;
1318 tree vector_type;
1320 if (vect_print_dump_info (REPORT_DETAILS))
1322 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1323 print_generic_expr (vect_dump, op, TDF_SLIM);
1326 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1327 &def_stmt, &def, &dt);
1328 gcc_assert (is_simple_use);
1329 if (vect_print_dump_info (REPORT_DETAILS))
1331 if (def)
1333 fprintf (vect_dump, "def = ");
1334 print_generic_expr (vect_dump, def, TDF_SLIM);
1336 if (def_stmt)
1338 fprintf (vect_dump, " def_stmt = ");
1339 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1343 switch (dt)
1345 /* Case 1: operand is a constant. */
1346 case vect_constant_def:
1348 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1349 gcc_assert (vector_type);
1350 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1352 if (scalar_def)
1353 *scalar_def = op;
1355 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1356 if (vect_print_dump_info (REPORT_DETAILS))
1357 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1359 return vect_init_vector (stmt, op, vector_type, NULL);
1362 /* Case 2: operand is defined outside the loop - loop invariant. */
1363 case vect_external_def:
1365 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1366 gcc_assert (vector_type);
1368 if (scalar_def)
1369 *scalar_def = def;
1371 /* Create 'vec_inv = {inv,inv,..,inv}' */
1372 if (vect_print_dump_info (REPORT_DETAILS))
1373 fprintf (vect_dump, "Create vector_inv.");
1375 return vect_init_vector (stmt, def, vector_type, NULL);
1378 /* Case 3: operand is defined inside the loop. */
1379 case vect_internal_def:
1381 if (scalar_def)
1382 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1384 /* Get the def from the vectorized stmt. */
1385 def_stmt_info = vinfo_for_stmt (def_stmt);
1387 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1388 /* Get vectorized pattern statement. */
1389 if (!vec_stmt
1390 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1391 && !STMT_VINFO_RELEVANT (def_stmt_info))
1392 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1393 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1394 gcc_assert (vec_stmt);
1395 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1396 vec_oprnd = PHI_RESULT (vec_stmt);
1397 else if (is_gimple_call (vec_stmt))
1398 vec_oprnd = gimple_call_lhs (vec_stmt);
1399 else
1400 vec_oprnd = gimple_assign_lhs (vec_stmt);
1401 return vec_oprnd;
1404 /* Case 4: operand is defined by a loop header phi - reduction */
1405 case vect_reduction_def:
1406 case vect_double_reduction_def:
1407 case vect_nested_cycle:
1409 struct loop *loop;
1411 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1412 loop = (gimple_bb (def_stmt))->loop_father;
1414 /* Get the def before the loop */
1415 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1416 return get_initial_def_for_reduction (stmt, op, scalar_def);
1419 /* Case 5: operand is defined by loop-header phi - induction. */
1420 case vect_induction_def:
1422 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1424 /* Get the def from the vectorized stmt. */
1425 def_stmt_info = vinfo_for_stmt (def_stmt);
1426 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1427 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1428 vec_oprnd = PHI_RESULT (vec_stmt);
1429 else
1430 vec_oprnd = gimple_get_lhs (vec_stmt);
1431 return vec_oprnd;
1434 default:
1435 gcc_unreachable ();
1440 /* Function vect_get_vec_def_for_stmt_copy
1442 Return a vector-def for an operand. This function is used when the
1443 vectorized stmt to be created (by the caller to this function) is a "copy"
1444 created in case the vectorized result cannot fit in one vector, and several
1445 copies of the vector-stmt are required. In this case the vector-def is
1446 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1447 of the stmt that defines VEC_OPRND.
1448 DT is the type of the vector def VEC_OPRND.
1450 Context:
1451 In case the vectorization factor (VF) is bigger than the number
1452 of elements that can fit in a vectype (nunits), we have to generate
1453 more than one vector stmt to vectorize the scalar stmt. This situation
1454 arises when there are multiple data-types operated upon in the loop; the
1455 smallest data-type determines the VF, and as a result, when vectorizing
1456 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1457 vector stmt (each computing a vector of 'nunits' results, and together
1458 computing 'VF' results in each iteration). This function is called when
1459 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1460 which VF=16 and nunits=4, so the number of copies required is 4):
1462 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1464 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1465 VS1.1: vx.1 = memref1 VS1.2
1466 VS1.2: vx.2 = memref2 VS1.3
1467 VS1.3: vx.3 = memref3
1469 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1470 VSnew.1: vz1 = vx.1 + ... VSnew.2
1471 VSnew.2: vz2 = vx.2 + ... VSnew.3
1472 VSnew.3: vz3 = vx.3 + ...
1474 The vectorization of S1 is explained in vectorizable_load.
1475 The vectorization of S2:
1476 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1477 the function 'vect_get_vec_def_for_operand' is called to
1478 get the relevant vector-def for each operand of S2. For operand x it
1479 returns the vector-def 'vx.0'.
1481 To create the remaining copies of the vector-stmt (VSnew.j), this
1482 function is called to get the relevant vector-def for each operand. It is
1483 obtained from the respective VS1.j stmt, which is recorded in the
1484 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1486 For example, to obtain the vector-def 'vx.1' in order to create the
1487 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1488 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1489 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1490 and return its def ('vx.1').
1491 Overall, to create the above sequence this function will be called 3 times:
1492 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1493 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1494 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1496 tree
1497 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1499 gimple vec_stmt_for_operand;
1500 stmt_vec_info def_stmt_info;
1502 /* Do nothing; can reuse same def. */
1503 if (dt == vect_external_def || dt == vect_constant_def )
1504 return vec_oprnd;
1506 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1507 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1508 gcc_assert (def_stmt_info);
1509 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1510 gcc_assert (vec_stmt_for_operand);
1511 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1512 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1513 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1514 else
1515 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1516 return vec_oprnd;
1520 /* Get vectorized definitions for the operands to create a copy of an original
1521 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1523 static void
1524 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1525 VEC(tree,heap) **vec_oprnds0,
1526 VEC(tree,heap) **vec_oprnds1)
1528 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1530 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1531 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1533 if (vec_oprnds1 && *vec_oprnds1)
1535 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1536 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1537 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1542 /* Get vectorized definitions for OP0 and OP1.
1543 REDUC_INDEX is the index of reduction operand in case of reduction,
1544 and -1 otherwise. */
1546 void
1547 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1548 VEC (tree, heap) **vec_oprnds0,
1549 VEC (tree, heap) **vec_oprnds1,
1550 slp_tree slp_node, int reduc_index)
1552 if (slp_node)
1554 int nops = (op1 == NULL_TREE) ? 1 : 2;
1555 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1556 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1558 VEC_quick_push (tree, ops, op0);
1559 if (op1)
1560 VEC_quick_push (tree, ops, op1);
1562 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1564 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1565 if (op1)
1566 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1568 VEC_free (tree, heap, ops);
1569 VEC_free (slp_void_p, heap, vec_defs);
1571 else
1573 tree vec_oprnd;
1575 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1576 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1577 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1579 if (op1)
1581 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1582 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1583 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1589 /* Function vect_finish_stmt_generation.
1591 Insert a new stmt. */
1593 void
1594 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1595 gimple_stmt_iterator *gsi)
1597 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1598 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1599 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1601 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1603 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1605 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1606 bb_vinfo));
1608 if (vect_print_dump_info (REPORT_DETAILS))
1610 fprintf (vect_dump, "add new stmt: ");
1611 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1614 gimple_set_location (vec_stmt, gimple_location (stmt));
1617 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1618 a function declaration if the target has a vectorized version
1619 of the function, or NULL_TREE if the function cannot be vectorized. */
1621 tree
1622 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1624 tree fndecl = gimple_call_fndecl (call);
1626 /* We only handle functions that do not read or clobber memory -- i.e.
1627 const or novops ones. */
1628 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1629 return NULL_TREE;
1631 if (!fndecl
1632 || TREE_CODE (fndecl) != FUNCTION_DECL
1633 || !DECL_BUILT_IN (fndecl))
1634 return NULL_TREE;
1636 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1637 vectype_in);
1640 /* Function vectorizable_call.
1642 Check if STMT performs a function call that can be vectorized.
1643 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1644 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1645 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1647 static bool
1648 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1649 slp_tree slp_node)
1651 tree vec_dest;
1652 tree scalar_dest;
1653 tree op, type;
1654 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1655 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1656 tree vectype_out, vectype_in;
1657 int nunits_in;
1658 int nunits_out;
1659 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1660 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1661 tree fndecl, new_temp, def, rhs_type;
1662 gimple def_stmt;
1663 enum vect_def_type dt[3]
1664 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1665 gimple new_stmt = NULL;
1666 int ncopies, j;
1667 VEC(tree, heap) *vargs = NULL;
1668 enum { NARROW, NONE, WIDEN } modifier;
1669 size_t i, nargs;
1670 tree lhs;
1672 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1673 return false;
1675 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1676 return false;
1678 /* Is STMT a vectorizable call? */
1679 if (!is_gimple_call (stmt))
1680 return false;
1682 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1683 return false;
1685 if (stmt_can_throw_internal (stmt))
1686 return false;
1688 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1690 /* Process function arguments. */
1691 rhs_type = NULL_TREE;
1692 vectype_in = NULL_TREE;
1693 nargs = gimple_call_num_args (stmt);
1695 /* Bail out if the function has more than three arguments, we do not have
1696 interesting builtin functions to vectorize with more than two arguments
1697 except for fma. No arguments is also not good. */
1698 if (nargs == 0 || nargs > 3)
1699 return false;
1701 for (i = 0; i < nargs; i++)
1703 tree opvectype;
1705 op = gimple_call_arg (stmt, i);
1707 /* We can only handle calls with arguments of the same type. */
1708 if (rhs_type
1709 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1711 if (vect_print_dump_info (REPORT_DETAILS))
1712 fprintf (vect_dump, "argument types differ.");
1713 return false;
1715 if (!rhs_type)
1716 rhs_type = TREE_TYPE (op);
1718 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1719 &def_stmt, &def, &dt[i], &opvectype))
1721 if (vect_print_dump_info (REPORT_DETAILS))
1722 fprintf (vect_dump, "use not simple.");
1723 return false;
1726 if (!vectype_in)
1727 vectype_in = opvectype;
1728 else if (opvectype
1729 && opvectype != vectype_in)
1731 if (vect_print_dump_info (REPORT_DETAILS))
1732 fprintf (vect_dump, "argument vector types differ.");
1733 return false;
1736 /* If all arguments are external or constant defs use a vector type with
1737 the same size as the output vector type. */
1738 if (!vectype_in)
1739 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1740 if (vec_stmt)
1741 gcc_assert (vectype_in);
1742 if (!vectype_in)
1744 if (vect_print_dump_info (REPORT_DETAILS))
1746 fprintf (vect_dump, "no vectype for scalar type ");
1747 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1750 return false;
1753 /* FORNOW */
1754 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1755 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1756 if (nunits_in == nunits_out / 2)
1757 modifier = NARROW;
1758 else if (nunits_out == nunits_in)
1759 modifier = NONE;
1760 else if (nunits_out == nunits_in / 2)
1761 modifier = WIDEN;
1762 else
1763 return false;
1765 /* For now, we only vectorize functions if a target specific builtin
1766 is available. TODO -- in some cases, it might be profitable to
1767 insert the calls for pieces of the vector, in order to be able
1768 to vectorize other operations in the loop. */
1769 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1770 if (fndecl == NULL_TREE)
1772 if (vect_print_dump_info (REPORT_DETAILS))
1773 fprintf (vect_dump, "function is not vectorizable.");
1775 return false;
1778 gcc_assert (!gimple_vuse (stmt));
1780 if (slp_node || PURE_SLP_STMT (stmt_info))
1781 ncopies = 1;
1782 else if (modifier == NARROW)
1783 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1784 else
1785 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1787 /* Sanity check: make sure that at least one copy of the vectorized stmt
1788 needs to be generated. */
1789 gcc_assert (ncopies >= 1);
1791 if (!vec_stmt) /* transformation not required. */
1793 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1794 if (vect_print_dump_info (REPORT_DETAILS))
1795 fprintf (vect_dump, "=== vectorizable_call ===");
1796 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1797 return true;
1800 /** Transform. **/
1802 if (vect_print_dump_info (REPORT_DETAILS))
1803 fprintf (vect_dump, "transform call.");
1805 /* Handle def. */
1806 scalar_dest = gimple_call_lhs (stmt);
1807 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1809 prev_stmt_info = NULL;
1810 switch (modifier)
1812 case NONE:
1813 for (j = 0; j < ncopies; ++j)
1815 /* Build argument list for the vectorized call. */
1816 if (j == 0)
1817 vargs = VEC_alloc (tree, heap, nargs);
1818 else
1819 VEC_truncate (tree, vargs, 0);
1821 if (slp_node)
1823 VEC (slp_void_p, heap) *vec_defs
1824 = VEC_alloc (slp_void_p, heap, nargs);
1825 VEC (tree, heap) *vec_oprnds0;
1827 for (i = 0; i < nargs; i++)
1828 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1829 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1830 vec_oprnds0
1831 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1833 /* Arguments are ready. Create the new vector stmt. */
1834 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1836 size_t k;
1837 for (k = 0; k < nargs; k++)
1839 VEC (tree, heap) *vec_oprndsk
1840 = (VEC (tree, heap) *)
1841 VEC_index (slp_void_p, vec_defs, k);
1842 VEC_replace (tree, vargs, k,
1843 VEC_index (tree, vec_oprndsk, i));
1845 new_stmt = gimple_build_call_vec (fndecl, vargs);
1846 new_temp = make_ssa_name (vec_dest, new_stmt);
1847 gimple_call_set_lhs (new_stmt, new_temp);
1848 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1849 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1850 new_stmt);
1853 for (i = 0; i < nargs; i++)
1855 VEC (tree, heap) *vec_oprndsi
1856 = (VEC (tree, heap) *)
1857 VEC_index (slp_void_p, vec_defs, i);
1858 VEC_free (tree, heap, vec_oprndsi);
1860 VEC_free (slp_void_p, heap, vec_defs);
1861 continue;
1864 for (i = 0; i < nargs; i++)
1866 op = gimple_call_arg (stmt, i);
1867 if (j == 0)
1868 vec_oprnd0
1869 = vect_get_vec_def_for_operand (op, stmt, NULL);
1870 else
1872 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1873 vec_oprnd0
1874 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1877 VEC_quick_push (tree, vargs, vec_oprnd0);
1880 new_stmt = gimple_build_call_vec (fndecl, vargs);
1881 new_temp = make_ssa_name (vec_dest, new_stmt);
1882 gimple_call_set_lhs (new_stmt, new_temp);
1883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1885 if (j == 0)
1886 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1887 else
1888 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1890 prev_stmt_info = vinfo_for_stmt (new_stmt);
1893 break;
1895 case NARROW:
1896 for (j = 0; j < ncopies; ++j)
1898 /* Build argument list for the vectorized call. */
1899 if (j == 0)
1900 vargs = VEC_alloc (tree, heap, nargs * 2);
1901 else
1902 VEC_truncate (tree, vargs, 0);
1904 if (slp_node)
1906 VEC (slp_void_p, heap) *vec_defs
1907 = VEC_alloc (slp_void_p, heap, nargs);
1908 VEC (tree, heap) *vec_oprnds0;
1910 for (i = 0; i < nargs; i++)
1911 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1912 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1913 vec_oprnds0
1914 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1916 /* Arguments are ready. Create the new vector stmt. */
1917 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1918 i += 2)
1920 size_t k;
1921 VEC_truncate (tree, vargs, 0);
1922 for (k = 0; k < nargs; k++)
1924 VEC (tree, heap) *vec_oprndsk
1925 = (VEC (tree, heap) *)
1926 VEC_index (slp_void_p, vec_defs, k);
1927 VEC_quick_push (tree, vargs,
1928 VEC_index (tree, vec_oprndsk, i));
1929 VEC_quick_push (tree, vargs,
1930 VEC_index (tree, vec_oprndsk, i + 1));
1932 new_stmt = gimple_build_call_vec (fndecl, vargs);
1933 new_temp = make_ssa_name (vec_dest, new_stmt);
1934 gimple_call_set_lhs (new_stmt, new_temp);
1935 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1936 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1937 new_stmt);
1940 for (i = 0; i < nargs; i++)
1942 VEC (tree, heap) *vec_oprndsi
1943 = (VEC (tree, heap) *)
1944 VEC_index (slp_void_p, vec_defs, i);
1945 VEC_free (tree, heap, vec_oprndsi);
1947 VEC_free (slp_void_p, heap, vec_defs);
1948 continue;
1951 for (i = 0; i < nargs; i++)
1953 op = gimple_call_arg (stmt, i);
1954 if (j == 0)
1956 vec_oprnd0
1957 = vect_get_vec_def_for_operand (op, stmt, NULL);
1958 vec_oprnd1
1959 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1961 else
1963 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1964 vec_oprnd0
1965 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1966 vec_oprnd1
1967 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1970 VEC_quick_push (tree, vargs, vec_oprnd0);
1971 VEC_quick_push (tree, vargs, vec_oprnd1);
1974 new_stmt = gimple_build_call_vec (fndecl, vargs);
1975 new_temp = make_ssa_name (vec_dest, new_stmt);
1976 gimple_call_set_lhs (new_stmt, new_temp);
1977 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1979 if (j == 0)
1980 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1981 else
1982 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1984 prev_stmt_info = vinfo_for_stmt (new_stmt);
1987 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1989 break;
1991 case WIDEN:
1992 /* No current target implements this case. */
1993 return false;
1996 VEC_free (tree, heap, vargs);
1998 /* Update the exception handling table with the vector stmt if necessary. */
1999 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2000 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2002 /* The call in STMT might prevent it from being removed in dce.
2003 We however cannot remove it here, due to the way the ssa name
2004 it defines is mapped to the new definition. So just replace
2005 rhs of the statement with something harmless. */
2007 if (slp_node)
2008 return true;
2010 type = TREE_TYPE (scalar_dest);
2011 if (is_pattern_stmt_p (stmt_info))
2012 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2013 else
2014 lhs = gimple_call_lhs (stmt);
2015 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2016 set_vinfo_for_stmt (new_stmt, stmt_info);
2017 set_vinfo_for_stmt (stmt, NULL);
2018 STMT_VINFO_STMT (stmt_info) = new_stmt;
2019 gsi_replace (gsi, new_stmt, false);
2020 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2022 return true;
2026 /* Function vect_gen_widened_results_half
2028 Create a vector stmt whose code, type, number of arguments, and result
2029 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2030 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2031 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2032 needs to be created (DECL is a function-decl of a target-builtin).
2033 STMT is the original scalar stmt that we are vectorizing. */
2035 static gimple
2036 vect_gen_widened_results_half (enum tree_code code,
2037 tree decl,
2038 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2039 tree vec_dest, gimple_stmt_iterator *gsi,
2040 gimple stmt)
2042 gimple new_stmt;
2043 tree new_temp;
2045 /* Generate half of the widened result: */
2046 if (code == CALL_EXPR)
2048 /* Target specific support */
2049 if (op_type == binary_op)
2050 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2051 else
2052 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2053 new_temp = make_ssa_name (vec_dest, new_stmt);
2054 gimple_call_set_lhs (new_stmt, new_temp);
2056 else
2058 /* Generic support */
2059 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2060 if (op_type != binary_op)
2061 vec_oprnd1 = NULL;
2062 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2063 vec_oprnd1);
2064 new_temp = make_ssa_name (vec_dest, new_stmt);
2065 gimple_assign_set_lhs (new_stmt, new_temp);
2067 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2069 return new_stmt;
2073 /* Get vectorized definitions for loop-based vectorization. For the first
2074 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2075 scalar operand), and for the rest we get a copy with
2076 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2077 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2078 The vectors are collected into VEC_OPRNDS. */
2080 static void
2081 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2082 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2084 tree vec_oprnd;
2086 /* Get first vector operand. */
2087 /* All the vector operands except the very first one (that is scalar oprnd)
2088 are stmt copies. */
2089 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2090 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2091 else
2092 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2094 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2096 /* Get second vector operand. */
2097 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2098 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2100 *oprnd = vec_oprnd;
2102 /* For conversion in multiple steps, continue to get operands
2103 recursively. */
2104 if (multi_step_cvt)
2105 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2109 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2110 For multi-step conversions store the resulting vectors and call the function
2111 recursively. */
2113 static void
2114 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2115 int multi_step_cvt, gimple stmt,
2116 VEC (tree, heap) *vec_dsts,
2117 gimple_stmt_iterator *gsi,
2118 slp_tree slp_node, enum tree_code code,
2119 stmt_vec_info *prev_stmt_info)
2121 unsigned int i;
2122 tree vop0, vop1, new_tmp, vec_dest;
2123 gimple new_stmt;
2124 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2126 vec_dest = VEC_pop (tree, vec_dsts);
2128 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2130 /* Create demotion operation. */
2131 vop0 = VEC_index (tree, *vec_oprnds, i);
2132 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2133 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2134 new_tmp = make_ssa_name (vec_dest, new_stmt);
2135 gimple_assign_set_lhs (new_stmt, new_tmp);
2136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2138 if (multi_step_cvt)
2139 /* Store the resulting vector for next recursive call. */
2140 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2141 else
2143 /* This is the last step of the conversion sequence. Store the
2144 vectors in SLP_NODE or in vector info of the scalar statement
2145 (or in STMT_VINFO_RELATED_STMT chain). */
2146 if (slp_node)
2147 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2148 else
2150 if (!*prev_stmt_info)
2151 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2152 else
2153 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2155 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2160 /* For multi-step demotion operations we first generate demotion operations
2161 from the source type to the intermediate types, and then combine the
2162 results (stored in VEC_OPRNDS) in demotion operation to the destination
2163 type. */
2164 if (multi_step_cvt)
2166 /* At each level of recursion we have half of the operands we had at the
2167 previous level. */
2168 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2169 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2170 stmt, vec_dsts, gsi, slp_node,
2171 VEC_PACK_TRUNC_EXPR,
2172 prev_stmt_info);
2175 VEC_quick_push (tree, vec_dsts, vec_dest);
2179 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2180 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2181 the resulting vectors and call the function recursively. */
2183 static void
2184 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2185 VEC (tree, heap) **vec_oprnds1,
2186 gimple stmt, tree vec_dest,
2187 gimple_stmt_iterator *gsi,
2188 enum tree_code code1,
2189 enum tree_code code2, tree decl1,
2190 tree decl2, int op_type)
2192 int i;
2193 tree vop0, vop1, new_tmp1, new_tmp2;
2194 gimple new_stmt1, new_stmt2;
2195 VEC (tree, heap) *vec_tmp = NULL;
2197 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2198 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2200 if (op_type == binary_op)
2201 vop1 = VEC_index (tree, *vec_oprnds1, i);
2202 else
2203 vop1 = NULL_TREE;
2205 /* Generate the two halves of promotion operation. */
2206 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2207 op_type, vec_dest, gsi, stmt);
2208 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2209 op_type, vec_dest, gsi, stmt);
2210 if (is_gimple_call (new_stmt1))
2212 new_tmp1 = gimple_call_lhs (new_stmt1);
2213 new_tmp2 = gimple_call_lhs (new_stmt2);
2215 else
2217 new_tmp1 = gimple_assign_lhs (new_stmt1);
2218 new_tmp2 = gimple_assign_lhs (new_stmt2);
2221 /* Store the results for the next step. */
2222 VEC_quick_push (tree, vec_tmp, new_tmp1);
2223 VEC_quick_push (tree, vec_tmp, new_tmp2);
2226 VEC_free (tree, heap, *vec_oprnds0);
2227 *vec_oprnds0 = vec_tmp;
2231 /* Check if STMT performs a conversion operation, that can be vectorized.
2232 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2233 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2234 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2236 static bool
2237 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2238 gimple *vec_stmt, slp_tree slp_node)
2240 tree vec_dest;
2241 tree scalar_dest;
2242 tree op0, op1 = NULL_TREE;
2243 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2244 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2245 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2246 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2247 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2248 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2249 tree new_temp;
2250 tree def;
2251 gimple def_stmt;
2252 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2253 gimple new_stmt = NULL;
2254 stmt_vec_info prev_stmt_info;
2255 int nunits_in;
2256 int nunits_out;
2257 tree vectype_out, vectype_in;
2258 int ncopies, i, j;
2259 tree lhs_type, rhs_type;
2260 enum { NARROW, NONE, WIDEN } modifier;
2261 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2262 tree vop0;
2263 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2264 int multi_step_cvt = 0;
2265 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2266 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2267 int op_type;
2268 enum machine_mode rhs_mode;
2269 unsigned short fltsz;
2271 /* Is STMT a vectorizable conversion? */
2273 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2274 return false;
2276 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2277 return false;
2279 if (!is_gimple_assign (stmt))
2280 return false;
2282 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2283 return false;
2285 code = gimple_assign_rhs_code (stmt);
2286 if (!CONVERT_EXPR_CODE_P (code)
2287 && code != FIX_TRUNC_EXPR
2288 && code != FLOAT_EXPR
2289 && code != WIDEN_MULT_EXPR
2290 && code != WIDEN_LSHIFT_EXPR)
2291 return false;
2293 op_type = TREE_CODE_LENGTH (code);
2295 /* Check types of lhs and rhs. */
2296 scalar_dest = gimple_assign_lhs (stmt);
2297 lhs_type = TREE_TYPE (scalar_dest);
2298 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2300 op0 = gimple_assign_rhs1 (stmt);
2301 rhs_type = TREE_TYPE (op0);
2303 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2304 && !((INTEGRAL_TYPE_P (lhs_type)
2305 && INTEGRAL_TYPE_P (rhs_type))
2306 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2307 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2308 return false;
2310 if ((INTEGRAL_TYPE_P (lhs_type)
2311 && (TYPE_PRECISION (lhs_type)
2312 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2313 || (INTEGRAL_TYPE_P (rhs_type)
2314 && (TYPE_PRECISION (rhs_type)
2315 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2317 if (vect_print_dump_info (REPORT_DETAILS))
2318 fprintf (vect_dump,
2319 "type conversion to/from bit-precision unsupported.");
2320 return false;
2323 /* Check the operands of the operation. */
2324 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2325 &def_stmt, &def, &dt[0], &vectype_in))
2327 if (vect_print_dump_info (REPORT_DETAILS))
2328 fprintf (vect_dump, "use not simple.");
2329 return false;
2331 if (op_type == binary_op)
2333 bool ok;
2335 op1 = gimple_assign_rhs2 (stmt);
2336 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2337 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2338 OP1. */
2339 if (CONSTANT_CLASS_P (op0))
2340 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2341 &def_stmt, &def, &dt[1], &vectype_in);
2342 else
2343 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2344 &def, &dt[1]);
2346 if (!ok)
2348 if (vect_print_dump_info (REPORT_DETAILS))
2349 fprintf (vect_dump, "use not simple.");
2350 return false;
2354 /* If op0 is an external or constant defs use a vector type of
2355 the same size as the output vector type. */
2356 if (!vectype_in)
2357 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2358 if (vec_stmt)
2359 gcc_assert (vectype_in);
2360 if (!vectype_in)
2362 if (vect_print_dump_info (REPORT_DETAILS))
2364 fprintf (vect_dump, "no vectype for scalar type ");
2365 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2368 return false;
2371 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2372 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2373 if (nunits_in < nunits_out)
2374 modifier = NARROW;
2375 else if (nunits_out == nunits_in)
2376 modifier = NONE;
2377 else
2378 modifier = WIDEN;
2380 /* Multiple types in SLP are handled by creating the appropriate number of
2381 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2382 case of SLP. */
2383 if (slp_node || PURE_SLP_STMT (stmt_info))
2384 ncopies = 1;
2385 else if (modifier == NARROW)
2386 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2387 else
2388 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2390 /* Sanity check: make sure that at least one copy of the vectorized stmt
2391 needs to be generated. */
2392 gcc_assert (ncopies >= 1);
2394 /* Supportable by target? */
2395 switch (modifier)
2397 case NONE:
2398 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2399 return false;
2400 if (supportable_convert_operation (code, vectype_out, vectype_in,
2401 &decl1, &code1))
2402 break;
2403 /* FALLTHRU */
2404 unsupported:
2405 if (vect_print_dump_info (REPORT_DETAILS))
2406 fprintf (vect_dump, "conversion not supported by target.");
2407 return false;
2409 case WIDEN:
2410 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2411 &code1, &code2, &multi_step_cvt,
2412 &interm_types))
2414 /* Binary widening operation can only be supported directly by the
2415 architecture. */
2416 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2417 break;
2420 if (code != FLOAT_EXPR
2421 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2422 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2423 goto unsupported;
2425 rhs_mode = TYPE_MODE (rhs_type);
2426 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2427 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2428 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2429 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2431 cvt_type
2432 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2433 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2434 if (cvt_type == NULL_TREE)
2435 goto unsupported;
2437 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2439 if (!supportable_convert_operation (code, vectype_out,
2440 cvt_type, &decl1, &codecvt1))
2441 goto unsupported;
2443 else if (!supportable_widening_operation (code, stmt, vectype_out,
2444 cvt_type, &codecvt1,
2445 &codecvt2, &multi_step_cvt,
2446 &interm_types))
2447 continue;
2448 else
2449 gcc_assert (multi_step_cvt == 0);
2451 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2452 vectype_in, &code1, &code2,
2453 &multi_step_cvt, &interm_types))
2454 break;
2457 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2458 goto unsupported;
2460 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2461 codecvt2 = ERROR_MARK;
2462 else
2464 multi_step_cvt++;
2465 VEC_safe_push (tree, heap, interm_types, cvt_type);
2466 cvt_type = NULL_TREE;
2468 break;
2470 case NARROW:
2471 gcc_assert (op_type == unary_op);
2472 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2473 &code1, &multi_step_cvt,
2474 &interm_types))
2475 break;
2477 if (code != FIX_TRUNC_EXPR
2478 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2479 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2480 goto unsupported;
2482 rhs_mode = TYPE_MODE (rhs_type);
2483 cvt_type
2484 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2485 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2486 if (cvt_type == NULL_TREE)
2487 goto unsupported;
2488 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2489 &decl1, &codecvt1))
2490 goto unsupported;
2491 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2492 &code1, &multi_step_cvt,
2493 &interm_types))
2494 break;
2495 goto unsupported;
2497 default:
2498 gcc_unreachable ();
2501 if (!vec_stmt) /* transformation not required. */
2503 if (vect_print_dump_info (REPORT_DETAILS))
2504 fprintf (vect_dump, "=== vectorizable_conversion ===");
2505 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2507 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2508 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2510 else if (modifier == NARROW)
2512 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2513 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2515 else
2517 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2518 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2520 VEC_free (tree, heap, interm_types);
2521 return true;
2524 /** Transform. **/
2525 if (vect_print_dump_info (REPORT_DETAILS))
2526 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2528 if (op_type == binary_op)
2530 if (CONSTANT_CLASS_P (op0))
2531 op0 = fold_convert (TREE_TYPE (op1), op0);
2532 else if (CONSTANT_CLASS_P (op1))
2533 op1 = fold_convert (TREE_TYPE (op0), op1);
2536 /* In case of multi-step conversion, we first generate conversion operations
2537 to the intermediate types, and then from that types to the final one.
2538 We create vector destinations for the intermediate type (TYPES) received
2539 from supportable_*_operation, and store them in the correct order
2540 for future use in vect_create_vectorized_*_stmts (). */
2541 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2542 vec_dest = vect_create_destination_var (scalar_dest,
2543 (cvt_type && modifier == WIDEN)
2544 ? cvt_type : vectype_out);
2545 VEC_quick_push (tree, vec_dsts, vec_dest);
2547 if (multi_step_cvt)
2549 for (i = VEC_length (tree, interm_types) - 1;
2550 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2552 vec_dest = vect_create_destination_var (scalar_dest,
2553 intermediate_type);
2554 VEC_quick_push (tree, vec_dsts, vec_dest);
2558 if (cvt_type)
2559 vec_dest = vect_create_destination_var (scalar_dest,
2560 modifier == WIDEN
2561 ? vectype_out : cvt_type);
2563 if (!slp_node)
2565 if (modifier == NONE)
2566 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2567 else if (modifier == WIDEN)
2569 vec_oprnds0 = VEC_alloc (tree, heap,
2570 (multi_step_cvt
2571 ? vect_pow2 (multi_step_cvt) : 1));
2572 if (op_type == binary_op)
2573 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2575 else
2576 vec_oprnds0 = VEC_alloc (tree, heap,
2577 2 * (multi_step_cvt
2578 ? vect_pow2 (multi_step_cvt) : 1));
2580 else if (code == WIDEN_LSHIFT_EXPR)
2581 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2583 last_oprnd = op0;
2584 prev_stmt_info = NULL;
2585 switch (modifier)
2587 case NONE:
2588 for (j = 0; j < ncopies; j++)
2590 if (j == 0)
2591 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2592 -1);
2593 else
2594 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2596 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2598 /* Arguments are ready, create the new vector stmt. */
2599 if (code1 == CALL_EXPR)
2601 new_stmt = gimple_build_call (decl1, 1, vop0);
2602 new_temp = make_ssa_name (vec_dest, new_stmt);
2603 gimple_call_set_lhs (new_stmt, new_temp);
2605 else
2607 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2608 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2609 vop0, NULL);
2610 new_temp = make_ssa_name (vec_dest, new_stmt);
2611 gimple_assign_set_lhs (new_stmt, new_temp);
2614 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2615 if (slp_node)
2616 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2617 new_stmt);
2620 if (j == 0)
2621 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2622 else
2623 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2624 prev_stmt_info = vinfo_for_stmt (new_stmt);
2626 break;
2628 case WIDEN:
2629 /* In case the vectorization factor (VF) is bigger than the number
2630 of elements that we can fit in a vectype (nunits), we have to
2631 generate more than one vector stmt - i.e - we need to "unroll"
2632 the vector stmt by a factor VF/nunits. */
2633 for (j = 0; j < ncopies; j++)
2635 /* Handle uses. */
2636 if (j == 0)
2638 if (slp_node)
2640 if (code == WIDEN_LSHIFT_EXPR)
2642 unsigned int k;
2644 vec_oprnd1 = op1;
2645 /* Store vec_oprnd1 for every vector stmt to be created
2646 for SLP_NODE. We check during the analysis that all
2647 the shift arguments are the same. */
2648 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2649 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2651 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2652 slp_node, -1);
2654 else
2655 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2656 &vec_oprnds1, slp_node, -1);
2658 else
2660 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2661 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2662 if (op_type == binary_op)
2664 if (code == WIDEN_LSHIFT_EXPR)
2665 vec_oprnd1 = op1;
2666 else
2667 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2668 NULL);
2669 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2673 else
2675 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2676 VEC_truncate (tree, vec_oprnds0, 0);
2677 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2678 if (op_type == binary_op)
2680 if (code == WIDEN_LSHIFT_EXPR)
2681 vec_oprnd1 = op1;
2682 else
2683 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2684 vec_oprnd1);
2685 VEC_truncate (tree, vec_oprnds1, 0);
2686 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2690 /* Arguments are ready. Create the new vector stmts. */
2691 for (i = multi_step_cvt; i >= 0; i--)
2693 tree this_dest = VEC_index (tree, vec_dsts, i);
2694 enum tree_code c1 = code1, c2 = code2;
2695 if (i == 0 && codecvt2 != ERROR_MARK)
2697 c1 = codecvt1;
2698 c2 = codecvt2;
2700 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2701 &vec_oprnds1,
2702 stmt, this_dest, gsi,
2703 c1, c2, decl1, decl2,
2704 op_type);
2707 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2709 if (cvt_type)
2711 if (codecvt1 == CALL_EXPR)
2713 new_stmt = gimple_build_call (decl1, 1, vop0);
2714 new_temp = make_ssa_name (vec_dest, new_stmt);
2715 gimple_call_set_lhs (new_stmt, new_temp);
2717 else
2719 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2720 new_temp = make_ssa_name (vec_dest, NULL);
2721 new_stmt = gimple_build_assign_with_ops (codecvt1,
2722 new_temp,
2723 vop0, NULL);
2726 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2728 else
2729 new_stmt = SSA_NAME_DEF_STMT (vop0);
2731 if (slp_node)
2732 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2733 new_stmt);
2734 else
2736 if (!prev_stmt_info)
2737 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2738 else
2739 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2740 prev_stmt_info = vinfo_for_stmt (new_stmt);
2745 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2746 break;
2748 case NARROW:
2749 /* In case the vectorization factor (VF) is bigger than the number
2750 of elements that we can fit in a vectype (nunits), we have to
2751 generate more than one vector stmt - i.e - we need to "unroll"
2752 the vector stmt by a factor VF/nunits. */
2753 for (j = 0; j < ncopies; j++)
2755 /* Handle uses. */
2756 if (slp_node)
2757 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2758 slp_node, -1);
2759 else
2761 VEC_truncate (tree, vec_oprnds0, 0);
2762 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2763 vect_pow2 (multi_step_cvt) - 1);
2766 /* Arguments are ready. Create the new vector stmts. */
2767 if (cvt_type)
2768 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2770 if (codecvt1 == CALL_EXPR)
2772 new_stmt = gimple_build_call (decl1, 1, vop0);
2773 new_temp = make_ssa_name (vec_dest, new_stmt);
2774 gimple_call_set_lhs (new_stmt, new_temp);
2776 else
2778 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2779 new_temp = make_ssa_name (vec_dest, NULL);
2780 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2781 vop0, NULL);
2784 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2785 VEC_replace (tree, vec_oprnds0, i, new_temp);
2788 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2789 stmt, vec_dsts, gsi,
2790 slp_node, code1,
2791 &prev_stmt_info);
2794 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2795 break;
2798 VEC_free (tree, heap, vec_oprnds0);
2799 VEC_free (tree, heap, vec_oprnds1);
2800 VEC_free (tree, heap, vec_dsts);
2801 VEC_free (tree, heap, interm_types);
2803 return true;
2807 /* Function vectorizable_assignment.
2809 Check if STMT performs an assignment (copy) that can be vectorized.
2810 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2811 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2812 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2814 static bool
2815 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2816 gimple *vec_stmt, slp_tree slp_node)
2818 tree vec_dest;
2819 tree scalar_dest;
2820 tree op;
2821 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2822 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2823 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2824 tree new_temp;
2825 tree def;
2826 gimple def_stmt;
2827 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2828 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2829 int ncopies;
2830 int i, j;
2831 VEC(tree,heap) *vec_oprnds = NULL;
2832 tree vop;
2833 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2834 gimple new_stmt = NULL;
2835 stmt_vec_info prev_stmt_info = NULL;
2836 enum tree_code code;
2837 tree vectype_in;
2839 /* Multiple types in SLP are handled by creating the appropriate number of
2840 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2841 case of SLP. */
2842 if (slp_node || PURE_SLP_STMT (stmt_info))
2843 ncopies = 1;
2844 else
2845 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2847 gcc_assert (ncopies >= 1);
2849 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2850 return false;
2852 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2853 return false;
2855 /* Is vectorizable assignment? */
2856 if (!is_gimple_assign (stmt))
2857 return false;
2859 scalar_dest = gimple_assign_lhs (stmt);
2860 if (TREE_CODE (scalar_dest) != SSA_NAME)
2861 return false;
2863 code = gimple_assign_rhs_code (stmt);
2864 if (gimple_assign_single_p (stmt)
2865 || code == PAREN_EXPR
2866 || CONVERT_EXPR_CODE_P (code))
2867 op = gimple_assign_rhs1 (stmt);
2868 else
2869 return false;
2871 if (code == VIEW_CONVERT_EXPR)
2872 op = TREE_OPERAND (op, 0);
2874 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2875 &def_stmt, &def, &dt[0], &vectype_in))
2877 if (vect_print_dump_info (REPORT_DETAILS))
2878 fprintf (vect_dump, "use not simple.");
2879 return false;
2882 /* We can handle NOP_EXPR conversions that do not change the number
2883 of elements or the vector size. */
2884 if ((CONVERT_EXPR_CODE_P (code)
2885 || code == VIEW_CONVERT_EXPR)
2886 && (!vectype_in
2887 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2888 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2889 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2890 return false;
2892 /* We do not handle bit-precision changes. */
2893 if ((CONVERT_EXPR_CODE_P (code)
2894 || code == VIEW_CONVERT_EXPR)
2895 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2896 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2897 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2898 || ((TYPE_PRECISION (TREE_TYPE (op))
2899 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2900 /* But a conversion that does not change the bit-pattern is ok. */
2901 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2902 > TYPE_PRECISION (TREE_TYPE (op)))
2903 && TYPE_UNSIGNED (TREE_TYPE (op))))
2905 if (vect_print_dump_info (REPORT_DETAILS))
2906 fprintf (vect_dump, "type conversion to/from bit-precision "
2907 "unsupported.");
2908 return false;
2911 if (!vec_stmt) /* transformation not required. */
2913 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2914 if (vect_print_dump_info (REPORT_DETAILS))
2915 fprintf (vect_dump, "=== vectorizable_assignment ===");
2916 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2917 return true;
2920 /** Transform. **/
2921 if (vect_print_dump_info (REPORT_DETAILS))
2922 fprintf (vect_dump, "transform assignment.");
2924 /* Handle def. */
2925 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2927 /* Handle use. */
2928 for (j = 0; j < ncopies; j++)
2930 /* Handle uses. */
2931 if (j == 0)
2932 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2933 else
2934 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2936 /* Arguments are ready. create the new vector stmt. */
2937 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2939 if (CONVERT_EXPR_CODE_P (code)
2940 || code == VIEW_CONVERT_EXPR)
2941 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2942 new_stmt = gimple_build_assign (vec_dest, vop);
2943 new_temp = make_ssa_name (vec_dest, new_stmt);
2944 gimple_assign_set_lhs (new_stmt, new_temp);
2945 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2946 if (slp_node)
2947 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2950 if (slp_node)
2951 continue;
2953 if (j == 0)
2954 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2955 else
2956 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2958 prev_stmt_info = vinfo_for_stmt (new_stmt);
2961 VEC_free (tree, heap, vec_oprnds);
2962 return true;
2966 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2967 either as shift by a scalar or by a vector. */
2969 bool
2970 vect_supportable_shift (enum tree_code code, tree scalar_type)
2973 enum machine_mode vec_mode;
2974 optab optab;
2975 int icode;
2976 tree vectype;
2978 vectype = get_vectype_for_scalar_type (scalar_type);
2979 if (!vectype)
2980 return false;
2982 optab = optab_for_tree_code (code, vectype, optab_scalar);
2983 if (!optab
2984 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2986 optab = optab_for_tree_code (code, vectype, optab_vector);
2987 if (!optab
2988 || (optab_handler (optab, TYPE_MODE (vectype))
2989 == CODE_FOR_nothing))
2990 return false;
2993 vec_mode = TYPE_MODE (vectype);
2994 icode = (int) optab_handler (optab, vec_mode);
2995 if (icode == CODE_FOR_nothing)
2996 return false;
2998 return true;
3002 /* Function vectorizable_shift.
3004 Check if STMT performs a shift operation that can be vectorized.
3005 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3006 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3007 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3009 static bool
3010 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3011 gimple *vec_stmt, slp_tree slp_node)
3013 tree vec_dest;
3014 tree scalar_dest;
3015 tree op0, op1 = NULL;
3016 tree vec_oprnd1 = NULL_TREE;
3017 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3018 tree vectype;
3019 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3020 enum tree_code code;
3021 enum machine_mode vec_mode;
3022 tree new_temp;
3023 optab optab;
3024 int icode;
3025 enum machine_mode optab_op2_mode;
3026 tree def;
3027 gimple def_stmt;
3028 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3029 gimple new_stmt = NULL;
3030 stmt_vec_info prev_stmt_info;
3031 int nunits_in;
3032 int nunits_out;
3033 tree vectype_out;
3034 tree op1_vectype;
3035 int ncopies;
3036 int j, i;
3037 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3038 tree vop0, vop1;
3039 unsigned int k;
3040 bool scalar_shift_arg = true;
3041 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3042 int vf;
3044 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3045 return false;
3047 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3048 return false;
3050 /* Is STMT a vectorizable binary/unary operation? */
3051 if (!is_gimple_assign (stmt))
3052 return false;
3054 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3055 return false;
3057 code = gimple_assign_rhs_code (stmt);
3059 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3060 || code == RROTATE_EXPR))
3061 return false;
3063 scalar_dest = gimple_assign_lhs (stmt);
3064 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3065 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3066 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3068 if (vect_print_dump_info (REPORT_DETAILS))
3069 fprintf (vect_dump, "bit-precision shifts not supported.");
3070 return false;
3073 op0 = gimple_assign_rhs1 (stmt);
3074 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3075 &def_stmt, &def, &dt[0], &vectype))
3077 if (vect_print_dump_info (REPORT_DETAILS))
3078 fprintf (vect_dump, "use not simple.");
3079 return false;
3081 /* If op0 is an external or constant def use a vector type with
3082 the same size as the output vector type. */
3083 if (!vectype)
3084 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3085 if (vec_stmt)
3086 gcc_assert (vectype);
3087 if (!vectype)
3089 if (vect_print_dump_info (REPORT_DETAILS))
3091 fprintf (vect_dump, "no vectype for scalar type ");
3092 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3095 return false;
3098 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3099 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3100 if (nunits_out != nunits_in)
3101 return false;
3103 op1 = gimple_assign_rhs2 (stmt);
3104 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3105 &def, &dt[1], &op1_vectype))
3107 if (vect_print_dump_info (REPORT_DETAILS))
3108 fprintf (vect_dump, "use not simple.");
3109 return false;
3112 if (loop_vinfo)
3113 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3114 else
3115 vf = 1;
3117 /* Multiple types in SLP are handled by creating the appropriate number of
3118 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3119 case of SLP. */
3120 if (slp_node || PURE_SLP_STMT (stmt_info))
3121 ncopies = 1;
3122 else
3123 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3125 gcc_assert (ncopies >= 1);
3127 /* Determine whether the shift amount is a vector, or scalar. If the
3128 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3130 if (dt[1] == vect_internal_def && !slp_node)
3131 scalar_shift_arg = false;
3132 else if (dt[1] == vect_constant_def
3133 || dt[1] == vect_external_def
3134 || dt[1] == vect_internal_def)
3136 /* In SLP, need to check whether the shift count is the same,
3137 in loops if it is a constant or invariant, it is always
3138 a scalar shift. */
3139 if (slp_node)
3141 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3142 gimple slpstmt;
3144 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3145 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3146 scalar_shift_arg = false;
3149 else
3151 if (vect_print_dump_info (REPORT_DETAILS))
3152 fprintf (vect_dump, "operand mode requires invariant argument.");
3153 return false;
3156 /* Vector shifted by vector. */
3157 if (!scalar_shift_arg)
3159 optab = optab_for_tree_code (code, vectype, optab_vector);
3160 if (vect_print_dump_info (REPORT_DETAILS))
3161 fprintf (vect_dump, "vector/vector shift/rotate found.");
3162 if (!op1_vectype)
3163 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3164 if (op1_vectype == NULL_TREE
3165 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3167 if (vect_print_dump_info (REPORT_DETAILS))
3168 fprintf (vect_dump, "unusable type for last operand in"
3169 " vector/vector shift/rotate.");
3170 return false;
3173 /* See if the machine has a vector shifted by scalar insn and if not
3174 then see if it has a vector shifted by vector insn. */
3175 else
3177 optab = optab_for_tree_code (code, vectype, optab_scalar);
3178 if (optab
3179 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3181 if (vect_print_dump_info (REPORT_DETAILS))
3182 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3184 else
3186 optab = optab_for_tree_code (code, vectype, optab_vector);
3187 if (optab
3188 && (optab_handler (optab, TYPE_MODE (vectype))
3189 != CODE_FOR_nothing))
3191 scalar_shift_arg = false;
3193 if (vect_print_dump_info (REPORT_DETAILS))
3194 fprintf (vect_dump, "vector/vector shift/rotate found.");
3196 /* Unlike the other binary operators, shifts/rotates have
3197 the rhs being int, instead of the same type as the lhs,
3198 so make sure the scalar is the right type if we are
3199 dealing with vectors of long long/long/short/char. */
3200 if (dt[1] == vect_constant_def)
3201 op1 = fold_convert (TREE_TYPE (vectype), op1);
3202 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3203 TREE_TYPE (op1)))
3205 if (slp_node
3206 && TYPE_MODE (TREE_TYPE (vectype))
3207 != TYPE_MODE (TREE_TYPE (op1)))
3209 if (vect_print_dump_info (REPORT_DETAILS))
3210 fprintf (vect_dump, "unusable type for last operand in"
3211 " vector/vector shift/rotate.");
3212 return false;
3214 if (vec_stmt && !slp_node)
3216 op1 = fold_convert (TREE_TYPE (vectype), op1);
3217 op1 = vect_init_vector (stmt, op1,
3218 TREE_TYPE (vectype), NULL);
3225 /* Supportable by target? */
3226 if (!optab)
3228 if (vect_print_dump_info (REPORT_DETAILS))
3229 fprintf (vect_dump, "no optab.");
3230 return false;
3232 vec_mode = TYPE_MODE (vectype);
3233 icode = (int) optab_handler (optab, vec_mode);
3234 if (icode == CODE_FOR_nothing)
3236 if (vect_print_dump_info (REPORT_DETAILS))
3237 fprintf (vect_dump, "op not supported by target.");
3238 /* Check only during analysis. */
3239 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3240 || (vf < vect_min_worthwhile_factor (code)
3241 && !vec_stmt))
3242 return false;
3243 if (vect_print_dump_info (REPORT_DETAILS))
3244 fprintf (vect_dump, "proceeding using word mode.");
3247 /* Worthwhile without SIMD support? Check only during analysis. */
3248 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3249 && vf < vect_min_worthwhile_factor (code)
3250 && !vec_stmt)
3252 if (vect_print_dump_info (REPORT_DETAILS))
3253 fprintf (vect_dump, "not worthwhile without SIMD support.");
3254 return false;
3257 if (!vec_stmt) /* transformation not required. */
3259 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3260 if (vect_print_dump_info (REPORT_DETAILS))
3261 fprintf (vect_dump, "=== vectorizable_shift ===");
3262 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3263 return true;
3266 /** Transform. **/
3268 if (vect_print_dump_info (REPORT_DETAILS))
3269 fprintf (vect_dump, "transform binary/unary operation.");
3271 /* Handle def. */
3272 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3274 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3275 created in the previous stages of the recursion, so no allocation is
3276 needed, except for the case of shift with scalar shift argument. In that
3277 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3278 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3279 In case of loop-based vectorization we allocate VECs of size 1. We
3280 allocate VEC_OPRNDS1 only in case of binary operation. */
3281 if (!slp_node)
3283 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3284 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3286 else if (scalar_shift_arg)
3287 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3289 prev_stmt_info = NULL;
3290 for (j = 0; j < ncopies; j++)
3292 /* Handle uses. */
3293 if (j == 0)
3295 if (scalar_shift_arg)
3297 /* Vector shl and shr insn patterns can be defined with scalar
3298 operand 2 (shift operand). In this case, use constant or loop
3299 invariant op1 directly, without extending it to vector mode
3300 first. */
3301 optab_op2_mode = insn_data[icode].operand[2].mode;
3302 if (!VECTOR_MODE_P (optab_op2_mode))
3304 if (vect_print_dump_info (REPORT_DETAILS))
3305 fprintf (vect_dump, "operand 1 using scalar mode.");
3306 vec_oprnd1 = op1;
3307 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3308 if (slp_node)
3310 /* Store vec_oprnd1 for every vector stmt to be created
3311 for SLP_NODE. We check during the analysis that all
3312 the shift arguments are the same.
3313 TODO: Allow different constants for different vector
3314 stmts generated for an SLP instance. */
3315 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3316 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3321 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3322 (a special case for certain kind of vector shifts); otherwise,
3323 operand 1 should be of a vector type (the usual case). */
3324 if (vec_oprnd1)
3325 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3326 slp_node, -1);
3327 else
3328 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3329 slp_node, -1);
3331 else
3332 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3334 /* Arguments are ready. Create the new vector stmt. */
3335 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3337 vop1 = VEC_index (tree, vec_oprnds1, i);
3338 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3339 new_temp = make_ssa_name (vec_dest, new_stmt);
3340 gimple_assign_set_lhs (new_stmt, new_temp);
3341 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3342 if (slp_node)
3343 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3346 if (slp_node)
3347 continue;
3349 if (j == 0)
3350 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3351 else
3352 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3353 prev_stmt_info = vinfo_for_stmt (new_stmt);
3356 VEC_free (tree, heap, vec_oprnds0);
3357 VEC_free (tree, heap, vec_oprnds1);
3359 return true;
3363 static tree permute_vec_elements (tree, tree, tree, gimple,
3364 gimple_stmt_iterator *);
3367 /* Function vectorizable_operation.
3369 Check if STMT performs a binary, unary or ternary operation that can
3370 be vectorized.
3371 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3372 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3373 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3375 static bool
3376 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3377 gimple *vec_stmt, slp_tree slp_node)
3379 tree vec_dest;
3380 tree scalar_dest;
3381 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3382 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3383 tree vectype;
3384 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3385 enum tree_code code;
3386 enum machine_mode vec_mode;
3387 tree new_temp;
3388 int op_type;
3389 optab optab;
3390 int icode;
3391 tree def;
3392 gimple def_stmt;
3393 enum vect_def_type dt[3]
3394 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3395 gimple new_stmt = NULL;
3396 stmt_vec_info prev_stmt_info;
3397 int nunits_in;
3398 int nunits_out;
3399 tree vectype_out;
3400 int ncopies;
3401 int j, i;
3402 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3403 tree vop0, vop1, vop2;
3404 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3405 int vf;
3407 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3408 return false;
3410 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3411 return false;
3413 /* Is STMT a vectorizable binary/unary operation? */
3414 if (!is_gimple_assign (stmt))
3415 return false;
3417 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3418 return false;
3420 code = gimple_assign_rhs_code (stmt);
3422 /* For pointer addition, we should use the normal plus for
3423 the vector addition. */
3424 if (code == POINTER_PLUS_EXPR)
3425 code = PLUS_EXPR;
3427 /* Support only unary or binary operations. */
3428 op_type = TREE_CODE_LENGTH (code);
3429 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3431 if (vect_print_dump_info (REPORT_DETAILS))
3432 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3433 op_type);
3434 return false;
3437 scalar_dest = gimple_assign_lhs (stmt);
3438 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3440 /* Most operations cannot handle bit-precision types without extra
3441 truncations. */
3442 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3443 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3444 /* Exception are bitwise binary operations. */
3445 && code != BIT_IOR_EXPR
3446 && code != BIT_XOR_EXPR
3447 && code != BIT_AND_EXPR)
3449 if (vect_print_dump_info (REPORT_DETAILS))
3450 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3451 return false;
3454 op0 = gimple_assign_rhs1 (stmt);
3455 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3456 &def_stmt, &def, &dt[0], &vectype))
3458 if (vect_print_dump_info (REPORT_DETAILS))
3459 fprintf (vect_dump, "use not simple.");
3460 return false;
3462 /* If op0 is an external or constant def use a vector type with
3463 the same size as the output vector type. */
3464 if (!vectype)
3465 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3466 if (vec_stmt)
3467 gcc_assert (vectype);
3468 if (!vectype)
3470 if (vect_print_dump_info (REPORT_DETAILS))
3472 fprintf (vect_dump, "no vectype for scalar type ");
3473 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3476 return false;
3479 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3480 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3481 if (nunits_out != nunits_in)
3482 return false;
3484 if (op_type == binary_op || op_type == ternary_op)
3486 op1 = gimple_assign_rhs2 (stmt);
3487 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3488 &def, &dt[1]))
3490 if (vect_print_dump_info (REPORT_DETAILS))
3491 fprintf (vect_dump, "use not simple.");
3492 return false;
3495 if (op_type == ternary_op)
3497 op2 = gimple_assign_rhs3 (stmt);
3498 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3499 &def, &dt[2]))
3501 if (vect_print_dump_info (REPORT_DETAILS))
3502 fprintf (vect_dump, "use not simple.");
3503 return false;
3507 if (loop_vinfo)
3508 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3509 else
3510 vf = 1;
3512 /* Multiple types in SLP are handled by creating the appropriate number of
3513 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3514 case of SLP. */
3515 if (slp_node || PURE_SLP_STMT (stmt_info))
3516 ncopies = 1;
3517 else
3518 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3520 gcc_assert (ncopies >= 1);
3522 /* Shifts are handled in vectorizable_shift (). */
3523 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3524 || code == RROTATE_EXPR)
3525 return false;
3527 /* Supportable by target? */
3529 vec_mode = TYPE_MODE (vectype);
3530 if (code == MULT_HIGHPART_EXPR)
3532 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3533 icode = LAST_INSN_CODE;
3534 else
3535 icode = CODE_FOR_nothing;
3537 else
3539 optab = optab_for_tree_code (code, vectype, optab_default);
3540 if (!optab)
3542 if (vect_print_dump_info (REPORT_DETAILS))
3543 fprintf (vect_dump, "no optab.");
3544 return false;
3546 icode = (int) optab_handler (optab, vec_mode);
3549 if (icode == CODE_FOR_nothing)
3551 if (vect_print_dump_info (REPORT_DETAILS))
3552 fprintf (vect_dump, "op not supported by target.");
3553 /* Check only during analysis. */
3554 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3555 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3556 return false;
3557 if (vect_print_dump_info (REPORT_DETAILS))
3558 fprintf (vect_dump, "proceeding using word mode.");
3561 /* Worthwhile without SIMD support? Check only during analysis. */
3562 if (!VECTOR_MODE_P (vec_mode)
3563 && !vec_stmt
3564 && vf < vect_min_worthwhile_factor (code))
3566 if (vect_print_dump_info (REPORT_DETAILS))
3567 fprintf (vect_dump, "not worthwhile without SIMD support.");
3568 return false;
3571 if (!vec_stmt) /* transformation not required. */
3573 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3574 if (vect_print_dump_info (REPORT_DETAILS))
3575 fprintf (vect_dump, "=== vectorizable_operation ===");
3576 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3577 return true;
3580 /** Transform. **/
3582 if (vect_print_dump_info (REPORT_DETAILS))
3583 fprintf (vect_dump, "transform binary/unary operation.");
3585 /* Handle def. */
3586 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3588 /* In case the vectorization factor (VF) is bigger than the number
3589 of elements that we can fit in a vectype (nunits), we have to generate
3590 more than one vector stmt - i.e - we need to "unroll" the
3591 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3592 from one copy of the vector stmt to the next, in the field
3593 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3594 stages to find the correct vector defs to be used when vectorizing
3595 stmts that use the defs of the current stmt. The example below
3596 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3597 we need to create 4 vectorized stmts):
3599 before vectorization:
3600 RELATED_STMT VEC_STMT
3601 S1: x = memref - -
3602 S2: z = x + 1 - -
3604 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3605 there):
3606 RELATED_STMT VEC_STMT
3607 VS1_0: vx0 = memref0 VS1_1 -
3608 VS1_1: vx1 = memref1 VS1_2 -
3609 VS1_2: vx2 = memref2 VS1_3 -
3610 VS1_3: vx3 = memref3 - -
3611 S1: x = load - VS1_0
3612 S2: z = x + 1 - -
3614 step2: vectorize stmt S2 (done here):
3615 To vectorize stmt S2 we first need to find the relevant vector
3616 def for the first operand 'x'. This is, as usual, obtained from
3617 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3618 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3619 relevant vector def 'vx0'. Having found 'vx0' we can generate
3620 the vector stmt VS2_0, and as usual, record it in the
3621 STMT_VINFO_VEC_STMT of stmt S2.
3622 When creating the second copy (VS2_1), we obtain the relevant vector
3623 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3624 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3625 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3626 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3627 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3628 chain of stmts and pointers:
3629 RELATED_STMT VEC_STMT
3630 VS1_0: vx0 = memref0 VS1_1 -
3631 VS1_1: vx1 = memref1 VS1_2 -
3632 VS1_2: vx2 = memref2 VS1_3 -
3633 VS1_3: vx3 = memref3 - -
3634 S1: x = load - VS1_0
3635 VS2_0: vz0 = vx0 + v1 VS2_1 -
3636 VS2_1: vz1 = vx1 + v1 VS2_2 -
3637 VS2_2: vz2 = vx2 + v1 VS2_3 -
3638 VS2_3: vz3 = vx3 + v1 - -
3639 S2: z = x + 1 - VS2_0 */
3641 prev_stmt_info = NULL;
3642 for (j = 0; j < ncopies; j++)
3644 /* Handle uses. */
3645 if (j == 0)
3647 if (op_type == binary_op || op_type == ternary_op)
3648 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3649 slp_node, -1);
3650 else
3651 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3652 slp_node, -1);
3653 if (op_type == ternary_op)
3655 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3656 VEC_quick_push (tree, vec_oprnds2,
3657 vect_get_vec_def_for_operand (op2, stmt, NULL));
3660 else
3662 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3663 if (op_type == ternary_op)
3665 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3666 VEC_quick_push (tree, vec_oprnds2,
3667 vect_get_vec_def_for_stmt_copy (dt[2],
3668 vec_oprnd));
3672 /* Arguments are ready. Create the new vector stmt. */
3673 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3675 vop1 = ((op_type == binary_op || op_type == ternary_op)
3676 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3677 vop2 = ((op_type == ternary_op)
3678 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3679 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3680 vop0, vop1, vop2);
3681 new_temp = make_ssa_name (vec_dest, new_stmt);
3682 gimple_assign_set_lhs (new_stmt, new_temp);
3683 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3684 if (slp_node)
3685 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3688 if (slp_node)
3689 continue;
3691 if (j == 0)
3692 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3693 else
3694 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3695 prev_stmt_info = vinfo_for_stmt (new_stmt);
3698 VEC_free (tree, heap, vec_oprnds0);
3699 if (vec_oprnds1)
3700 VEC_free (tree, heap, vec_oprnds1);
3701 if (vec_oprnds2)
3702 VEC_free (tree, heap, vec_oprnds2);
3704 return true;
3708 /* Function vectorizable_store.
3710 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3711 can be vectorized.
3712 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3713 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3714 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3716 static bool
3717 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3718 slp_tree slp_node)
3720 tree scalar_dest;
3721 tree data_ref;
3722 tree op;
3723 tree vec_oprnd = NULL_TREE;
3724 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3725 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3726 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3727 tree elem_type;
3728 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3729 struct loop *loop = NULL;
3730 enum machine_mode vec_mode;
3731 tree dummy;
3732 enum dr_alignment_support alignment_support_scheme;
3733 tree def;
3734 gimple def_stmt;
3735 enum vect_def_type dt;
3736 stmt_vec_info prev_stmt_info = NULL;
3737 tree dataref_ptr = NULL_TREE;
3738 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3739 int ncopies;
3740 int j;
3741 gimple next_stmt, first_stmt = NULL;
3742 bool grouped_store = false;
3743 bool store_lanes_p = false;
3744 unsigned int group_size, i;
3745 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3746 bool inv_p;
3747 VEC(tree,heap) *vec_oprnds = NULL;
3748 bool slp = (slp_node != NULL);
3749 unsigned int vec_num;
3750 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3751 tree aggr_type;
3753 if (loop_vinfo)
3754 loop = LOOP_VINFO_LOOP (loop_vinfo);
3756 /* Multiple types in SLP are handled by creating the appropriate number of
3757 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3758 case of SLP. */
3759 if (slp || PURE_SLP_STMT (stmt_info))
3760 ncopies = 1;
3761 else
3762 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3764 gcc_assert (ncopies >= 1);
3766 /* FORNOW. This restriction should be relaxed. */
3767 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3769 if (vect_print_dump_info (REPORT_DETAILS))
3770 fprintf (vect_dump, "multiple types in nested loop.");
3771 return false;
3774 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3775 return false;
3777 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3778 return false;
3780 /* Is vectorizable store? */
3782 if (!is_gimple_assign (stmt))
3783 return false;
3785 scalar_dest = gimple_assign_lhs (stmt);
3786 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3787 && is_pattern_stmt_p (stmt_info))
3788 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3789 if (TREE_CODE (scalar_dest) != ARRAY_REF
3790 && TREE_CODE (scalar_dest) != INDIRECT_REF
3791 && TREE_CODE (scalar_dest) != COMPONENT_REF
3792 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3793 && TREE_CODE (scalar_dest) != REALPART_EXPR
3794 && TREE_CODE (scalar_dest) != MEM_REF)
3795 return false;
3797 gcc_assert (gimple_assign_single_p (stmt));
3798 op = gimple_assign_rhs1 (stmt);
3799 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3800 &def, &dt))
3802 if (vect_print_dump_info (REPORT_DETAILS))
3803 fprintf (vect_dump, "use not simple.");
3804 return false;
3807 elem_type = TREE_TYPE (vectype);
3808 vec_mode = TYPE_MODE (vectype);
3810 /* FORNOW. In some cases can vectorize even if data-type not supported
3811 (e.g. - array initialization with 0). */
3812 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3813 return false;
3815 if (!STMT_VINFO_DATA_REF (stmt_info))
3816 return false;
3818 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3819 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3820 size_zero_node) < 0)
3822 if (vect_print_dump_info (REPORT_DETAILS))
3823 fprintf (vect_dump, "negative step for store.");
3824 return false;
3827 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3829 grouped_store = true;
3830 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3831 if (!slp && !PURE_SLP_STMT (stmt_info))
3833 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3834 if (vect_store_lanes_supported (vectype, group_size))
3835 store_lanes_p = true;
3836 else if (!vect_grouped_store_supported (vectype, group_size))
3837 return false;
3840 if (first_stmt == stmt)
3842 /* STMT is the leader of the group. Check the operands of all the
3843 stmts of the group. */
3844 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3845 while (next_stmt)
3847 gcc_assert (gimple_assign_single_p (next_stmt));
3848 op = gimple_assign_rhs1 (next_stmt);
3849 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3850 &def_stmt, &def, &dt))
3852 if (vect_print_dump_info (REPORT_DETAILS))
3853 fprintf (vect_dump, "use not simple.");
3854 return false;
3856 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3861 if (!vec_stmt) /* transformation not required. */
3863 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3864 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3865 NULL, NULL, NULL);
3866 return true;
3869 /** Transform. **/
3871 if (grouped_store)
3873 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3874 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3876 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3878 /* FORNOW */
3879 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3881 /* We vectorize all the stmts of the interleaving group when we
3882 reach the last stmt in the group. */
3883 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3884 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3885 && !slp)
3887 *vec_stmt = NULL;
3888 return true;
3891 if (slp)
3893 grouped_store = false;
3894 /* VEC_NUM is the number of vect stmts to be created for this
3895 group. */
3896 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3897 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3898 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3899 op = gimple_assign_rhs1 (first_stmt);
3901 else
3902 /* VEC_NUM is the number of vect stmts to be created for this
3903 group. */
3904 vec_num = group_size;
3906 else
3908 first_stmt = stmt;
3909 first_dr = dr;
3910 group_size = vec_num = 1;
3913 if (vect_print_dump_info (REPORT_DETAILS))
3914 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3916 dr_chain = VEC_alloc (tree, heap, group_size);
3917 oprnds = VEC_alloc (tree, heap, group_size);
3919 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3920 gcc_assert (alignment_support_scheme);
3921 /* Targets with store-lane instructions must not require explicit
3922 realignment. */
3923 gcc_assert (!store_lanes_p
3924 || alignment_support_scheme == dr_aligned
3925 || alignment_support_scheme == dr_unaligned_supported);
3927 if (store_lanes_p)
3928 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3929 else
3930 aggr_type = vectype;
3932 /* In case the vectorization factor (VF) is bigger than the number
3933 of elements that we can fit in a vectype (nunits), we have to generate
3934 more than one vector stmt - i.e - we need to "unroll" the
3935 vector stmt by a factor VF/nunits. For more details see documentation in
3936 vect_get_vec_def_for_copy_stmt. */
3938 /* In case of interleaving (non-unit grouped access):
3940 S1: &base + 2 = x2
3941 S2: &base = x0
3942 S3: &base + 1 = x1
3943 S4: &base + 3 = x3
3945 We create vectorized stores starting from base address (the access of the
3946 first stmt in the chain (S2 in the above example), when the last store stmt
3947 of the chain (S4) is reached:
3949 VS1: &base = vx2
3950 VS2: &base + vec_size*1 = vx0
3951 VS3: &base + vec_size*2 = vx1
3952 VS4: &base + vec_size*3 = vx3
3954 Then permutation statements are generated:
3956 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3957 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3960 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3961 (the order of the data-refs in the output of vect_permute_store_chain
3962 corresponds to the order of scalar stmts in the interleaving chain - see
3963 the documentation of vect_permute_store_chain()).
3965 In case of both multiple types and interleaving, above vector stores and
3966 permutation stmts are created for every copy. The result vector stmts are
3967 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3968 STMT_VINFO_RELATED_STMT for the next copies.
3971 prev_stmt_info = NULL;
3972 for (j = 0; j < ncopies; j++)
3974 gimple new_stmt;
3975 gimple ptr_incr;
3977 if (j == 0)
3979 if (slp)
3981 /* Get vectorized arguments for SLP_NODE. */
3982 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3983 NULL, slp_node, -1);
3985 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3987 else
3989 /* For interleaved stores we collect vectorized defs for all the
3990 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3991 used as an input to vect_permute_store_chain(), and OPRNDS as
3992 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3994 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
3995 OPRNDS are of size 1. */
3996 next_stmt = first_stmt;
3997 for (i = 0; i < group_size; i++)
3999 /* Since gaps are not supported for interleaved stores,
4000 GROUP_SIZE is the exact number of stmts in the chain.
4001 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4002 there is no interleaving, GROUP_SIZE is 1, and only one
4003 iteration of the loop will be executed. */
4004 gcc_assert (next_stmt
4005 && gimple_assign_single_p (next_stmt));
4006 op = gimple_assign_rhs1 (next_stmt);
4008 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4009 NULL);
4010 VEC_quick_push(tree, dr_chain, vec_oprnd);
4011 VEC_quick_push(tree, oprnds, vec_oprnd);
4012 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4016 /* We should have catched mismatched types earlier. */
4017 gcc_assert (useless_type_conversion_p (vectype,
4018 TREE_TYPE (vec_oprnd)));
4019 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4020 NULL_TREE, &dummy, gsi,
4021 &ptr_incr, false, &inv_p);
4022 gcc_assert (bb_vinfo || !inv_p);
4024 else
4026 /* For interleaved stores we created vectorized defs for all the
4027 defs stored in OPRNDS in the previous iteration (previous copy).
4028 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4029 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4030 next copy.
4031 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4032 OPRNDS are of size 1. */
4033 for (i = 0; i < group_size; i++)
4035 op = VEC_index (tree, oprnds, i);
4036 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4037 &def, &dt);
4038 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4039 VEC_replace(tree, dr_chain, i, vec_oprnd);
4040 VEC_replace(tree, oprnds, i, vec_oprnd);
4042 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4043 TYPE_SIZE_UNIT (aggr_type));
4046 if (store_lanes_p)
4048 tree vec_array;
4050 /* Combine all the vectors into an array. */
4051 vec_array = create_vector_array (vectype, vec_num);
4052 for (i = 0; i < vec_num; i++)
4054 vec_oprnd = VEC_index (tree, dr_chain, i);
4055 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4058 /* Emit:
4059 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4060 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4061 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4062 gimple_call_set_lhs (new_stmt, data_ref);
4063 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4065 else
4067 new_stmt = NULL;
4068 if (grouped_store)
4070 result_chain = VEC_alloc (tree, heap, group_size);
4071 /* Permute. */
4072 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4073 &result_chain);
4076 next_stmt = first_stmt;
4077 for (i = 0; i < vec_num; i++)
4079 unsigned align, misalign;
4081 if (i > 0)
4082 /* Bump the vector pointer. */
4083 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4084 stmt, NULL_TREE);
4086 if (slp)
4087 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4088 else if (grouped_store)
4089 /* For grouped stores vectorized defs are interleaved in
4090 vect_permute_store_chain(). */
4091 vec_oprnd = VEC_index (tree, result_chain, i);
4093 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4094 build_int_cst (reference_alias_ptr_type
4095 (DR_REF (first_dr)), 0));
4096 align = TYPE_ALIGN_UNIT (vectype);
4097 if (aligned_access_p (first_dr))
4098 misalign = 0;
4099 else if (DR_MISALIGNMENT (first_dr) == -1)
4101 TREE_TYPE (data_ref)
4102 = build_aligned_type (TREE_TYPE (data_ref),
4103 TYPE_ALIGN (elem_type));
4104 align = TYPE_ALIGN_UNIT (elem_type);
4105 misalign = 0;
4107 else
4109 TREE_TYPE (data_ref)
4110 = build_aligned_type (TREE_TYPE (data_ref),
4111 TYPE_ALIGN (elem_type));
4112 misalign = DR_MISALIGNMENT (first_dr);
4114 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4115 misalign);
4117 /* Arguments are ready. Create the new vector stmt. */
4118 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4119 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4121 if (slp)
4122 continue;
4124 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4125 if (!next_stmt)
4126 break;
4129 if (!slp)
4131 if (j == 0)
4132 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4133 else
4134 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4135 prev_stmt_info = vinfo_for_stmt (new_stmt);
4139 VEC_free (tree, heap, dr_chain);
4140 VEC_free (tree, heap, oprnds);
4141 if (result_chain)
4142 VEC_free (tree, heap, result_chain);
4143 if (vec_oprnds)
4144 VEC_free (tree, heap, vec_oprnds);
4146 return true;
4149 /* Given a vector type VECTYPE and permutation SEL returns
4150 the VECTOR_CST mask that implements the permutation of the
4151 vector elements. If that is impossible to do, returns NULL. */
4153 tree
4154 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4156 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4157 int i, nunits;
4159 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4161 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4162 return NULL;
4164 mask_elt_type = lang_hooks.types.type_for_mode
4165 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4166 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4168 mask_elts = XALLOCAVEC (tree, nunits);
4169 for (i = nunits - 1; i >= 0; i--)
4170 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4171 mask_vec = build_vector (mask_type, mask_elts);
4173 return mask_vec;
4176 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4177 reversal of the vector elements. If that is impossible to do,
4178 returns NULL. */
4180 static tree
4181 perm_mask_for_reverse (tree vectype)
4183 int i, nunits;
4184 unsigned char *sel;
4186 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4187 sel = XALLOCAVEC (unsigned char, nunits);
4189 for (i = 0; i < nunits; ++i)
4190 sel[i] = nunits - 1 - i;
4192 return vect_gen_perm_mask (vectype, sel);
4195 /* Given a vector variable X and Y, that was generated for the scalar
4196 STMT, generate instructions to permute the vector elements of X and Y
4197 using permutation mask MASK_VEC, insert them at *GSI and return the
4198 permuted vector variable. */
4200 static tree
4201 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4202 gimple_stmt_iterator *gsi)
4204 tree vectype = TREE_TYPE (x);
4205 tree perm_dest, data_ref;
4206 gimple perm_stmt;
4208 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4209 data_ref = make_ssa_name (perm_dest, NULL);
4211 /* Generate the permute statement. */
4212 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4213 x, y, mask_vec);
4214 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4216 return data_ref;
4219 /* vectorizable_load.
4221 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4222 can be vectorized.
4223 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4224 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4225 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4227 static bool
4228 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4229 slp_tree slp_node, slp_instance slp_node_instance)
4231 tree scalar_dest;
4232 tree vec_dest = NULL;
4233 tree data_ref = NULL;
4234 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4235 stmt_vec_info prev_stmt_info;
4236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4237 struct loop *loop = NULL;
4238 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4239 bool nested_in_vect_loop = false;
4240 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4241 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4242 tree elem_type;
4243 tree new_temp;
4244 enum machine_mode mode;
4245 gimple new_stmt = NULL;
4246 tree dummy;
4247 enum dr_alignment_support alignment_support_scheme;
4248 tree dataref_ptr = NULL_TREE;
4249 gimple ptr_incr;
4250 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4251 int ncopies;
4252 int i, j, group_size;
4253 tree msq = NULL_TREE, lsq;
4254 tree offset = NULL_TREE;
4255 tree realignment_token = NULL_TREE;
4256 gimple phi = NULL;
4257 VEC(tree,heap) *dr_chain = NULL;
4258 bool grouped_load = false;
4259 bool load_lanes_p = false;
4260 gimple first_stmt;
4261 bool inv_p;
4262 bool negative = false;
4263 bool compute_in_loop = false;
4264 struct loop *at_loop;
4265 int vec_num;
4266 bool slp = (slp_node != NULL);
4267 bool slp_perm = false;
4268 enum tree_code code;
4269 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4270 int vf;
4271 tree aggr_type;
4272 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4273 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4274 tree stride_base, stride_step;
4275 int gather_scale = 1;
4276 enum vect_def_type gather_dt = vect_unknown_def_type;
4278 if (loop_vinfo)
4280 loop = LOOP_VINFO_LOOP (loop_vinfo);
4281 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4282 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4284 else
4285 vf = 1;
4287 /* Multiple types in SLP are handled by creating the appropriate number of
4288 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4289 case of SLP. */
4290 if (slp || PURE_SLP_STMT (stmt_info))
4291 ncopies = 1;
4292 else
4293 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4295 gcc_assert (ncopies >= 1);
4297 /* FORNOW. This restriction should be relaxed. */
4298 if (nested_in_vect_loop && ncopies > 1)
4300 if (vect_print_dump_info (REPORT_DETAILS))
4301 fprintf (vect_dump, "multiple types in nested loop.");
4302 return false;
4305 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4306 return false;
4308 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4309 return false;
4311 /* Is vectorizable load? */
4312 if (!is_gimple_assign (stmt))
4313 return false;
4315 scalar_dest = gimple_assign_lhs (stmt);
4316 if (TREE_CODE (scalar_dest) != SSA_NAME)
4317 return false;
4319 code = gimple_assign_rhs_code (stmt);
4320 if (code != ARRAY_REF
4321 && code != INDIRECT_REF
4322 && code != COMPONENT_REF
4323 && code != IMAGPART_EXPR
4324 && code != REALPART_EXPR
4325 && code != MEM_REF
4326 && TREE_CODE_CLASS (code) != tcc_declaration)
4327 return false;
4329 if (!STMT_VINFO_DATA_REF (stmt_info))
4330 return false;
4332 elem_type = TREE_TYPE (vectype);
4333 mode = TYPE_MODE (vectype);
4335 /* FORNOW. In some cases can vectorize even if data-type not supported
4336 (e.g. - data copies). */
4337 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4339 if (vect_print_dump_info (REPORT_DETAILS))
4340 fprintf (vect_dump, "Aligned load, but unsupported type.");
4341 return false;
4344 /* Check if the load is a part of an interleaving chain. */
4345 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4347 grouped_load = true;
4348 /* FORNOW */
4349 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4351 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4352 if (!slp && !PURE_SLP_STMT (stmt_info))
4354 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4355 if (vect_load_lanes_supported (vectype, group_size))
4356 load_lanes_p = true;
4357 else if (!vect_grouped_load_supported (vectype, group_size))
4358 return false;
4363 if (STMT_VINFO_GATHER_P (stmt_info))
4365 gimple def_stmt;
4366 tree def;
4367 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4368 &gather_off, &gather_scale);
4369 gcc_assert (gather_decl);
4370 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4371 &def_stmt, &def, &gather_dt,
4372 &gather_off_vectype))
4374 if (vect_print_dump_info (REPORT_DETAILS))
4375 fprintf (vect_dump, "gather index use not simple.");
4376 return false;
4379 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4381 if (!vect_check_strided_load (stmt, loop_vinfo,
4382 &stride_base, &stride_step))
4383 return false;
4385 else
4387 negative = tree_int_cst_compare (nested_in_vect_loop
4388 ? STMT_VINFO_DR_STEP (stmt_info)
4389 : DR_STEP (dr),
4390 size_zero_node) < 0;
4391 if (negative && ncopies > 1)
4393 if (vect_print_dump_info (REPORT_DETAILS))
4394 fprintf (vect_dump, "multiple types with negative step.");
4395 return false;
4398 if (negative)
4400 gcc_assert (!grouped_load);
4401 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4402 if (alignment_support_scheme != dr_aligned
4403 && alignment_support_scheme != dr_unaligned_supported)
4405 if (vect_print_dump_info (REPORT_DETAILS))
4406 fprintf (vect_dump, "negative step but alignment required.");
4407 return false;
4409 if (!perm_mask_for_reverse (vectype))
4411 if (vect_print_dump_info (REPORT_DETAILS))
4412 fprintf (vect_dump, "negative step and reversing not supported.");
4413 return false;
4418 if (!vec_stmt) /* transformation not required. */
4420 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4421 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4422 return true;
4425 if (vect_print_dump_info (REPORT_DETAILS))
4426 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4428 /** Transform. **/
4430 if (STMT_VINFO_GATHER_P (stmt_info))
4432 tree vec_oprnd0 = NULL_TREE, op;
4433 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4434 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4435 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4436 edge pe = loop_preheader_edge (loop);
4437 gimple_seq seq;
4438 basic_block new_bb;
4439 enum { NARROW, NONE, WIDEN } modifier;
4440 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4442 if (nunits == gather_off_nunits)
4443 modifier = NONE;
4444 else if (nunits == gather_off_nunits / 2)
4446 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4447 modifier = WIDEN;
4449 for (i = 0; i < gather_off_nunits; ++i)
4450 sel[i] = i | nunits;
4452 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4453 gcc_assert (perm_mask != NULL_TREE);
4455 else if (nunits == gather_off_nunits * 2)
4457 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4458 modifier = NARROW;
4460 for (i = 0; i < nunits; ++i)
4461 sel[i] = i < gather_off_nunits
4462 ? i : i + nunits - gather_off_nunits;
4464 perm_mask = vect_gen_perm_mask (vectype, sel);
4465 gcc_assert (perm_mask != NULL_TREE);
4466 ncopies *= 2;
4468 else
4469 gcc_unreachable ();
4471 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4472 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4473 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4474 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4475 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4476 scaletype = TREE_VALUE (arglist);
4477 gcc_checking_assert (types_compatible_p (srctype, rettype)
4478 && types_compatible_p (srctype, masktype));
4480 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4482 ptr = fold_convert (ptrtype, gather_base);
4483 if (!is_gimple_min_invariant (ptr))
4485 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4486 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4487 gcc_assert (!new_bb);
4490 /* Currently we support only unconditional gather loads,
4491 so mask should be all ones. */
4492 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4493 mask = build_int_cst (TREE_TYPE (masktype), -1);
4494 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4496 REAL_VALUE_TYPE r;
4497 long tmp[6];
4498 for (j = 0; j < 6; ++j)
4499 tmp[j] = -1;
4500 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4501 mask = build_real (TREE_TYPE (masktype), r);
4503 else
4504 gcc_unreachable ();
4505 mask = build_vector_from_val (masktype, mask);
4506 mask = vect_init_vector (stmt, mask, masktype, NULL);
4508 scale = build_int_cst (scaletype, gather_scale);
4510 prev_stmt_info = NULL;
4511 for (j = 0; j < ncopies; ++j)
4513 if (modifier == WIDEN && (j & 1))
4514 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4515 perm_mask, stmt, gsi);
4516 else if (j == 0)
4517 op = vec_oprnd0
4518 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4519 else
4520 op = vec_oprnd0
4521 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4523 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4525 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4526 == TYPE_VECTOR_SUBPARTS (idxtype));
4527 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4528 var = make_ssa_name (var, NULL);
4529 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4530 new_stmt
4531 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4532 op, NULL_TREE);
4533 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4534 op = var;
4537 new_stmt
4538 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4540 if (!useless_type_conversion_p (vectype, rettype))
4542 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4543 == TYPE_VECTOR_SUBPARTS (rettype));
4544 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4545 op = make_ssa_name (var, new_stmt);
4546 gimple_call_set_lhs (new_stmt, op);
4547 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4548 var = make_ssa_name (vec_dest, NULL);
4549 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4550 new_stmt
4551 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4552 NULL_TREE);
4554 else
4556 var = make_ssa_name (vec_dest, new_stmt);
4557 gimple_call_set_lhs (new_stmt, var);
4560 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4562 if (modifier == NARROW)
4564 if ((j & 1) == 0)
4566 prev_res = var;
4567 continue;
4569 var = permute_vec_elements (prev_res, var,
4570 perm_mask, stmt, gsi);
4571 new_stmt = SSA_NAME_DEF_STMT (var);
4574 if (prev_stmt_info == NULL)
4575 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4576 else
4577 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4578 prev_stmt_info = vinfo_for_stmt (new_stmt);
4580 return true;
4582 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4584 gimple_stmt_iterator incr_gsi;
4585 bool insert_after;
4586 gimple incr;
4587 tree offvar;
4588 tree ref = DR_REF (dr);
4589 tree ivstep;
4590 tree running_off;
4591 VEC(constructor_elt, gc) *v = NULL;
4592 gimple_seq stmts = NULL;
4594 gcc_assert (stride_base && stride_step);
4596 /* For a load with loop-invariant (but other than power-of-2)
4597 stride (i.e. not a grouped access) like so:
4599 for (i = 0; i < n; i += stride)
4600 ... = array[i];
4602 we generate a new induction variable and new accesses to
4603 form a new vector (or vectors, depending on ncopies):
4605 for (j = 0; ; j += VF*stride)
4606 tmp1 = array[j];
4607 tmp2 = array[j + stride];
4609 vectemp = {tmp1, tmp2, ...}
4612 ivstep = stride_step;
4613 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4614 build_int_cst (TREE_TYPE (ivstep), vf));
4616 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4618 create_iv (stride_base, ivstep, NULL,
4619 loop, &incr_gsi, insert_after,
4620 &offvar, NULL);
4621 incr = gsi_stmt (incr_gsi);
4622 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4624 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4625 if (stmts)
4626 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4628 prev_stmt_info = NULL;
4629 running_off = offvar;
4630 for (j = 0; j < ncopies; j++)
4632 tree vec_inv;
4634 v = VEC_alloc (constructor_elt, gc, nunits);
4635 for (i = 0; i < nunits; i++)
4637 tree newref, newoff;
4638 gimple incr;
4639 if (TREE_CODE (ref) == ARRAY_REF)
4640 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
4641 unshare_expr (TREE_OPERAND (ref, 0)),
4642 running_off,
4643 NULL_TREE, NULL_TREE);
4644 else
4645 newref = build2 (MEM_REF, TREE_TYPE (ref),
4646 running_off,
4647 TREE_OPERAND (ref, 1));
4649 newref = force_gimple_operand_gsi (gsi, newref, true,
4650 NULL_TREE, true,
4651 GSI_SAME_STMT);
4652 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4653 newoff = copy_ssa_name (running_off, NULL);
4654 if (POINTER_TYPE_P (TREE_TYPE (newoff)))
4655 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4656 running_off, stride_step);
4657 else
4658 incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
4659 running_off, stride_step);
4660 vect_finish_stmt_generation (stmt, incr, gsi);
4662 running_off = newoff;
4665 vec_inv = build_constructor (vectype, v);
4666 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4667 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4669 if (j == 0)
4670 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4671 else
4672 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4673 prev_stmt_info = vinfo_for_stmt (new_stmt);
4675 return true;
4678 if (grouped_load)
4680 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4681 if (slp
4682 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4683 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4684 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4686 /* Check if the chain of loads is already vectorized. */
4687 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4689 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4690 return true;
4692 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4693 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4695 /* VEC_NUM is the number of vect stmts to be created for this group. */
4696 if (slp)
4698 grouped_load = false;
4699 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4700 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4701 slp_perm = true;
4703 else
4704 vec_num = group_size;
4706 else
4708 first_stmt = stmt;
4709 first_dr = dr;
4710 group_size = vec_num = 1;
4713 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4714 gcc_assert (alignment_support_scheme);
4715 /* Targets with load-lane instructions must not require explicit
4716 realignment. */
4717 gcc_assert (!load_lanes_p
4718 || alignment_support_scheme == dr_aligned
4719 || alignment_support_scheme == dr_unaligned_supported);
4721 /* In case the vectorization factor (VF) is bigger than the number
4722 of elements that we can fit in a vectype (nunits), we have to generate
4723 more than one vector stmt - i.e - we need to "unroll" the
4724 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4725 from one copy of the vector stmt to the next, in the field
4726 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4727 stages to find the correct vector defs to be used when vectorizing
4728 stmts that use the defs of the current stmt. The example below
4729 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4730 need to create 4 vectorized stmts):
4732 before vectorization:
4733 RELATED_STMT VEC_STMT
4734 S1: x = memref - -
4735 S2: z = x + 1 - -
4737 step 1: vectorize stmt S1:
4738 We first create the vector stmt VS1_0, and, as usual, record a
4739 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4740 Next, we create the vector stmt VS1_1, and record a pointer to
4741 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4742 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4743 stmts and pointers:
4744 RELATED_STMT VEC_STMT
4745 VS1_0: vx0 = memref0 VS1_1 -
4746 VS1_1: vx1 = memref1 VS1_2 -
4747 VS1_2: vx2 = memref2 VS1_3 -
4748 VS1_3: vx3 = memref3 - -
4749 S1: x = load - VS1_0
4750 S2: z = x + 1 - -
4752 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4753 information we recorded in RELATED_STMT field is used to vectorize
4754 stmt S2. */
4756 /* In case of interleaving (non-unit grouped access):
4758 S1: x2 = &base + 2
4759 S2: x0 = &base
4760 S3: x1 = &base + 1
4761 S4: x3 = &base + 3
4763 Vectorized loads are created in the order of memory accesses
4764 starting from the access of the first stmt of the chain:
4766 VS1: vx0 = &base
4767 VS2: vx1 = &base + vec_size*1
4768 VS3: vx3 = &base + vec_size*2
4769 VS4: vx4 = &base + vec_size*3
4771 Then permutation statements are generated:
4773 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4774 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4777 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4778 (the order of the data-refs in the output of vect_permute_load_chain
4779 corresponds to the order of scalar stmts in the interleaving chain - see
4780 the documentation of vect_permute_load_chain()).
4781 The generation of permutation stmts and recording them in
4782 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4784 In case of both multiple types and interleaving, the vector loads and
4785 permutation stmts above are created for every copy. The result vector
4786 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4787 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4789 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4790 on a target that supports unaligned accesses (dr_unaligned_supported)
4791 we generate the following code:
4792 p = initial_addr;
4793 indx = 0;
4794 loop {
4795 p = p + indx * vectype_size;
4796 vec_dest = *(p);
4797 indx = indx + 1;
4800 Otherwise, the data reference is potentially unaligned on a target that
4801 does not support unaligned accesses (dr_explicit_realign_optimized) -
4802 then generate the following code, in which the data in each iteration is
4803 obtained by two vector loads, one from the previous iteration, and one
4804 from the current iteration:
4805 p1 = initial_addr;
4806 msq_init = *(floor(p1))
4807 p2 = initial_addr + VS - 1;
4808 realignment_token = call target_builtin;
4809 indx = 0;
4810 loop {
4811 p2 = p2 + indx * vectype_size
4812 lsq = *(floor(p2))
4813 vec_dest = realign_load (msq, lsq, realignment_token)
4814 indx = indx + 1;
4815 msq = lsq;
4816 } */
4818 /* If the misalignment remains the same throughout the execution of the
4819 loop, we can create the init_addr and permutation mask at the loop
4820 preheader. Otherwise, it needs to be created inside the loop.
4821 This can only occur when vectorizing memory accesses in the inner-loop
4822 nested within an outer-loop that is being vectorized. */
4824 if (nested_in_vect_loop
4825 && (TREE_INT_CST_LOW (DR_STEP (dr))
4826 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4828 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4829 compute_in_loop = true;
4832 if ((alignment_support_scheme == dr_explicit_realign_optimized
4833 || alignment_support_scheme == dr_explicit_realign)
4834 && !compute_in_loop)
4836 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4837 alignment_support_scheme, NULL_TREE,
4838 &at_loop);
4839 if (alignment_support_scheme == dr_explicit_realign_optimized)
4841 phi = SSA_NAME_DEF_STMT (msq);
4842 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4845 else
4846 at_loop = loop;
4848 if (negative)
4849 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4851 if (load_lanes_p)
4852 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4853 else
4854 aggr_type = vectype;
4856 prev_stmt_info = NULL;
4857 for (j = 0; j < ncopies; j++)
4859 /* 1. Create the vector or array pointer update chain. */
4860 if (j == 0)
4861 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4862 offset, &dummy, gsi,
4863 &ptr_incr, false, &inv_p);
4864 else
4865 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4866 TYPE_SIZE_UNIT (aggr_type));
4868 if (grouped_load || slp_perm)
4869 dr_chain = VEC_alloc (tree, heap, vec_num);
4871 if (load_lanes_p)
4873 tree vec_array;
4875 vec_array = create_vector_array (vectype, vec_num);
4877 /* Emit:
4878 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4879 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4880 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4881 gimple_call_set_lhs (new_stmt, vec_array);
4882 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4884 /* Extract each vector into an SSA_NAME. */
4885 for (i = 0; i < vec_num; i++)
4887 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4888 vec_array, i);
4889 VEC_quick_push (tree, dr_chain, new_temp);
4892 /* Record the mapping between SSA_NAMEs and statements. */
4893 vect_record_grouped_load_vectors (stmt, dr_chain);
4895 else
4897 for (i = 0; i < vec_num; i++)
4899 if (i > 0)
4900 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4901 stmt, NULL_TREE);
4903 /* 2. Create the vector-load in the loop. */
4904 switch (alignment_support_scheme)
4906 case dr_aligned:
4907 case dr_unaligned_supported:
4909 unsigned int align, misalign;
4911 data_ref
4912 = build2 (MEM_REF, vectype, dataref_ptr,
4913 build_int_cst (reference_alias_ptr_type
4914 (DR_REF (first_dr)), 0));
4915 align = TYPE_ALIGN_UNIT (vectype);
4916 if (alignment_support_scheme == dr_aligned)
4918 gcc_assert (aligned_access_p (first_dr));
4919 misalign = 0;
4921 else if (DR_MISALIGNMENT (first_dr) == -1)
4923 TREE_TYPE (data_ref)
4924 = build_aligned_type (TREE_TYPE (data_ref),
4925 TYPE_ALIGN (elem_type));
4926 align = TYPE_ALIGN_UNIT (elem_type);
4927 misalign = 0;
4929 else
4931 TREE_TYPE (data_ref)
4932 = build_aligned_type (TREE_TYPE (data_ref),
4933 TYPE_ALIGN (elem_type));
4934 misalign = DR_MISALIGNMENT (first_dr);
4936 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
4937 align, misalign);
4938 break;
4940 case dr_explicit_realign:
4942 tree ptr, bump;
4943 tree vs_minus_1;
4945 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4947 if (compute_in_loop)
4948 msq = vect_setup_realignment (first_stmt, gsi,
4949 &realignment_token,
4950 dr_explicit_realign,
4951 dataref_ptr, NULL);
4953 ptr = copy_ssa_name (dataref_ptr, NULL);
4954 new_stmt = gimple_build_assign_with_ops
4955 (BIT_AND_EXPR, ptr, dataref_ptr,
4956 build_int_cst
4957 (TREE_TYPE (dataref_ptr),
4958 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4959 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4960 data_ref
4961 = build2 (MEM_REF, vectype, ptr,
4962 build_int_cst (reference_alias_ptr_type
4963 (DR_REF (first_dr)), 0));
4964 vec_dest = vect_create_destination_var (scalar_dest,
4965 vectype);
4966 new_stmt = gimple_build_assign (vec_dest, data_ref);
4967 new_temp = make_ssa_name (vec_dest, new_stmt);
4968 gimple_assign_set_lhs (new_stmt, new_temp);
4969 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4970 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4971 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4972 msq = new_temp;
4974 bump = size_binop (MULT_EXPR, vs_minus_1,
4975 TYPE_SIZE_UNIT (elem_type));
4976 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4977 new_stmt = gimple_build_assign_with_ops
4978 (BIT_AND_EXPR, NULL_TREE, ptr,
4979 build_int_cst
4980 (TREE_TYPE (ptr),
4981 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4982 ptr = copy_ssa_name (dataref_ptr, new_stmt);
4983 gimple_assign_set_lhs (new_stmt, ptr);
4984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4985 data_ref
4986 = build2 (MEM_REF, vectype, ptr,
4987 build_int_cst (reference_alias_ptr_type
4988 (DR_REF (first_dr)), 0));
4989 break;
4991 case dr_explicit_realign_optimized:
4992 new_temp = copy_ssa_name (dataref_ptr, NULL);
4993 new_stmt = gimple_build_assign_with_ops
4994 (BIT_AND_EXPR, new_temp, dataref_ptr,
4995 build_int_cst
4996 (TREE_TYPE (dataref_ptr),
4997 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4998 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4999 data_ref
5000 = build2 (MEM_REF, vectype, new_temp,
5001 build_int_cst (reference_alias_ptr_type
5002 (DR_REF (first_dr)), 0));
5003 break;
5004 default:
5005 gcc_unreachable ();
5007 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5008 new_stmt = gimple_build_assign (vec_dest, data_ref);
5009 new_temp = make_ssa_name (vec_dest, new_stmt);
5010 gimple_assign_set_lhs (new_stmt, new_temp);
5011 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5013 /* 3. Handle explicit realignment if necessary/supported.
5014 Create in loop:
5015 vec_dest = realign_load (msq, lsq, realignment_token) */
5016 if (alignment_support_scheme == dr_explicit_realign_optimized
5017 || alignment_support_scheme == dr_explicit_realign)
5019 lsq = gimple_assign_lhs (new_stmt);
5020 if (!realignment_token)
5021 realignment_token = dataref_ptr;
5022 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5023 new_stmt
5024 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
5025 vec_dest, msq, lsq,
5026 realignment_token);
5027 new_temp = make_ssa_name (vec_dest, new_stmt);
5028 gimple_assign_set_lhs (new_stmt, new_temp);
5029 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5031 if (alignment_support_scheme == dr_explicit_realign_optimized)
5033 gcc_assert (phi);
5034 if (i == vec_num - 1 && j == ncopies - 1)
5035 add_phi_arg (phi, lsq,
5036 loop_latch_edge (containing_loop),
5037 UNKNOWN_LOCATION);
5038 msq = lsq;
5042 /* 4. Handle invariant-load. */
5043 if (inv_p && !bb_vinfo)
5045 gimple_stmt_iterator gsi2 = *gsi;
5046 gcc_assert (!grouped_load);
5047 gsi_next (&gsi2);
5048 new_temp = vect_init_vector (stmt, scalar_dest,
5049 vectype, &gsi2);
5050 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5053 if (negative)
5055 tree perm_mask = perm_mask_for_reverse (vectype);
5056 new_temp = permute_vec_elements (new_temp, new_temp,
5057 perm_mask, stmt, gsi);
5058 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5061 /* Collect vector loads and later create their permutation in
5062 vect_transform_grouped_load (). */
5063 if (grouped_load || slp_perm)
5064 VEC_quick_push (tree, dr_chain, new_temp);
5066 /* Store vector loads in the corresponding SLP_NODE. */
5067 if (slp && !slp_perm)
5068 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
5069 new_stmt);
5073 if (slp && !slp_perm)
5074 continue;
5076 if (slp_perm)
5078 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
5079 slp_node_instance, false))
5081 VEC_free (tree, heap, dr_chain);
5082 return false;
5085 else
5087 if (grouped_load)
5089 if (!load_lanes_p)
5090 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5091 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5093 else
5095 if (j == 0)
5096 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5097 else
5098 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5099 prev_stmt_info = vinfo_for_stmt (new_stmt);
5102 if (dr_chain)
5103 VEC_free (tree, heap, dr_chain);
5106 return true;
5109 /* Function vect_is_simple_cond.
5111 Input:
5112 LOOP - the loop that is being vectorized.
5113 COND - Condition that is checked for simple use.
5115 Output:
5116 *COMP_VECTYPE - the vector type for the comparison.
5118 Returns whether a COND can be vectorized. Checks whether
5119 condition operands are supportable using vec_is_simple_use. */
5121 static bool
5122 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5123 bb_vec_info bb_vinfo, tree *comp_vectype)
5125 tree lhs, rhs;
5126 tree def;
5127 enum vect_def_type dt;
5128 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5130 if (!COMPARISON_CLASS_P (cond))
5131 return false;
5133 lhs = TREE_OPERAND (cond, 0);
5134 rhs = TREE_OPERAND (cond, 1);
5136 if (TREE_CODE (lhs) == SSA_NAME)
5138 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5139 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5140 &lhs_def_stmt, &def, &dt, &vectype1))
5141 return false;
5143 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5144 && TREE_CODE (lhs) != FIXED_CST)
5145 return false;
5147 if (TREE_CODE (rhs) == SSA_NAME)
5149 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5150 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5151 &rhs_def_stmt, &def, &dt, &vectype2))
5152 return false;
5154 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5155 && TREE_CODE (rhs) != FIXED_CST)
5156 return false;
5158 *comp_vectype = vectype1 ? vectype1 : vectype2;
5159 return true;
5162 /* vectorizable_condition.
5164 Check if STMT is conditional modify expression that can be vectorized.
5165 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5166 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5167 at GSI.
5169 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5170 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5171 else caluse if it is 2).
5173 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5175 bool
5176 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5177 gimple *vec_stmt, tree reduc_def, int reduc_index,
5178 slp_tree slp_node)
5180 tree scalar_dest = NULL_TREE;
5181 tree vec_dest = NULL_TREE;
5182 tree cond_expr, then_clause, else_clause;
5183 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5184 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5185 tree comp_vectype = NULL_TREE;
5186 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5187 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5188 tree vec_compare, vec_cond_expr;
5189 tree new_temp;
5190 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5191 tree def;
5192 enum vect_def_type dt, dts[4];
5193 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5194 int ncopies;
5195 enum tree_code code;
5196 stmt_vec_info prev_stmt_info = NULL;
5197 int i, j;
5198 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5199 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5200 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
5202 if (slp_node || PURE_SLP_STMT (stmt_info))
5203 ncopies = 1;
5204 else
5205 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5207 gcc_assert (ncopies >= 1);
5208 if (reduc_index && ncopies > 1)
5209 return false; /* FORNOW */
5211 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5212 return false;
5214 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5215 return false;
5217 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5218 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5219 && reduc_def))
5220 return false;
5222 /* FORNOW: not yet supported. */
5223 if (STMT_VINFO_LIVE_P (stmt_info))
5225 if (vect_print_dump_info (REPORT_DETAILS))
5226 fprintf (vect_dump, "value used after loop.");
5227 return false;
5230 /* Is vectorizable conditional operation? */
5231 if (!is_gimple_assign (stmt))
5232 return false;
5234 code = gimple_assign_rhs_code (stmt);
5236 if (code != COND_EXPR)
5237 return false;
5239 cond_expr = gimple_assign_rhs1 (stmt);
5240 then_clause = gimple_assign_rhs2 (stmt);
5241 else_clause = gimple_assign_rhs3 (stmt);
5243 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5244 &comp_vectype)
5245 || !comp_vectype)
5246 return false;
5248 if (TREE_CODE (then_clause) == SSA_NAME)
5250 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5251 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5252 &then_def_stmt, &def, &dt))
5253 return false;
5255 else if (TREE_CODE (then_clause) != INTEGER_CST
5256 && TREE_CODE (then_clause) != REAL_CST
5257 && TREE_CODE (then_clause) != FIXED_CST)
5258 return false;
5260 if (TREE_CODE (else_clause) == SSA_NAME)
5262 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5263 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5264 &else_def_stmt, &def, &dt))
5265 return false;
5267 else if (TREE_CODE (else_clause) != INTEGER_CST
5268 && TREE_CODE (else_clause) != REAL_CST
5269 && TREE_CODE (else_clause) != FIXED_CST)
5270 return false;
5272 if (!vec_stmt)
5274 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5275 return expand_vec_cond_expr_p (vectype, comp_vectype);
5278 /* Transform. */
5280 if (!slp_node)
5282 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5283 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5284 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5285 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5288 /* Handle def. */
5289 scalar_dest = gimple_assign_lhs (stmt);
5290 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5292 /* Handle cond expr. */
5293 for (j = 0; j < ncopies; j++)
5295 gimple new_stmt = NULL;
5296 if (j == 0)
5298 if (slp_node)
5300 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5301 VEC (slp_void_p, heap) *vec_defs;
5303 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5304 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5305 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5306 VEC_safe_push (tree, heap, ops, then_clause);
5307 VEC_safe_push (tree, heap, ops, else_clause);
5308 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5309 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5310 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5311 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5312 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5314 VEC_free (tree, heap, ops);
5315 VEC_free (slp_void_p, heap, vec_defs);
5317 else
5319 gimple gtemp;
5320 vec_cond_lhs =
5321 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5322 stmt, NULL);
5323 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5324 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5326 vec_cond_rhs =
5327 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5328 stmt, NULL);
5329 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5330 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5331 if (reduc_index == 1)
5332 vec_then_clause = reduc_def;
5333 else
5335 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5336 stmt, NULL);
5337 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5338 NULL, &gtemp, &def, &dts[2]);
5340 if (reduc_index == 2)
5341 vec_else_clause = reduc_def;
5342 else
5344 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5345 stmt, NULL);
5346 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5347 NULL, &gtemp, &def, &dts[3]);
5351 else
5353 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5354 VEC_pop (tree, vec_oprnds0));
5355 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5356 VEC_pop (tree, vec_oprnds1));
5357 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5358 VEC_pop (tree, vec_oprnds2));
5359 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5360 VEC_pop (tree, vec_oprnds3));
5363 if (!slp_node)
5365 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5366 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5367 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5368 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5371 /* Arguments are ready. Create the new vector stmt. */
5372 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5374 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5375 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5376 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5378 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5379 vec_cond_lhs, vec_cond_rhs);
5380 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5381 vec_compare, vec_then_clause, vec_else_clause);
5383 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5384 new_temp = make_ssa_name (vec_dest, new_stmt);
5385 gimple_assign_set_lhs (new_stmt, new_temp);
5386 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5387 if (slp_node)
5388 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5391 if (slp_node)
5392 continue;
5394 if (j == 0)
5395 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5396 else
5397 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5399 prev_stmt_info = vinfo_for_stmt (new_stmt);
5402 VEC_free (tree, heap, vec_oprnds0);
5403 VEC_free (tree, heap, vec_oprnds1);
5404 VEC_free (tree, heap, vec_oprnds2);
5405 VEC_free (tree, heap, vec_oprnds3);
5407 return true;
5411 /* Make sure the statement is vectorizable. */
5413 bool
5414 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5416 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5417 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5418 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5419 bool ok;
5420 tree scalar_type, vectype;
5421 gimple pattern_stmt;
5422 gimple_seq pattern_def_seq;
5424 if (vect_print_dump_info (REPORT_DETAILS))
5426 fprintf (vect_dump, "==> examining statement: ");
5427 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5430 if (gimple_has_volatile_ops (stmt))
5432 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5433 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5435 return false;
5438 /* Skip stmts that do not need to be vectorized. In loops this is expected
5439 to include:
5440 - the COND_EXPR which is the loop exit condition
5441 - any LABEL_EXPRs in the loop
5442 - computations that are used only for array indexing or loop control.
5443 In basic blocks we only analyze statements that are a part of some SLP
5444 instance, therefore, all the statements are relevant.
5446 Pattern statement needs to be analyzed instead of the original statement
5447 if the original statement is not relevant. Otherwise, we analyze both
5448 statements. In basic blocks we are called from some SLP instance
5449 traversal, don't analyze pattern stmts instead, the pattern stmts
5450 already will be part of SLP instance. */
5452 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5453 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5454 && !STMT_VINFO_LIVE_P (stmt_info))
5456 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5457 && pattern_stmt
5458 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5459 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5461 /* Analyze PATTERN_STMT instead of the original stmt. */
5462 stmt = pattern_stmt;
5463 stmt_info = vinfo_for_stmt (pattern_stmt);
5464 if (vect_print_dump_info (REPORT_DETAILS))
5466 fprintf (vect_dump, "==> examining pattern statement: ");
5467 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5470 else
5472 if (vect_print_dump_info (REPORT_DETAILS))
5473 fprintf (vect_dump, "irrelevant.");
5475 return true;
5478 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5479 && node == NULL
5480 && pattern_stmt
5481 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5482 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5484 /* Analyze PATTERN_STMT too. */
5485 if (vect_print_dump_info (REPORT_DETAILS))
5487 fprintf (vect_dump, "==> examining pattern statement: ");
5488 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5491 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5492 return false;
5495 if (is_pattern_stmt_p (stmt_info)
5496 && node == NULL
5497 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5499 gimple_stmt_iterator si;
5501 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5503 gimple pattern_def_stmt = gsi_stmt (si);
5504 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5505 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5507 /* Analyze def stmt of STMT if it's a pattern stmt. */
5508 if (vect_print_dump_info (REPORT_DETAILS))
5510 fprintf (vect_dump, "==> examining pattern def statement: ");
5511 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5514 if (!vect_analyze_stmt (pattern_def_stmt,
5515 need_to_vectorize, node))
5516 return false;
5521 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5523 case vect_internal_def:
5524 break;
5526 case vect_reduction_def:
5527 case vect_nested_cycle:
5528 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5529 || relevance == vect_used_in_outer_by_reduction
5530 || relevance == vect_unused_in_scope));
5531 break;
5533 case vect_induction_def:
5534 case vect_constant_def:
5535 case vect_external_def:
5536 case vect_unknown_def_type:
5537 default:
5538 gcc_unreachable ();
5541 if (bb_vinfo)
5543 gcc_assert (PURE_SLP_STMT (stmt_info));
5545 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5546 if (vect_print_dump_info (REPORT_DETAILS))
5548 fprintf (vect_dump, "get vectype for scalar type: ");
5549 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5552 vectype = get_vectype_for_scalar_type (scalar_type);
5553 if (!vectype)
5555 if (vect_print_dump_info (REPORT_DETAILS))
5557 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5558 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5560 return false;
5563 if (vect_print_dump_info (REPORT_DETAILS))
5565 fprintf (vect_dump, "vectype: ");
5566 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5569 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5572 if (STMT_VINFO_RELEVANT_P (stmt_info))
5574 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5575 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5576 *need_to_vectorize = true;
5579 ok = true;
5580 if (!bb_vinfo
5581 && (STMT_VINFO_RELEVANT_P (stmt_info)
5582 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5583 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5584 || vectorizable_shift (stmt, NULL, NULL, NULL)
5585 || vectorizable_operation (stmt, NULL, NULL, NULL)
5586 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5587 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5588 || vectorizable_call (stmt, NULL, NULL, NULL)
5589 || vectorizable_store (stmt, NULL, NULL, NULL)
5590 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5591 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5592 else
5594 if (bb_vinfo)
5595 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5596 || vectorizable_shift (stmt, NULL, NULL, node)
5597 || vectorizable_operation (stmt, NULL, NULL, node)
5598 || vectorizable_assignment (stmt, NULL, NULL, node)
5599 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5600 || vectorizable_call (stmt, NULL, NULL, node)
5601 || vectorizable_store (stmt, NULL, NULL, node)
5602 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5605 if (!ok)
5607 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5609 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5610 fprintf (vect_dump, "supported: ");
5611 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5614 return false;
5617 if (bb_vinfo)
5618 return true;
5620 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5621 need extra handling, except for vectorizable reductions. */
5622 if (STMT_VINFO_LIVE_P (stmt_info)
5623 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5624 ok = vectorizable_live_operation (stmt, NULL, NULL);
5626 if (!ok)
5628 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5630 fprintf (vect_dump, "not vectorized: live stmt not ");
5631 fprintf (vect_dump, "supported: ");
5632 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5635 return false;
5638 return true;
5642 /* Function vect_transform_stmt.
5644 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5646 bool
5647 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5648 bool *grouped_store, slp_tree slp_node,
5649 slp_instance slp_node_instance)
5651 bool is_store = false;
5652 gimple vec_stmt = NULL;
5653 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5654 bool done;
5656 switch (STMT_VINFO_TYPE (stmt_info))
5658 case type_demotion_vec_info_type:
5659 case type_promotion_vec_info_type:
5660 case type_conversion_vec_info_type:
5661 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5662 gcc_assert (done);
5663 break;
5665 case induc_vec_info_type:
5666 gcc_assert (!slp_node);
5667 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5668 gcc_assert (done);
5669 break;
5671 case shift_vec_info_type:
5672 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5673 gcc_assert (done);
5674 break;
5676 case op_vec_info_type:
5677 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5678 gcc_assert (done);
5679 break;
5681 case assignment_vec_info_type:
5682 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5683 gcc_assert (done);
5684 break;
5686 case load_vec_info_type:
5687 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5688 slp_node_instance);
5689 gcc_assert (done);
5690 break;
5692 case store_vec_info_type:
5693 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5694 gcc_assert (done);
5695 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5697 /* In case of interleaving, the whole chain is vectorized when the
5698 last store in the chain is reached. Store stmts before the last
5699 one are skipped, and there vec_stmt_info shouldn't be freed
5700 meanwhile. */
5701 *grouped_store = true;
5702 if (STMT_VINFO_VEC_STMT (stmt_info))
5703 is_store = true;
5705 else
5706 is_store = true;
5707 break;
5709 case condition_vec_info_type:
5710 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5711 gcc_assert (done);
5712 break;
5714 case call_vec_info_type:
5715 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5716 stmt = gsi_stmt (*gsi);
5717 break;
5719 case reduc_vec_info_type:
5720 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5721 gcc_assert (done);
5722 break;
5724 default:
5725 if (!STMT_VINFO_LIVE_P (stmt_info))
5727 if (vect_print_dump_info (REPORT_DETAILS))
5728 fprintf (vect_dump, "stmt not supported.");
5729 gcc_unreachable ();
5733 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5734 is being vectorized, but outside the immediately enclosing loop. */
5735 if (vec_stmt
5736 && STMT_VINFO_LOOP_VINFO (stmt_info)
5737 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5738 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5739 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5740 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5741 || STMT_VINFO_RELEVANT (stmt_info) ==
5742 vect_used_in_outer_by_reduction))
5744 struct loop *innerloop = LOOP_VINFO_LOOP (
5745 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5746 imm_use_iterator imm_iter;
5747 use_operand_p use_p;
5748 tree scalar_dest;
5749 gimple exit_phi;
5751 if (vect_print_dump_info (REPORT_DETAILS))
5752 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5754 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5755 (to be used when vectorizing outer-loop stmts that use the DEF of
5756 STMT). */
5757 if (gimple_code (stmt) == GIMPLE_PHI)
5758 scalar_dest = PHI_RESULT (stmt);
5759 else
5760 scalar_dest = gimple_assign_lhs (stmt);
5762 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5764 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5766 exit_phi = USE_STMT (use_p);
5767 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5772 /* Handle stmts whose DEF is used outside the loop-nest that is
5773 being vectorized. */
5774 if (STMT_VINFO_LIVE_P (stmt_info)
5775 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5777 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5778 gcc_assert (done);
5781 if (vec_stmt)
5782 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5784 return is_store;
5788 /* Remove a group of stores (for SLP or interleaving), free their
5789 stmt_vec_info. */
5791 void
5792 vect_remove_stores (gimple first_stmt)
5794 gimple next = first_stmt;
5795 gimple tmp;
5796 gimple_stmt_iterator next_si;
5798 while (next)
5800 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5802 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5803 if (is_pattern_stmt_p (stmt_info))
5804 next = STMT_VINFO_RELATED_STMT (stmt_info);
5805 /* Free the attached stmt_vec_info and remove the stmt. */
5806 next_si = gsi_for_stmt (next);
5807 unlink_stmt_vdef (next);
5808 gsi_remove (&next_si, true);
5809 release_defs (next);
5810 free_stmt_vec_info (next);
5811 next = tmp;
5816 /* Function new_stmt_vec_info.
5818 Create and initialize a new stmt_vec_info struct for STMT. */
5820 stmt_vec_info
5821 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5822 bb_vec_info bb_vinfo)
5824 stmt_vec_info res;
5825 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5827 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5828 STMT_VINFO_STMT (res) = stmt;
5829 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5830 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5831 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5832 STMT_VINFO_LIVE_P (res) = false;
5833 STMT_VINFO_VECTYPE (res) = NULL;
5834 STMT_VINFO_VEC_STMT (res) = NULL;
5835 STMT_VINFO_VECTORIZABLE (res) = true;
5836 STMT_VINFO_IN_PATTERN_P (res) = false;
5837 STMT_VINFO_RELATED_STMT (res) = NULL;
5838 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5839 STMT_VINFO_DATA_REF (res) = NULL;
5841 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5842 STMT_VINFO_DR_OFFSET (res) = NULL;
5843 STMT_VINFO_DR_INIT (res) = NULL;
5844 STMT_VINFO_DR_STEP (res) = NULL;
5845 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5847 if (gimple_code (stmt) == GIMPLE_PHI
5848 && is_loop_header_bb_p (gimple_bb (stmt)))
5849 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5850 else
5851 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5853 STMT_VINFO_SAME_ALIGN_REFS (res) = NULL;
5854 STMT_SLP_TYPE (res) = loop_vect;
5855 GROUP_FIRST_ELEMENT (res) = NULL;
5856 GROUP_NEXT_ELEMENT (res) = NULL;
5857 GROUP_SIZE (res) = 0;
5858 GROUP_STORE_COUNT (res) = 0;
5859 GROUP_GAP (res) = 0;
5860 GROUP_SAME_DR_STMT (res) = NULL;
5861 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5863 return res;
5867 /* Create a hash table for stmt_vec_info. */
5869 void
5870 init_stmt_vec_info_vec (void)
5872 gcc_assert (!stmt_vec_info_vec);
5873 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5877 /* Free hash table for stmt_vec_info. */
5879 void
5880 free_stmt_vec_info_vec (void)
5882 gcc_assert (stmt_vec_info_vec);
5883 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5887 /* Free stmt vectorization related info. */
5889 void
5890 free_stmt_vec_info (gimple stmt)
5892 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5894 if (!stmt_info)
5895 return;
5897 /* Check if this statement has a related "pattern stmt"
5898 (introduced by the vectorizer during the pattern recognition
5899 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5900 too. */
5901 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5903 stmt_vec_info patt_info
5904 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5905 if (patt_info)
5907 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5908 if (seq)
5910 gimple_stmt_iterator si;
5911 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5912 free_stmt_vec_info (gsi_stmt (si));
5914 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5918 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5919 set_vinfo_for_stmt (stmt, NULL);
5920 free (stmt_info);
5924 /* Function get_vectype_for_scalar_type_and_size.
5926 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5927 by the target. */
5929 static tree
5930 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5932 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5933 enum machine_mode simd_mode;
5934 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5935 int nunits;
5936 tree vectype;
5938 if (nbytes == 0)
5939 return NULL_TREE;
5941 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5942 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5943 return NULL_TREE;
5945 /* We can't build a vector type of elements with alignment bigger than
5946 their size. */
5947 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5948 return NULL_TREE;
5950 /* For vector types of elements whose mode precision doesn't
5951 match their types precision we use a element type of mode
5952 precision. The vectorization routines will have to make sure
5953 they support the proper result truncation/extension.
5954 We also make sure to build vector types with INTEGER_TYPE
5955 component type only. */
5956 if (INTEGRAL_TYPE_P (scalar_type)
5957 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5958 || TREE_CODE (scalar_type) != INTEGER_TYPE))
5959 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5960 TYPE_UNSIGNED (scalar_type));
5962 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5963 When the component mode passes the above test simply use a type
5964 corresponding to that mode. The theory is that any use that
5965 would cause problems with this will disable vectorization anyway. */
5966 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5967 && !INTEGRAL_TYPE_P (scalar_type)
5968 && !POINTER_TYPE_P (scalar_type))
5969 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5971 /* If no size was supplied use the mode the target prefers. Otherwise
5972 lookup a vector mode of the specified size. */
5973 if (size == 0)
5974 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5975 else
5976 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5977 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5978 if (nunits <= 1)
5979 return NULL_TREE;
5981 vectype = build_vector_type (scalar_type, nunits);
5982 if (vect_print_dump_info (REPORT_DETAILS))
5984 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5985 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5988 if (!vectype)
5989 return NULL_TREE;
5991 if (vect_print_dump_info (REPORT_DETAILS))
5993 fprintf (vect_dump, "vectype: ");
5994 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5997 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5998 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6000 if (vect_print_dump_info (REPORT_DETAILS))
6001 fprintf (vect_dump, "mode not supported by target.");
6002 return NULL_TREE;
6005 return vectype;
6008 unsigned int current_vector_size;
6010 /* Function get_vectype_for_scalar_type.
6012 Returns the vector type corresponding to SCALAR_TYPE as supported
6013 by the target. */
6015 tree
6016 get_vectype_for_scalar_type (tree scalar_type)
6018 tree vectype;
6019 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6020 current_vector_size);
6021 if (vectype
6022 && current_vector_size == 0)
6023 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6024 return vectype;
6027 /* Function get_same_sized_vectype
6029 Returns a vector type corresponding to SCALAR_TYPE of size
6030 VECTOR_TYPE if supported by the target. */
6032 tree
6033 get_same_sized_vectype (tree scalar_type, tree vector_type)
6035 return get_vectype_for_scalar_type_and_size
6036 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6039 /* Function vect_is_simple_use.
6041 Input:
6042 LOOP_VINFO - the vect info of the loop that is being vectorized.
6043 BB_VINFO - the vect info of the basic block that is being vectorized.
6044 OPERAND - operand of STMT in the loop or bb.
6045 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6047 Returns whether a stmt with OPERAND can be vectorized.
6048 For loops, supportable operands are constants, loop invariants, and operands
6049 that are defined by the current iteration of the loop. Unsupportable
6050 operands are those that are defined by a previous iteration of the loop (as
6051 is the case in reduction/induction computations).
6052 For basic blocks, supportable operands are constants and bb invariants.
6053 For now, operands defined outside the basic block are not supported. */
6055 bool
6056 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6057 bb_vec_info bb_vinfo, gimple *def_stmt,
6058 tree *def, enum vect_def_type *dt)
6060 basic_block bb;
6061 stmt_vec_info stmt_vinfo;
6062 struct loop *loop = NULL;
6064 if (loop_vinfo)
6065 loop = LOOP_VINFO_LOOP (loop_vinfo);
6067 *def_stmt = NULL;
6068 *def = NULL_TREE;
6070 if (vect_print_dump_info (REPORT_DETAILS))
6072 fprintf (vect_dump, "vect_is_simple_use: operand ");
6073 print_generic_expr (vect_dump, operand, TDF_SLIM);
6076 if (CONSTANT_CLASS_P (operand))
6078 *dt = vect_constant_def;
6079 return true;
6082 if (is_gimple_min_invariant (operand))
6084 *def = operand;
6085 *dt = vect_external_def;
6086 return true;
6089 if (TREE_CODE (operand) == PAREN_EXPR)
6091 if (vect_print_dump_info (REPORT_DETAILS))
6092 fprintf (vect_dump, "non-associatable copy.");
6093 operand = TREE_OPERAND (operand, 0);
6096 if (TREE_CODE (operand) != SSA_NAME)
6098 if (vect_print_dump_info (REPORT_DETAILS))
6099 fprintf (vect_dump, "not ssa-name.");
6100 return false;
6103 *def_stmt = SSA_NAME_DEF_STMT (operand);
6104 if (*def_stmt == NULL)
6106 if (vect_print_dump_info (REPORT_DETAILS))
6107 fprintf (vect_dump, "no def_stmt.");
6108 return false;
6111 if (vect_print_dump_info (REPORT_DETAILS))
6113 fprintf (vect_dump, "def_stmt: ");
6114 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
6117 /* Empty stmt is expected only in case of a function argument.
6118 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6119 if (gimple_nop_p (*def_stmt))
6121 *def = operand;
6122 *dt = vect_external_def;
6123 return true;
6126 bb = gimple_bb (*def_stmt);
6128 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6129 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6130 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6131 *dt = vect_external_def;
6132 else
6134 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6135 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6138 if (*dt == vect_unknown_def_type
6139 || (stmt
6140 && *dt == vect_double_reduction_def
6141 && gimple_code (stmt) != GIMPLE_PHI))
6143 if (vect_print_dump_info (REPORT_DETAILS))
6144 fprintf (vect_dump, "Unsupported pattern.");
6145 return false;
6148 if (vect_print_dump_info (REPORT_DETAILS))
6149 fprintf (vect_dump, "type of def: %d.",*dt);
6151 switch (gimple_code (*def_stmt))
6153 case GIMPLE_PHI:
6154 *def = gimple_phi_result (*def_stmt);
6155 break;
6157 case GIMPLE_ASSIGN:
6158 *def = gimple_assign_lhs (*def_stmt);
6159 break;
6161 case GIMPLE_CALL:
6162 *def = gimple_call_lhs (*def_stmt);
6163 if (*def != NULL)
6164 break;
6165 /* FALLTHRU */
6166 default:
6167 if (vect_print_dump_info (REPORT_DETAILS))
6168 fprintf (vect_dump, "unsupported defining stmt: ");
6169 return false;
6172 return true;
6175 /* Function vect_is_simple_use_1.
6177 Same as vect_is_simple_use_1 but also determines the vector operand
6178 type of OPERAND and stores it to *VECTYPE. If the definition of
6179 OPERAND is vect_uninitialized_def, vect_constant_def or
6180 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6181 is responsible to compute the best suited vector type for the
6182 scalar operand. */
6184 bool
6185 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6186 bb_vec_info bb_vinfo, gimple *def_stmt,
6187 tree *def, enum vect_def_type *dt, tree *vectype)
6189 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6190 def, dt))
6191 return false;
6193 /* Now get a vector type if the def is internal, otherwise supply
6194 NULL_TREE and leave it up to the caller to figure out a proper
6195 type for the use stmt. */
6196 if (*dt == vect_internal_def
6197 || *dt == vect_induction_def
6198 || *dt == vect_reduction_def
6199 || *dt == vect_double_reduction_def
6200 || *dt == vect_nested_cycle)
6202 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6204 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6205 && !STMT_VINFO_RELEVANT (stmt_info)
6206 && !STMT_VINFO_LIVE_P (stmt_info))
6207 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6209 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6210 gcc_assert (*vectype != NULL_TREE);
6212 else if (*dt == vect_uninitialized_def
6213 || *dt == vect_constant_def
6214 || *dt == vect_external_def)
6215 *vectype = NULL_TREE;
6216 else
6217 gcc_unreachable ();
6219 return true;
6223 /* Function supportable_widening_operation
6225 Check whether an operation represented by the code CODE is a
6226 widening operation that is supported by the target platform in
6227 vector form (i.e., when operating on arguments of type VECTYPE_IN
6228 producing a result of type VECTYPE_OUT).
6230 Widening operations we currently support are NOP (CONVERT), FLOAT
6231 and WIDEN_MULT. This function checks if these operations are supported
6232 by the target platform either directly (via vector tree-codes), or via
6233 target builtins.
6235 Output:
6236 - CODE1 and CODE2 are codes of vector operations to be used when
6237 vectorizing the operation, if available.
6238 - MULTI_STEP_CVT determines the number of required intermediate steps in
6239 case of multi-step conversion (like char->short->int - in that case
6240 MULTI_STEP_CVT will be 1).
6241 - INTERM_TYPES contains the intermediate type required to perform the
6242 widening operation (short in the above example). */
6244 bool
6245 supportable_widening_operation (enum tree_code code, gimple stmt,
6246 tree vectype_out, tree vectype_in,
6247 enum tree_code *code1, enum tree_code *code2,
6248 int *multi_step_cvt,
6249 VEC (tree, heap) **interm_types)
6251 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6252 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6253 struct loop *vect_loop = NULL;
6254 enum machine_mode vec_mode;
6255 enum insn_code icode1, icode2;
6256 optab optab1, optab2;
6257 tree vectype = vectype_in;
6258 tree wide_vectype = vectype_out;
6259 enum tree_code c1, c2;
6260 int i;
6261 tree prev_type, intermediate_type;
6262 enum machine_mode intermediate_mode, prev_mode;
6263 optab optab3, optab4;
6265 *multi_step_cvt = 0;
6266 if (loop_info)
6267 vect_loop = LOOP_VINFO_LOOP (loop_info);
6269 switch (code)
6271 case WIDEN_MULT_EXPR:
6272 /* The result of a vectorized widening operation usually requires
6273 two vectors (because the widened results do not fit into one vector).
6274 The generated vector results would normally be expected to be
6275 generated in the same order as in the original scalar computation,
6276 i.e. if 8 results are generated in each vector iteration, they are
6277 to be organized as follows:
6278 vect1: [res1,res2,res3,res4],
6279 vect2: [res5,res6,res7,res8].
6281 However, in the special case that the result of the widening
6282 operation is used in a reduction computation only, the order doesn't
6283 matter (because when vectorizing a reduction we change the order of
6284 the computation). Some targets can take advantage of this and
6285 generate more efficient code. For example, targets like Altivec,
6286 that support widen_mult using a sequence of {mult_even,mult_odd}
6287 generate the following vectors:
6288 vect1: [res1,res3,res5,res7],
6289 vect2: [res2,res4,res6,res8].
6291 When vectorizing outer-loops, we execute the inner-loop sequentially
6292 (each vectorized inner-loop iteration contributes to VF outer-loop
6293 iterations in parallel). We therefore don't allow to change the
6294 order of the computation in the inner-loop during outer-loop
6295 vectorization. */
6296 /* TODO: Another case in which order doesn't *really* matter is when we
6297 widen and then contract again, e.g. (short)((int)x * y >> 8).
6298 Normally, pack_trunc performs an even/odd permute, whereas the
6299 repack from an even/odd expansion would be an interleave, which
6300 would be significantly simpler for e.g. AVX2. */
6301 /* In any case, in order to avoid duplicating the code below, recurse
6302 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6303 are properly set up for the caller. If we fail, we'll continue with
6304 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6305 if (vect_loop
6306 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6307 && !nested_in_vect_loop_p (vect_loop, stmt)
6308 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6309 stmt, vectype_out, vectype_in,
6310 code1, code2, multi_step_cvt,
6311 interm_types))
6312 return true;
6313 c1 = VEC_WIDEN_MULT_LO_EXPR;
6314 c2 = VEC_WIDEN_MULT_HI_EXPR;
6315 break;
6317 case VEC_WIDEN_MULT_EVEN_EXPR:
6318 /* Support the recursion induced just above. */
6319 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6320 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6321 break;
6323 case WIDEN_LSHIFT_EXPR:
6324 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6325 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6326 break;
6328 CASE_CONVERT:
6329 c1 = VEC_UNPACK_LO_EXPR;
6330 c2 = VEC_UNPACK_HI_EXPR;
6331 break;
6333 case FLOAT_EXPR:
6334 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6335 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6336 break;
6338 case FIX_TRUNC_EXPR:
6339 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6340 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6341 computing the operation. */
6342 return false;
6344 default:
6345 gcc_unreachable ();
6348 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6350 enum tree_code ctmp = c1;
6351 c1 = c2;
6352 c2 = ctmp;
6355 if (code == FIX_TRUNC_EXPR)
6357 /* The signedness is determined from output operand. */
6358 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6359 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6361 else
6363 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6364 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6367 if (!optab1 || !optab2)
6368 return false;
6370 vec_mode = TYPE_MODE (vectype);
6371 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6372 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6373 return false;
6375 *code1 = c1;
6376 *code2 = c2;
6378 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6379 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6380 return true;
6382 /* Check if it's a multi-step conversion that can be done using intermediate
6383 types. */
6385 prev_type = vectype;
6386 prev_mode = vec_mode;
6388 if (!CONVERT_EXPR_CODE_P (code))
6389 return false;
6391 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6392 intermediate steps in promotion sequence. We try
6393 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6394 not. */
6395 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6396 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6398 intermediate_mode = insn_data[icode1].operand[0].mode;
6399 intermediate_type
6400 = lang_hooks.types.type_for_mode (intermediate_mode,
6401 TYPE_UNSIGNED (prev_type));
6402 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6403 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6405 if (!optab3 || !optab4
6406 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6407 || insn_data[icode1].operand[0].mode != intermediate_mode
6408 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6409 || insn_data[icode2].operand[0].mode != intermediate_mode
6410 || ((icode1 = optab_handler (optab3, intermediate_mode))
6411 == CODE_FOR_nothing)
6412 || ((icode2 = optab_handler (optab4, intermediate_mode))
6413 == CODE_FOR_nothing))
6414 break;
6416 VEC_quick_push (tree, *interm_types, intermediate_type);
6417 (*multi_step_cvt)++;
6419 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6420 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6421 return true;
6423 prev_type = intermediate_type;
6424 prev_mode = intermediate_mode;
6427 VEC_free (tree, heap, *interm_types);
6428 return false;
6432 /* Function supportable_narrowing_operation
6434 Check whether an operation represented by the code CODE is a
6435 narrowing operation that is supported by the target platform in
6436 vector form (i.e., when operating on arguments of type VECTYPE_IN
6437 and producing a result of type VECTYPE_OUT).
6439 Narrowing operations we currently support are NOP (CONVERT) and
6440 FIX_TRUNC. This function checks if these operations are supported by
6441 the target platform directly via vector tree-codes.
6443 Output:
6444 - CODE1 is the code of a vector operation to be used when
6445 vectorizing the operation, if available.
6446 - MULTI_STEP_CVT determines the number of required intermediate steps in
6447 case of multi-step conversion (like int->short->char - in that case
6448 MULTI_STEP_CVT will be 1).
6449 - INTERM_TYPES contains the intermediate type required to perform the
6450 narrowing operation (short in the above example). */
6452 bool
6453 supportable_narrowing_operation (enum tree_code code,
6454 tree vectype_out, tree vectype_in,
6455 enum tree_code *code1, int *multi_step_cvt,
6456 VEC (tree, heap) **interm_types)
6458 enum machine_mode vec_mode;
6459 enum insn_code icode1;
6460 optab optab1, interm_optab;
6461 tree vectype = vectype_in;
6462 tree narrow_vectype = vectype_out;
6463 enum tree_code c1;
6464 tree intermediate_type;
6465 enum machine_mode intermediate_mode, prev_mode;
6466 int i;
6467 bool uns;
6469 *multi_step_cvt = 0;
6470 switch (code)
6472 CASE_CONVERT:
6473 c1 = VEC_PACK_TRUNC_EXPR;
6474 break;
6476 case FIX_TRUNC_EXPR:
6477 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6478 break;
6480 case FLOAT_EXPR:
6481 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6482 tree code and optabs used for computing the operation. */
6483 return false;
6485 default:
6486 gcc_unreachable ();
6489 if (code == FIX_TRUNC_EXPR)
6490 /* The signedness is determined from output operand. */
6491 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6492 else
6493 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6495 if (!optab1)
6496 return false;
6498 vec_mode = TYPE_MODE (vectype);
6499 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6500 return false;
6502 *code1 = c1;
6504 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6505 return true;
6507 /* Check if it's a multi-step conversion that can be done using intermediate
6508 types. */
6509 prev_mode = vec_mode;
6510 if (code == FIX_TRUNC_EXPR)
6511 uns = TYPE_UNSIGNED (vectype_out);
6512 else
6513 uns = TYPE_UNSIGNED (vectype);
6515 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6516 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6517 costly than signed. */
6518 if (code == FIX_TRUNC_EXPR && uns)
6520 enum insn_code icode2;
6522 intermediate_type
6523 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6524 interm_optab
6525 = optab_for_tree_code (c1, intermediate_type, optab_default);
6526 if (interm_optab != unknown_optab
6527 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6528 && insn_data[icode1].operand[0].mode
6529 == insn_data[icode2].operand[0].mode)
6531 uns = false;
6532 optab1 = interm_optab;
6533 icode1 = icode2;
6537 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6538 intermediate steps in promotion sequence. We try
6539 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6540 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6541 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6543 intermediate_mode = insn_data[icode1].operand[0].mode;
6544 intermediate_type
6545 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6546 interm_optab
6547 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6548 optab_default);
6549 if (!interm_optab
6550 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6551 || insn_data[icode1].operand[0].mode != intermediate_mode
6552 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6553 == CODE_FOR_nothing))
6554 break;
6556 VEC_quick_push (tree, *interm_types, intermediate_type);
6557 (*multi_step_cvt)++;
6559 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6560 return true;
6562 prev_mode = intermediate_mode;
6563 optab1 = interm_optab;
6566 VEC_free (tree, heap, *interm_types);
6567 return false;