2012-08-10 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-stmts.c
blob491b2391f88265dea9c930d2c60dc36ab2bcf173
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "recog.h" /* FIXME: for insn_data */
36 #include "optabs.h"
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
39 #include "dumpfile.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
46 tree
47 stmt_vectype (struct _stmt_vec_info *stmt_info)
49 return STMT_VINFO_VECTYPE (stmt_info);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
54 bool
55 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
57 gimple stmt = STMT_VINFO_STMT (stmt_info);
58 basic_block bb = gimple_bb (stmt);
59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60 struct loop* loop;
62 if (!loop_vinfo)
63 return false;
65 loop = LOOP_VINFO_LOOP (loop_vinfo);
67 return (bb->loop_father == loop->inner);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
74 unsigned
75 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
77 int misalign, enum vect_cost_model_location where)
79 if (body_cost_vec)
81 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
82 add_stmt_info_to_vec (body_cost_vec, count, kind,
83 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
84 misalign);
85 return (unsigned)
86 (builtin_vectorization_cost (kind, vectype, misalign) * count);
89 else
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93 void *target_cost_data;
95 if (loop_vinfo)
96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97 else
98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
100 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
101 misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
119 static tree
120 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
124 gimple new_stmt;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
138 return vect_name;
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
145 static void
146 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
149 tree array_ref;
150 gimple new_stmt;
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
164 static tree
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167 tree mem_ref, alias_ptr_type;
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173 return mem_ref;
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 static void
183 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
190 gimple pattern_stmt;
192 if (vect_print_dump_info (REPORT_DETAILS))
193 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
195 /* If this stmt is an original stmt in a pattern, we might need to mark its
196 related pattern stmt instead of the original stmt. However, such stmts
197 may have their own uses that are not in any pattern, in such cases the
198 stmt itself should be marked. */
199 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
201 bool found = false;
202 if (!used_in_pattern)
204 imm_use_iterator imm_iter;
205 use_operand_p use_p;
206 gimple use_stmt;
207 tree lhs;
208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
209 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
211 if (is_gimple_assign (stmt))
212 lhs = gimple_assign_lhs (stmt);
213 else
214 lhs = gimple_call_lhs (stmt);
216 /* This use is out of pattern use, if LHS has other uses that are
217 pattern uses, we should mark the stmt itself, and not the pattern
218 stmt. */
219 if (TREE_CODE (lhs) == SSA_NAME)
220 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
222 if (is_gimple_debug (USE_STMT (use_p)))
223 continue;
224 use_stmt = USE_STMT (use_p);
226 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
227 continue;
229 if (vinfo_for_stmt (use_stmt)
230 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
232 found = true;
233 break;
238 if (!found)
240 /* This is the last stmt in a sequence that was detected as a
241 pattern that can potentially be vectorized. Don't mark the stmt
242 as relevant/live because it's not going to be vectorized.
243 Instead mark the pattern-stmt that replaces it. */
245 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
247 if (vect_print_dump_info (REPORT_DETAILS))
248 fprintf (vect_dump, "last stmt in pattern. don't mark"
249 " relevant/live.");
250 stmt_info = vinfo_for_stmt (pattern_stmt);
251 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
252 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
253 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
254 stmt = pattern_stmt;
258 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
259 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
260 STMT_VINFO_RELEVANT (stmt_info) = relevant;
262 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
263 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
265 if (vect_print_dump_info (REPORT_DETAILS))
266 fprintf (vect_dump, "already marked relevant/live.");
267 return;
270 VEC_safe_push (gimple, heap, *worklist, stmt);
274 /* Function vect_stmt_relevant_p.
276 Return true if STMT in loop that is represented by LOOP_VINFO is
277 "relevant for vectorization".
279 A stmt is considered "relevant for vectorization" if:
280 - it has uses outside the loop.
281 - it has vdefs (it alters memory).
282 - control stmts in the loop (except for the exit condition).
284 CHECKME: what other side effects would the vectorizer allow? */
286 static bool
287 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
288 enum vect_relevant *relevant, bool *live_p)
290 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
291 ssa_op_iter op_iter;
292 imm_use_iterator imm_iter;
293 use_operand_p use_p;
294 def_operand_p def_p;
296 *relevant = vect_unused_in_scope;
297 *live_p = false;
299 /* cond stmt other than loop exit cond. */
300 if (is_ctrl_stmt (stmt)
301 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
302 != loop_exit_ctrl_vec_info_type)
303 *relevant = vect_used_in_scope;
305 /* changing memory. */
306 if (gimple_code (stmt) != GIMPLE_PHI)
307 if (gimple_vdef (stmt))
309 if (vect_print_dump_info (REPORT_DETAILS))
310 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
311 *relevant = vect_used_in_scope;
314 /* uses outside the loop. */
315 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
317 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
319 basic_block bb = gimple_bb (USE_STMT (use_p));
320 if (!flow_bb_inside_loop_p (loop, bb))
322 if (vect_print_dump_info (REPORT_DETAILS))
323 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
325 if (is_gimple_debug (USE_STMT (use_p)))
326 continue;
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
331 gcc_assert (bb == single_exit (loop)->dest);
333 *live_p = true;
338 return (*live_p || *relevant);
342 /* Function exist_non_indexing_operands_for_use_p
344 USE is one of the uses attached to STMT. Check if USE is
345 used in STMT for anything other than indexing an array. */
347 static bool
348 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
350 tree operand;
351 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
353 /* USE corresponds to some operand in STMT. If there is no data
354 reference in STMT, then any operand that corresponds to USE
355 is not indexing an array. */
356 if (!STMT_VINFO_DATA_REF (stmt_info))
357 return true;
359 /* STMT has a data_ref. FORNOW this means that its of one of
360 the following forms:
361 -1- ARRAY_REF = var
362 -2- var = ARRAY_REF
363 (This should have been verified in analyze_data_refs).
365 'var' in the second case corresponds to a def, not a use,
366 so USE cannot correspond to any operands that are not used
367 for array indexing.
369 Therefore, all we need to check is if STMT falls into the
370 first case, and whether var corresponds to USE. */
372 if (!gimple_assign_copy_p (stmt))
373 return false;
374 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
375 return false;
376 operand = gimple_assign_rhs1 (stmt);
377 if (TREE_CODE (operand) != SSA_NAME)
378 return false;
380 if (operand == use)
381 return true;
383 return false;
388 Function process_use.
390 Inputs:
391 - a USE in STMT in a loop represented by LOOP_VINFO
392 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
393 that defined USE. This is done by calling mark_relevant and passing it
394 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
395 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
396 be performed.
398 Outputs:
399 Generally, LIVE_P and RELEVANT are used to define the liveness and
400 relevance info of the DEF_STMT of this USE:
401 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
402 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
403 Exceptions:
404 - case 1: If USE is used only for address computations (e.g. array indexing),
405 which does not need to be directly vectorized, then the liveness/relevance
406 of the respective DEF_STMT is left unchanged.
407 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
408 skip DEF_STMT cause it had already been processed.
409 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
410 be modified accordingly.
412 Return true if everything is as expected. Return false otherwise. */
414 static bool
415 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
416 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
417 bool force)
419 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
420 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
421 stmt_vec_info dstmt_vinfo;
422 basic_block bb, def_bb;
423 tree def;
424 gimple def_stmt;
425 enum vect_def_type dt;
427 /* case 1: we are only interested in uses that need to be vectorized. Uses
428 that are used for address computation are not considered relevant. */
429 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
430 return true;
432 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
434 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
435 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
436 return false;
439 if (!def_stmt || gimple_nop_p (def_stmt))
440 return true;
442 def_bb = gimple_bb (def_stmt);
443 if (!flow_bb_inside_loop_p (loop, def_bb))
445 if (vect_print_dump_info (REPORT_DETAILS))
446 fprintf (vect_dump, "def_stmt is out of loop.");
447 return true;
450 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
451 DEF_STMT must have already been processed, because this should be the
452 only way that STMT, which is a reduction-phi, was put in the worklist,
453 as there should be no other uses for DEF_STMT in the loop. So we just
454 check that everything is as expected, and we are done. */
455 dstmt_vinfo = vinfo_for_stmt (def_stmt);
456 bb = gimple_bb (stmt);
457 if (gimple_code (stmt) == GIMPLE_PHI
458 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
459 && gimple_code (def_stmt) != GIMPLE_PHI
460 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
461 && bb->loop_father == def_bb->loop_father)
463 if (vect_print_dump_info (REPORT_DETAILS))
464 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
465 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
466 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
467 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
468 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
469 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
470 return true;
473 /* case 3a: outer-loop stmt defining an inner-loop stmt:
474 outer-loop-header-bb:
475 d = def_stmt
476 inner-loop:
477 stmt # use (d)
478 outer-loop-tail-bb:
479 ... */
480 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
482 if (vect_print_dump_info (REPORT_DETAILS))
483 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
485 switch (relevant)
487 case vect_unused_in_scope:
488 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
489 vect_used_in_scope : vect_unused_in_scope;
490 break;
492 case vect_used_in_outer_by_reduction:
493 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
494 relevant = vect_used_by_reduction;
495 break;
497 case vect_used_in_outer:
498 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
499 relevant = vect_used_in_scope;
500 break;
502 case vect_used_in_scope:
503 break;
505 default:
506 gcc_unreachable ();
510 /* case 3b: inner-loop stmt defining an outer-loop stmt:
511 outer-loop-header-bb:
513 inner-loop:
514 d = def_stmt
515 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
516 stmt # use (d) */
517 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
522 switch (relevant)
524 case vect_unused_in_scope:
525 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
526 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
527 vect_used_in_outer_by_reduction : vect_unused_in_scope;
528 break;
530 case vect_used_by_reduction:
531 relevant = vect_used_in_outer_by_reduction;
532 break;
534 case vect_used_in_scope:
535 relevant = vect_used_in_outer;
536 break;
538 default:
539 gcc_unreachable ();
543 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
544 is_pattern_stmt_p (stmt_vinfo));
545 return true;
549 /* Function vect_mark_stmts_to_be_vectorized.
551 Not all stmts in the loop need to be vectorized. For example:
553 for i...
554 for j...
555 1. T0 = i + j
556 2. T1 = a[T0]
558 3. j = j + 1
560 Stmt 1 and 3 do not need to be vectorized, because loop control and
561 addressing of vectorized data-refs are handled differently.
563 This pass detects such stmts. */
565 bool
566 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
568 VEC(gimple,heap) *worklist;
569 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
570 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
571 unsigned int nbbs = loop->num_nodes;
572 gimple_stmt_iterator si;
573 gimple stmt;
574 unsigned int i;
575 stmt_vec_info stmt_vinfo;
576 basic_block bb;
577 gimple phi;
578 bool live_p;
579 enum vect_relevant relevant, tmp_relevant;
580 enum vect_def_type def_type;
582 if (vect_print_dump_info (REPORT_DETAILS))
583 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
585 worklist = VEC_alloc (gimple, heap, 64);
587 /* 1. Init worklist. */
588 for (i = 0; i < nbbs; i++)
590 bb = bbs[i];
591 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
593 phi = gsi_stmt (si);
594 if (vect_print_dump_info (REPORT_DETAILS))
596 fprintf (vect_dump, "init: phi relevant? ");
597 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
600 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
601 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
603 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
605 stmt = gsi_stmt (si);
606 if (vect_print_dump_info (REPORT_DETAILS))
608 fprintf (vect_dump, "init: stmt relevant? ");
609 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
612 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
613 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
617 /* 2. Process_worklist */
618 while (VEC_length (gimple, worklist) > 0)
620 use_operand_p use_p;
621 ssa_op_iter iter;
623 stmt = VEC_pop (gimple, worklist);
624 if (vect_print_dump_info (REPORT_DETAILS))
626 fprintf (vect_dump, "worklist: examine stmt: ");
627 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
630 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
631 (DEF_STMT) as relevant/irrelevant and live/dead according to the
632 liveness and relevance properties of STMT. */
633 stmt_vinfo = vinfo_for_stmt (stmt);
634 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
635 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
637 /* Generally, the liveness and relevance properties of STMT are
638 propagated as is to the DEF_STMTs of its USEs:
639 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
640 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
642 One exception is when STMT has been identified as defining a reduction
643 variable; in this case we set the liveness/relevance as follows:
644 live_p = false
645 relevant = vect_used_by_reduction
646 This is because we distinguish between two kinds of relevant stmts -
647 those that are used by a reduction computation, and those that are
648 (also) used by a regular computation. This allows us later on to
649 identify stmts that are used solely by a reduction, and therefore the
650 order of the results that they produce does not have to be kept. */
652 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
653 tmp_relevant = relevant;
654 switch (def_type)
656 case vect_reduction_def:
657 switch (tmp_relevant)
659 case vect_unused_in_scope:
660 relevant = vect_used_by_reduction;
661 break;
663 case vect_used_by_reduction:
664 if (gimple_code (stmt) == GIMPLE_PHI)
665 break;
666 /* fall through */
668 default:
669 if (vect_print_dump_info (REPORT_DETAILS))
670 fprintf (vect_dump, "unsupported use of reduction.");
672 VEC_free (gimple, heap, worklist);
673 return false;
676 live_p = false;
677 break;
679 case vect_nested_cycle:
680 if (tmp_relevant != vect_unused_in_scope
681 && tmp_relevant != vect_used_in_outer_by_reduction
682 && tmp_relevant != vect_used_in_outer)
684 if (vect_print_dump_info (REPORT_DETAILS))
685 fprintf (vect_dump, "unsupported use of nested cycle.");
687 VEC_free (gimple, heap, worklist);
688 return false;
691 live_p = false;
692 break;
694 case vect_double_reduction_def:
695 if (tmp_relevant != vect_unused_in_scope
696 && tmp_relevant != vect_used_by_reduction)
698 if (vect_print_dump_info (REPORT_DETAILS))
699 fprintf (vect_dump, "unsupported use of double reduction.");
701 VEC_free (gimple, heap, worklist);
702 return false;
705 live_p = false;
706 break;
708 default:
709 break;
712 if (is_pattern_stmt_p (stmt_vinfo))
714 /* Pattern statements are not inserted into the code, so
715 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
716 have to scan the RHS or function arguments instead. */
717 if (is_gimple_assign (stmt))
719 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
720 tree op = gimple_assign_rhs1 (stmt);
722 i = 1;
723 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
725 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
726 live_p, relevant, &worklist, false)
727 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
728 live_p, relevant, &worklist, false))
730 VEC_free (gimple, heap, worklist);
731 return false;
733 i = 2;
735 for (; i < gimple_num_ops (stmt); i++)
737 op = gimple_op (stmt, i);
738 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
739 &worklist, false))
741 VEC_free (gimple, heap, worklist);
742 return false;
746 else if (is_gimple_call (stmt))
748 for (i = 0; i < gimple_call_num_args (stmt); i++)
750 tree arg = gimple_call_arg (stmt, i);
751 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
752 &worklist, false))
754 VEC_free (gimple, heap, worklist);
755 return false;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
765 &worklist, false))
767 VEC_free (gimple, heap, worklist);
768 return false;
772 if (STMT_VINFO_GATHER_P (stmt_vinfo))
774 tree off;
775 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
776 gcc_assert (decl);
777 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
778 &worklist, true))
780 VEC_free (gimple, heap, worklist);
781 return false;
784 } /* while worklist */
786 VEC_free (gimple, heap, worklist);
787 return true;
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 void
798 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
799 enum vect_def_type *dt,
800 stmt_vector_for_cost *prologue_cost_vec,
801 stmt_vector_for_cost *body_cost_vec)
803 int i;
804 int inside_cost = 0, prologue_cost = 0;
806 /* The SLP costs were already calculated during SLP tree build. */
807 if (PURE_SLP_STMT (stmt_info))
808 return;
810 /* FORNOW: Assuming maximum 2 args per stmts. */
811 for (i = 0; i < 2; i++)
812 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
813 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
814 stmt_info, 0, vect_prologue);
816 /* Pass the inside-of-loop statements to the target-specific cost model. */
817 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
818 stmt_info, 0, vect_body);
820 if (vect_print_dump_info (REPORT_COST))
821 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
822 "prologue_cost = %d .", inside_cost, prologue_cost);
826 /* Model cost for type demotion and promotion operations. PWR is normally
827 zero for single-step promotions and demotions. It will be one if
828 two-step promotion/demotion is required, and so on. Each additional
829 step doubles the number of instructions required. */
831 static void
832 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
833 enum vect_def_type *dt, int pwr)
835 int i, tmp;
836 int inside_cost = 0, prologue_cost = 0;
837 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
838 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
839 void *target_cost_data;
841 /* The SLP costs were already calculated during SLP tree build. */
842 if (PURE_SLP_STMT (stmt_info))
843 return;
845 if (loop_vinfo)
846 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
847 else
848 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
850 for (i = 0; i < pwr + 1; i++)
852 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
853 (i + 1) : i;
854 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
855 vec_promote_demote, stmt_info, 0,
856 vect_body);
859 /* FORNOW: Assuming maximum 2 args per stmts. */
860 for (i = 0; i < 2; i++)
861 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
862 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
863 stmt_info, 0, vect_prologue);
865 if (vect_print_dump_info (REPORT_COST))
866 fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
867 "prologue_cost = %d .", inside_cost, prologue_cost);
870 /* Function vect_cost_group_size
872 For grouped load or store, return the group_size only if it is the first
873 load or store of a group, else return 1. This ensures that group size is
874 only returned once per group. */
876 static int
877 vect_cost_group_size (stmt_vec_info stmt_info)
879 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
881 if (first_stmt == STMT_VINFO_STMT (stmt_info))
882 return GROUP_SIZE (stmt_info);
884 return 1;
888 /* Function vect_model_store_cost
890 Models cost for stores. In the case of grouped accesses, one access
891 has the overhead of the grouped access attributed to it. */
893 void
894 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
895 bool store_lanes_p, enum vect_def_type dt,
896 slp_tree slp_node,
897 stmt_vector_for_cost *prologue_cost_vec,
898 stmt_vector_for_cost *body_cost_vec)
900 int group_size;
901 unsigned int inside_cost = 0, prologue_cost = 0;
902 struct data_reference *first_dr;
903 gimple first_stmt;
905 /* The SLP costs were already calculated during SLP tree build. */
906 if (PURE_SLP_STMT (stmt_info))
907 return;
909 if (dt == vect_constant_def || dt == vect_external_def)
910 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
911 stmt_info, 0, vect_prologue);
913 /* Grouped access? */
914 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
916 if (slp_node)
918 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
919 group_size = 1;
921 else
923 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
924 group_size = vect_cost_group_size (stmt_info);
927 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
929 /* Not a grouped access. */
930 else
932 group_size = 1;
933 first_dr = STMT_VINFO_DATA_REF (stmt_info);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (!store_lanes_p && group_size > 1)
942 /* Uses a high and low interleave operation for each needed permute. */
944 int nstmts = ncopies * exact_log2 (group_size) * group_size;
945 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
946 stmt_info, 0, vect_body);
948 if (vect_print_dump_info (REPORT_COST))
949 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
950 group_size);
953 /* Costs of the stores. */
954 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
956 if (vect_print_dump_info (REPORT_COST))
957 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
958 "prologue_cost = %d .", inside_cost, prologue_cost);
962 /* Calculate cost of DR's memory access. */
963 void
964 vect_get_store_cost (struct data_reference *dr, int ncopies,
965 unsigned int *inside_cost,
966 stmt_vector_for_cost *body_cost_vec)
968 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
969 gimple stmt = DR_STMT (dr);
970 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
972 switch (alignment_support_scheme)
974 case dr_aligned:
976 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
977 vector_store, stmt_info, 0,
978 vect_body);
980 if (vect_print_dump_info (REPORT_COST))
981 fprintf (vect_dump, "vect_model_store_cost: aligned.");
983 break;
986 case dr_unaligned_supported:
988 /* Here, we assign an additional cost for the unaligned store. */
989 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
990 unaligned_store, stmt_info,
991 DR_MISALIGNMENT (dr), vect_body);
993 if (vect_print_dump_info (REPORT_COST))
994 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
995 "hardware.");
997 break;
1000 case dr_unaligned_unsupported:
1002 *inside_cost = VECT_MAX_COST;
1004 if (vect_print_dump_info (REPORT_COST))
1005 fprintf (vect_dump, "vect_model_store_cost: unsupported access.");
1007 break;
1010 default:
1011 gcc_unreachable ();
1016 /* Function vect_model_load_cost
1018 Models cost for loads. In the case of grouped accesses, the last access
1019 has the overhead of the grouped access attributed to it. Since unaligned
1020 accesses are supported for loads, we also account for the costs of the
1021 access scheme chosen. */
1023 void
1024 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1025 bool load_lanes_p, slp_tree slp_node,
1026 stmt_vector_for_cost *prologue_cost_vec,
1027 stmt_vector_for_cost *body_cost_vec)
1029 int group_size;
1030 gimple first_stmt;
1031 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1032 unsigned int inside_cost = 0, prologue_cost = 0;
1034 /* The SLP costs were already calculated during SLP tree build. */
1035 if (PURE_SLP_STMT (stmt_info))
1036 return;
1038 /* Grouped accesses? */
1039 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1040 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1042 group_size = vect_cost_group_size (stmt_info);
1043 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1045 /* Not a grouped access. */
1046 else
1048 group_size = 1;
1049 first_dr = dr;
1052 /* We assume that the cost of a single load-lanes instruction is
1053 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1054 access is instead being provided by a load-and-permute operation,
1055 include the cost of the permutes. */
1056 if (!load_lanes_p && group_size > 1)
1058 /* Uses an even and odd extract operations for each needed permute. */
1059 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1060 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1061 stmt_info, 0, vect_body);
1063 if (vect_print_dump_info (REPORT_COST))
1064 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
1065 group_size);
1068 /* The loads themselves. */
1069 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1071 /* N scalar loads plus gathering them into a vector. */
1072 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1073 inside_cost += record_stmt_cost (body_cost_vec,
1074 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1075 scalar_load, stmt_info, 0, vect_body);
1076 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1077 stmt_info, 0, vect_body);
1079 else
1080 vect_get_load_cost (first_dr, ncopies,
1081 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1082 || group_size > 1 || slp_node),
1083 &inside_cost, &prologue_cost,
1084 prologue_cost_vec, body_cost_vec, true);
1086 if (vect_print_dump_info (REPORT_COST))
1087 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1088 "prologue_cost = %d .", inside_cost, prologue_cost);
1092 /* Calculate cost of DR's memory access. */
1093 void
1094 vect_get_load_cost (struct data_reference *dr, int ncopies,
1095 bool add_realign_cost, unsigned int *inside_cost,
1096 unsigned int *prologue_cost,
1097 stmt_vector_for_cost *prologue_cost_vec,
1098 stmt_vector_for_cost *body_cost_vec,
1099 bool record_prologue_costs)
1101 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1102 gimple stmt = DR_STMT (dr);
1103 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1105 switch (alignment_support_scheme)
1107 case dr_aligned:
1109 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1110 stmt_info, 0, vect_body);
1112 if (vect_print_dump_info (REPORT_COST))
1113 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1115 break;
1117 case dr_unaligned_supported:
1119 /* Here, we assign an additional cost for the unaligned load. */
1120 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1121 unaligned_load, stmt_info,
1122 DR_MISALIGNMENT (dr), vect_body);
1124 if (vect_print_dump_info (REPORT_COST))
1125 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1126 "hardware.");
1128 break;
1130 case dr_explicit_realign:
1132 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1133 vector_load, stmt_info, 0, vect_body);
1134 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1135 vec_perm, stmt_info, 0, vect_body);
1137 /* FIXME: If the misalignment remains fixed across the iterations of
1138 the containing loop, the following cost should be added to the
1139 prologue costs. */
1140 if (targetm.vectorize.builtin_mask_for_load)
1141 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1142 stmt_info, 0, vect_body);
1144 if (vect_print_dump_info (REPORT_COST))
1145 fprintf (vect_dump, "vect_model_load_cost: explicit realign");
1147 break;
1149 case dr_explicit_realign_optimized:
1151 if (vect_print_dump_info (REPORT_COST))
1152 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1153 "pipelined.");
1155 /* Unaligned software pipeline has a load of an address, an initial
1156 load, and possibly a mask operation to "prime" the loop. However,
1157 if this is an access in a group of loads, which provide grouped
1158 access, then the above cost should only be considered for one
1159 access in the group. Inside the loop, there is a load op
1160 and a realignment op. */
1162 if (add_realign_cost && record_prologue_costs)
1164 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1165 vector_stmt, stmt_info,
1166 0, vect_prologue);
1167 if (targetm.vectorize.builtin_mask_for_load)
1168 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1169 vector_stmt, stmt_info,
1170 0, vect_prologue);
1173 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1174 stmt_info, 0, vect_body);
1175 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1176 stmt_info, 0, vect_body);
1178 if (vect_print_dump_info (REPORT_COST))
1179 fprintf (vect_dump,
1180 "vect_model_load_cost: explicit realign optimized");
1182 break;
1185 case dr_unaligned_unsupported:
1187 *inside_cost = VECT_MAX_COST;
1189 if (vect_print_dump_info (REPORT_COST))
1190 fprintf (vect_dump, "vect_model_load_cost: unsupported access.");
1192 break;
1195 default:
1196 gcc_unreachable ();
1200 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1201 the loop preheader for the vectorized stmt STMT. */
1203 static void
1204 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1206 if (gsi)
1207 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1208 else
1210 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1213 if (loop_vinfo)
1215 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1216 basic_block new_bb;
1217 edge pe;
1219 if (nested_in_vect_loop_p (loop, stmt))
1220 loop = loop->inner;
1222 pe = loop_preheader_edge (loop);
1223 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1224 gcc_assert (!new_bb);
1226 else
1228 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1229 basic_block bb;
1230 gimple_stmt_iterator gsi_bb_start;
1232 gcc_assert (bb_vinfo);
1233 bb = BB_VINFO_BB (bb_vinfo);
1234 gsi_bb_start = gsi_after_labels (bb);
1235 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1239 if (vect_print_dump_info (REPORT_DETAILS))
1241 fprintf (vect_dump, "created new init_stmt: ");
1242 print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
1246 /* Function vect_init_vector.
1248 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1249 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1250 vector type a vector with all elements equal to VAL is created first.
1251 Place the initialization at BSI if it is not NULL. Otherwise, place the
1252 initialization at the loop preheader.
1253 Return the DEF of INIT_STMT.
1254 It will be used in the vectorization of STMT. */
1256 tree
1257 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1259 tree new_var;
1260 gimple init_stmt;
1261 tree vec_oprnd;
1262 tree new_temp;
1264 if (TREE_CODE (type) == VECTOR_TYPE
1265 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1267 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1269 if (CONSTANT_CLASS_P (val))
1270 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1271 else
1273 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1274 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1275 new_temp, val,
1276 NULL_TREE);
1277 vect_init_vector_1 (stmt, init_stmt, gsi);
1278 val = new_temp;
1281 val = build_vector_from_val (type, val);
1284 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1285 init_stmt = gimple_build_assign (new_var, val);
1286 new_temp = make_ssa_name (new_var, init_stmt);
1287 gimple_assign_set_lhs (init_stmt, new_temp);
1288 vect_init_vector_1 (stmt, init_stmt, gsi);
1289 vec_oprnd = gimple_assign_lhs (init_stmt);
1290 return vec_oprnd;
1294 /* Function vect_get_vec_def_for_operand.
1296 OP is an operand in STMT. This function returns a (vector) def that will be
1297 used in the vectorized stmt for STMT.
1299 In the case that OP is an SSA_NAME which is defined in the loop, then
1300 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1302 In case OP is an invariant or constant, a new stmt that creates a vector def
1303 needs to be introduced. */
1305 tree
1306 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1308 tree vec_oprnd;
1309 gimple vec_stmt;
1310 gimple def_stmt;
1311 stmt_vec_info def_stmt_info = NULL;
1312 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1313 unsigned int nunits;
1314 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1315 tree def;
1316 enum vect_def_type dt;
1317 bool is_simple_use;
1318 tree vector_type;
1320 if (vect_print_dump_info (REPORT_DETAILS))
1322 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1323 print_generic_expr (vect_dump, op, TDF_SLIM);
1326 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1327 &def_stmt, &def, &dt);
1328 gcc_assert (is_simple_use);
1329 if (vect_print_dump_info (REPORT_DETAILS))
1331 if (def)
1333 fprintf (vect_dump, "def = ");
1334 print_generic_expr (vect_dump, def, TDF_SLIM);
1336 if (def_stmt)
1338 fprintf (vect_dump, " def_stmt = ");
1339 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1343 switch (dt)
1345 /* Case 1: operand is a constant. */
1346 case vect_constant_def:
1348 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1349 gcc_assert (vector_type);
1350 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1352 if (scalar_def)
1353 *scalar_def = op;
1355 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1356 if (vect_print_dump_info (REPORT_DETAILS))
1357 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1359 return vect_init_vector (stmt, op, vector_type, NULL);
1362 /* Case 2: operand is defined outside the loop - loop invariant. */
1363 case vect_external_def:
1365 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1366 gcc_assert (vector_type);
1368 if (scalar_def)
1369 *scalar_def = def;
1371 /* Create 'vec_inv = {inv,inv,..,inv}' */
1372 if (vect_print_dump_info (REPORT_DETAILS))
1373 fprintf (vect_dump, "Create vector_inv.");
1375 return vect_init_vector (stmt, def, vector_type, NULL);
1378 /* Case 3: operand is defined inside the loop. */
1379 case vect_internal_def:
1381 if (scalar_def)
1382 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1384 /* Get the def from the vectorized stmt. */
1385 def_stmt_info = vinfo_for_stmt (def_stmt);
1387 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1388 /* Get vectorized pattern statement. */
1389 if (!vec_stmt
1390 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1391 && !STMT_VINFO_RELEVANT (def_stmt_info))
1392 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1393 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1394 gcc_assert (vec_stmt);
1395 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1396 vec_oprnd = PHI_RESULT (vec_stmt);
1397 else if (is_gimple_call (vec_stmt))
1398 vec_oprnd = gimple_call_lhs (vec_stmt);
1399 else
1400 vec_oprnd = gimple_assign_lhs (vec_stmt);
1401 return vec_oprnd;
1404 /* Case 4: operand is defined by a loop header phi - reduction */
1405 case vect_reduction_def:
1406 case vect_double_reduction_def:
1407 case vect_nested_cycle:
1409 struct loop *loop;
1411 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1412 loop = (gimple_bb (def_stmt))->loop_father;
1414 /* Get the def before the loop */
1415 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1416 return get_initial_def_for_reduction (stmt, op, scalar_def);
1419 /* Case 5: operand is defined by loop-header phi - induction. */
1420 case vect_induction_def:
1422 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1424 /* Get the def from the vectorized stmt. */
1425 def_stmt_info = vinfo_for_stmt (def_stmt);
1426 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1427 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1428 vec_oprnd = PHI_RESULT (vec_stmt);
1429 else
1430 vec_oprnd = gimple_get_lhs (vec_stmt);
1431 return vec_oprnd;
1434 default:
1435 gcc_unreachable ();
1440 /* Function vect_get_vec_def_for_stmt_copy
1442 Return a vector-def for an operand. This function is used when the
1443 vectorized stmt to be created (by the caller to this function) is a "copy"
1444 created in case the vectorized result cannot fit in one vector, and several
1445 copies of the vector-stmt are required. In this case the vector-def is
1446 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1447 of the stmt that defines VEC_OPRND.
1448 DT is the type of the vector def VEC_OPRND.
1450 Context:
1451 In case the vectorization factor (VF) is bigger than the number
1452 of elements that can fit in a vectype (nunits), we have to generate
1453 more than one vector stmt to vectorize the scalar stmt. This situation
1454 arises when there are multiple data-types operated upon in the loop; the
1455 smallest data-type determines the VF, and as a result, when vectorizing
1456 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1457 vector stmt (each computing a vector of 'nunits' results, and together
1458 computing 'VF' results in each iteration). This function is called when
1459 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1460 which VF=16 and nunits=4, so the number of copies required is 4):
1462 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1464 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1465 VS1.1: vx.1 = memref1 VS1.2
1466 VS1.2: vx.2 = memref2 VS1.3
1467 VS1.3: vx.3 = memref3
1469 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1470 VSnew.1: vz1 = vx.1 + ... VSnew.2
1471 VSnew.2: vz2 = vx.2 + ... VSnew.3
1472 VSnew.3: vz3 = vx.3 + ...
1474 The vectorization of S1 is explained in vectorizable_load.
1475 The vectorization of S2:
1476 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1477 the function 'vect_get_vec_def_for_operand' is called to
1478 get the relevant vector-def for each operand of S2. For operand x it
1479 returns the vector-def 'vx.0'.
1481 To create the remaining copies of the vector-stmt (VSnew.j), this
1482 function is called to get the relevant vector-def for each operand. It is
1483 obtained from the respective VS1.j stmt, which is recorded in the
1484 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1486 For example, to obtain the vector-def 'vx.1' in order to create the
1487 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1488 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1489 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1490 and return its def ('vx.1').
1491 Overall, to create the above sequence this function will be called 3 times:
1492 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1493 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1494 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1496 tree
1497 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1499 gimple vec_stmt_for_operand;
1500 stmt_vec_info def_stmt_info;
1502 /* Do nothing; can reuse same def. */
1503 if (dt == vect_external_def || dt == vect_constant_def )
1504 return vec_oprnd;
1506 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1507 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1508 gcc_assert (def_stmt_info);
1509 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1510 gcc_assert (vec_stmt_for_operand);
1511 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1512 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1513 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1514 else
1515 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1516 return vec_oprnd;
1520 /* Get vectorized definitions for the operands to create a copy of an original
1521 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1523 static void
1524 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1525 VEC(tree,heap) **vec_oprnds0,
1526 VEC(tree,heap) **vec_oprnds1)
1528 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1530 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1531 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1533 if (vec_oprnds1 && *vec_oprnds1)
1535 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1536 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1537 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1542 /* Get vectorized definitions for OP0 and OP1.
1543 REDUC_INDEX is the index of reduction operand in case of reduction,
1544 and -1 otherwise. */
1546 void
1547 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1548 VEC (tree, heap) **vec_oprnds0,
1549 VEC (tree, heap) **vec_oprnds1,
1550 slp_tree slp_node, int reduc_index)
1552 if (slp_node)
1554 int nops = (op1 == NULL_TREE) ? 1 : 2;
1555 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1556 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1558 VEC_quick_push (tree, ops, op0);
1559 if (op1)
1560 VEC_quick_push (tree, ops, op1);
1562 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1564 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1565 if (op1)
1566 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1568 VEC_free (tree, heap, ops);
1569 VEC_free (slp_void_p, heap, vec_defs);
1571 else
1573 tree vec_oprnd;
1575 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1576 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1577 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1579 if (op1)
1581 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1582 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1583 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1589 /* Function vect_finish_stmt_generation.
1591 Insert a new stmt. */
1593 void
1594 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1595 gimple_stmt_iterator *gsi)
1597 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1598 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1599 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1601 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1603 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1605 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1606 bb_vinfo));
1608 if (vect_print_dump_info (REPORT_DETAILS))
1610 fprintf (vect_dump, "add new stmt: ");
1611 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1614 gimple_set_location (vec_stmt, gimple_location (stmt));
1617 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1618 a function declaration if the target has a vectorized version
1619 of the function, or NULL_TREE if the function cannot be vectorized. */
1621 tree
1622 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1624 tree fndecl = gimple_call_fndecl (call);
1626 /* We only handle functions that do not read or clobber memory -- i.e.
1627 const or novops ones. */
1628 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1629 return NULL_TREE;
1631 if (!fndecl
1632 || TREE_CODE (fndecl) != FUNCTION_DECL
1633 || !DECL_BUILT_IN (fndecl))
1634 return NULL_TREE;
1636 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1637 vectype_in);
1640 /* Function vectorizable_call.
1642 Check if STMT performs a function call that can be vectorized.
1643 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1644 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1645 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1647 static bool
1648 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1649 slp_tree slp_node)
1651 tree vec_dest;
1652 tree scalar_dest;
1653 tree op, type;
1654 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1655 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1656 tree vectype_out, vectype_in;
1657 int nunits_in;
1658 int nunits_out;
1659 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1660 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1661 tree fndecl, new_temp, def, rhs_type;
1662 gimple def_stmt;
1663 enum vect_def_type dt[3]
1664 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1665 gimple new_stmt = NULL;
1666 int ncopies, j;
1667 VEC(tree, heap) *vargs = NULL;
1668 enum { NARROW, NONE, WIDEN } modifier;
1669 size_t i, nargs;
1670 tree lhs;
1672 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1673 return false;
1675 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1676 return false;
1678 /* Is STMT a vectorizable call? */
1679 if (!is_gimple_call (stmt))
1680 return false;
1682 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1683 return false;
1685 if (stmt_can_throw_internal (stmt))
1686 return false;
1688 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1690 /* Process function arguments. */
1691 rhs_type = NULL_TREE;
1692 vectype_in = NULL_TREE;
1693 nargs = gimple_call_num_args (stmt);
1695 /* Bail out if the function has more than three arguments, we do not have
1696 interesting builtin functions to vectorize with more than two arguments
1697 except for fma. No arguments is also not good. */
1698 if (nargs == 0 || nargs > 3)
1699 return false;
1701 for (i = 0; i < nargs; i++)
1703 tree opvectype;
1705 op = gimple_call_arg (stmt, i);
1707 /* We can only handle calls with arguments of the same type. */
1708 if (rhs_type
1709 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1711 if (vect_print_dump_info (REPORT_DETAILS))
1712 fprintf (vect_dump, "argument types differ.");
1713 return false;
1715 if (!rhs_type)
1716 rhs_type = TREE_TYPE (op);
1718 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1719 &def_stmt, &def, &dt[i], &opvectype))
1721 if (vect_print_dump_info (REPORT_DETAILS))
1722 fprintf (vect_dump, "use not simple.");
1723 return false;
1726 if (!vectype_in)
1727 vectype_in = opvectype;
1728 else if (opvectype
1729 && opvectype != vectype_in)
1731 if (vect_print_dump_info (REPORT_DETAILS))
1732 fprintf (vect_dump, "argument vector types differ.");
1733 return false;
1736 /* If all arguments are external or constant defs use a vector type with
1737 the same size as the output vector type. */
1738 if (!vectype_in)
1739 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1740 if (vec_stmt)
1741 gcc_assert (vectype_in);
1742 if (!vectype_in)
1744 if (vect_print_dump_info (REPORT_DETAILS))
1746 fprintf (vect_dump, "no vectype for scalar type ");
1747 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1750 return false;
1753 /* FORNOW */
1754 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1755 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1756 if (nunits_in == nunits_out / 2)
1757 modifier = NARROW;
1758 else if (nunits_out == nunits_in)
1759 modifier = NONE;
1760 else if (nunits_out == nunits_in / 2)
1761 modifier = WIDEN;
1762 else
1763 return false;
1765 /* For now, we only vectorize functions if a target specific builtin
1766 is available. TODO -- in some cases, it might be profitable to
1767 insert the calls for pieces of the vector, in order to be able
1768 to vectorize other operations in the loop. */
1769 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1770 if (fndecl == NULL_TREE)
1772 if (vect_print_dump_info (REPORT_DETAILS))
1773 fprintf (vect_dump, "function is not vectorizable.");
1775 return false;
1778 gcc_assert (!gimple_vuse (stmt));
1780 if (slp_node || PURE_SLP_STMT (stmt_info))
1781 ncopies = 1;
1782 else if (modifier == NARROW)
1783 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1784 else
1785 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1787 /* Sanity check: make sure that at least one copy of the vectorized stmt
1788 needs to be generated. */
1789 gcc_assert (ncopies >= 1);
1791 if (!vec_stmt) /* transformation not required. */
1793 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1794 if (vect_print_dump_info (REPORT_DETAILS))
1795 fprintf (vect_dump, "=== vectorizable_call ===");
1796 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1797 return true;
1800 /** Transform. **/
1802 if (vect_print_dump_info (REPORT_DETAILS))
1803 fprintf (vect_dump, "transform call.");
1805 /* Handle def. */
1806 scalar_dest = gimple_call_lhs (stmt);
1807 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1809 prev_stmt_info = NULL;
1810 switch (modifier)
1812 case NONE:
1813 for (j = 0; j < ncopies; ++j)
1815 /* Build argument list for the vectorized call. */
1816 if (j == 0)
1817 vargs = VEC_alloc (tree, heap, nargs);
1818 else
1819 VEC_truncate (tree, vargs, 0);
1821 if (slp_node)
1823 VEC (slp_void_p, heap) *vec_defs
1824 = VEC_alloc (slp_void_p, heap, nargs);
1825 VEC (tree, heap) *vec_oprnds0;
1827 for (i = 0; i < nargs; i++)
1828 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1829 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1830 vec_oprnds0
1831 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1833 /* Arguments are ready. Create the new vector stmt. */
1834 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1836 size_t k;
1837 for (k = 0; k < nargs; k++)
1839 VEC (tree, heap) *vec_oprndsk
1840 = (VEC (tree, heap) *)
1841 VEC_index (slp_void_p, vec_defs, k);
1842 VEC_replace (tree, vargs, k,
1843 VEC_index (tree, vec_oprndsk, i));
1845 new_stmt = gimple_build_call_vec (fndecl, vargs);
1846 new_temp = make_ssa_name (vec_dest, new_stmt);
1847 gimple_call_set_lhs (new_stmt, new_temp);
1848 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1849 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1850 new_stmt);
1853 for (i = 0; i < nargs; i++)
1855 VEC (tree, heap) *vec_oprndsi
1856 = (VEC (tree, heap) *)
1857 VEC_index (slp_void_p, vec_defs, i);
1858 VEC_free (tree, heap, vec_oprndsi);
1860 VEC_free (slp_void_p, heap, vec_defs);
1861 continue;
1864 for (i = 0; i < nargs; i++)
1866 op = gimple_call_arg (stmt, i);
1867 if (j == 0)
1868 vec_oprnd0
1869 = vect_get_vec_def_for_operand (op, stmt, NULL);
1870 else
1872 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1873 vec_oprnd0
1874 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1877 VEC_quick_push (tree, vargs, vec_oprnd0);
1880 new_stmt = gimple_build_call_vec (fndecl, vargs);
1881 new_temp = make_ssa_name (vec_dest, new_stmt);
1882 gimple_call_set_lhs (new_stmt, new_temp);
1883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1885 if (j == 0)
1886 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1887 else
1888 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1890 prev_stmt_info = vinfo_for_stmt (new_stmt);
1893 break;
1895 case NARROW:
1896 for (j = 0; j < ncopies; ++j)
1898 /* Build argument list for the vectorized call. */
1899 if (j == 0)
1900 vargs = VEC_alloc (tree, heap, nargs * 2);
1901 else
1902 VEC_truncate (tree, vargs, 0);
1904 if (slp_node)
1906 VEC (slp_void_p, heap) *vec_defs
1907 = VEC_alloc (slp_void_p, heap, nargs);
1908 VEC (tree, heap) *vec_oprnds0;
1910 for (i = 0; i < nargs; i++)
1911 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1912 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1913 vec_oprnds0
1914 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1916 /* Arguments are ready. Create the new vector stmt. */
1917 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1918 i += 2)
1920 size_t k;
1921 VEC_truncate (tree, vargs, 0);
1922 for (k = 0; k < nargs; k++)
1924 VEC (tree, heap) *vec_oprndsk
1925 = (VEC (tree, heap) *)
1926 VEC_index (slp_void_p, vec_defs, k);
1927 VEC_quick_push (tree, vargs,
1928 VEC_index (tree, vec_oprndsk, i));
1929 VEC_quick_push (tree, vargs,
1930 VEC_index (tree, vec_oprndsk, i + 1));
1932 new_stmt = gimple_build_call_vec (fndecl, vargs);
1933 new_temp = make_ssa_name (vec_dest, new_stmt);
1934 gimple_call_set_lhs (new_stmt, new_temp);
1935 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1936 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1937 new_stmt);
1940 for (i = 0; i < nargs; i++)
1942 VEC (tree, heap) *vec_oprndsi
1943 = (VEC (tree, heap) *)
1944 VEC_index (slp_void_p, vec_defs, i);
1945 VEC_free (tree, heap, vec_oprndsi);
1947 VEC_free (slp_void_p, heap, vec_defs);
1948 continue;
1951 for (i = 0; i < nargs; i++)
1953 op = gimple_call_arg (stmt, i);
1954 if (j == 0)
1956 vec_oprnd0
1957 = vect_get_vec_def_for_operand (op, stmt, NULL);
1958 vec_oprnd1
1959 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1961 else
1963 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1964 vec_oprnd0
1965 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1966 vec_oprnd1
1967 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1970 VEC_quick_push (tree, vargs, vec_oprnd0);
1971 VEC_quick_push (tree, vargs, vec_oprnd1);
1974 new_stmt = gimple_build_call_vec (fndecl, vargs);
1975 new_temp = make_ssa_name (vec_dest, new_stmt);
1976 gimple_call_set_lhs (new_stmt, new_temp);
1977 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1979 if (j == 0)
1980 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1981 else
1982 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1984 prev_stmt_info = vinfo_for_stmt (new_stmt);
1987 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1989 break;
1991 case WIDEN:
1992 /* No current target implements this case. */
1993 return false;
1996 VEC_free (tree, heap, vargs);
1998 /* Update the exception handling table with the vector stmt if necessary. */
1999 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2000 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2002 /* The call in STMT might prevent it from being removed in dce.
2003 We however cannot remove it here, due to the way the ssa name
2004 it defines is mapped to the new definition. So just replace
2005 rhs of the statement with something harmless. */
2007 if (slp_node)
2008 return true;
2010 type = TREE_TYPE (scalar_dest);
2011 if (is_pattern_stmt_p (stmt_info))
2012 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2013 else
2014 lhs = gimple_call_lhs (stmt);
2015 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2016 set_vinfo_for_stmt (new_stmt, stmt_info);
2017 set_vinfo_for_stmt (stmt, NULL);
2018 STMT_VINFO_STMT (stmt_info) = new_stmt;
2019 gsi_replace (gsi, new_stmt, false);
2020 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2022 return true;
2026 /* Function vect_gen_widened_results_half
2028 Create a vector stmt whose code, type, number of arguments, and result
2029 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2030 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2031 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2032 needs to be created (DECL is a function-decl of a target-builtin).
2033 STMT is the original scalar stmt that we are vectorizing. */
2035 static gimple
2036 vect_gen_widened_results_half (enum tree_code code,
2037 tree decl,
2038 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2039 tree vec_dest, gimple_stmt_iterator *gsi,
2040 gimple stmt)
2042 gimple new_stmt;
2043 tree new_temp;
2045 /* Generate half of the widened result: */
2046 if (code == CALL_EXPR)
2048 /* Target specific support */
2049 if (op_type == binary_op)
2050 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2051 else
2052 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2053 new_temp = make_ssa_name (vec_dest, new_stmt);
2054 gimple_call_set_lhs (new_stmt, new_temp);
2056 else
2058 /* Generic support */
2059 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2060 if (op_type != binary_op)
2061 vec_oprnd1 = NULL;
2062 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2063 vec_oprnd1);
2064 new_temp = make_ssa_name (vec_dest, new_stmt);
2065 gimple_assign_set_lhs (new_stmt, new_temp);
2067 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2069 return new_stmt;
2073 /* Get vectorized definitions for loop-based vectorization. For the first
2074 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2075 scalar operand), and for the rest we get a copy with
2076 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2077 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2078 The vectors are collected into VEC_OPRNDS. */
2080 static void
2081 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2082 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2084 tree vec_oprnd;
2086 /* Get first vector operand. */
2087 /* All the vector operands except the very first one (that is scalar oprnd)
2088 are stmt copies. */
2089 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2090 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2091 else
2092 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2094 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2096 /* Get second vector operand. */
2097 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2098 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2100 *oprnd = vec_oprnd;
2102 /* For conversion in multiple steps, continue to get operands
2103 recursively. */
2104 if (multi_step_cvt)
2105 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2109 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2110 For multi-step conversions store the resulting vectors and call the function
2111 recursively. */
2113 static void
2114 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2115 int multi_step_cvt, gimple stmt,
2116 VEC (tree, heap) *vec_dsts,
2117 gimple_stmt_iterator *gsi,
2118 slp_tree slp_node, enum tree_code code,
2119 stmt_vec_info *prev_stmt_info)
2121 unsigned int i;
2122 tree vop0, vop1, new_tmp, vec_dest;
2123 gimple new_stmt;
2124 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2126 vec_dest = VEC_pop (tree, vec_dsts);
2128 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2130 /* Create demotion operation. */
2131 vop0 = VEC_index (tree, *vec_oprnds, i);
2132 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2133 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2134 new_tmp = make_ssa_name (vec_dest, new_stmt);
2135 gimple_assign_set_lhs (new_stmt, new_tmp);
2136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2138 if (multi_step_cvt)
2139 /* Store the resulting vector for next recursive call. */
2140 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2141 else
2143 /* This is the last step of the conversion sequence. Store the
2144 vectors in SLP_NODE or in vector info of the scalar statement
2145 (or in STMT_VINFO_RELATED_STMT chain). */
2146 if (slp_node)
2147 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2148 else
2150 if (!*prev_stmt_info)
2151 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2152 else
2153 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2155 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2160 /* For multi-step demotion operations we first generate demotion operations
2161 from the source type to the intermediate types, and then combine the
2162 results (stored in VEC_OPRNDS) in demotion operation to the destination
2163 type. */
2164 if (multi_step_cvt)
2166 /* At each level of recursion we have half of the operands we had at the
2167 previous level. */
2168 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2169 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2170 stmt, vec_dsts, gsi, slp_node,
2171 VEC_PACK_TRUNC_EXPR,
2172 prev_stmt_info);
2175 VEC_quick_push (tree, vec_dsts, vec_dest);
2179 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2180 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2181 the resulting vectors and call the function recursively. */
2183 static void
2184 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2185 VEC (tree, heap) **vec_oprnds1,
2186 gimple stmt, tree vec_dest,
2187 gimple_stmt_iterator *gsi,
2188 enum tree_code code1,
2189 enum tree_code code2, tree decl1,
2190 tree decl2, int op_type)
2192 int i;
2193 tree vop0, vop1, new_tmp1, new_tmp2;
2194 gimple new_stmt1, new_stmt2;
2195 VEC (tree, heap) *vec_tmp = NULL;
2197 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2198 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2200 if (op_type == binary_op)
2201 vop1 = VEC_index (tree, *vec_oprnds1, i);
2202 else
2203 vop1 = NULL_TREE;
2205 /* Generate the two halves of promotion operation. */
2206 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2207 op_type, vec_dest, gsi, stmt);
2208 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2209 op_type, vec_dest, gsi, stmt);
2210 if (is_gimple_call (new_stmt1))
2212 new_tmp1 = gimple_call_lhs (new_stmt1);
2213 new_tmp2 = gimple_call_lhs (new_stmt2);
2215 else
2217 new_tmp1 = gimple_assign_lhs (new_stmt1);
2218 new_tmp2 = gimple_assign_lhs (new_stmt2);
2221 /* Store the results for the next step. */
2222 VEC_quick_push (tree, vec_tmp, new_tmp1);
2223 VEC_quick_push (tree, vec_tmp, new_tmp2);
2226 VEC_free (tree, heap, *vec_oprnds0);
2227 *vec_oprnds0 = vec_tmp;
2231 /* Check if STMT performs a conversion operation, that can be vectorized.
2232 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2233 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2234 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2236 static bool
2237 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2238 gimple *vec_stmt, slp_tree slp_node)
2240 tree vec_dest;
2241 tree scalar_dest;
2242 tree op0, op1 = NULL_TREE;
2243 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2244 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2245 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2246 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2247 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2248 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2249 tree new_temp;
2250 tree def;
2251 gimple def_stmt;
2252 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2253 gimple new_stmt = NULL;
2254 stmt_vec_info prev_stmt_info;
2255 int nunits_in;
2256 int nunits_out;
2257 tree vectype_out, vectype_in;
2258 int ncopies, i, j;
2259 tree lhs_type, rhs_type;
2260 enum { NARROW, NONE, WIDEN } modifier;
2261 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2262 tree vop0;
2263 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2264 int multi_step_cvt = 0;
2265 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2266 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2267 int op_type;
2268 enum machine_mode rhs_mode;
2269 unsigned short fltsz;
2271 /* Is STMT a vectorizable conversion? */
2273 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2274 return false;
2276 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2277 return false;
2279 if (!is_gimple_assign (stmt))
2280 return false;
2282 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2283 return false;
2285 code = gimple_assign_rhs_code (stmt);
2286 if (!CONVERT_EXPR_CODE_P (code)
2287 && code != FIX_TRUNC_EXPR
2288 && code != FLOAT_EXPR
2289 && code != WIDEN_MULT_EXPR
2290 && code != WIDEN_LSHIFT_EXPR)
2291 return false;
2293 op_type = TREE_CODE_LENGTH (code);
2295 /* Check types of lhs and rhs. */
2296 scalar_dest = gimple_assign_lhs (stmt);
2297 lhs_type = TREE_TYPE (scalar_dest);
2298 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2300 op0 = gimple_assign_rhs1 (stmt);
2301 rhs_type = TREE_TYPE (op0);
2303 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2304 && !((INTEGRAL_TYPE_P (lhs_type)
2305 && INTEGRAL_TYPE_P (rhs_type))
2306 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2307 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2308 return false;
2310 if ((INTEGRAL_TYPE_P (lhs_type)
2311 && (TYPE_PRECISION (lhs_type)
2312 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2313 || (INTEGRAL_TYPE_P (rhs_type)
2314 && (TYPE_PRECISION (rhs_type)
2315 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2317 if (vect_print_dump_info (REPORT_DETAILS))
2318 fprintf (vect_dump,
2319 "type conversion to/from bit-precision unsupported.");
2320 return false;
2323 /* Check the operands of the operation. */
2324 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2325 &def_stmt, &def, &dt[0], &vectype_in))
2327 if (vect_print_dump_info (REPORT_DETAILS))
2328 fprintf (vect_dump, "use not simple.");
2329 return false;
2331 if (op_type == binary_op)
2333 bool ok;
2335 op1 = gimple_assign_rhs2 (stmt);
2336 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2337 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2338 OP1. */
2339 if (CONSTANT_CLASS_P (op0))
2340 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2341 &def_stmt, &def, &dt[1], &vectype_in);
2342 else
2343 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2344 &def, &dt[1]);
2346 if (!ok)
2348 if (vect_print_dump_info (REPORT_DETAILS))
2349 fprintf (vect_dump, "use not simple.");
2350 return false;
2354 /* If op0 is an external or constant defs use a vector type of
2355 the same size as the output vector type. */
2356 if (!vectype_in)
2357 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2358 if (vec_stmt)
2359 gcc_assert (vectype_in);
2360 if (!vectype_in)
2362 if (vect_print_dump_info (REPORT_DETAILS))
2364 fprintf (vect_dump, "no vectype for scalar type ");
2365 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2368 return false;
2371 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2372 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2373 if (nunits_in < nunits_out)
2374 modifier = NARROW;
2375 else if (nunits_out == nunits_in)
2376 modifier = NONE;
2377 else
2378 modifier = WIDEN;
2380 /* Multiple types in SLP are handled by creating the appropriate number of
2381 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2382 case of SLP. */
2383 if (slp_node || PURE_SLP_STMT (stmt_info))
2384 ncopies = 1;
2385 else if (modifier == NARROW)
2386 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2387 else
2388 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2390 /* Sanity check: make sure that at least one copy of the vectorized stmt
2391 needs to be generated. */
2392 gcc_assert (ncopies >= 1);
2394 /* Supportable by target? */
2395 switch (modifier)
2397 case NONE:
2398 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2399 return false;
2400 if (supportable_convert_operation (code, vectype_out, vectype_in,
2401 &decl1, &code1))
2402 break;
2403 /* FALLTHRU */
2404 unsupported:
2405 if (vect_print_dump_info (REPORT_DETAILS))
2406 fprintf (vect_dump, "conversion not supported by target.");
2407 return false;
2409 case WIDEN:
2410 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2411 &code1, &code2, &multi_step_cvt,
2412 &interm_types))
2414 /* Binary widening operation can only be supported directly by the
2415 architecture. */
2416 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2417 break;
2420 if (code != FLOAT_EXPR
2421 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2422 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2423 goto unsupported;
2425 rhs_mode = TYPE_MODE (rhs_type);
2426 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2427 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2428 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2429 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2431 cvt_type
2432 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2433 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2434 if (cvt_type == NULL_TREE)
2435 goto unsupported;
2437 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2439 if (!supportable_convert_operation (code, vectype_out,
2440 cvt_type, &decl1, &codecvt1))
2441 goto unsupported;
2443 else if (!supportable_widening_operation (code, stmt, vectype_out,
2444 cvt_type, &codecvt1,
2445 &codecvt2, &multi_step_cvt,
2446 &interm_types))
2447 continue;
2448 else
2449 gcc_assert (multi_step_cvt == 0);
2451 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2452 vectype_in, &code1, &code2,
2453 &multi_step_cvt, &interm_types))
2454 break;
2457 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2458 goto unsupported;
2460 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2461 codecvt2 = ERROR_MARK;
2462 else
2464 multi_step_cvt++;
2465 VEC_safe_push (tree, heap, interm_types, cvt_type);
2466 cvt_type = NULL_TREE;
2468 break;
2470 case NARROW:
2471 gcc_assert (op_type == unary_op);
2472 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2473 &code1, &multi_step_cvt,
2474 &interm_types))
2475 break;
2477 if (code != FIX_TRUNC_EXPR
2478 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2479 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2480 goto unsupported;
2482 rhs_mode = TYPE_MODE (rhs_type);
2483 cvt_type
2484 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2485 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2486 if (cvt_type == NULL_TREE)
2487 goto unsupported;
2488 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2489 &decl1, &codecvt1))
2490 goto unsupported;
2491 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2492 &code1, &multi_step_cvt,
2493 &interm_types))
2494 break;
2495 goto unsupported;
2497 default:
2498 gcc_unreachable ();
2501 if (!vec_stmt) /* transformation not required. */
2503 if (vect_print_dump_info (REPORT_DETAILS))
2504 fprintf (vect_dump, "=== vectorizable_conversion ===");
2505 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2507 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2508 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2510 else if (modifier == NARROW)
2512 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2513 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2515 else
2517 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2518 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2520 VEC_free (tree, heap, interm_types);
2521 return true;
2524 /** Transform. **/
2525 if (vect_print_dump_info (REPORT_DETAILS))
2526 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2528 if (op_type == binary_op)
2530 if (CONSTANT_CLASS_P (op0))
2531 op0 = fold_convert (TREE_TYPE (op1), op0);
2532 else if (CONSTANT_CLASS_P (op1))
2533 op1 = fold_convert (TREE_TYPE (op0), op1);
2536 /* In case of multi-step conversion, we first generate conversion operations
2537 to the intermediate types, and then from that types to the final one.
2538 We create vector destinations for the intermediate type (TYPES) received
2539 from supportable_*_operation, and store them in the correct order
2540 for future use in vect_create_vectorized_*_stmts (). */
2541 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2542 vec_dest = vect_create_destination_var (scalar_dest,
2543 (cvt_type && modifier == WIDEN)
2544 ? cvt_type : vectype_out);
2545 VEC_quick_push (tree, vec_dsts, vec_dest);
2547 if (multi_step_cvt)
2549 for (i = VEC_length (tree, interm_types) - 1;
2550 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2552 vec_dest = vect_create_destination_var (scalar_dest,
2553 intermediate_type);
2554 VEC_quick_push (tree, vec_dsts, vec_dest);
2558 if (cvt_type)
2559 vec_dest = vect_create_destination_var (scalar_dest,
2560 modifier == WIDEN
2561 ? vectype_out : cvt_type);
2563 if (!slp_node)
2565 if (modifier == NONE)
2566 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2567 else if (modifier == WIDEN)
2569 vec_oprnds0 = VEC_alloc (tree, heap,
2570 (multi_step_cvt
2571 ? vect_pow2 (multi_step_cvt) : 1));
2572 if (op_type == binary_op)
2573 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2575 else
2576 vec_oprnds0 = VEC_alloc (tree, heap,
2577 2 * (multi_step_cvt
2578 ? vect_pow2 (multi_step_cvt) : 1));
2580 else if (code == WIDEN_LSHIFT_EXPR)
2581 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2583 last_oprnd = op0;
2584 prev_stmt_info = NULL;
2585 switch (modifier)
2587 case NONE:
2588 for (j = 0; j < ncopies; j++)
2590 if (j == 0)
2591 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2592 -1);
2593 else
2594 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2596 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2598 /* Arguments are ready, create the new vector stmt. */
2599 if (code1 == CALL_EXPR)
2601 new_stmt = gimple_build_call (decl1, 1, vop0);
2602 new_temp = make_ssa_name (vec_dest, new_stmt);
2603 gimple_call_set_lhs (new_stmt, new_temp);
2605 else
2607 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2608 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2609 vop0, NULL);
2610 new_temp = make_ssa_name (vec_dest, new_stmt);
2611 gimple_assign_set_lhs (new_stmt, new_temp);
2614 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2615 if (slp_node)
2616 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2617 new_stmt);
2620 if (j == 0)
2621 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2622 else
2623 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2624 prev_stmt_info = vinfo_for_stmt (new_stmt);
2626 break;
2628 case WIDEN:
2629 /* In case the vectorization factor (VF) is bigger than the number
2630 of elements that we can fit in a vectype (nunits), we have to
2631 generate more than one vector stmt - i.e - we need to "unroll"
2632 the vector stmt by a factor VF/nunits. */
2633 for (j = 0; j < ncopies; j++)
2635 /* Handle uses. */
2636 if (j == 0)
2638 if (slp_node)
2640 if (code == WIDEN_LSHIFT_EXPR)
2642 unsigned int k;
2644 vec_oprnd1 = op1;
2645 /* Store vec_oprnd1 for every vector stmt to be created
2646 for SLP_NODE. We check during the analysis that all
2647 the shift arguments are the same. */
2648 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2649 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2651 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2652 slp_node, -1);
2654 else
2655 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2656 &vec_oprnds1, slp_node, -1);
2658 else
2660 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2661 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2662 if (op_type == binary_op)
2664 if (code == WIDEN_LSHIFT_EXPR)
2665 vec_oprnd1 = op1;
2666 else
2667 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2668 NULL);
2669 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2673 else
2675 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2676 VEC_truncate (tree, vec_oprnds0, 0);
2677 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2678 if (op_type == binary_op)
2680 if (code == WIDEN_LSHIFT_EXPR)
2681 vec_oprnd1 = op1;
2682 else
2683 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2684 vec_oprnd1);
2685 VEC_truncate (tree, vec_oprnds1, 0);
2686 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2690 /* Arguments are ready. Create the new vector stmts. */
2691 for (i = multi_step_cvt; i >= 0; i--)
2693 tree this_dest = VEC_index (tree, vec_dsts, i);
2694 enum tree_code c1 = code1, c2 = code2;
2695 if (i == 0 && codecvt2 != ERROR_MARK)
2697 c1 = codecvt1;
2698 c2 = codecvt2;
2700 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2701 &vec_oprnds1,
2702 stmt, this_dest, gsi,
2703 c1, c2, decl1, decl2,
2704 op_type);
2707 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2709 if (cvt_type)
2711 if (codecvt1 == CALL_EXPR)
2713 new_stmt = gimple_build_call (decl1, 1, vop0);
2714 new_temp = make_ssa_name (vec_dest, new_stmt);
2715 gimple_call_set_lhs (new_stmt, new_temp);
2717 else
2719 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2720 new_temp = make_ssa_name (vec_dest, NULL);
2721 new_stmt = gimple_build_assign_with_ops (codecvt1,
2722 new_temp,
2723 vop0, NULL);
2726 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2728 else
2729 new_stmt = SSA_NAME_DEF_STMT (vop0);
2731 if (slp_node)
2732 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2733 new_stmt);
2734 else
2736 if (!prev_stmt_info)
2737 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2738 else
2739 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2740 prev_stmt_info = vinfo_for_stmt (new_stmt);
2745 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2746 break;
2748 case NARROW:
2749 /* In case the vectorization factor (VF) is bigger than the number
2750 of elements that we can fit in a vectype (nunits), we have to
2751 generate more than one vector stmt - i.e - we need to "unroll"
2752 the vector stmt by a factor VF/nunits. */
2753 for (j = 0; j < ncopies; j++)
2755 /* Handle uses. */
2756 if (slp_node)
2757 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2758 slp_node, -1);
2759 else
2761 VEC_truncate (tree, vec_oprnds0, 0);
2762 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2763 vect_pow2 (multi_step_cvt) - 1);
2766 /* Arguments are ready. Create the new vector stmts. */
2767 if (cvt_type)
2768 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2770 if (codecvt1 == CALL_EXPR)
2772 new_stmt = gimple_build_call (decl1, 1, vop0);
2773 new_temp = make_ssa_name (vec_dest, new_stmt);
2774 gimple_call_set_lhs (new_stmt, new_temp);
2776 else
2778 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2779 new_temp = make_ssa_name (vec_dest, NULL);
2780 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2781 vop0, NULL);
2784 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2785 VEC_replace (tree, vec_oprnds0, i, new_temp);
2788 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2789 stmt, vec_dsts, gsi,
2790 slp_node, code1,
2791 &prev_stmt_info);
2794 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2795 break;
2798 VEC_free (tree, heap, vec_oprnds0);
2799 VEC_free (tree, heap, vec_oprnds1);
2800 VEC_free (tree, heap, vec_dsts);
2801 VEC_free (tree, heap, interm_types);
2803 return true;
2807 /* Function vectorizable_assignment.
2809 Check if STMT performs an assignment (copy) that can be vectorized.
2810 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2811 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2812 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2814 static bool
2815 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2816 gimple *vec_stmt, slp_tree slp_node)
2818 tree vec_dest;
2819 tree scalar_dest;
2820 tree op;
2821 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2822 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2823 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2824 tree new_temp;
2825 tree def;
2826 gimple def_stmt;
2827 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2828 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2829 int ncopies;
2830 int i, j;
2831 VEC(tree,heap) *vec_oprnds = NULL;
2832 tree vop;
2833 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2834 gimple new_stmt = NULL;
2835 stmt_vec_info prev_stmt_info = NULL;
2836 enum tree_code code;
2837 tree vectype_in;
2839 /* Multiple types in SLP are handled by creating the appropriate number of
2840 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2841 case of SLP. */
2842 if (slp_node || PURE_SLP_STMT (stmt_info))
2843 ncopies = 1;
2844 else
2845 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2847 gcc_assert (ncopies >= 1);
2849 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2850 return false;
2852 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2853 return false;
2855 /* Is vectorizable assignment? */
2856 if (!is_gimple_assign (stmt))
2857 return false;
2859 scalar_dest = gimple_assign_lhs (stmt);
2860 if (TREE_CODE (scalar_dest) != SSA_NAME)
2861 return false;
2863 code = gimple_assign_rhs_code (stmt);
2864 if (gimple_assign_single_p (stmt)
2865 || code == PAREN_EXPR
2866 || CONVERT_EXPR_CODE_P (code))
2867 op = gimple_assign_rhs1 (stmt);
2868 else
2869 return false;
2871 if (code == VIEW_CONVERT_EXPR)
2872 op = TREE_OPERAND (op, 0);
2874 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2875 &def_stmt, &def, &dt[0], &vectype_in))
2877 if (vect_print_dump_info (REPORT_DETAILS))
2878 fprintf (vect_dump, "use not simple.");
2879 return false;
2882 /* We can handle NOP_EXPR conversions that do not change the number
2883 of elements or the vector size. */
2884 if ((CONVERT_EXPR_CODE_P (code)
2885 || code == VIEW_CONVERT_EXPR)
2886 && (!vectype_in
2887 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2888 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2889 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2890 return false;
2892 /* We do not handle bit-precision changes. */
2893 if ((CONVERT_EXPR_CODE_P (code)
2894 || code == VIEW_CONVERT_EXPR)
2895 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2896 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2897 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2898 || ((TYPE_PRECISION (TREE_TYPE (op))
2899 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2900 /* But a conversion that does not change the bit-pattern is ok. */
2901 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2902 > TYPE_PRECISION (TREE_TYPE (op)))
2903 && TYPE_UNSIGNED (TREE_TYPE (op))))
2905 if (vect_print_dump_info (REPORT_DETAILS))
2906 fprintf (vect_dump, "type conversion to/from bit-precision "
2907 "unsupported.");
2908 return false;
2911 if (!vec_stmt) /* transformation not required. */
2913 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2914 if (vect_print_dump_info (REPORT_DETAILS))
2915 fprintf (vect_dump, "=== vectorizable_assignment ===");
2916 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2917 return true;
2920 /** Transform. **/
2921 if (vect_print_dump_info (REPORT_DETAILS))
2922 fprintf (vect_dump, "transform assignment.");
2924 /* Handle def. */
2925 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2927 /* Handle use. */
2928 for (j = 0; j < ncopies; j++)
2930 /* Handle uses. */
2931 if (j == 0)
2932 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2933 else
2934 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2936 /* Arguments are ready. create the new vector stmt. */
2937 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2939 if (CONVERT_EXPR_CODE_P (code)
2940 || code == VIEW_CONVERT_EXPR)
2941 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2942 new_stmt = gimple_build_assign (vec_dest, vop);
2943 new_temp = make_ssa_name (vec_dest, new_stmt);
2944 gimple_assign_set_lhs (new_stmt, new_temp);
2945 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2946 if (slp_node)
2947 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2950 if (slp_node)
2951 continue;
2953 if (j == 0)
2954 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2955 else
2956 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2958 prev_stmt_info = vinfo_for_stmt (new_stmt);
2961 VEC_free (tree, heap, vec_oprnds);
2962 return true;
2966 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2967 either as shift by a scalar or by a vector. */
2969 bool
2970 vect_supportable_shift (enum tree_code code, tree scalar_type)
2973 enum machine_mode vec_mode;
2974 optab optab;
2975 int icode;
2976 tree vectype;
2978 vectype = get_vectype_for_scalar_type (scalar_type);
2979 if (!vectype)
2980 return false;
2982 optab = optab_for_tree_code (code, vectype, optab_scalar);
2983 if (!optab
2984 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2986 optab = optab_for_tree_code (code, vectype, optab_vector);
2987 if (!optab
2988 || (optab_handler (optab, TYPE_MODE (vectype))
2989 == CODE_FOR_nothing))
2990 return false;
2993 vec_mode = TYPE_MODE (vectype);
2994 icode = (int) optab_handler (optab, vec_mode);
2995 if (icode == CODE_FOR_nothing)
2996 return false;
2998 return true;
3002 /* Function vectorizable_shift.
3004 Check if STMT performs a shift operation that can be vectorized.
3005 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3006 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3007 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3009 static bool
3010 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3011 gimple *vec_stmt, slp_tree slp_node)
3013 tree vec_dest;
3014 tree scalar_dest;
3015 tree op0, op1 = NULL;
3016 tree vec_oprnd1 = NULL_TREE;
3017 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3018 tree vectype;
3019 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3020 enum tree_code code;
3021 enum machine_mode vec_mode;
3022 tree new_temp;
3023 optab optab;
3024 int icode;
3025 enum machine_mode optab_op2_mode;
3026 tree def;
3027 gimple def_stmt;
3028 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3029 gimple new_stmt = NULL;
3030 stmt_vec_info prev_stmt_info;
3031 int nunits_in;
3032 int nunits_out;
3033 tree vectype_out;
3034 tree op1_vectype;
3035 int ncopies;
3036 int j, i;
3037 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3038 tree vop0, vop1;
3039 unsigned int k;
3040 bool scalar_shift_arg = true;
3041 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3042 int vf;
3044 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3045 return false;
3047 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3048 return false;
3050 /* Is STMT a vectorizable binary/unary operation? */
3051 if (!is_gimple_assign (stmt))
3052 return false;
3054 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3055 return false;
3057 code = gimple_assign_rhs_code (stmt);
3059 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3060 || code == RROTATE_EXPR))
3061 return false;
3063 scalar_dest = gimple_assign_lhs (stmt);
3064 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3065 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3066 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3068 if (vect_print_dump_info (REPORT_DETAILS))
3069 fprintf (vect_dump, "bit-precision shifts not supported.");
3070 return false;
3073 op0 = gimple_assign_rhs1 (stmt);
3074 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3075 &def_stmt, &def, &dt[0], &vectype))
3077 if (vect_print_dump_info (REPORT_DETAILS))
3078 fprintf (vect_dump, "use not simple.");
3079 return false;
3081 /* If op0 is an external or constant def use a vector type with
3082 the same size as the output vector type. */
3083 if (!vectype)
3084 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3085 if (vec_stmt)
3086 gcc_assert (vectype);
3087 if (!vectype)
3089 if (vect_print_dump_info (REPORT_DETAILS))
3091 fprintf (vect_dump, "no vectype for scalar type ");
3092 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3095 return false;
3098 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3099 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3100 if (nunits_out != nunits_in)
3101 return false;
3103 op1 = gimple_assign_rhs2 (stmt);
3104 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3105 &def, &dt[1], &op1_vectype))
3107 if (vect_print_dump_info (REPORT_DETAILS))
3108 fprintf (vect_dump, "use not simple.");
3109 return false;
3112 if (loop_vinfo)
3113 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3114 else
3115 vf = 1;
3117 /* Multiple types in SLP are handled by creating the appropriate number of
3118 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3119 case of SLP. */
3120 if (slp_node || PURE_SLP_STMT (stmt_info))
3121 ncopies = 1;
3122 else
3123 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3125 gcc_assert (ncopies >= 1);
3127 /* Determine whether the shift amount is a vector, or scalar. If the
3128 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3130 if (dt[1] == vect_internal_def && !slp_node)
3131 scalar_shift_arg = false;
3132 else if (dt[1] == vect_constant_def
3133 || dt[1] == vect_external_def
3134 || dt[1] == vect_internal_def)
3136 /* In SLP, need to check whether the shift count is the same,
3137 in loops if it is a constant or invariant, it is always
3138 a scalar shift. */
3139 if (slp_node)
3141 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3142 gimple slpstmt;
3144 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3145 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3146 scalar_shift_arg = false;
3149 else
3151 if (vect_print_dump_info (REPORT_DETAILS))
3152 fprintf (vect_dump, "operand mode requires invariant argument.");
3153 return false;
3156 /* Vector shifted by vector. */
3157 if (!scalar_shift_arg)
3159 optab = optab_for_tree_code (code, vectype, optab_vector);
3160 if (vect_print_dump_info (REPORT_DETAILS))
3161 fprintf (vect_dump, "vector/vector shift/rotate found.");
3162 if (!op1_vectype)
3163 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3164 if (op1_vectype == NULL_TREE
3165 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3167 if (vect_print_dump_info (REPORT_DETAILS))
3168 fprintf (vect_dump, "unusable type for last operand in"
3169 " vector/vector shift/rotate.");
3170 return false;
3173 /* See if the machine has a vector shifted by scalar insn and if not
3174 then see if it has a vector shifted by vector insn. */
3175 else
3177 optab = optab_for_tree_code (code, vectype, optab_scalar);
3178 if (optab
3179 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3181 if (vect_print_dump_info (REPORT_DETAILS))
3182 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3184 else
3186 optab = optab_for_tree_code (code, vectype, optab_vector);
3187 if (optab
3188 && (optab_handler (optab, TYPE_MODE (vectype))
3189 != CODE_FOR_nothing))
3191 scalar_shift_arg = false;
3193 if (vect_print_dump_info (REPORT_DETAILS))
3194 fprintf (vect_dump, "vector/vector shift/rotate found.");
3196 /* Unlike the other binary operators, shifts/rotates have
3197 the rhs being int, instead of the same type as the lhs,
3198 so make sure the scalar is the right type if we are
3199 dealing with vectors of long long/long/short/char. */
3200 if (dt[1] == vect_constant_def)
3201 op1 = fold_convert (TREE_TYPE (vectype), op1);
3202 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3203 TREE_TYPE (op1)))
3205 if (slp_node
3206 && TYPE_MODE (TREE_TYPE (vectype))
3207 != TYPE_MODE (TREE_TYPE (op1)))
3209 if (vect_print_dump_info (REPORT_DETAILS))
3210 fprintf (vect_dump, "unusable type for last operand in"
3211 " vector/vector shift/rotate.");
3212 return false;
3214 if (vec_stmt && !slp_node)
3216 op1 = fold_convert (TREE_TYPE (vectype), op1);
3217 op1 = vect_init_vector (stmt, op1,
3218 TREE_TYPE (vectype), NULL);
3225 /* Supportable by target? */
3226 if (!optab)
3228 if (vect_print_dump_info (REPORT_DETAILS))
3229 fprintf (vect_dump, "no optab.");
3230 return false;
3232 vec_mode = TYPE_MODE (vectype);
3233 icode = (int) optab_handler (optab, vec_mode);
3234 if (icode == CODE_FOR_nothing)
3236 if (vect_print_dump_info (REPORT_DETAILS))
3237 fprintf (vect_dump, "op not supported by target.");
3238 /* Check only during analysis. */
3239 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3240 || (vf < vect_min_worthwhile_factor (code)
3241 && !vec_stmt))
3242 return false;
3243 if (vect_print_dump_info (REPORT_DETAILS))
3244 fprintf (vect_dump, "proceeding using word mode.");
3247 /* Worthwhile without SIMD support? Check only during analysis. */
3248 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3249 && vf < vect_min_worthwhile_factor (code)
3250 && !vec_stmt)
3252 if (vect_print_dump_info (REPORT_DETAILS))
3253 fprintf (vect_dump, "not worthwhile without SIMD support.");
3254 return false;
3257 if (!vec_stmt) /* transformation not required. */
3259 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3260 if (vect_print_dump_info (REPORT_DETAILS))
3261 fprintf (vect_dump, "=== vectorizable_shift ===");
3262 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3263 return true;
3266 /** Transform. **/
3268 if (vect_print_dump_info (REPORT_DETAILS))
3269 fprintf (vect_dump, "transform binary/unary operation.");
3271 /* Handle def. */
3272 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3274 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3275 created in the previous stages of the recursion, so no allocation is
3276 needed, except for the case of shift with scalar shift argument. In that
3277 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3278 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3279 In case of loop-based vectorization we allocate VECs of size 1. We
3280 allocate VEC_OPRNDS1 only in case of binary operation. */
3281 if (!slp_node)
3283 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3284 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3286 else if (scalar_shift_arg)
3287 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3289 prev_stmt_info = NULL;
3290 for (j = 0; j < ncopies; j++)
3292 /* Handle uses. */
3293 if (j == 0)
3295 if (scalar_shift_arg)
3297 /* Vector shl and shr insn patterns can be defined with scalar
3298 operand 2 (shift operand). In this case, use constant or loop
3299 invariant op1 directly, without extending it to vector mode
3300 first. */
3301 optab_op2_mode = insn_data[icode].operand[2].mode;
3302 if (!VECTOR_MODE_P (optab_op2_mode))
3304 if (vect_print_dump_info (REPORT_DETAILS))
3305 fprintf (vect_dump, "operand 1 using scalar mode.");
3306 vec_oprnd1 = op1;
3307 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3308 if (slp_node)
3310 /* Store vec_oprnd1 for every vector stmt to be created
3311 for SLP_NODE. We check during the analysis that all
3312 the shift arguments are the same.
3313 TODO: Allow different constants for different vector
3314 stmts generated for an SLP instance. */
3315 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3316 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3321 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3322 (a special case for certain kind of vector shifts); otherwise,
3323 operand 1 should be of a vector type (the usual case). */
3324 if (vec_oprnd1)
3325 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3326 slp_node, -1);
3327 else
3328 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3329 slp_node, -1);
3331 else
3332 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3334 /* Arguments are ready. Create the new vector stmt. */
3335 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3337 vop1 = VEC_index (tree, vec_oprnds1, i);
3338 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3339 new_temp = make_ssa_name (vec_dest, new_stmt);
3340 gimple_assign_set_lhs (new_stmt, new_temp);
3341 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3342 if (slp_node)
3343 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3346 if (slp_node)
3347 continue;
3349 if (j == 0)
3350 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3351 else
3352 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3353 prev_stmt_info = vinfo_for_stmt (new_stmt);
3356 VEC_free (tree, heap, vec_oprnds0);
3357 VEC_free (tree, heap, vec_oprnds1);
3359 return true;
3363 static tree permute_vec_elements (tree, tree, tree, gimple,
3364 gimple_stmt_iterator *);
3367 /* Function vectorizable_operation.
3369 Check if STMT performs a binary, unary or ternary operation that can
3370 be vectorized.
3371 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3372 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3373 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3375 static bool
3376 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3377 gimple *vec_stmt, slp_tree slp_node)
3379 tree vec_dest;
3380 tree scalar_dest;
3381 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3382 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3383 tree vectype;
3384 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3385 enum tree_code code;
3386 enum machine_mode vec_mode;
3387 tree new_temp;
3388 int op_type;
3389 optab optab;
3390 int icode;
3391 tree def;
3392 gimple def_stmt;
3393 enum vect_def_type dt[3]
3394 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3395 gimple new_stmt = NULL;
3396 stmt_vec_info prev_stmt_info;
3397 int nunits_in;
3398 int nunits_out;
3399 tree vectype_out;
3400 int ncopies;
3401 int j, i;
3402 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3403 tree vop0, vop1, vop2;
3404 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3405 int vf;
3407 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3408 return false;
3410 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3411 return false;
3413 /* Is STMT a vectorizable binary/unary operation? */
3414 if (!is_gimple_assign (stmt))
3415 return false;
3417 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3418 return false;
3420 code = gimple_assign_rhs_code (stmt);
3422 /* For pointer addition, we should use the normal plus for
3423 the vector addition. */
3424 if (code == POINTER_PLUS_EXPR)
3425 code = PLUS_EXPR;
3427 /* Support only unary or binary operations. */
3428 op_type = TREE_CODE_LENGTH (code);
3429 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3431 if (vect_print_dump_info (REPORT_DETAILS))
3432 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3433 op_type);
3434 return false;
3437 scalar_dest = gimple_assign_lhs (stmt);
3438 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3440 /* Most operations cannot handle bit-precision types without extra
3441 truncations. */
3442 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3443 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3444 /* Exception are bitwise binary operations. */
3445 && code != BIT_IOR_EXPR
3446 && code != BIT_XOR_EXPR
3447 && code != BIT_AND_EXPR)
3449 if (vect_print_dump_info (REPORT_DETAILS))
3450 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3451 return false;
3454 op0 = gimple_assign_rhs1 (stmt);
3455 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3456 &def_stmt, &def, &dt[0], &vectype))
3458 if (vect_print_dump_info (REPORT_DETAILS))
3459 fprintf (vect_dump, "use not simple.");
3460 return false;
3462 /* If op0 is an external or constant def use a vector type with
3463 the same size as the output vector type. */
3464 if (!vectype)
3465 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3466 if (vec_stmt)
3467 gcc_assert (vectype);
3468 if (!vectype)
3470 if (vect_print_dump_info (REPORT_DETAILS))
3472 fprintf (vect_dump, "no vectype for scalar type ");
3473 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3476 return false;
3479 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3480 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3481 if (nunits_out != nunits_in)
3482 return false;
3484 if (op_type == binary_op || op_type == ternary_op)
3486 op1 = gimple_assign_rhs2 (stmt);
3487 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3488 &def, &dt[1]))
3490 if (vect_print_dump_info (REPORT_DETAILS))
3491 fprintf (vect_dump, "use not simple.");
3492 return false;
3495 if (op_type == ternary_op)
3497 op2 = gimple_assign_rhs3 (stmt);
3498 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3499 &def, &dt[2]))
3501 if (vect_print_dump_info (REPORT_DETAILS))
3502 fprintf (vect_dump, "use not simple.");
3503 return false;
3507 if (loop_vinfo)
3508 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3509 else
3510 vf = 1;
3512 /* Multiple types in SLP are handled by creating the appropriate number of
3513 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3514 case of SLP. */
3515 if (slp_node || PURE_SLP_STMT (stmt_info))
3516 ncopies = 1;
3517 else
3518 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3520 gcc_assert (ncopies >= 1);
3522 /* Shifts are handled in vectorizable_shift (). */
3523 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3524 || code == RROTATE_EXPR)
3525 return false;
3527 /* Supportable by target? */
3529 vec_mode = TYPE_MODE (vectype);
3530 if (code == MULT_HIGHPART_EXPR)
3532 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3533 icode = LAST_INSN_CODE;
3534 else
3535 icode = CODE_FOR_nothing;
3537 else
3539 optab = optab_for_tree_code (code, vectype, optab_default);
3540 if (!optab)
3542 if (vect_print_dump_info (REPORT_DETAILS))
3543 fprintf (vect_dump, "no optab.");
3544 return false;
3546 icode = (int) optab_handler (optab, vec_mode);
3549 if (icode == CODE_FOR_nothing)
3551 if (vect_print_dump_info (REPORT_DETAILS))
3552 fprintf (vect_dump, "op not supported by target.");
3553 /* Check only during analysis. */
3554 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3555 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3556 return false;
3557 if (vect_print_dump_info (REPORT_DETAILS))
3558 fprintf (vect_dump, "proceeding using word mode.");
3561 /* Worthwhile without SIMD support? Check only during analysis. */
3562 if (!VECTOR_MODE_P (vec_mode)
3563 && !vec_stmt
3564 && vf < vect_min_worthwhile_factor (code))
3566 if (vect_print_dump_info (REPORT_DETAILS))
3567 fprintf (vect_dump, "not worthwhile without SIMD support.");
3568 return false;
3571 if (!vec_stmt) /* transformation not required. */
3573 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3574 if (vect_print_dump_info (REPORT_DETAILS))
3575 fprintf (vect_dump, "=== vectorizable_operation ===");
3576 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3577 return true;
3580 /** Transform. **/
3582 if (vect_print_dump_info (REPORT_DETAILS))
3583 fprintf (vect_dump, "transform binary/unary operation.");
3585 /* Handle def. */
3586 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3588 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3589 created in the previous stages of the recursion, so no allocation is
3590 needed, except for the case of shift with scalar shift argument. In that
3591 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3592 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3593 In case of loop-based vectorization we allocate VECs of size 1. We
3594 allocate VEC_OPRNDS1 only in case of binary operation. */
3595 if (!slp_node)
3597 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3598 if (op_type == binary_op || op_type == ternary_op)
3599 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3600 if (op_type == ternary_op)
3601 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3604 /* In case the vectorization factor (VF) is bigger than the number
3605 of elements that we can fit in a vectype (nunits), we have to generate
3606 more than one vector stmt - i.e - we need to "unroll" the
3607 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3608 from one copy of the vector stmt to the next, in the field
3609 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3610 stages to find the correct vector defs to be used when vectorizing
3611 stmts that use the defs of the current stmt. The example below
3612 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3613 we need to create 4 vectorized stmts):
3615 before vectorization:
3616 RELATED_STMT VEC_STMT
3617 S1: x = memref - -
3618 S2: z = x + 1 - -
3620 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3621 there):
3622 RELATED_STMT VEC_STMT
3623 VS1_0: vx0 = memref0 VS1_1 -
3624 VS1_1: vx1 = memref1 VS1_2 -
3625 VS1_2: vx2 = memref2 VS1_3 -
3626 VS1_3: vx3 = memref3 - -
3627 S1: x = load - VS1_0
3628 S2: z = x + 1 - -
3630 step2: vectorize stmt S2 (done here):
3631 To vectorize stmt S2 we first need to find the relevant vector
3632 def for the first operand 'x'. This is, as usual, obtained from
3633 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3634 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3635 relevant vector def 'vx0'. Having found 'vx0' we can generate
3636 the vector stmt VS2_0, and as usual, record it in the
3637 STMT_VINFO_VEC_STMT of stmt S2.
3638 When creating the second copy (VS2_1), we obtain the relevant vector
3639 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3640 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3641 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3642 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3643 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3644 chain of stmts and pointers:
3645 RELATED_STMT VEC_STMT
3646 VS1_0: vx0 = memref0 VS1_1 -
3647 VS1_1: vx1 = memref1 VS1_2 -
3648 VS1_2: vx2 = memref2 VS1_3 -
3649 VS1_3: vx3 = memref3 - -
3650 S1: x = load - VS1_0
3651 VS2_0: vz0 = vx0 + v1 VS2_1 -
3652 VS2_1: vz1 = vx1 + v1 VS2_2 -
3653 VS2_2: vz2 = vx2 + v1 VS2_3 -
3654 VS2_3: vz3 = vx3 + v1 - -
3655 S2: z = x + 1 - VS2_0 */
3657 prev_stmt_info = NULL;
3658 for (j = 0; j < ncopies; j++)
3660 /* Handle uses. */
3661 if (j == 0)
3663 if (op_type == binary_op || op_type == ternary_op)
3664 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3665 slp_node, -1);
3666 else
3667 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3668 slp_node, -1);
3669 if (op_type == ternary_op)
3671 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3672 VEC_quick_push (tree, vec_oprnds2,
3673 vect_get_vec_def_for_operand (op2, stmt, NULL));
3676 else
3678 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3679 if (op_type == ternary_op)
3681 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3682 VEC_quick_push (tree, vec_oprnds2,
3683 vect_get_vec_def_for_stmt_copy (dt[2],
3684 vec_oprnd));
3688 /* Arguments are ready. Create the new vector stmt. */
3689 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3691 vop1 = ((op_type == binary_op || op_type == ternary_op)
3692 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3693 vop2 = ((op_type == ternary_op)
3694 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3695 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3696 vop0, vop1, vop2);
3697 new_temp = make_ssa_name (vec_dest, new_stmt);
3698 gimple_assign_set_lhs (new_stmt, new_temp);
3699 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3700 if (slp_node)
3701 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3704 if (slp_node)
3705 continue;
3707 if (j == 0)
3708 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3709 else
3710 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3711 prev_stmt_info = vinfo_for_stmt (new_stmt);
3714 VEC_free (tree, heap, vec_oprnds0);
3715 if (vec_oprnds1)
3716 VEC_free (tree, heap, vec_oprnds1);
3717 if (vec_oprnds2)
3718 VEC_free (tree, heap, vec_oprnds2);
3720 return true;
3724 /* Function vectorizable_store.
3726 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3727 can be vectorized.
3728 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3729 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3730 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3732 static bool
3733 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3734 slp_tree slp_node)
3736 tree scalar_dest;
3737 tree data_ref;
3738 tree op;
3739 tree vec_oprnd = NULL_TREE;
3740 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3741 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3742 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3743 tree elem_type;
3744 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3745 struct loop *loop = NULL;
3746 enum machine_mode vec_mode;
3747 tree dummy;
3748 enum dr_alignment_support alignment_support_scheme;
3749 tree def;
3750 gimple def_stmt;
3751 enum vect_def_type dt;
3752 stmt_vec_info prev_stmt_info = NULL;
3753 tree dataref_ptr = NULL_TREE;
3754 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3755 int ncopies;
3756 int j;
3757 gimple next_stmt, first_stmt = NULL;
3758 bool grouped_store = false;
3759 bool store_lanes_p = false;
3760 unsigned int group_size, i;
3761 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3762 bool inv_p;
3763 VEC(tree,heap) *vec_oprnds = NULL;
3764 bool slp = (slp_node != NULL);
3765 unsigned int vec_num;
3766 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3767 tree aggr_type;
3769 if (loop_vinfo)
3770 loop = LOOP_VINFO_LOOP (loop_vinfo);
3772 /* Multiple types in SLP are handled by creating the appropriate number of
3773 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3774 case of SLP. */
3775 if (slp || PURE_SLP_STMT (stmt_info))
3776 ncopies = 1;
3777 else
3778 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3780 gcc_assert (ncopies >= 1);
3782 /* FORNOW. This restriction should be relaxed. */
3783 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3785 if (vect_print_dump_info (REPORT_DETAILS))
3786 fprintf (vect_dump, "multiple types in nested loop.");
3787 return false;
3790 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3791 return false;
3793 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3794 return false;
3796 /* Is vectorizable store? */
3798 if (!is_gimple_assign (stmt))
3799 return false;
3801 scalar_dest = gimple_assign_lhs (stmt);
3802 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3803 && is_pattern_stmt_p (stmt_info))
3804 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3805 if (TREE_CODE (scalar_dest) != ARRAY_REF
3806 && TREE_CODE (scalar_dest) != INDIRECT_REF
3807 && TREE_CODE (scalar_dest) != COMPONENT_REF
3808 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3809 && TREE_CODE (scalar_dest) != REALPART_EXPR
3810 && TREE_CODE (scalar_dest) != MEM_REF)
3811 return false;
3813 gcc_assert (gimple_assign_single_p (stmt));
3814 op = gimple_assign_rhs1 (stmt);
3815 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3816 &def, &dt))
3818 if (vect_print_dump_info (REPORT_DETAILS))
3819 fprintf (vect_dump, "use not simple.");
3820 return false;
3823 elem_type = TREE_TYPE (vectype);
3824 vec_mode = TYPE_MODE (vectype);
3826 /* FORNOW. In some cases can vectorize even if data-type not supported
3827 (e.g. - array initialization with 0). */
3828 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3829 return false;
3831 if (!STMT_VINFO_DATA_REF (stmt_info))
3832 return false;
3834 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3835 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3836 size_zero_node) < 0)
3838 if (vect_print_dump_info (REPORT_DETAILS))
3839 fprintf (vect_dump, "negative step for store.");
3840 return false;
3843 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3845 grouped_store = true;
3846 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3847 if (!slp && !PURE_SLP_STMT (stmt_info))
3849 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3850 if (vect_store_lanes_supported (vectype, group_size))
3851 store_lanes_p = true;
3852 else if (!vect_grouped_store_supported (vectype, group_size))
3853 return false;
3856 if (first_stmt == stmt)
3858 /* STMT is the leader of the group. Check the operands of all the
3859 stmts of the group. */
3860 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3861 while (next_stmt)
3863 gcc_assert (gimple_assign_single_p (next_stmt));
3864 op = gimple_assign_rhs1 (next_stmt);
3865 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3866 &def_stmt, &def, &dt))
3868 if (vect_print_dump_info (REPORT_DETAILS))
3869 fprintf (vect_dump, "use not simple.");
3870 return false;
3872 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3877 if (!vec_stmt) /* transformation not required. */
3879 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3880 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3881 NULL, NULL, NULL);
3882 return true;
3885 /** Transform. **/
3887 if (grouped_store)
3889 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3890 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3892 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3894 /* FORNOW */
3895 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3897 /* We vectorize all the stmts of the interleaving group when we
3898 reach the last stmt in the group. */
3899 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3900 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3901 && !slp)
3903 *vec_stmt = NULL;
3904 return true;
3907 if (slp)
3909 grouped_store = false;
3910 /* VEC_NUM is the number of vect stmts to be created for this
3911 group. */
3912 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3913 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3914 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3915 op = gimple_assign_rhs1 (first_stmt);
3917 else
3918 /* VEC_NUM is the number of vect stmts to be created for this
3919 group. */
3920 vec_num = group_size;
3922 else
3924 first_stmt = stmt;
3925 first_dr = dr;
3926 group_size = vec_num = 1;
3929 if (vect_print_dump_info (REPORT_DETAILS))
3930 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3932 dr_chain = VEC_alloc (tree, heap, group_size);
3933 oprnds = VEC_alloc (tree, heap, group_size);
3935 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3936 gcc_assert (alignment_support_scheme);
3937 /* Targets with store-lane instructions must not require explicit
3938 realignment. */
3939 gcc_assert (!store_lanes_p
3940 || alignment_support_scheme == dr_aligned
3941 || alignment_support_scheme == dr_unaligned_supported);
3943 if (store_lanes_p)
3944 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3945 else
3946 aggr_type = vectype;
3948 /* In case the vectorization factor (VF) is bigger than the number
3949 of elements that we can fit in a vectype (nunits), we have to generate
3950 more than one vector stmt - i.e - we need to "unroll" the
3951 vector stmt by a factor VF/nunits. For more details see documentation in
3952 vect_get_vec_def_for_copy_stmt. */
3954 /* In case of interleaving (non-unit grouped access):
3956 S1: &base + 2 = x2
3957 S2: &base = x0
3958 S3: &base + 1 = x1
3959 S4: &base + 3 = x3
3961 We create vectorized stores starting from base address (the access of the
3962 first stmt in the chain (S2 in the above example), when the last store stmt
3963 of the chain (S4) is reached:
3965 VS1: &base = vx2
3966 VS2: &base + vec_size*1 = vx0
3967 VS3: &base + vec_size*2 = vx1
3968 VS4: &base + vec_size*3 = vx3
3970 Then permutation statements are generated:
3972 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3973 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3976 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3977 (the order of the data-refs in the output of vect_permute_store_chain
3978 corresponds to the order of scalar stmts in the interleaving chain - see
3979 the documentation of vect_permute_store_chain()).
3981 In case of both multiple types and interleaving, above vector stores and
3982 permutation stmts are created for every copy. The result vector stmts are
3983 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3984 STMT_VINFO_RELATED_STMT for the next copies.
3987 prev_stmt_info = NULL;
3988 for (j = 0; j < ncopies; j++)
3990 gimple new_stmt;
3991 gimple ptr_incr;
3993 if (j == 0)
3995 if (slp)
3997 /* Get vectorized arguments for SLP_NODE. */
3998 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3999 NULL, slp_node, -1);
4001 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4003 else
4005 /* For interleaved stores we collect vectorized defs for all the
4006 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4007 used as an input to vect_permute_store_chain(), and OPRNDS as
4008 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4010 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4011 OPRNDS are of size 1. */
4012 next_stmt = first_stmt;
4013 for (i = 0; i < group_size; i++)
4015 /* Since gaps are not supported for interleaved stores,
4016 GROUP_SIZE is the exact number of stmts in the chain.
4017 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4018 there is no interleaving, GROUP_SIZE is 1, and only one
4019 iteration of the loop will be executed. */
4020 gcc_assert (next_stmt
4021 && gimple_assign_single_p (next_stmt));
4022 op = gimple_assign_rhs1 (next_stmt);
4024 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4025 NULL);
4026 VEC_quick_push(tree, dr_chain, vec_oprnd);
4027 VEC_quick_push(tree, oprnds, vec_oprnd);
4028 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4032 /* We should have catched mismatched types earlier. */
4033 gcc_assert (useless_type_conversion_p (vectype,
4034 TREE_TYPE (vec_oprnd)));
4035 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4036 NULL_TREE, &dummy, gsi,
4037 &ptr_incr, false, &inv_p);
4038 gcc_assert (bb_vinfo || !inv_p);
4040 else
4042 /* For interleaved stores we created vectorized defs for all the
4043 defs stored in OPRNDS in the previous iteration (previous copy).
4044 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4045 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4046 next copy.
4047 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4048 OPRNDS are of size 1. */
4049 for (i = 0; i < group_size; i++)
4051 op = VEC_index (tree, oprnds, i);
4052 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4053 &def, &dt);
4054 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4055 VEC_replace(tree, dr_chain, i, vec_oprnd);
4056 VEC_replace(tree, oprnds, i, vec_oprnd);
4058 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4059 TYPE_SIZE_UNIT (aggr_type));
4062 if (store_lanes_p)
4064 tree vec_array;
4066 /* Combine all the vectors into an array. */
4067 vec_array = create_vector_array (vectype, vec_num);
4068 for (i = 0; i < vec_num; i++)
4070 vec_oprnd = VEC_index (tree, dr_chain, i);
4071 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4074 /* Emit:
4075 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4076 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4077 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4078 gimple_call_set_lhs (new_stmt, data_ref);
4079 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4081 else
4083 new_stmt = NULL;
4084 if (grouped_store)
4086 result_chain = VEC_alloc (tree, heap, group_size);
4087 /* Permute. */
4088 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4089 &result_chain);
4092 next_stmt = first_stmt;
4093 for (i = 0; i < vec_num; i++)
4095 unsigned align, misalign;
4097 if (i > 0)
4098 /* Bump the vector pointer. */
4099 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4100 stmt, NULL_TREE);
4102 if (slp)
4103 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4104 else if (grouped_store)
4105 /* For grouped stores vectorized defs are interleaved in
4106 vect_permute_store_chain(). */
4107 vec_oprnd = VEC_index (tree, result_chain, i);
4109 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4110 build_int_cst (reference_alias_ptr_type
4111 (DR_REF (first_dr)), 0));
4112 align = TYPE_ALIGN_UNIT (vectype);
4113 if (aligned_access_p (first_dr))
4114 misalign = 0;
4115 else if (DR_MISALIGNMENT (first_dr) == -1)
4117 TREE_TYPE (data_ref)
4118 = build_aligned_type (TREE_TYPE (data_ref),
4119 TYPE_ALIGN (elem_type));
4120 align = TYPE_ALIGN_UNIT (elem_type);
4121 misalign = 0;
4123 else
4125 TREE_TYPE (data_ref)
4126 = build_aligned_type (TREE_TYPE (data_ref),
4127 TYPE_ALIGN (elem_type));
4128 misalign = DR_MISALIGNMENT (first_dr);
4130 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4131 misalign);
4133 /* Arguments are ready. Create the new vector stmt. */
4134 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4135 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4137 if (slp)
4138 continue;
4140 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4141 if (!next_stmt)
4142 break;
4145 if (!slp)
4147 if (j == 0)
4148 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4149 else
4150 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4151 prev_stmt_info = vinfo_for_stmt (new_stmt);
4155 VEC_free (tree, heap, dr_chain);
4156 VEC_free (tree, heap, oprnds);
4157 if (result_chain)
4158 VEC_free (tree, heap, result_chain);
4159 if (vec_oprnds)
4160 VEC_free (tree, heap, vec_oprnds);
4162 return true;
4165 /* Given a vector type VECTYPE and permutation SEL returns
4166 the VECTOR_CST mask that implements the permutation of the
4167 vector elements. If that is impossible to do, returns NULL. */
4169 tree
4170 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4172 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4173 int i, nunits;
4175 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4177 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4178 return NULL;
4180 mask_elt_type = lang_hooks.types.type_for_mode
4181 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4182 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4184 mask_elts = XALLOCAVEC (tree, nunits);
4185 for (i = nunits - 1; i >= 0; i--)
4186 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4187 mask_vec = build_vector (mask_type, mask_elts);
4189 return mask_vec;
4192 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4193 reversal of the vector elements. If that is impossible to do,
4194 returns NULL. */
4196 static tree
4197 perm_mask_for_reverse (tree vectype)
4199 int i, nunits;
4200 unsigned char *sel;
4202 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4203 sel = XALLOCAVEC (unsigned char, nunits);
4205 for (i = 0; i < nunits; ++i)
4206 sel[i] = nunits - 1 - i;
4208 return vect_gen_perm_mask (vectype, sel);
4211 /* Given a vector variable X and Y, that was generated for the scalar
4212 STMT, generate instructions to permute the vector elements of X and Y
4213 using permutation mask MASK_VEC, insert them at *GSI and return the
4214 permuted vector variable. */
4216 static tree
4217 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4218 gimple_stmt_iterator *gsi)
4220 tree vectype = TREE_TYPE (x);
4221 tree perm_dest, data_ref;
4222 gimple perm_stmt;
4224 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4225 data_ref = make_ssa_name (perm_dest, NULL);
4227 /* Generate the permute statement. */
4228 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4229 x, y, mask_vec);
4230 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4232 return data_ref;
4235 /* vectorizable_load.
4237 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4238 can be vectorized.
4239 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4240 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4241 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4243 static bool
4244 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4245 slp_tree slp_node, slp_instance slp_node_instance)
4247 tree scalar_dest;
4248 tree vec_dest = NULL;
4249 tree data_ref = NULL;
4250 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4251 stmt_vec_info prev_stmt_info;
4252 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4253 struct loop *loop = NULL;
4254 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4255 bool nested_in_vect_loop = false;
4256 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4257 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4258 tree elem_type;
4259 tree new_temp;
4260 enum machine_mode mode;
4261 gimple new_stmt = NULL;
4262 tree dummy;
4263 enum dr_alignment_support alignment_support_scheme;
4264 tree dataref_ptr = NULL_TREE;
4265 gimple ptr_incr;
4266 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4267 int ncopies;
4268 int i, j, group_size;
4269 tree msq = NULL_TREE, lsq;
4270 tree offset = NULL_TREE;
4271 tree realignment_token = NULL_TREE;
4272 gimple phi = NULL;
4273 VEC(tree,heap) *dr_chain = NULL;
4274 bool grouped_load = false;
4275 bool load_lanes_p = false;
4276 gimple first_stmt;
4277 bool inv_p;
4278 bool negative = false;
4279 bool compute_in_loop = false;
4280 struct loop *at_loop;
4281 int vec_num;
4282 bool slp = (slp_node != NULL);
4283 bool slp_perm = false;
4284 enum tree_code code;
4285 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4286 int vf;
4287 tree aggr_type;
4288 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4289 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4290 tree stride_base, stride_step;
4291 int gather_scale = 1;
4292 enum vect_def_type gather_dt = vect_unknown_def_type;
4294 if (loop_vinfo)
4296 loop = LOOP_VINFO_LOOP (loop_vinfo);
4297 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4298 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4300 else
4301 vf = 1;
4303 /* Multiple types in SLP are handled by creating the appropriate number of
4304 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4305 case of SLP. */
4306 if (slp || PURE_SLP_STMT (stmt_info))
4307 ncopies = 1;
4308 else
4309 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4311 gcc_assert (ncopies >= 1);
4313 /* FORNOW. This restriction should be relaxed. */
4314 if (nested_in_vect_loop && ncopies > 1)
4316 if (vect_print_dump_info (REPORT_DETAILS))
4317 fprintf (vect_dump, "multiple types in nested loop.");
4318 return false;
4321 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4322 return false;
4324 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4325 return false;
4327 /* Is vectorizable load? */
4328 if (!is_gimple_assign (stmt))
4329 return false;
4331 scalar_dest = gimple_assign_lhs (stmt);
4332 if (TREE_CODE (scalar_dest) != SSA_NAME)
4333 return false;
4335 code = gimple_assign_rhs_code (stmt);
4336 if (code != ARRAY_REF
4337 && code != INDIRECT_REF
4338 && code != COMPONENT_REF
4339 && code != IMAGPART_EXPR
4340 && code != REALPART_EXPR
4341 && code != MEM_REF
4342 && TREE_CODE_CLASS (code) != tcc_declaration)
4343 return false;
4345 if (!STMT_VINFO_DATA_REF (stmt_info))
4346 return false;
4348 elem_type = TREE_TYPE (vectype);
4349 mode = TYPE_MODE (vectype);
4351 /* FORNOW. In some cases can vectorize even if data-type not supported
4352 (e.g. - data copies). */
4353 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4355 if (vect_print_dump_info (REPORT_DETAILS))
4356 fprintf (vect_dump, "Aligned load, but unsupported type.");
4357 return false;
4360 /* Check if the load is a part of an interleaving chain. */
4361 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4363 grouped_load = true;
4364 /* FORNOW */
4365 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4367 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4368 if (!slp && !PURE_SLP_STMT (stmt_info))
4370 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4371 if (vect_load_lanes_supported (vectype, group_size))
4372 load_lanes_p = true;
4373 else if (!vect_grouped_load_supported (vectype, group_size))
4374 return false;
4379 if (STMT_VINFO_GATHER_P (stmt_info))
4381 gimple def_stmt;
4382 tree def;
4383 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4384 &gather_off, &gather_scale);
4385 gcc_assert (gather_decl);
4386 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4387 &def_stmt, &def, &gather_dt,
4388 &gather_off_vectype))
4390 if (vect_print_dump_info (REPORT_DETAILS))
4391 fprintf (vect_dump, "gather index use not simple.");
4392 return false;
4395 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4397 if (!vect_check_strided_load (stmt, loop_vinfo,
4398 &stride_base, &stride_step))
4399 return false;
4401 else
4403 negative = tree_int_cst_compare (nested_in_vect_loop
4404 ? STMT_VINFO_DR_STEP (stmt_info)
4405 : DR_STEP (dr),
4406 size_zero_node) < 0;
4407 if (negative && ncopies > 1)
4409 if (vect_print_dump_info (REPORT_DETAILS))
4410 fprintf (vect_dump, "multiple types with negative step.");
4411 return false;
4414 if (negative)
4416 gcc_assert (!grouped_load);
4417 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4418 if (alignment_support_scheme != dr_aligned
4419 && alignment_support_scheme != dr_unaligned_supported)
4421 if (vect_print_dump_info (REPORT_DETAILS))
4422 fprintf (vect_dump, "negative step but alignment required.");
4423 return false;
4425 if (!perm_mask_for_reverse (vectype))
4427 if (vect_print_dump_info (REPORT_DETAILS))
4428 fprintf (vect_dump, "negative step and reversing not supported.");
4429 return false;
4434 if (!vec_stmt) /* transformation not required. */
4436 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4437 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4438 return true;
4441 if (vect_print_dump_info (REPORT_DETAILS))
4442 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4444 /** Transform. **/
4446 if (STMT_VINFO_GATHER_P (stmt_info))
4448 tree vec_oprnd0 = NULL_TREE, op;
4449 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4450 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4451 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4452 edge pe = loop_preheader_edge (loop);
4453 gimple_seq seq;
4454 basic_block new_bb;
4455 enum { NARROW, NONE, WIDEN } modifier;
4456 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4458 if (nunits == gather_off_nunits)
4459 modifier = NONE;
4460 else if (nunits == gather_off_nunits / 2)
4462 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4463 modifier = WIDEN;
4465 for (i = 0; i < gather_off_nunits; ++i)
4466 sel[i] = i | nunits;
4468 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4469 gcc_assert (perm_mask != NULL_TREE);
4471 else if (nunits == gather_off_nunits * 2)
4473 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4474 modifier = NARROW;
4476 for (i = 0; i < nunits; ++i)
4477 sel[i] = i < gather_off_nunits
4478 ? i : i + nunits - gather_off_nunits;
4480 perm_mask = vect_gen_perm_mask (vectype, sel);
4481 gcc_assert (perm_mask != NULL_TREE);
4482 ncopies *= 2;
4484 else
4485 gcc_unreachable ();
4487 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4488 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4489 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4490 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4491 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4492 scaletype = TREE_VALUE (arglist);
4493 gcc_checking_assert (types_compatible_p (srctype, rettype)
4494 && types_compatible_p (srctype, masktype));
4496 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4498 ptr = fold_convert (ptrtype, gather_base);
4499 if (!is_gimple_min_invariant (ptr))
4501 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4502 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4503 gcc_assert (!new_bb);
4506 /* Currently we support only unconditional gather loads,
4507 so mask should be all ones. */
4508 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4509 mask = build_int_cst (TREE_TYPE (masktype), -1);
4510 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4512 REAL_VALUE_TYPE r;
4513 long tmp[6];
4514 for (j = 0; j < 6; ++j)
4515 tmp[j] = -1;
4516 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4517 mask = build_real (TREE_TYPE (masktype), r);
4519 else
4520 gcc_unreachable ();
4521 mask = build_vector_from_val (masktype, mask);
4522 mask = vect_init_vector (stmt, mask, masktype, NULL);
4524 scale = build_int_cst (scaletype, gather_scale);
4526 prev_stmt_info = NULL;
4527 for (j = 0; j < ncopies; ++j)
4529 if (modifier == WIDEN && (j & 1))
4530 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4531 perm_mask, stmt, gsi);
4532 else if (j == 0)
4533 op = vec_oprnd0
4534 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4535 else
4536 op = vec_oprnd0
4537 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4539 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4541 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4542 == TYPE_VECTOR_SUBPARTS (idxtype));
4543 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4544 var = make_ssa_name (var, NULL);
4545 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4546 new_stmt
4547 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4548 op, NULL_TREE);
4549 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4550 op = var;
4553 new_stmt
4554 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4556 if (!useless_type_conversion_p (vectype, rettype))
4558 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4559 == TYPE_VECTOR_SUBPARTS (rettype));
4560 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4561 op = make_ssa_name (var, new_stmt);
4562 gimple_call_set_lhs (new_stmt, op);
4563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4564 var = make_ssa_name (vec_dest, NULL);
4565 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4566 new_stmt
4567 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4568 NULL_TREE);
4570 else
4572 var = make_ssa_name (vec_dest, new_stmt);
4573 gimple_call_set_lhs (new_stmt, var);
4576 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4578 if (modifier == NARROW)
4580 if ((j & 1) == 0)
4582 prev_res = var;
4583 continue;
4585 var = permute_vec_elements (prev_res, var,
4586 perm_mask, stmt, gsi);
4587 new_stmt = SSA_NAME_DEF_STMT (var);
4590 if (prev_stmt_info == NULL)
4591 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4592 else
4593 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4594 prev_stmt_info = vinfo_for_stmt (new_stmt);
4596 return true;
4598 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4600 gimple_stmt_iterator incr_gsi;
4601 bool insert_after;
4602 gimple incr;
4603 tree offvar;
4604 tree ref = DR_REF (dr);
4605 tree ivstep;
4606 tree running_off;
4607 VEC(constructor_elt, gc) *v = NULL;
4608 gimple_seq stmts = NULL;
4610 gcc_assert (stride_base && stride_step);
4612 /* For a load with loop-invariant (but other than power-of-2)
4613 stride (i.e. not a grouped access) like so:
4615 for (i = 0; i < n; i += stride)
4616 ... = array[i];
4618 we generate a new induction variable and new accesses to
4619 form a new vector (or vectors, depending on ncopies):
4621 for (j = 0; ; j += VF*stride)
4622 tmp1 = array[j];
4623 tmp2 = array[j + stride];
4625 vectemp = {tmp1, tmp2, ...}
4628 ivstep = stride_step;
4629 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4630 build_int_cst (TREE_TYPE (ivstep), vf));
4632 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4634 create_iv (stride_base, ivstep, NULL,
4635 loop, &incr_gsi, insert_after,
4636 &offvar, NULL);
4637 incr = gsi_stmt (incr_gsi);
4638 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4640 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4641 if (stmts)
4642 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4644 prev_stmt_info = NULL;
4645 running_off = offvar;
4646 for (j = 0; j < ncopies; j++)
4648 tree vec_inv;
4650 v = VEC_alloc (constructor_elt, gc, nunits);
4651 for (i = 0; i < nunits; i++)
4653 tree newref, newoff;
4654 gimple incr;
4655 if (TREE_CODE (ref) == ARRAY_REF)
4656 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
4657 unshare_expr (TREE_OPERAND (ref, 0)),
4658 running_off,
4659 NULL_TREE, NULL_TREE);
4660 else
4661 newref = build2 (MEM_REF, TREE_TYPE (ref),
4662 running_off,
4663 TREE_OPERAND (ref, 1));
4665 newref = force_gimple_operand_gsi (gsi, newref, true,
4666 NULL_TREE, true,
4667 GSI_SAME_STMT);
4668 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4669 newoff = copy_ssa_name (running_off, NULL);
4670 if (POINTER_TYPE_P (TREE_TYPE (newoff)))
4671 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4672 running_off, stride_step);
4673 else
4674 incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
4675 running_off, stride_step);
4676 vect_finish_stmt_generation (stmt, incr, gsi);
4678 running_off = newoff;
4681 vec_inv = build_constructor (vectype, v);
4682 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4683 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4685 if (j == 0)
4686 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4687 else
4688 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4689 prev_stmt_info = vinfo_for_stmt (new_stmt);
4691 return true;
4694 if (grouped_load)
4696 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4697 if (slp
4698 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4699 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4700 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4702 /* Check if the chain of loads is already vectorized. */
4703 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4705 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4706 return true;
4708 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4709 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4711 /* VEC_NUM is the number of vect stmts to be created for this group. */
4712 if (slp)
4714 grouped_load = false;
4715 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4716 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4717 slp_perm = true;
4719 else
4720 vec_num = group_size;
4722 else
4724 first_stmt = stmt;
4725 first_dr = dr;
4726 group_size = vec_num = 1;
4729 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4730 gcc_assert (alignment_support_scheme);
4731 /* Targets with load-lane instructions must not require explicit
4732 realignment. */
4733 gcc_assert (!load_lanes_p
4734 || alignment_support_scheme == dr_aligned
4735 || alignment_support_scheme == dr_unaligned_supported);
4737 /* In case the vectorization factor (VF) is bigger than the number
4738 of elements that we can fit in a vectype (nunits), we have to generate
4739 more than one vector stmt - i.e - we need to "unroll" the
4740 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4741 from one copy of the vector stmt to the next, in the field
4742 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4743 stages to find the correct vector defs to be used when vectorizing
4744 stmts that use the defs of the current stmt. The example below
4745 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4746 need to create 4 vectorized stmts):
4748 before vectorization:
4749 RELATED_STMT VEC_STMT
4750 S1: x = memref - -
4751 S2: z = x + 1 - -
4753 step 1: vectorize stmt S1:
4754 We first create the vector stmt VS1_0, and, as usual, record a
4755 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4756 Next, we create the vector stmt VS1_1, and record a pointer to
4757 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4758 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4759 stmts and pointers:
4760 RELATED_STMT VEC_STMT
4761 VS1_0: vx0 = memref0 VS1_1 -
4762 VS1_1: vx1 = memref1 VS1_2 -
4763 VS1_2: vx2 = memref2 VS1_3 -
4764 VS1_3: vx3 = memref3 - -
4765 S1: x = load - VS1_0
4766 S2: z = x + 1 - -
4768 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4769 information we recorded in RELATED_STMT field is used to vectorize
4770 stmt S2. */
4772 /* In case of interleaving (non-unit grouped access):
4774 S1: x2 = &base + 2
4775 S2: x0 = &base
4776 S3: x1 = &base + 1
4777 S4: x3 = &base + 3
4779 Vectorized loads are created in the order of memory accesses
4780 starting from the access of the first stmt of the chain:
4782 VS1: vx0 = &base
4783 VS2: vx1 = &base + vec_size*1
4784 VS3: vx3 = &base + vec_size*2
4785 VS4: vx4 = &base + vec_size*3
4787 Then permutation statements are generated:
4789 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4790 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4793 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4794 (the order of the data-refs in the output of vect_permute_load_chain
4795 corresponds to the order of scalar stmts in the interleaving chain - see
4796 the documentation of vect_permute_load_chain()).
4797 The generation of permutation stmts and recording them in
4798 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4800 In case of both multiple types and interleaving, the vector loads and
4801 permutation stmts above are created for every copy. The result vector
4802 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4803 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4805 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4806 on a target that supports unaligned accesses (dr_unaligned_supported)
4807 we generate the following code:
4808 p = initial_addr;
4809 indx = 0;
4810 loop {
4811 p = p + indx * vectype_size;
4812 vec_dest = *(p);
4813 indx = indx + 1;
4816 Otherwise, the data reference is potentially unaligned on a target that
4817 does not support unaligned accesses (dr_explicit_realign_optimized) -
4818 then generate the following code, in which the data in each iteration is
4819 obtained by two vector loads, one from the previous iteration, and one
4820 from the current iteration:
4821 p1 = initial_addr;
4822 msq_init = *(floor(p1))
4823 p2 = initial_addr + VS - 1;
4824 realignment_token = call target_builtin;
4825 indx = 0;
4826 loop {
4827 p2 = p2 + indx * vectype_size
4828 lsq = *(floor(p2))
4829 vec_dest = realign_load (msq, lsq, realignment_token)
4830 indx = indx + 1;
4831 msq = lsq;
4832 } */
4834 /* If the misalignment remains the same throughout the execution of the
4835 loop, we can create the init_addr and permutation mask at the loop
4836 preheader. Otherwise, it needs to be created inside the loop.
4837 This can only occur when vectorizing memory accesses in the inner-loop
4838 nested within an outer-loop that is being vectorized. */
4840 if (nested_in_vect_loop
4841 && (TREE_INT_CST_LOW (DR_STEP (dr))
4842 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4844 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4845 compute_in_loop = true;
4848 if ((alignment_support_scheme == dr_explicit_realign_optimized
4849 || alignment_support_scheme == dr_explicit_realign)
4850 && !compute_in_loop)
4852 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4853 alignment_support_scheme, NULL_TREE,
4854 &at_loop);
4855 if (alignment_support_scheme == dr_explicit_realign_optimized)
4857 phi = SSA_NAME_DEF_STMT (msq);
4858 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4861 else
4862 at_loop = loop;
4864 if (negative)
4865 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4867 if (load_lanes_p)
4868 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4869 else
4870 aggr_type = vectype;
4872 prev_stmt_info = NULL;
4873 for (j = 0; j < ncopies; j++)
4875 /* 1. Create the vector or array pointer update chain. */
4876 if (j == 0)
4877 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4878 offset, &dummy, gsi,
4879 &ptr_incr, false, &inv_p);
4880 else
4881 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4882 TYPE_SIZE_UNIT (aggr_type));
4884 if (grouped_load || slp_perm)
4885 dr_chain = VEC_alloc (tree, heap, vec_num);
4887 if (load_lanes_p)
4889 tree vec_array;
4891 vec_array = create_vector_array (vectype, vec_num);
4893 /* Emit:
4894 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4895 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4896 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4897 gimple_call_set_lhs (new_stmt, vec_array);
4898 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4900 /* Extract each vector into an SSA_NAME. */
4901 for (i = 0; i < vec_num; i++)
4903 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4904 vec_array, i);
4905 VEC_quick_push (tree, dr_chain, new_temp);
4908 /* Record the mapping between SSA_NAMEs and statements. */
4909 vect_record_grouped_load_vectors (stmt, dr_chain);
4911 else
4913 for (i = 0; i < vec_num; i++)
4915 if (i > 0)
4916 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4917 stmt, NULL_TREE);
4919 /* 2. Create the vector-load in the loop. */
4920 switch (alignment_support_scheme)
4922 case dr_aligned:
4923 case dr_unaligned_supported:
4925 unsigned int align, misalign;
4927 data_ref
4928 = build2 (MEM_REF, vectype, dataref_ptr,
4929 build_int_cst (reference_alias_ptr_type
4930 (DR_REF (first_dr)), 0));
4931 align = TYPE_ALIGN_UNIT (vectype);
4932 if (alignment_support_scheme == dr_aligned)
4934 gcc_assert (aligned_access_p (first_dr));
4935 misalign = 0;
4937 else if (DR_MISALIGNMENT (first_dr) == -1)
4939 TREE_TYPE (data_ref)
4940 = build_aligned_type (TREE_TYPE (data_ref),
4941 TYPE_ALIGN (elem_type));
4942 align = TYPE_ALIGN_UNIT (elem_type);
4943 misalign = 0;
4945 else
4947 TREE_TYPE (data_ref)
4948 = build_aligned_type (TREE_TYPE (data_ref),
4949 TYPE_ALIGN (elem_type));
4950 misalign = DR_MISALIGNMENT (first_dr);
4952 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
4953 align, misalign);
4954 break;
4956 case dr_explicit_realign:
4958 tree ptr, bump;
4959 tree vs_minus_1;
4961 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4963 if (compute_in_loop)
4964 msq = vect_setup_realignment (first_stmt, gsi,
4965 &realignment_token,
4966 dr_explicit_realign,
4967 dataref_ptr, NULL);
4969 ptr = copy_ssa_name (dataref_ptr, NULL);
4970 new_stmt = gimple_build_assign_with_ops
4971 (BIT_AND_EXPR, ptr, dataref_ptr,
4972 build_int_cst
4973 (TREE_TYPE (dataref_ptr),
4974 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4975 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4976 data_ref
4977 = build2 (MEM_REF, vectype, ptr,
4978 build_int_cst (reference_alias_ptr_type
4979 (DR_REF (first_dr)), 0));
4980 vec_dest = vect_create_destination_var (scalar_dest,
4981 vectype);
4982 new_stmt = gimple_build_assign (vec_dest, data_ref);
4983 new_temp = make_ssa_name (vec_dest, new_stmt);
4984 gimple_assign_set_lhs (new_stmt, new_temp);
4985 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4986 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4987 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4988 msq = new_temp;
4990 bump = size_binop (MULT_EXPR, vs_minus_1,
4991 TYPE_SIZE_UNIT (elem_type));
4992 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4993 new_stmt = gimple_build_assign_with_ops
4994 (BIT_AND_EXPR, NULL_TREE, ptr,
4995 build_int_cst
4996 (TREE_TYPE (ptr),
4997 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4998 ptr = copy_ssa_name (dataref_ptr, new_stmt);
4999 gimple_assign_set_lhs (new_stmt, ptr);
5000 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5001 data_ref
5002 = build2 (MEM_REF, vectype, ptr,
5003 build_int_cst (reference_alias_ptr_type
5004 (DR_REF (first_dr)), 0));
5005 break;
5007 case dr_explicit_realign_optimized:
5008 new_temp = copy_ssa_name (dataref_ptr, NULL);
5009 new_stmt = gimple_build_assign_with_ops
5010 (BIT_AND_EXPR, new_temp, dataref_ptr,
5011 build_int_cst
5012 (TREE_TYPE (dataref_ptr),
5013 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5014 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5015 data_ref
5016 = build2 (MEM_REF, vectype, new_temp,
5017 build_int_cst (reference_alias_ptr_type
5018 (DR_REF (first_dr)), 0));
5019 break;
5020 default:
5021 gcc_unreachable ();
5023 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5024 new_stmt = gimple_build_assign (vec_dest, data_ref);
5025 new_temp = make_ssa_name (vec_dest, new_stmt);
5026 gimple_assign_set_lhs (new_stmt, new_temp);
5027 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5029 /* 3. Handle explicit realignment if necessary/supported.
5030 Create in loop:
5031 vec_dest = realign_load (msq, lsq, realignment_token) */
5032 if (alignment_support_scheme == dr_explicit_realign_optimized
5033 || alignment_support_scheme == dr_explicit_realign)
5035 lsq = gimple_assign_lhs (new_stmt);
5036 if (!realignment_token)
5037 realignment_token = dataref_ptr;
5038 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5039 new_stmt
5040 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
5041 vec_dest, msq, lsq,
5042 realignment_token);
5043 new_temp = make_ssa_name (vec_dest, new_stmt);
5044 gimple_assign_set_lhs (new_stmt, new_temp);
5045 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5047 if (alignment_support_scheme == dr_explicit_realign_optimized)
5049 gcc_assert (phi);
5050 if (i == vec_num - 1 && j == ncopies - 1)
5051 add_phi_arg (phi, lsq,
5052 loop_latch_edge (containing_loop),
5053 UNKNOWN_LOCATION);
5054 msq = lsq;
5058 /* 4. Handle invariant-load. */
5059 if (inv_p && !bb_vinfo)
5061 gimple_stmt_iterator gsi2 = *gsi;
5062 gcc_assert (!grouped_load);
5063 gsi_next (&gsi2);
5064 new_temp = vect_init_vector (stmt, scalar_dest,
5065 vectype, &gsi2);
5066 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5069 if (negative)
5071 tree perm_mask = perm_mask_for_reverse (vectype);
5072 new_temp = permute_vec_elements (new_temp, new_temp,
5073 perm_mask, stmt, gsi);
5074 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5077 /* Collect vector loads and later create their permutation in
5078 vect_transform_grouped_load (). */
5079 if (grouped_load || slp_perm)
5080 VEC_quick_push (tree, dr_chain, new_temp);
5082 /* Store vector loads in the corresponding SLP_NODE. */
5083 if (slp && !slp_perm)
5084 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
5085 new_stmt);
5089 if (slp && !slp_perm)
5090 continue;
5092 if (slp_perm)
5094 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
5095 slp_node_instance, false))
5097 VEC_free (tree, heap, dr_chain);
5098 return false;
5101 else
5103 if (grouped_load)
5105 if (!load_lanes_p)
5106 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5107 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5109 else
5111 if (j == 0)
5112 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5113 else
5114 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5115 prev_stmt_info = vinfo_for_stmt (new_stmt);
5118 if (dr_chain)
5119 VEC_free (tree, heap, dr_chain);
5122 return true;
5125 /* Function vect_is_simple_cond.
5127 Input:
5128 LOOP - the loop that is being vectorized.
5129 COND - Condition that is checked for simple use.
5131 Output:
5132 *COMP_VECTYPE - the vector type for the comparison.
5134 Returns whether a COND can be vectorized. Checks whether
5135 condition operands are supportable using vec_is_simple_use. */
5137 static bool
5138 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5139 bb_vec_info bb_vinfo, tree *comp_vectype)
5141 tree lhs, rhs;
5142 tree def;
5143 enum vect_def_type dt;
5144 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5146 if (!COMPARISON_CLASS_P (cond))
5147 return false;
5149 lhs = TREE_OPERAND (cond, 0);
5150 rhs = TREE_OPERAND (cond, 1);
5152 if (TREE_CODE (lhs) == SSA_NAME)
5154 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5155 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5156 &lhs_def_stmt, &def, &dt, &vectype1))
5157 return false;
5159 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5160 && TREE_CODE (lhs) != FIXED_CST)
5161 return false;
5163 if (TREE_CODE (rhs) == SSA_NAME)
5165 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5166 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5167 &rhs_def_stmt, &def, &dt, &vectype2))
5168 return false;
5170 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5171 && TREE_CODE (rhs) != FIXED_CST)
5172 return false;
5174 *comp_vectype = vectype1 ? vectype1 : vectype2;
5175 return true;
5178 /* vectorizable_condition.
5180 Check if STMT is conditional modify expression that can be vectorized.
5181 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5182 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5183 at GSI.
5185 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5186 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5187 else caluse if it is 2).
5189 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5191 bool
5192 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5193 gimple *vec_stmt, tree reduc_def, int reduc_index,
5194 slp_tree slp_node)
5196 tree scalar_dest = NULL_TREE;
5197 tree vec_dest = NULL_TREE;
5198 tree cond_expr, then_clause, else_clause;
5199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5200 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5201 tree comp_vectype = NULL_TREE;
5202 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5203 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5204 tree vec_compare, vec_cond_expr;
5205 tree new_temp;
5206 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5207 tree def;
5208 enum vect_def_type dt, dts[4];
5209 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5210 int ncopies;
5211 enum tree_code code;
5212 stmt_vec_info prev_stmt_info = NULL;
5213 int i, j;
5214 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5215 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5216 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
5218 if (slp_node || PURE_SLP_STMT (stmt_info))
5219 ncopies = 1;
5220 else
5221 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5223 gcc_assert (ncopies >= 1);
5224 if (reduc_index && ncopies > 1)
5225 return false; /* FORNOW */
5227 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5228 return false;
5230 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5231 return false;
5233 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5234 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5235 && reduc_def))
5236 return false;
5238 /* FORNOW: not yet supported. */
5239 if (STMT_VINFO_LIVE_P (stmt_info))
5241 if (vect_print_dump_info (REPORT_DETAILS))
5242 fprintf (vect_dump, "value used after loop.");
5243 return false;
5246 /* Is vectorizable conditional operation? */
5247 if (!is_gimple_assign (stmt))
5248 return false;
5250 code = gimple_assign_rhs_code (stmt);
5252 if (code != COND_EXPR)
5253 return false;
5255 cond_expr = gimple_assign_rhs1 (stmt);
5256 then_clause = gimple_assign_rhs2 (stmt);
5257 else_clause = gimple_assign_rhs3 (stmt);
5259 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5260 &comp_vectype)
5261 || !comp_vectype)
5262 return false;
5264 if (TREE_CODE (then_clause) == SSA_NAME)
5266 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5267 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5268 &then_def_stmt, &def, &dt))
5269 return false;
5271 else if (TREE_CODE (then_clause) != INTEGER_CST
5272 && TREE_CODE (then_clause) != REAL_CST
5273 && TREE_CODE (then_clause) != FIXED_CST)
5274 return false;
5276 if (TREE_CODE (else_clause) == SSA_NAME)
5278 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5279 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5280 &else_def_stmt, &def, &dt))
5281 return false;
5283 else if (TREE_CODE (else_clause) != INTEGER_CST
5284 && TREE_CODE (else_clause) != REAL_CST
5285 && TREE_CODE (else_clause) != FIXED_CST)
5286 return false;
5288 if (!vec_stmt)
5290 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5291 return expand_vec_cond_expr_p (vectype, comp_vectype);
5294 /* Transform. */
5296 if (!slp_node)
5298 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5299 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5300 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5301 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5304 /* Handle def. */
5305 scalar_dest = gimple_assign_lhs (stmt);
5306 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5308 /* Handle cond expr. */
5309 for (j = 0; j < ncopies; j++)
5311 gimple new_stmt = NULL;
5312 if (j == 0)
5314 if (slp_node)
5316 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5317 VEC (slp_void_p, heap) *vec_defs;
5319 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5320 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5321 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5322 VEC_safe_push (tree, heap, ops, then_clause);
5323 VEC_safe_push (tree, heap, ops, else_clause);
5324 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5325 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5326 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5327 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5328 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5330 VEC_free (tree, heap, ops);
5331 VEC_free (slp_void_p, heap, vec_defs);
5333 else
5335 gimple gtemp;
5336 vec_cond_lhs =
5337 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5338 stmt, NULL);
5339 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5340 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5342 vec_cond_rhs =
5343 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5344 stmt, NULL);
5345 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5346 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5347 if (reduc_index == 1)
5348 vec_then_clause = reduc_def;
5349 else
5351 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5352 stmt, NULL);
5353 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5354 NULL, &gtemp, &def, &dts[2]);
5356 if (reduc_index == 2)
5357 vec_else_clause = reduc_def;
5358 else
5360 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5361 stmt, NULL);
5362 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5363 NULL, &gtemp, &def, &dts[3]);
5367 else
5369 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5370 VEC_pop (tree, vec_oprnds0));
5371 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5372 VEC_pop (tree, vec_oprnds1));
5373 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5374 VEC_pop (tree, vec_oprnds2));
5375 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5376 VEC_pop (tree, vec_oprnds3));
5379 if (!slp_node)
5381 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5382 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5383 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5384 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5387 /* Arguments are ready. Create the new vector stmt. */
5388 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5390 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5391 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5392 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5394 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5395 vec_cond_lhs, vec_cond_rhs);
5396 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5397 vec_compare, vec_then_clause, vec_else_clause);
5399 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5400 new_temp = make_ssa_name (vec_dest, new_stmt);
5401 gimple_assign_set_lhs (new_stmt, new_temp);
5402 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5403 if (slp_node)
5404 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5407 if (slp_node)
5408 continue;
5410 if (j == 0)
5411 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5412 else
5413 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5415 prev_stmt_info = vinfo_for_stmt (new_stmt);
5418 VEC_free (tree, heap, vec_oprnds0);
5419 VEC_free (tree, heap, vec_oprnds1);
5420 VEC_free (tree, heap, vec_oprnds2);
5421 VEC_free (tree, heap, vec_oprnds3);
5423 return true;
5427 /* Make sure the statement is vectorizable. */
5429 bool
5430 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5432 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5433 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5434 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5435 bool ok;
5436 tree scalar_type, vectype;
5437 gimple pattern_stmt;
5438 gimple_seq pattern_def_seq;
5440 if (vect_print_dump_info (REPORT_DETAILS))
5442 fprintf (vect_dump, "==> examining statement: ");
5443 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5446 if (gimple_has_volatile_ops (stmt))
5448 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5449 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5451 return false;
5454 /* Skip stmts that do not need to be vectorized. In loops this is expected
5455 to include:
5456 - the COND_EXPR which is the loop exit condition
5457 - any LABEL_EXPRs in the loop
5458 - computations that are used only for array indexing or loop control.
5459 In basic blocks we only analyze statements that are a part of some SLP
5460 instance, therefore, all the statements are relevant.
5462 Pattern statement needs to be analyzed instead of the original statement
5463 if the original statement is not relevant. Otherwise, we analyze both
5464 statements. In basic blocks we are called from some SLP instance
5465 traversal, don't analyze pattern stmts instead, the pattern stmts
5466 already will be part of SLP instance. */
5468 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5469 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5470 && !STMT_VINFO_LIVE_P (stmt_info))
5472 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5473 && pattern_stmt
5474 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5475 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5477 /* Analyze PATTERN_STMT instead of the original stmt. */
5478 stmt = pattern_stmt;
5479 stmt_info = vinfo_for_stmt (pattern_stmt);
5480 if (vect_print_dump_info (REPORT_DETAILS))
5482 fprintf (vect_dump, "==> examining pattern statement: ");
5483 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5486 else
5488 if (vect_print_dump_info (REPORT_DETAILS))
5489 fprintf (vect_dump, "irrelevant.");
5491 return true;
5494 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5495 && node == NULL
5496 && pattern_stmt
5497 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5498 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5500 /* Analyze PATTERN_STMT too. */
5501 if (vect_print_dump_info (REPORT_DETAILS))
5503 fprintf (vect_dump, "==> examining pattern statement: ");
5504 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5507 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5508 return false;
5511 if (is_pattern_stmt_p (stmt_info)
5512 && node == NULL
5513 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5515 gimple_stmt_iterator si;
5517 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5519 gimple pattern_def_stmt = gsi_stmt (si);
5520 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5521 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5523 /* Analyze def stmt of STMT if it's a pattern stmt. */
5524 if (vect_print_dump_info (REPORT_DETAILS))
5526 fprintf (vect_dump, "==> examining pattern def statement: ");
5527 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5530 if (!vect_analyze_stmt (pattern_def_stmt,
5531 need_to_vectorize, node))
5532 return false;
5537 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5539 case vect_internal_def:
5540 break;
5542 case vect_reduction_def:
5543 case vect_nested_cycle:
5544 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5545 || relevance == vect_used_in_outer_by_reduction
5546 || relevance == vect_unused_in_scope));
5547 break;
5549 case vect_induction_def:
5550 case vect_constant_def:
5551 case vect_external_def:
5552 case vect_unknown_def_type:
5553 default:
5554 gcc_unreachable ();
5557 if (bb_vinfo)
5559 gcc_assert (PURE_SLP_STMT (stmt_info));
5561 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5562 if (vect_print_dump_info (REPORT_DETAILS))
5564 fprintf (vect_dump, "get vectype for scalar type: ");
5565 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5568 vectype = get_vectype_for_scalar_type (scalar_type);
5569 if (!vectype)
5571 if (vect_print_dump_info (REPORT_DETAILS))
5573 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5574 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5576 return false;
5579 if (vect_print_dump_info (REPORT_DETAILS))
5581 fprintf (vect_dump, "vectype: ");
5582 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5585 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5588 if (STMT_VINFO_RELEVANT_P (stmt_info))
5590 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5591 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5592 *need_to_vectorize = true;
5595 ok = true;
5596 if (!bb_vinfo
5597 && (STMT_VINFO_RELEVANT_P (stmt_info)
5598 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5599 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5600 || vectorizable_shift (stmt, NULL, NULL, NULL)
5601 || vectorizable_operation (stmt, NULL, NULL, NULL)
5602 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5603 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5604 || vectorizable_call (stmt, NULL, NULL, NULL)
5605 || vectorizable_store (stmt, NULL, NULL, NULL)
5606 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5607 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5608 else
5610 if (bb_vinfo)
5611 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5612 || vectorizable_shift (stmt, NULL, NULL, node)
5613 || vectorizable_operation (stmt, NULL, NULL, node)
5614 || vectorizable_assignment (stmt, NULL, NULL, node)
5615 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5616 || vectorizable_call (stmt, NULL, NULL, node)
5617 || vectorizable_store (stmt, NULL, NULL, node)
5618 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5621 if (!ok)
5623 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5625 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5626 fprintf (vect_dump, "supported: ");
5627 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5630 return false;
5633 if (bb_vinfo)
5634 return true;
5636 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5637 need extra handling, except for vectorizable reductions. */
5638 if (STMT_VINFO_LIVE_P (stmt_info)
5639 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5640 ok = vectorizable_live_operation (stmt, NULL, NULL);
5642 if (!ok)
5644 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5646 fprintf (vect_dump, "not vectorized: live stmt not ");
5647 fprintf (vect_dump, "supported: ");
5648 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5651 return false;
5654 return true;
5658 /* Function vect_transform_stmt.
5660 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5662 bool
5663 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5664 bool *grouped_store, slp_tree slp_node,
5665 slp_instance slp_node_instance)
5667 bool is_store = false;
5668 gimple vec_stmt = NULL;
5669 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5670 bool done;
5672 switch (STMT_VINFO_TYPE (stmt_info))
5674 case type_demotion_vec_info_type:
5675 case type_promotion_vec_info_type:
5676 case type_conversion_vec_info_type:
5677 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5678 gcc_assert (done);
5679 break;
5681 case induc_vec_info_type:
5682 gcc_assert (!slp_node);
5683 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5684 gcc_assert (done);
5685 break;
5687 case shift_vec_info_type:
5688 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5689 gcc_assert (done);
5690 break;
5692 case op_vec_info_type:
5693 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5694 gcc_assert (done);
5695 break;
5697 case assignment_vec_info_type:
5698 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5699 gcc_assert (done);
5700 break;
5702 case load_vec_info_type:
5703 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5704 slp_node_instance);
5705 gcc_assert (done);
5706 break;
5708 case store_vec_info_type:
5709 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5710 gcc_assert (done);
5711 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5713 /* In case of interleaving, the whole chain is vectorized when the
5714 last store in the chain is reached. Store stmts before the last
5715 one are skipped, and there vec_stmt_info shouldn't be freed
5716 meanwhile. */
5717 *grouped_store = true;
5718 if (STMT_VINFO_VEC_STMT (stmt_info))
5719 is_store = true;
5721 else
5722 is_store = true;
5723 break;
5725 case condition_vec_info_type:
5726 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5727 gcc_assert (done);
5728 break;
5730 case call_vec_info_type:
5731 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5732 stmt = gsi_stmt (*gsi);
5733 break;
5735 case reduc_vec_info_type:
5736 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5737 gcc_assert (done);
5738 break;
5740 default:
5741 if (!STMT_VINFO_LIVE_P (stmt_info))
5743 if (vect_print_dump_info (REPORT_DETAILS))
5744 fprintf (vect_dump, "stmt not supported.");
5745 gcc_unreachable ();
5749 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5750 is being vectorized, but outside the immediately enclosing loop. */
5751 if (vec_stmt
5752 && STMT_VINFO_LOOP_VINFO (stmt_info)
5753 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5754 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5755 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5756 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5757 || STMT_VINFO_RELEVANT (stmt_info) ==
5758 vect_used_in_outer_by_reduction))
5760 struct loop *innerloop = LOOP_VINFO_LOOP (
5761 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5762 imm_use_iterator imm_iter;
5763 use_operand_p use_p;
5764 tree scalar_dest;
5765 gimple exit_phi;
5767 if (vect_print_dump_info (REPORT_DETAILS))
5768 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5770 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5771 (to be used when vectorizing outer-loop stmts that use the DEF of
5772 STMT). */
5773 if (gimple_code (stmt) == GIMPLE_PHI)
5774 scalar_dest = PHI_RESULT (stmt);
5775 else
5776 scalar_dest = gimple_assign_lhs (stmt);
5778 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5780 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5782 exit_phi = USE_STMT (use_p);
5783 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5788 /* Handle stmts whose DEF is used outside the loop-nest that is
5789 being vectorized. */
5790 if (STMT_VINFO_LIVE_P (stmt_info)
5791 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5793 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5794 gcc_assert (done);
5797 if (vec_stmt)
5798 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5800 return is_store;
5804 /* Remove a group of stores (for SLP or interleaving), free their
5805 stmt_vec_info. */
5807 void
5808 vect_remove_stores (gimple first_stmt)
5810 gimple next = first_stmt;
5811 gimple tmp;
5812 gimple_stmt_iterator next_si;
5814 while (next)
5816 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5818 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5819 if (is_pattern_stmt_p (stmt_info))
5820 next = STMT_VINFO_RELATED_STMT (stmt_info);
5821 /* Free the attached stmt_vec_info and remove the stmt. */
5822 next_si = gsi_for_stmt (next);
5823 unlink_stmt_vdef (next);
5824 gsi_remove (&next_si, true);
5825 release_defs (next);
5826 free_stmt_vec_info (next);
5827 next = tmp;
5832 /* Function new_stmt_vec_info.
5834 Create and initialize a new stmt_vec_info struct for STMT. */
5836 stmt_vec_info
5837 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5838 bb_vec_info bb_vinfo)
5840 stmt_vec_info res;
5841 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5843 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5844 STMT_VINFO_STMT (res) = stmt;
5845 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5846 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5847 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5848 STMT_VINFO_LIVE_P (res) = false;
5849 STMT_VINFO_VECTYPE (res) = NULL;
5850 STMT_VINFO_VEC_STMT (res) = NULL;
5851 STMT_VINFO_VECTORIZABLE (res) = true;
5852 STMT_VINFO_IN_PATTERN_P (res) = false;
5853 STMT_VINFO_RELATED_STMT (res) = NULL;
5854 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5855 STMT_VINFO_DATA_REF (res) = NULL;
5857 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5858 STMT_VINFO_DR_OFFSET (res) = NULL;
5859 STMT_VINFO_DR_INIT (res) = NULL;
5860 STMT_VINFO_DR_STEP (res) = NULL;
5861 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5863 if (gimple_code (stmt) == GIMPLE_PHI
5864 && is_loop_header_bb_p (gimple_bb (stmt)))
5865 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5866 else
5867 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5869 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5870 STMT_SLP_TYPE (res) = loop_vect;
5871 GROUP_FIRST_ELEMENT (res) = NULL;
5872 GROUP_NEXT_ELEMENT (res) = NULL;
5873 GROUP_SIZE (res) = 0;
5874 GROUP_STORE_COUNT (res) = 0;
5875 GROUP_GAP (res) = 0;
5876 GROUP_SAME_DR_STMT (res) = NULL;
5877 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5879 return res;
5883 /* Create a hash table for stmt_vec_info. */
5885 void
5886 init_stmt_vec_info_vec (void)
5888 gcc_assert (!stmt_vec_info_vec);
5889 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5893 /* Free hash table for stmt_vec_info. */
5895 void
5896 free_stmt_vec_info_vec (void)
5898 gcc_assert (stmt_vec_info_vec);
5899 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5903 /* Free stmt vectorization related info. */
5905 void
5906 free_stmt_vec_info (gimple stmt)
5908 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5910 if (!stmt_info)
5911 return;
5913 /* Check if this statement has a related "pattern stmt"
5914 (introduced by the vectorizer during the pattern recognition
5915 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5916 too. */
5917 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5919 stmt_vec_info patt_info
5920 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5921 if (patt_info)
5923 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5924 if (seq)
5926 gimple_stmt_iterator si;
5927 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5928 free_stmt_vec_info (gsi_stmt (si));
5930 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5934 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5935 set_vinfo_for_stmt (stmt, NULL);
5936 free (stmt_info);
5940 /* Function get_vectype_for_scalar_type_and_size.
5942 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5943 by the target. */
5945 static tree
5946 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5948 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5949 enum machine_mode simd_mode;
5950 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5951 int nunits;
5952 tree vectype;
5954 if (nbytes == 0)
5955 return NULL_TREE;
5957 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5958 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5959 return NULL_TREE;
5961 /* We can't build a vector type of elements with alignment bigger than
5962 their size. */
5963 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5964 return NULL_TREE;
5966 /* For vector types of elements whose mode precision doesn't
5967 match their types precision we use a element type of mode
5968 precision. The vectorization routines will have to make sure
5969 they support the proper result truncation/extension.
5970 We also make sure to build vector types with INTEGER_TYPE
5971 component type only. */
5972 if (INTEGRAL_TYPE_P (scalar_type)
5973 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5974 || TREE_CODE (scalar_type) != INTEGER_TYPE))
5975 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5976 TYPE_UNSIGNED (scalar_type));
5978 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5979 When the component mode passes the above test simply use a type
5980 corresponding to that mode. The theory is that any use that
5981 would cause problems with this will disable vectorization anyway. */
5982 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5983 && !INTEGRAL_TYPE_P (scalar_type)
5984 && !POINTER_TYPE_P (scalar_type))
5985 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5987 /* If no size was supplied use the mode the target prefers. Otherwise
5988 lookup a vector mode of the specified size. */
5989 if (size == 0)
5990 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5991 else
5992 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5993 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5994 if (nunits <= 1)
5995 return NULL_TREE;
5997 vectype = build_vector_type (scalar_type, nunits);
5998 if (vect_print_dump_info (REPORT_DETAILS))
6000 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
6001 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
6004 if (!vectype)
6005 return NULL_TREE;
6007 if (vect_print_dump_info (REPORT_DETAILS))
6009 fprintf (vect_dump, "vectype: ");
6010 print_generic_expr (vect_dump, vectype, TDF_SLIM);
6013 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6014 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6016 if (vect_print_dump_info (REPORT_DETAILS))
6017 fprintf (vect_dump, "mode not supported by target.");
6018 return NULL_TREE;
6021 return vectype;
6024 unsigned int current_vector_size;
6026 /* Function get_vectype_for_scalar_type.
6028 Returns the vector type corresponding to SCALAR_TYPE as supported
6029 by the target. */
6031 tree
6032 get_vectype_for_scalar_type (tree scalar_type)
6034 tree vectype;
6035 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6036 current_vector_size);
6037 if (vectype
6038 && current_vector_size == 0)
6039 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6040 return vectype;
6043 /* Function get_same_sized_vectype
6045 Returns a vector type corresponding to SCALAR_TYPE of size
6046 VECTOR_TYPE if supported by the target. */
6048 tree
6049 get_same_sized_vectype (tree scalar_type, tree vector_type)
6051 return get_vectype_for_scalar_type_and_size
6052 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6055 /* Function vect_is_simple_use.
6057 Input:
6058 LOOP_VINFO - the vect info of the loop that is being vectorized.
6059 BB_VINFO - the vect info of the basic block that is being vectorized.
6060 OPERAND - operand of STMT in the loop or bb.
6061 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6063 Returns whether a stmt with OPERAND can be vectorized.
6064 For loops, supportable operands are constants, loop invariants, and operands
6065 that are defined by the current iteration of the loop. Unsupportable
6066 operands are those that are defined by a previous iteration of the loop (as
6067 is the case in reduction/induction computations).
6068 For basic blocks, supportable operands are constants and bb invariants.
6069 For now, operands defined outside the basic block are not supported. */
6071 bool
6072 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6073 bb_vec_info bb_vinfo, gimple *def_stmt,
6074 tree *def, enum vect_def_type *dt)
6076 basic_block bb;
6077 stmt_vec_info stmt_vinfo;
6078 struct loop *loop = NULL;
6080 if (loop_vinfo)
6081 loop = LOOP_VINFO_LOOP (loop_vinfo);
6083 *def_stmt = NULL;
6084 *def = NULL_TREE;
6086 if (vect_print_dump_info (REPORT_DETAILS))
6088 fprintf (vect_dump, "vect_is_simple_use: operand ");
6089 print_generic_expr (vect_dump, operand, TDF_SLIM);
6092 if (CONSTANT_CLASS_P (operand))
6094 *dt = vect_constant_def;
6095 return true;
6098 if (is_gimple_min_invariant (operand))
6100 *def = operand;
6101 *dt = vect_external_def;
6102 return true;
6105 if (TREE_CODE (operand) == PAREN_EXPR)
6107 if (vect_print_dump_info (REPORT_DETAILS))
6108 fprintf (vect_dump, "non-associatable copy.");
6109 operand = TREE_OPERAND (operand, 0);
6112 if (TREE_CODE (operand) != SSA_NAME)
6114 if (vect_print_dump_info (REPORT_DETAILS))
6115 fprintf (vect_dump, "not ssa-name.");
6116 return false;
6119 *def_stmt = SSA_NAME_DEF_STMT (operand);
6120 if (*def_stmt == NULL)
6122 if (vect_print_dump_info (REPORT_DETAILS))
6123 fprintf (vect_dump, "no def_stmt.");
6124 return false;
6127 if (vect_print_dump_info (REPORT_DETAILS))
6129 fprintf (vect_dump, "def_stmt: ");
6130 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
6133 /* Empty stmt is expected only in case of a function argument.
6134 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6135 if (gimple_nop_p (*def_stmt))
6137 *def = operand;
6138 *dt = vect_external_def;
6139 return true;
6142 bb = gimple_bb (*def_stmt);
6144 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6145 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6146 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6147 *dt = vect_external_def;
6148 else
6150 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6151 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6154 if (*dt == vect_unknown_def_type
6155 || (stmt
6156 && *dt == vect_double_reduction_def
6157 && gimple_code (stmt) != GIMPLE_PHI))
6159 if (vect_print_dump_info (REPORT_DETAILS))
6160 fprintf (vect_dump, "Unsupported pattern.");
6161 return false;
6164 if (vect_print_dump_info (REPORT_DETAILS))
6165 fprintf (vect_dump, "type of def: %d.",*dt);
6167 switch (gimple_code (*def_stmt))
6169 case GIMPLE_PHI:
6170 *def = gimple_phi_result (*def_stmt);
6171 break;
6173 case GIMPLE_ASSIGN:
6174 *def = gimple_assign_lhs (*def_stmt);
6175 break;
6177 case GIMPLE_CALL:
6178 *def = gimple_call_lhs (*def_stmt);
6179 if (*def != NULL)
6180 break;
6181 /* FALLTHRU */
6182 default:
6183 if (vect_print_dump_info (REPORT_DETAILS))
6184 fprintf (vect_dump, "unsupported defining stmt: ");
6185 return false;
6188 return true;
6191 /* Function vect_is_simple_use_1.
6193 Same as vect_is_simple_use_1 but also determines the vector operand
6194 type of OPERAND and stores it to *VECTYPE. If the definition of
6195 OPERAND is vect_uninitialized_def, vect_constant_def or
6196 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6197 is responsible to compute the best suited vector type for the
6198 scalar operand. */
6200 bool
6201 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6202 bb_vec_info bb_vinfo, gimple *def_stmt,
6203 tree *def, enum vect_def_type *dt, tree *vectype)
6205 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6206 def, dt))
6207 return false;
6209 /* Now get a vector type if the def is internal, otherwise supply
6210 NULL_TREE and leave it up to the caller to figure out a proper
6211 type for the use stmt. */
6212 if (*dt == vect_internal_def
6213 || *dt == vect_induction_def
6214 || *dt == vect_reduction_def
6215 || *dt == vect_double_reduction_def
6216 || *dt == vect_nested_cycle)
6218 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6220 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6221 && !STMT_VINFO_RELEVANT (stmt_info)
6222 && !STMT_VINFO_LIVE_P (stmt_info))
6223 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6225 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6226 gcc_assert (*vectype != NULL_TREE);
6228 else if (*dt == vect_uninitialized_def
6229 || *dt == vect_constant_def
6230 || *dt == vect_external_def)
6231 *vectype = NULL_TREE;
6232 else
6233 gcc_unreachable ();
6235 return true;
6239 /* Function supportable_widening_operation
6241 Check whether an operation represented by the code CODE is a
6242 widening operation that is supported by the target platform in
6243 vector form (i.e., when operating on arguments of type VECTYPE_IN
6244 producing a result of type VECTYPE_OUT).
6246 Widening operations we currently support are NOP (CONVERT), FLOAT
6247 and WIDEN_MULT. This function checks if these operations are supported
6248 by the target platform either directly (via vector tree-codes), or via
6249 target builtins.
6251 Output:
6252 - CODE1 and CODE2 are codes of vector operations to be used when
6253 vectorizing the operation, if available.
6254 - MULTI_STEP_CVT determines the number of required intermediate steps in
6255 case of multi-step conversion (like char->short->int - in that case
6256 MULTI_STEP_CVT will be 1).
6257 - INTERM_TYPES contains the intermediate type required to perform the
6258 widening operation (short in the above example). */
6260 bool
6261 supportable_widening_operation (enum tree_code code, gimple stmt,
6262 tree vectype_out, tree vectype_in,
6263 enum tree_code *code1, enum tree_code *code2,
6264 int *multi_step_cvt,
6265 VEC (tree, heap) **interm_types)
6267 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6268 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6269 struct loop *vect_loop = NULL;
6270 enum machine_mode vec_mode;
6271 enum insn_code icode1, icode2;
6272 optab optab1, optab2;
6273 tree vectype = vectype_in;
6274 tree wide_vectype = vectype_out;
6275 enum tree_code c1, c2;
6276 int i;
6277 tree prev_type, intermediate_type;
6278 enum machine_mode intermediate_mode, prev_mode;
6279 optab optab3, optab4;
6281 *multi_step_cvt = 0;
6282 if (loop_info)
6283 vect_loop = LOOP_VINFO_LOOP (loop_info);
6285 switch (code)
6287 case WIDEN_MULT_EXPR:
6288 /* The result of a vectorized widening operation usually requires
6289 two vectors (because the widened results do not fit into one vector).
6290 The generated vector results would normally be expected to be
6291 generated in the same order as in the original scalar computation,
6292 i.e. if 8 results are generated in each vector iteration, they are
6293 to be organized as follows:
6294 vect1: [res1,res2,res3,res4],
6295 vect2: [res5,res6,res7,res8].
6297 However, in the special case that the result of the widening
6298 operation is used in a reduction computation only, the order doesn't
6299 matter (because when vectorizing a reduction we change the order of
6300 the computation). Some targets can take advantage of this and
6301 generate more efficient code. For example, targets like Altivec,
6302 that support widen_mult using a sequence of {mult_even,mult_odd}
6303 generate the following vectors:
6304 vect1: [res1,res3,res5,res7],
6305 vect2: [res2,res4,res6,res8].
6307 When vectorizing outer-loops, we execute the inner-loop sequentially
6308 (each vectorized inner-loop iteration contributes to VF outer-loop
6309 iterations in parallel). We therefore don't allow to change the
6310 order of the computation in the inner-loop during outer-loop
6311 vectorization. */
6312 /* TODO: Another case in which order doesn't *really* matter is when we
6313 widen and then contract again, e.g. (short)((int)x * y >> 8).
6314 Normally, pack_trunc performs an even/odd permute, whereas the
6315 repack from an even/odd expansion would be an interleave, which
6316 would be significantly simpler for e.g. AVX2. */
6317 /* In any case, in order to avoid duplicating the code below, recurse
6318 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6319 are properly set up for the caller. If we fail, we'll continue with
6320 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6321 if (vect_loop
6322 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6323 && !nested_in_vect_loop_p (vect_loop, stmt)
6324 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6325 stmt, vectype_out, vectype_in,
6326 code1, code2, multi_step_cvt,
6327 interm_types))
6328 return true;
6329 c1 = VEC_WIDEN_MULT_LO_EXPR;
6330 c2 = VEC_WIDEN_MULT_HI_EXPR;
6331 break;
6333 case VEC_WIDEN_MULT_EVEN_EXPR:
6334 /* Support the recursion induced just above. */
6335 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6336 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6337 break;
6339 case WIDEN_LSHIFT_EXPR:
6340 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6341 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6342 break;
6344 CASE_CONVERT:
6345 c1 = VEC_UNPACK_LO_EXPR;
6346 c2 = VEC_UNPACK_HI_EXPR;
6347 break;
6349 case FLOAT_EXPR:
6350 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6351 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6352 break;
6354 case FIX_TRUNC_EXPR:
6355 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6356 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6357 computing the operation. */
6358 return false;
6360 default:
6361 gcc_unreachable ();
6364 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6366 enum tree_code ctmp = c1;
6367 c1 = c2;
6368 c2 = ctmp;
6371 if (code == FIX_TRUNC_EXPR)
6373 /* The signedness is determined from output operand. */
6374 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6375 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6377 else
6379 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6380 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6383 if (!optab1 || !optab2)
6384 return false;
6386 vec_mode = TYPE_MODE (vectype);
6387 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6388 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6389 return false;
6391 *code1 = c1;
6392 *code2 = c2;
6394 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6395 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6396 return true;
6398 /* Check if it's a multi-step conversion that can be done using intermediate
6399 types. */
6401 prev_type = vectype;
6402 prev_mode = vec_mode;
6404 if (!CONVERT_EXPR_CODE_P (code))
6405 return false;
6407 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6408 intermediate steps in promotion sequence. We try
6409 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6410 not. */
6411 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6412 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6414 intermediate_mode = insn_data[icode1].operand[0].mode;
6415 intermediate_type
6416 = lang_hooks.types.type_for_mode (intermediate_mode,
6417 TYPE_UNSIGNED (prev_type));
6418 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6419 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6421 if (!optab3 || !optab4
6422 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6423 || insn_data[icode1].operand[0].mode != intermediate_mode
6424 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6425 || insn_data[icode2].operand[0].mode != intermediate_mode
6426 || ((icode1 = optab_handler (optab3, intermediate_mode))
6427 == CODE_FOR_nothing)
6428 || ((icode2 = optab_handler (optab4, intermediate_mode))
6429 == CODE_FOR_nothing))
6430 break;
6432 VEC_quick_push (tree, *interm_types, intermediate_type);
6433 (*multi_step_cvt)++;
6435 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6436 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6437 return true;
6439 prev_type = intermediate_type;
6440 prev_mode = intermediate_mode;
6443 VEC_free (tree, heap, *interm_types);
6444 return false;
6448 /* Function supportable_narrowing_operation
6450 Check whether an operation represented by the code CODE is a
6451 narrowing operation that is supported by the target platform in
6452 vector form (i.e., when operating on arguments of type VECTYPE_IN
6453 and producing a result of type VECTYPE_OUT).
6455 Narrowing operations we currently support are NOP (CONVERT) and
6456 FIX_TRUNC. This function checks if these operations are supported by
6457 the target platform directly via vector tree-codes.
6459 Output:
6460 - CODE1 is the code of a vector operation to be used when
6461 vectorizing the operation, if available.
6462 - MULTI_STEP_CVT determines the number of required intermediate steps in
6463 case of multi-step conversion (like int->short->char - in that case
6464 MULTI_STEP_CVT will be 1).
6465 - INTERM_TYPES contains the intermediate type required to perform the
6466 narrowing operation (short in the above example). */
6468 bool
6469 supportable_narrowing_operation (enum tree_code code,
6470 tree vectype_out, tree vectype_in,
6471 enum tree_code *code1, int *multi_step_cvt,
6472 VEC (tree, heap) **interm_types)
6474 enum machine_mode vec_mode;
6475 enum insn_code icode1;
6476 optab optab1, interm_optab;
6477 tree vectype = vectype_in;
6478 tree narrow_vectype = vectype_out;
6479 enum tree_code c1;
6480 tree intermediate_type;
6481 enum machine_mode intermediate_mode, prev_mode;
6482 int i;
6483 bool uns;
6485 *multi_step_cvt = 0;
6486 switch (code)
6488 CASE_CONVERT:
6489 c1 = VEC_PACK_TRUNC_EXPR;
6490 break;
6492 case FIX_TRUNC_EXPR:
6493 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6494 break;
6496 case FLOAT_EXPR:
6497 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6498 tree code and optabs used for computing the operation. */
6499 return false;
6501 default:
6502 gcc_unreachable ();
6505 if (code == FIX_TRUNC_EXPR)
6506 /* The signedness is determined from output operand. */
6507 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6508 else
6509 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6511 if (!optab1)
6512 return false;
6514 vec_mode = TYPE_MODE (vectype);
6515 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6516 return false;
6518 *code1 = c1;
6520 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6521 return true;
6523 /* Check if it's a multi-step conversion that can be done using intermediate
6524 types. */
6525 prev_mode = vec_mode;
6526 if (code == FIX_TRUNC_EXPR)
6527 uns = TYPE_UNSIGNED (vectype_out);
6528 else
6529 uns = TYPE_UNSIGNED (vectype);
6531 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6532 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6533 costly than signed. */
6534 if (code == FIX_TRUNC_EXPR && uns)
6536 enum insn_code icode2;
6538 intermediate_type
6539 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6540 interm_optab
6541 = optab_for_tree_code (c1, intermediate_type, optab_default);
6542 if (interm_optab != unknown_optab
6543 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6544 && insn_data[icode1].operand[0].mode
6545 == insn_data[icode2].operand[0].mode)
6547 uns = false;
6548 optab1 = interm_optab;
6549 icode1 = icode2;
6553 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6554 intermediate steps in promotion sequence. We try
6555 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6556 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6557 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6559 intermediate_mode = insn_data[icode1].operand[0].mode;
6560 intermediate_type
6561 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6562 interm_optab
6563 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6564 optab_default);
6565 if (!interm_optab
6566 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6567 || insn_data[icode1].operand[0].mode != intermediate_mode
6568 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6569 == CODE_FOR_nothing))
6570 break;
6572 VEC_quick_push (tree, *interm_types, intermediate_type);
6573 (*multi_step_cvt)++;
6575 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6576 return true;
6578 prev_mode = intermediate_mode;
6579 optab1 = interm_optab;
6582 VEC_free (tree, heap, *interm_types);
6583 return false;