Merge aosp-toolchain/gcc/gcc-4_9 changes.
[official-gcc.git] / gcc-4_6 / gcc / tree-vect-stmts.c
blobe7222639771597858a6f7149db7bc67f4ec85e75
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
47 /* Function vect_mark_relevant.
49 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
51 static void
52 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
53 enum vect_relevant relevant, bool live_p)
55 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
56 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
57 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
59 if (vect_print_dump_info (REPORT_DETAILS))
60 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
62 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
64 gimple pattern_stmt;
66 /* This is the last stmt in a sequence that was detected as a
67 pattern that can potentially be vectorized. Don't mark the stmt
68 as relevant/live because it's not going to be vectorized.
69 Instead mark the pattern-stmt that replaces it. */
71 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
73 if (vect_print_dump_info (REPORT_DETAILS))
74 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
75 stmt_info = vinfo_for_stmt (pattern_stmt);
76 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
77 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
78 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
79 stmt = pattern_stmt;
82 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
83 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
84 STMT_VINFO_RELEVANT (stmt_info) = relevant;
86 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
87 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
89 if (vect_print_dump_info (REPORT_DETAILS))
90 fprintf (vect_dump, "already marked relevant/live.");
91 return;
94 VEC_safe_push (gimple, heap, *worklist, stmt);
98 /* Function vect_stmt_relevant_p.
100 Return true if STMT in loop that is represented by LOOP_VINFO is
101 "relevant for vectorization".
103 A stmt is considered "relevant for vectorization" if:
104 - it has uses outside the loop.
105 - it has vdefs (it alters memory).
106 - control stmts in the loop (except for the exit condition).
108 CHECKME: what other side effects would the vectorizer allow? */
110 static bool
111 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
112 enum vect_relevant *relevant, bool *live_p)
114 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
115 ssa_op_iter op_iter;
116 imm_use_iterator imm_iter;
117 use_operand_p use_p;
118 def_operand_p def_p;
120 *relevant = vect_unused_in_scope;
121 *live_p = false;
123 /* cond stmt other than loop exit cond. */
124 if (is_ctrl_stmt (stmt)
125 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
126 != loop_exit_ctrl_vec_info_type)
127 *relevant = vect_used_in_scope;
129 /* changing memory. */
130 if (gimple_code (stmt) != GIMPLE_PHI)
131 if (gimple_vdef (stmt))
133 if (vect_print_dump_info (REPORT_DETAILS))
134 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
135 *relevant = vect_used_in_scope;
138 /* uses outside the loop. */
139 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
141 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
143 basic_block bb = gimple_bb (USE_STMT (use_p));
144 if (!flow_bb_inside_loop_p (loop, bb))
146 if (vect_print_dump_info (REPORT_DETAILS))
147 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
149 if (is_gimple_debug (USE_STMT (use_p)))
150 continue;
152 /* We expect all such uses to be in the loop exit phis
153 (because of loop closed form) */
154 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
155 gcc_assert (bb == single_exit (loop)->dest);
157 *live_p = true;
162 return (*live_p || *relevant);
166 /* Function exist_non_indexing_operands_for_use_p
168 USE is one of the uses attached to STMT. Check if USE is
169 used in STMT for anything other than indexing an array. */
171 static bool
172 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
174 tree operand;
175 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
177 /* USE corresponds to some operand in STMT. If there is no data
178 reference in STMT, then any operand that corresponds to USE
179 is not indexing an array. */
180 if (!STMT_VINFO_DATA_REF (stmt_info))
181 return true;
183 /* STMT has a data_ref. FORNOW this means that its of one of
184 the following forms:
185 -1- ARRAY_REF = var
186 -2- var = ARRAY_REF
187 (This should have been verified in analyze_data_refs).
189 'var' in the second case corresponds to a def, not a use,
190 so USE cannot correspond to any operands that are not used
191 for array indexing.
193 Therefore, all we need to check is if STMT falls into the
194 first case, and whether var corresponds to USE. */
196 if (!gimple_assign_copy_p (stmt))
197 return false;
198 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
199 return false;
200 operand = gimple_assign_rhs1 (stmt);
201 if (TREE_CODE (operand) != SSA_NAME)
202 return false;
204 if (operand == use)
205 return true;
207 return false;
212 Function process_use.
214 Inputs:
215 - a USE in STMT in a loop represented by LOOP_VINFO
216 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
217 that defined USE. This is done by calling mark_relevant and passing it
218 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
220 Outputs:
221 Generally, LIVE_P and RELEVANT are used to define the liveness and
222 relevance info of the DEF_STMT of this USE:
223 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
224 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
225 Exceptions:
226 - case 1: If USE is used only for address computations (e.g. array indexing),
227 which does not need to be directly vectorized, then the liveness/relevance
228 of the respective DEF_STMT is left unchanged.
229 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
230 skip DEF_STMT cause it had already been processed.
231 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
232 be modified accordingly.
234 Return true if everything is as expected. Return false otherwise. */
236 static bool
237 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
238 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
241 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
242 stmt_vec_info dstmt_vinfo;
243 basic_block bb, def_bb;
244 tree def;
245 gimple def_stmt;
246 enum vect_def_type dt;
248 /* case 1: we are only interested in uses that need to be vectorized. Uses
249 that are used for address computation are not considered relevant. */
250 if (!exist_non_indexing_operands_for_use_p (use, stmt))
251 return true;
253 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
255 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
256 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
257 return false;
260 if (!def_stmt || gimple_nop_p (def_stmt))
261 return true;
263 def_bb = gimple_bb (def_stmt);
264 if (!flow_bb_inside_loop_p (loop, def_bb))
266 if (vect_print_dump_info (REPORT_DETAILS))
267 fprintf (vect_dump, "def_stmt is out of loop.");
268 return true;
271 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
272 DEF_STMT must have already been processed, because this should be the
273 only way that STMT, which is a reduction-phi, was put in the worklist,
274 as there should be no other uses for DEF_STMT in the loop. So we just
275 check that everything is as expected, and we are done. */
276 dstmt_vinfo = vinfo_for_stmt (def_stmt);
277 bb = gimple_bb (stmt);
278 if (gimple_code (stmt) == GIMPLE_PHI
279 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
280 && gimple_code (def_stmt) != GIMPLE_PHI
281 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
282 && bb->loop_father == def_bb->loop_father)
284 if (vect_print_dump_info (REPORT_DETAILS))
285 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
286 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
287 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
288 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
289 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
290 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
291 return true;
294 /* case 3a: outer-loop stmt defining an inner-loop stmt:
295 outer-loop-header-bb:
296 d = def_stmt
297 inner-loop:
298 stmt # use (d)
299 outer-loop-tail-bb:
300 ... */
301 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
303 if (vect_print_dump_info (REPORT_DETAILS))
304 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
306 switch (relevant)
308 case vect_unused_in_scope:
309 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
310 vect_used_in_scope : vect_unused_in_scope;
311 break;
313 case vect_used_in_outer_by_reduction:
314 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
315 relevant = vect_used_by_reduction;
316 break;
318 case vect_used_in_outer:
319 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
320 relevant = vect_used_in_scope;
321 break;
323 case vect_used_in_scope:
324 break;
326 default:
327 gcc_unreachable ();
331 /* case 3b: inner-loop stmt defining an outer-loop stmt:
332 outer-loop-header-bb:
334 inner-loop:
335 d = def_stmt
336 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
337 stmt # use (d) */
338 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
340 if (vect_print_dump_info (REPORT_DETAILS))
341 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
343 switch (relevant)
345 case vect_unused_in_scope:
346 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
347 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
348 vect_used_in_outer_by_reduction : vect_unused_in_scope;
349 break;
351 case vect_used_by_reduction:
352 relevant = vect_used_in_outer_by_reduction;
353 break;
355 case vect_used_in_scope:
356 relevant = vect_used_in_outer;
357 break;
359 default:
360 gcc_unreachable ();
364 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
365 return true;
369 /* Function vect_mark_stmts_to_be_vectorized.
371 Not all stmts in the loop need to be vectorized. For example:
373 for i...
374 for j...
375 1. T0 = i + j
376 2. T1 = a[T0]
378 3. j = j + 1
380 Stmt 1 and 3 do not need to be vectorized, because loop control and
381 addressing of vectorized data-refs are handled differently.
383 This pass detects such stmts. */
385 bool
386 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
388 VEC(gimple,heap) *worklist;
389 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
390 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
391 unsigned int nbbs = loop->num_nodes;
392 gimple_stmt_iterator si;
393 gimple stmt;
394 unsigned int i;
395 stmt_vec_info stmt_vinfo;
396 basic_block bb;
397 gimple phi;
398 bool live_p;
399 enum vect_relevant relevant, tmp_relevant;
400 enum vect_def_type def_type;
402 if (vect_print_dump_info (REPORT_DETAILS))
403 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
405 worklist = VEC_alloc (gimple, heap, 64);
407 /* 1. Init worklist. */
408 for (i = 0; i < nbbs; i++)
410 bb = bbs[i];
411 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
413 phi = gsi_stmt (si);
414 if (vect_print_dump_info (REPORT_DETAILS))
416 fprintf (vect_dump, "init: phi relevant? ");
417 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
420 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
421 vect_mark_relevant (&worklist, phi, relevant, live_p);
423 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
425 stmt = gsi_stmt (si);
426 if (vect_print_dump_info (REPORT_DETAILS))
428 fprintf (vect_dump, "init: stmt relevant? ");
429 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
432 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
433 vect_mark_relevant (&worklist, stmt, relevant, live_p);
437 /* 2. Process_worklist */
438 while (VEC_length (gimple, worklist) > 0)
440 use_operand_p use_p;
441 ssa_op_iter iter;
443 stmt = VEC_pop (gimple, worklist);
444 if (vect_print_dump_info (REPORT_DETAILS))
446 fprintf (vect_dump, "worklist: examine stmt: ");
447 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
450 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
451 (DEF_STMT) as relevant/irrelevant and live/dead according to the
452 liveness and relevance properties of STMT. */
453 stmt_vinfo = vinfo_for_stmt (stmt);
454 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
455 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
457 /* Generally, the liveness and relevance properties of STMT are
458 propagated as is to the DEF_STMTs of its USEs:
459 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
460 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
462 One exception is when STMT has been identified as defining a reduction
463 variable; in this case we set the liveness/relevance as follows:
464 live_p = false
465 relevant = vect_used_by_reduction
466 This is because we distinguish between two kinds of relevant stmts -
467 those that are used by a reduction computation, and those that are
468 (also) used by a regular computation. This allows us later on to
469 identify stmts that are used solely by a reduction, and therefore the
470 order of the results that they produce does not have to be kept. */
472 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
473 tmp_relevant = relevant;
474 switch (def_type)
476 case vect_reduction_def:
477 switch (tmp_relevant)
479 case vect_unused_in_scope:
480 relevant = vect_used_by_reduction;
481 break;
483 case vect_used_by_reduction:
484 if (gimple_code (stmt) == GIMPLE_PHI)
485 break;
486 /* fall through */
488 default:
489 if (vect_print_dump_info (REPORT_DETAILS))
490 fprintf (vect_dump, "unsupported use of reduction.");
492 VEC_free (gimple, heap, worklist);
493 return false;
496 live_p = false;
497 break;
499 case vect_nested_cycle:
500 if (tmp_relevant != vect_unused_in_scope
501 && tmp_relevant != vect_used_in_outer_by_reduction
502 && tmp_relevant != vect_used_in_outer)
504 if (vect_print_dump_info (REPORT_DETAILS))
505 fprintf (vect_dump, "unsupported use of nested cycle.");
507 VEC_free (gimple, heap, worklist);
508 return false;
511 live_p = false;
512 break;
514 case vect_double_reduction_def:
515 if (tmp_relevant != vect_unused_in_scope
516 && tmp_relevant != vect_used_by_reduction)
518 if (vect_print_dump_info (REPORT_DETAILS))
519 fprintf (vect_dump, "unsupported use of double reduction.");
521 VEC_free (gimple, heap, worklist);
522 return false;
525 live_p = false;
526 break;
528 default:
529 break;
532 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
534 tree op = USE_FROM_PTR (use_p);
535 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
537 VEC_free (gimple, heap, worklist);
538 return false;
541 } /* while worklist */
543 VEC_free (gimple, heap, worklist);
544 return true;
548 /* Get cost by calling cost target builtin. */
550 static inline
551 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
553 tree dummy_type = NULL;
554 int dummy = 0;
556 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
557 dummy_type, dummy);
561 /* Get cost for STMT. */
564 cost_for_stmt (gimple stmt)
566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
568 switch (STMT_VINFO_TYPE (stmt_info))
570 case load_vec_info_type:
571 return vect_get_stmt_cost (scalar_load);
572 case store_vec_info_type:
573 return vect_get_stmt_cost (scalar_store);
574 case op_vec_info_type:
575 case condition_vec_info_type:
576 case assignment_vec_info_type:
577 case reduc_vec_info_type:
578 case induc_vec_info_type:
579 case type_promotion_vec_info_type:
580 case type_demotion_vec_info_type:
581 case type_conversion_vec_info_type:
582 case call_vec_info_type:
583 return vect_get_stmt_cost (scalar_stmt);
584 case undef_vec_info_type:
585 default:
586 gcc_unreachable ();
590 /* Function vect_model_simple_cost.
592 Models cost for simple operations, i.e. those that only emit ncopies of a
593 single op. Right now, this does not account for multiple insns that could
594 be generated for the single vector op. We will handle that shortly. */
596 void
597 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
598 enum vect_def_type *dt, slp_tree slp_node)
600 int i;
601 int inside_cost = 0, outside_cost = 0;
603 /* The SLP costs were already calculated during SLP tree build. */
604 if (PURE_SLP_STMT (stmt_info))
605 return;
607 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
609 /* FORNOW: Assuming maximum 2 args per stmts. */
610 for (i = 0; i < 2; i++)
612 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
613 outside_cost += vect_get_stmt_cost (vector_stmt);
616 if (vect_print_dump_info (REPORT_COST))
617 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
618 "outside_cost = %d .", inside_cost, outside_cost);
620 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
621 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
622 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
626 /* Model cost for type demotion and promotion operations. PWR is normally
627 zero for single-step promotions and demotions. It will be one if
628 two-step promotion/demotion is required, and so on. Each additional
629 step doubles the number of instructions required. */
631 static void
632 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
633 enum vect_def_type *dt, int pwr)
635 int i, tmp;
636 int inside_cost = 0, outside_cost = 0, single_stmt_cost;
638 /* The SLP costs were already calculated during SLP tree build. */
639 if (PURE_SLP_STMT (stmt_info))
640 return;
642 single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
643 for (i = 0; i < pwr + 1; i++)
645 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
646 (i + 1) : i;
647 inside_cost += vect_pow2 (tmp) * single_stmt_cost;
650 /* FORNOW: Assuming maximum 2 args per stmts. */
651 for (i = 0; i < 2; i++)
653 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
654 outside_cost += vect_get_stmt_cost (vector_stmt);
657 if (vect_print_dump_info (REPORT_COST))
658 fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
659 "outside_cost = %d .", inside_cost, outside_cost);
661 /* Set the costs in STMT_INFO. */
662 stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
663 stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
666 /* Function vect_cost_strided_group_size
668 For strided load or store, return the group_size only if it is the first
669 load or store of a group, else return 1. This ensures that group size is
670 only returned once per group. */
672 static int
673 vect_cost_strided_group_size (stmt_vec_info stmt_info)
675 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
677 if (first_stmt == STMT_VINFO_STMT (stmt_info))
678 return DR_GROUP_SIZE (stmt_info);
680 return 1;
684 /* Function vect_model_store_cost
686 Models cost for stores. In the case of strided accesses, one access
687 has the overhead of the strided access attributed to it. */
689 void
690 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
691 enum vect_def_type dt, slp_tree slp_node)
693 int group_size;
694 unsigned int inside_cost = 0, outside_cost = 0;
695 struct data_reference *first_dr;
696 gimple first_stmt;
698 /* The SLP costs were already calculated during SLP tree build. */
699 if (PURE_SLP_STMT (stmt_info))
700 return;
702 if (dt == vect_constant_def || dt == vect_external_def)
703 outside_cost = vect_get_stmt_cost (scalar_to_vec);
705 /* Strided access? */
706 if (DR_GROUP_FIRST_DR (stmt_info))
708 if (slp_node)
710 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
711 group_size = 1;
713 else
715 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
716 group_size = vect_cost_strided_group_size (stmt_info);
719 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
721 /* Not a strided access. */
722 else
724 group_size = 1;
725 first_dr = STMT_VINFO_DATA_REF (stmt_info);
728 /* Is this an access in a group of stores, which provide strided access?
729 If so, add in the cost of the permutes. */
730 if (group_size > 1)
732 /* Uses a high and low interleave operation for each needed permute. */
733 inside_cost = ncopies * exact_log2(group_size) * group_size
734 * vect_get_stmt_cost (vec_perm);
736 if (vect_print_dump_info (REPORT_COST))
737 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
738 group_size);
742 /* Costs of the stores. */
743 vect_get_store_cost (first_dr, ncopies, &inside_cost);
745 if (vect_print_dump_info (REPORT_COST))
746 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
747 "outside_cost = %d .", inside_cost, outside_cost);
749 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
750 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
751 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
755 /* Calculate cost of DR's memory access. */
756 void
757 vect_get_store_cost (struct data_reference *dr, int ncopies,
758 unsigned int *inside_cost)
760 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
762 switch (alignment_support_scheme)
764 case dr_aligned:
766 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
768 if (vect_print_dump_info (REPORT_COST))
769 fprintf (vect_dump, "vect_model_store_cost: aligned.");
771 break;
774 case dr_unaligned_supported:
776 gimple stmt = DR_STMT (dr);
777 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
778 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
780 /* Here, we assign an additional cost for the unaligned store. */
781 *inside_cost += ncopies
782 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
783 vectype, DR_MISALIGNMENT (dr));
785 if (vect_print_dump_info (REPORT_COST))
786 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
787 "hardware.");
789 break;
792 default:
793 gcc_unreachable ();
798 /* Function vect_model_load_cost
800 Models cost for loads. In the case of strided accesses, the last access
801 has the overhead of the strided access attributed to it. Since unaligned
802 accesses are supported for loads, we also account for the costs of the
803 access scheme chosen. */
805 void
806 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
809 int group_size;
810 gimple first_stmt;
811 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
812 unsigned int inside_cost = 0, outside_cost = 0;
814 /* The SLP costs were already calculated during SLP tree build. */
815 if (PURE_SLP_STMT (stmt_info))
816 return;
818 /* Strided accesses? */
819 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
820 if (first_stmt && !slp_node)
822 group_size = vect_cost_strided_group_size (stmt_info);
823 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
825 /* Not a strided access. */
826 else
828 group_size = 1;
829 first_dr = dr;
832 /* Is this an access in a group of loads providing strided access?
833 If so, add in the cost of the permutes. */
834 if (group_size > 1)
836 /* Uses an even and odd extract operations for each needed permute. */
837 inside_cost = ncopies * exact_log2(group_size) * group_size
838 * vect_get_stmt_cost (vec_perm);
840 if (vect_print_dump_info (REPORT_COST))
841 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
842 group_size);
845 /* The loads themselves. */
846 vect_get_load_cost (first_dr, ncopies,
847 ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node),
848 &inside_cost, &outside_cost);
850 if (vect_print_dump_info (REPORT_COST))
851 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
852 "outside_cost = %d .", inside_cost, outside_cost);
854 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
855 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
856 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
860 /* Calculate cost of DR's memory access. */
861 void
862 vect_get_load_cost (struct data_reference *dr, int ncopies,
863 bool add_realign_cost, unsigned int *inside_cost,
864 unsigned int *outside_cost)
866 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
868 switch (alignment_support_scheme)
870 case dr_aligned:
872 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
874 if (vect_print_dump_info (REPORT_COST))
875 fprintf (vect_dump, "vect_model_load_cost: aligned.");
877 break;
879 case dr_unaligned_supported:
881 gimple stmt = DR_STMT (dr);
882 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
883 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
885 /* Here, we assign an additional cost for the unaligned load. */
886 *inside_cost += ncopies
887 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
888 vectype, DR_MISALIGNMENT (dr));
889 if (vect_print_dump_info (REPORT_COST))
890 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
891 "hardware.");
893 break;
895 case dr_explicit_realign:
897 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
898 + vect_get_stmt_cost (vec_perm));
900 /* FIXME: If the misalignment remains fixed across the iterations of
901 the containing loop, the following cost should be added to the
902 outside costs. */
903 if (targetm.vectorize.builtin_mask_for_load)
904 *inside_cost += vect_get_stmt_cost (vector_stmt);
906 if (vect_print_dump_info (REPORT_COST))
907 fprintf (vect_dump, "vect_model_load_cost: explicit realign");
909 break;
911 case dr_explicit_realign_optimized:
913 if (vect_print_dump_info (REPORT_COST))
914 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
915 "pipelined.");
917 /* Unaligned software pipeline has a load of an address, an initial
918 load, and possibly a mask operation to "prime" the loop. However,
919 if this is an access in a group of loads, which provide strided
920 access, then the above cost should only be considered for one
921 access in the group. Inside the loop, there is a load op
922 and a realignment op. */
924 if (add_realign_cost)
926 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
927 if (targetm.vectorize.builtin_mask_for_load)
928 *outside_cost += vect_get_stmt_cost (vector_stmt);
931 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
932 + vect_get_stmt_cost (vec_perm));
934 if (vect_print_dump_info (REPORT_COST))
935 fprintf (vect_dump,
936 "vect_model_load_cost: explicit realign optimized");
938 break;
941 default:
942 gcc_unreachable ();
947 /* Function vect_init_vector.
949 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
950 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
951 is not NULL. Otherwise, place the initialization at the loop preheader.
952 Return the DEF of INIT_STMT.
953 It will be used in the vectorization of STMT. */
955 tree
956 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
957 gimple_stmt_iterator *gsi)
959 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
960 tree new_var;
961 gimple init_stmt;
962 tree vec_oprnd;
963 edge pe;
964 tree new_temp;
965 basic_block new_bb;
967 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
968 add_referenced_var (new_var);
969 init_stmt = gimple_build_assign (new_var, vector_var);
970 new_temp = make_ssa_name (new_var, init_stmt);
971 gimple_assign_set_lhs (init_stmt, new_temp);
973 if (gsi)
974 vect_finish_stmt_generation (stmt, init_stmt, gsi);
975 else
977 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
979 if (loop_vinfo)
981 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
983 if (nested_in_vect_loop_p (loop, stmt))
984 loop = loop->inner;
986 pe = loop_preheader_edge (loop);
987 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
988 gcc_assert (!new_bb);
990 else
992 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
993 basic_block bb;
994 gimple_stmt_iterator gsi_bb_start;
996 gcc_assert (bb_vinfo);
997 bb = BB_VINFO_BB (bb_vinfo);
998 gsi_bb_start = gsi_after_labels (bb);
999 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1003 if (vect_print_dump_info (REPORT_DETAILS))
1005 fprintf (vect_dump, "created new init_stmt: ");
1006 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1009 vec_oprnd = gimple_assign_lhs (init_stmt);
1010 return vec_oprnd;
1014 /* Function vect_get_vec_def_for_operand.
1016 OP is an operand in STMT. This function returns a (vector) def that will be
1017 used in the vectorized stmt for STMT.
1019 In the case that OP is an SSA_NAME which is defined in the loop, then
1020 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1022 In case OP is an invariant or constant, a new stmt that creates a vector def
1023 needs to be introduced. */
1025 tree
1026 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1028 tree vec_oprnd;
1029 gimple vec_stmt;
1030 gimple def_stmt;
1031 stmt_vec_info def_stmt_info = NULL;
1032 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1033 unsigned int nunits;
1034 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1035 tree vec_inv;
1036 tree vec_cst;
1037 tree t = NULL_TREE;
1038 tree def;
1039 int i;
1040 enum vect_def_type dt;
1041 bool is_simple_use;
1042 tree vector_type;
1044 if (vect_print_dump_info (REPORT_DETAILS))
1046 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1047 print_generic_expr (vect_dump, op, TDF_SLIM);
1050 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1051 &dt);
1052 gcc_assert (is_simple_use);
1053 if (vect_print_dump_info (REPORT_DETAILS))
1055 if (def)
1057 fprintf (vect_dump, "def = ");
1058 print_generic_expr (vect_dump, def, TDF_SLIM);
1060 if (def_stmt)
1062 fprintf (vect_dump, " def_stmt = ");
1063 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1067 switch (dt)
1069 /* Case 1: operand is a constant. */
1070 case vect_constant_def:
1072 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1073 gcc_assert (vector_type);
1074 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1076 if (scalar_def)
1077 *scalar_def = op;
1079 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1080 if (vect_print_dump_info (REPORT_DETAILS))
1081 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1083 vec_cst = build_vector_from_val (vector_type, op);
1084 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1087 /* Case 2: operand is defined outside the loop - loop invariant. */
1088 case vect_external_def:
1090 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1091 gcc_assert (vector_type);
1092 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1094 if (scalar_def)
1095 *scalar_def = def;
1097 /* Create 'vec_inv = {inv,inv,..,inv}' */
1098 if (vect_print_dump_info (REPORT_DETAILS))
1099 fprintf (vect_dump, "Create vector_inv.");
1101 for (i = nunits - 1; i >= 0; --i)
1103 t = tree_cons (NULL_TREE, def, t);
1106 /* FIXME: use build_constructor directly. */
1107 vec_inv = build_constructor_from_list (vector_type, t);
1108 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1111 /* Case 3: operand is defined inside the loop. */
1112 case vect_internal_def:
1114 if (scalar_def)
1115 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1117 /* Get the def from the vectorized stmt. */
1118 def_stmt_info = vinfo_for_stmt (def_stmt);
1119 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1120 gcc_assert (vec_stmt);
1121 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1122 vec_oprnd = PHI_RESULT (vec_stmt);
1123 else if (is_gimple_call (vec_stmt))
1124 vec_oprnd = gimple_call_lhs (vec_stmt);
1125 else
1126 vec_oprnd = gimple_assign_lhs (vec_stmt);
1127 return vec_oprnd;
1130 /* Case 4: operand is defined by a loop header phi - reduction */
1131 case vect_reduction_def:
1132 case vect_double_reduction_def:
1133 case vect_nested_cycle:
1135 struct loop *loop;
1137 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1138 loop = (gimple_bb (def_stmt))->loop_father;
1140 /* Get the def before the loop */
1141 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1142 return get_initial_def_for_reduction (stmt, op, scalar_def);
1145 /* Case 5: operand is defined by loop-header phi - induction. */
1146 case vect_induction_def:
1148 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1150 /* Get the def from the vectorized stmt. */
1151 def_stmt_info = vinfo_for_stmt (def_stmt);
1152 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1153 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1154 vec_oprnd = PHI_RESULT (vec_stmt);
1155 else
1156 vec_oprnd = gimple_get_lhs (vec_stmt);
1157 return vec_oprnd;
1160 default:
1161 gcc_unreachable ();
1166 /* Function vect_get_vec_def_for_stmt_copy
1168 Return a vector-def for an operand. This function is used when the
1169 vectorized stmt to be created (by the caller to this function) is a "copy"
1170 created in case the vectorized result cannot fit in one vector, and several
1171 copies of the vector-stmt are required. In this case the vector-def is
1172 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1173 of the stmt that defines VEC_OPRND.
1174 DT is the type of the vector def VEC_OPRND.
1176 Context:
1177 In case the vectorization factor (VF) is bigger than the number
1178 of elements that can fit in a vectype (nunits), we have to generate
1179 more than one vector stmt to vectorize the scalar stmt. This situation
1180 arises when there are multiple data-types operated upon in the loop; the
1181 smallest data-type determines the VF, and as a result, when vectorizing
1182 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1183 vector stmt (each computing a vector of 'nunits' results, and together
1184 computing 'VF' results in each iteration). This function is called when
1185 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1186 which VF=16 and nunits=4, so the number of copies required is 4):
1188 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1190 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1191 VS1.1: vx.1 = memref1 VS1.2
1192 VS1.2: vx.2 = memref2 VS1.3
1193 VS1.3: vx.3 = memref3
1195 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1196 VSnew.1: vz1 = vx.1 + ... VSnew.2
1197 VSnew.2: vz2 = vx.2 + ... VSnew.3
1198 VSnew.3: vz3 = vx.3 + ...
1200 The vectorization of S1 is explained in vectorizable_load.
1201 The vectorization of S2:
1202 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1203 the function 'vect_get_vec_def_for_operand' is called to
1204 get the relevant vector-def for each operand of S2. For operand x it
1205 returns the vector-def 'vx.0'.
1207 To create the remaining copies of the vector-stmt (VSnew.j), this
1208 function is called to get the relevant vector-def for each operand. It is
1209 obtained from the respective VS1.j stmt, which is recorded in the
1210 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1212 For example, to obtain the vector-def 'vx.1' in order to create the
1213 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1214 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1215 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1216 and return its def ('vx.1').
1217 Overall, to create the above sequence this function will be called 3 times:
1218 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1219 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1220 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1222 tree
1223 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1225 gimple vec_stmt_for_operand;
1226 stmt_vec_info def_stmt_info;
1228 /* Do nothing; can reuse same def. */
1229 if (dt == vect_external_def || dt == vect_constant_def )
1230 return vec_oprnd;
1232 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1233 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1234 gcc_assert (def_stmt_info);
1235 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1236 gcc_assert (vec_stmt_for_operand);
1237 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1238 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1239 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1240 else
1241 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1242 return vec_oprnd;
1246 /* Get vectorized definitions for the operands to create a copy of an original
1247 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1249 static void
1250 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1251 VEC(tree,heap) **vec_oprnds0,
1252 VEC(tree,heap) **vec_oprnds1)
1254 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1256 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1257 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1259 if (vec_oprnds1 && *vec_oprnds1)
1261 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1262 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1263 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1268 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1269 NULL. */
1271 static void
1272 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1273 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1274 slp_tree slp_node)
1276 if (slp_node)
1277 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1278 else
1280 tree vec_oprnd;
1282 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1283 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1284 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1286 if (op1)
1288 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1289 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1290 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1296 /* Function vect_finish_stmt_generation.
1298 Insert a new stmt. */
1300 void
1301 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1302 gimple_stmt_iterator *gsi)
1304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1305 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1306 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1308 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1310 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1312 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1313 bb_vinfo));
1315 if (vect_print_dump_info (REPORT_DETAILS))
1317 fprintf (vect_dump, "add new stmt: ");
1318 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1321 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1324 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1325 a function declaration if the target has a vectorized version
1326 of the function, or NULL_TREE if the function cannot be vectorized. */
1328 tree
1329 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1331 tree fndecl = gimple_call_fndecl (call);
1333 /* We only handle functions that do not read or clobber memory -- i.e.
1334 const or novops ones. */
1335 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1336 return NULL_TREE;
1338 if (!fndecl
1339 || TREE_CODE (fndecl) != FUNCTION_DECL
1340 || !DECL_BUILT_IN (fndecl))
1341 return NULL_TREE;
1343 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1344 vectype_in);
1347 /* Function vectorizable_call.
1349 Check if STMT performs a function call that can be vectorized.
1350 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1351 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1352 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1354 static bool
1355 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1357 tree vec_dest;
1358 tree scalar_dest;
1359 tree op, type;
1360 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1362 tree vectype_out, vectype_in;
1363 int nunits_in;
1364 int nunits_out;
1365 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1366 tree fndecl, new_temp, def, rhs_type;
1367 gimple def_stmt;
1368 enum vect_def_type dt[3]
1369 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1370 gimple new_stmt = NULL;
1371 int ncopies, j;
1372 VEC(tree, heap) *vargs = NULL;
1373 enum { NARROW, NONE, WIDEN } modifier;
1374 size_t i, nargs;
1376 /* FORNOW: unsupported in basic block SLP. */
1377 gcc_assert (loop_vinfo);
1379 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1380 return false;
1382 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1383 return false;
1385 /* FORNOW: SLP not supported. */
1386 if (STMT_SLP_TYPE (stmt_info))
1387 return false;
1389 /* Is STMT a vectorizable call? */
1390 if (!is_gimple_call (stmt))
1391 return false;
1393 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1394 return false;
1396 if (stmt_can_throw_internal (stmt))
1397 return false;
1399 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1401 /* Process function arguments. */
1402 rhs_type = NULL_TREE;
1403 vectype_in = NULL_TREE;
1404 nargs = gimple_call_num_args (stmt);
1406 /* Bail out if the function has more than three arguments, we do not have
1407 interesting builtin functions to vectorize with more than two arguments
1408 except for fma. No arguments is also not good. */
1409 if (nargs == 0 || nargs > 3)
1410 return false;
1412 for (i = 0; i < nargs; i++)
1414 tree opvectype;
1416 op = gimple_call_arg (stmt, i);
1418 /* We can only handle calls with arguments of the same type. */
1419 if (rhs_type
1420 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1422 if (vect_print_dump_info (REPORT_DETAILS))
1423 fprintf (vect_dump, "argument types differ.");
1424 return false;
1426 if (!rhs_type)
1427 rhs_type = TREE_TYPE (op);
1429 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1430 &def_stmt, &def, &dt[i], &opvectype))
1432 if (vect_print_dump_info (REPORT_DETAILS))
1433 fprintf (vect_dump, "use not simple.");
1434 return false;
1437 if (!vectype_in)
1438 vectype_in = opvectype;
1439 else if (opvectype
1440 && opvectype != vectype_in)
1442 if (vect_print_dump_info (REPORT_DETAILS))
1443 fprintf (vect_dump, "argument vector types differ.");
1444 return false;
1447 /* If all arguments are external or constant defs use a vector type with
1448 the same size as the output vector type. */
1449 if (!vectype_in)
1450 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1451 if (vec_stmt)
1452 gcc_assert (vectype_in);
1453 if (!vectype_in)
1455 if (vect_print_dump_info (REPORT_DETAILS))
1457 fprintf (vect_dump, "no vectype for scalar type ");
1458 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1461 return false;
1464 /* FORNOW */
1465 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1466 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1467 if (nunits_in == nunits_out / 2)
1468 modifier = NARROW;
1469 else if (nunits_out == nunits_in)
1470 modifier = NONE;
1471 else if (nunits_out == nunits_in / 2)
1472 modifier = WIDEN;
1473 else
1474 return false;
1476 /* For now, we only vectorize functions if a target specific builtin
1477 is available. TODO -- in some cases, it might be profitable to
1478 insert the calls for pieces of the vector, in order to be able
1479 to vectorize other operations in the loop. */
1480 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1481 if (fndecl == NULL_TREE)
1483 if (vect_print_dump_info (REPORT_DETAILS))
1484 fprintf (vect_dump, "function is not vectorizable.");
1486 return false;
1489 gcc_assert (!gimple_vuse (stmt));
1491 if (modifier == NARROW)
1492 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1493 else
1494 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1496 /* Sanity check: make sure that at least one copy of the vectorized stmt
1497 needs to be generated. */
1498 gcc_assert (ncopies >= 1);
1500 if (!vec_stmt) /* transformation not required. */
1502 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1503 if (vect_print_dump_info (REPORT_DETAILS))
1504 fprintf (vect_dump, "=== vectorizable_call ===");
1505 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1506 return true;
1509 /** Transform. **/
1511 if (vect_print_dump_info (REPORT_DETAILS))
1512 fprintf (vect_dump, "transform operation.");
1514 /* Handle def. */
1515 scalar_dest = gimple_call_lhs (stmt);
1516 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1518 prev_stmt_info = NULL;
1519 switch (modifier)
1521 case NONE:
1522 for (j = 0; j < ncopies; ++j)
1524 /* Build argument list for the vectorized call. */
1525 if (j == 0)
1526 vargs = VEC_alloc (tree, heap, nargs);
1527 else
1528 VEC_truncate (tree, vargs, 0);
1530 for (i = 0; i < nargs; i++)
1532 op = gimple_call_arg (stmt, i);
1533 if (j == 0)
1534 vec_oprnd0
1535 = vect_get_vec_def_for_operand (op, stmt, NULL);
1536 else
1538 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1539 vec_oprnd0
1540 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1543 VEC_quick_push (tree, vargs, vec_oprnd0);
1546 new_stmt = gimple_build_call_vec (fndecl, vargs);
1547 new_temp = make_ssa_name (vec_dest, new_stmt);
1548 gimple_call_set_lhs (new_stmt, new_temp);
1550 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1551 mark_symbols_for_renaming (new_stmt);
1553 if (j == 0)
1554 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1555 else
1556 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1558 prev_stmt_info = vinfo_for_stmt (new_stmt);
1561 break;
1563 case NARROW:
1564 for (j = 0; j < ncopies; ++j)
1566 /* Build argument list for the vectorized call. */
1567 if (j == 0)
1568 vargs = VEC_alloc (tree, heap, nargs * 2);
1569 else
1570 VEC_truncate (tree, vargs, 0);
1572 for (i = 0; i < nargs; i++)
1574 op = gimple_call_arg (stmt, i);
1575 if (j == 0)
1577 vec_oprnd0
1578 = vect_get_vec_def_for_operand (op, stmt, NULL);
1579 vec_oprnd1
1580 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1582 else
1584 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1585 vec_oprnd0
1586 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1587 vec_oprnd1
1588 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1591 VEC_quick_push (tree, vargs, vec_oprnd0);
1592 VEC_quick_push (tree, vargs, vec_oprnd1);
1595 new_stmt = gimple_build_call_vec (fndecl, vargs);
1596 new_temp = make_ssa_name (vec_dest, new_stmt);
1597 gimple_call_set_lhs (new_stmt, new_temp);
1599 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1600 mark_symbols_for_renaming (new_stmt);
1602 if (j == 0)
1603 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1604 else
1605 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1607 prev_stmt_info = vinfo_for_stmt (new_stmt);
1610 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1612 break;
1614 case WIDEN:
1615 /* No current target implements this case. */
1616 return false;
1619 VEC_free (tree, heap, vargs);
1621 /* Update the exception handling table with the vector stmt if necessary. */
1622 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1623 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1625 /* The call in STMT might prevent it from being removed in dce.
1626 We however cannot remove it here, due to the way the ssa name
1627 it defines is mapped to the new definition. So just replace
1628 rhs of the statement with something harmless. */
1630 type = TREE_TYPE (scalar_dest);
1631 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1632 build_zero_cst (type));
1633 set_vinfo_for_stmt (new_stmt, stmt_info);
1634 /* For pattern statements make the related statement to point to
1635 NEW_STMT in order to be able to retrieve the original statement
1636 information later. */
1637 if (is_pattern_stmt_p (stmt_info))
1639 gimple related = STMT_VINFO_RELATED_STMT (stmt_info);
1640 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (related)) = new_stmt;
1642 set_vinfo_for_stmt (stmt, NULL);
1643 STMT_VINFO_STMT (stmt_info) = new_stmt;
1644 gsi_replace (gsi, new_stmt, false);
1645 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1647 return true;
1651 /* Function vect_gen_widened_results_half
1653 Create a vector stmt whose code, type, number of arguments, and result
1654 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1655 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1656 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1657 needs to be created (DECL is a function-decl of a target-builtin).
1658 STMT is the original scalar stmt that we are vectorizing. */
1660 static gimple
1661 vect_gen_widened_results_half (enum tree_code code,
1662 tree decl,
1663 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1664 tree vec_dest, gimple_stmt_iterator *gsi,
1665 gimple stmt)
1667 gimple new_stmt;
1668 tree new_temp;
1670 /* Generate half of the widened result: */
1671 if (code == CALL_EXPR)
1673 /* Target specific support */
1674 if (op_type == binary_op)
1675 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1676 else
1677 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1678 new_temp = make_ssa_name (vec_dest, new_stmt);
1679 gimple_call_set_lhs (new_stmt, new_temp);
1681 else
1683 /* Generic support */
1684 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1685 if (op_type != binary_op)
1686 vec_oprnd1 = NULL;
1687 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1688 vec_oprnd1);
1689 new_temp = make_ssa_name (vec_dest, new_stmt);
1690 gimple_assign_set_lhs (new_stmt, new_temp);
1692 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1694 return new_stmt;
1698 /* Check if STMT performs a conversion operation, that can be vectorized.
1699 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1700 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1701 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1703 static bool
1704 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1705 gimple *vec_stmt, slp_tree slp_node)
1707 tree vec_dest;
1708 tree scalar_dest;
1709 tree op0;
1710 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1711 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1712 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1713 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1714 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1715 tree new_temp;
1716 tree def;
1717 gimple def_stmt;
1718 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1719 gimple new_stmt = NULL;
1720 stmt_vec_info prev_stmt_info;
1721 int nunits_in;
1722 int nunits_out;
1723 tree vectype_out, vectype_in;
1724 int ncopies, j;
1725 tree rhs_type;
1726 tree builtin_decl;
1727 enum { NARROW, NONE, WIDEN } modifier;
1728 int i;
1729 VEC(tree,heap) *vec_oprnds0 = NULL;
1730 tree vop0;
1731 VEC(tree,heap) *dummy = NULL;
1732 int dummy_int;
1734 /* Is STMT a vectorizable conversion? */
1736 /* FORNOW: unsupported in basic block SLP. */
1737 gcc_assert (loop_vinfo);
1739 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1740 return false;
1742 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1743 return false;
1745 if (!is_gimple_assign (stmt))
1746 return false;
1748 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1749 return false;
1751 code = gimple_assign_rhs_code (stmt);
1752 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1753 return false;
1755 /* Check types of lhs and rhs. */
1756 scalar_dest = gimple_assign_lhs (stmt);
1757 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1759 op0 = gimple_assign_rhs1 (stmt);
1760 rhs_type = TREE_TYPE (op0);
1761 /* Check the operands of the operation. */
1762 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1763 &def_stmt, &def, &dt[0], &vectype_in))
1765 if (vect_print_dump_info (REPORT_DETAILS))
1766 fprintf (vect_dump, "use not simple.");
1767 return false;
1769 /* If op0 is an external or constant defs use a vector type of
1770 the same size as the output vector type. */
1771 if (!vectype_in)
1772 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1773 if (vec_stmt)
1774 gcc_assert (vectype_in);
1775 if (!vectype_in)
1777 if (vect_print_dump_info (REPORT_DETAILS))
1779 fprintf (vect_dump, "no vectype for scalar type ");
1780 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1783 return false;
1786 /* FORNOW */
1787 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1788 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1789 if (nunits_in == nunits_out / 2)
1790 modifier = NARROW;
1791 else if (nunits_out == nunits_in)
1792 modifier = NONE;
1793 else if (nunits_out == nunits_in / 2)
1794 modifier = WIDEN;
1795 else
1796 return false;
1798 if (modifier == NARROW)
1799 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1800 else
1801 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1803 /* Multiple types in SLP are handled by creating the appropriate number of
1804 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1805 case of SLP. */
1806 if (slp_node)
1807 ncopies = 1;
1809 /* Sanity check: make sure that at least one copy of the vectorized stmt
1810 needs to be generated. */
1811 gcc_assert (ncopies >= 1);
1813 /* Supportable by target? */
1814 if ((modifier == NONE
1815 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
1816 || (modifier == WIDEN
1817 && !supportable_widening_operation (code, stmt,
1818 vectype_out, vectype_in,
1819 &decl1, &decl2,
1820 &code1, &code2,
1821 &dummy_int, &dummy))
1822 || (modifier == NARROW
1823 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1824 &code1, &dummy_int, &dummy)))
1826 if (vect_print_dump_info (REPORT_DETAILS))
1827 fprintf (vect_dump, "conversion not supported by target.");
1828 return false;
1831 if (modifier != NONE)
1833 /* FORNOW: SLP not supported. */
1834 if (STMT_SLP_TYPE (stmt_info))
1835 return false;
1838 if (!vec_stmt) /* transformation not required. */
1840 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1841 return true;
1844 /** Transform. **/
1845 if (vect_print_dump_info (REPORT_DETAILS))
1846 fprintf (vect_dump, "transform conversion.");
1848 /* Handle def. */
1849 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1851 if (modifier == NONE && !slp_node)
1852 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1854 prev_stmt_info = NULL;
1855 switch (modifier)
1857 case NONE:
1858 for (j = 0; j < ncopies; j++)
1860 if (j == 0)
1861 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1862 else
1863 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1865 builtin_decl =
1866 targetm.vectorize.builtin_conversion (code,
1867 vectype_out, vectype_in);
1868 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1870 /* Arguments are ready. create the new vector stmt. */
1871 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1872 new_temp = make_ssa_name (vec_dest, new_stmt);
1873 gimple_call_set_lhs (new_stmt, new_temp);
1874 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1875 if (slp_node)
1876 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1879 if (j == 0)
1880 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1881 else
1882 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1883 prev_stmt_info = vinfo_for_stmt (new_stmt);
1885 break;
1887 case WIDEN:
1888 /* In case the vectorization factor (VF) is bigger than the number
1889 of elements that we can fit in a vectype (nunits), we have to
1890 generate more than one vector stmt - i.e - we need to "unroll"
1891 the vector stmt by a factor VF/nunits. */
1892 for (j = 0; j < ncopies; j++)
1894 if (j == 0)
1895 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1896 else
1897 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1899 /* Generate first half of the widened result: */
1900 new_stmt
1901 = vect_gen_widened_results_half (code1, decl1,
1902 vec_oprnd0, vec_oprnd1,
1903 unary_op, vec_dest, gsi, stmt);
1904 if (j == 0)
1905 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1906 else
1907 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1908 prev_stmt_info = vinfo_for_stmt (new_stmt);
1910 /* Generate second half of the widened result: */
1911 new_stmt
1912 = vect_gen_widened_results_half (code2, decl2,
1913 vec_oprnd0, vec_oprnd1,
1914 unary_op, vec_dest, gsi, stmt);
1915 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1916 prev_stmt_info = vinfo_for_stmt (new_stmt);
1918 break;
1920 case NARROW:
1921 /* In case the vectorization factor (VF) is bigger than the number
1922 of elements that we can fit in a vectype (nunits), we have to
1923 generate more than one vector stmt - i.e - we need to "unroll"
1924 the vector stmt by a factor VF/nunits. */
1925 for (j = 0; j < ncopies; j++)
1927 /* Handle uses. */
1928 if (j == 0)
1930 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1931 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1933 else
1935 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1936 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1939 /* Arguments are ready. Create the new vector stmt. */
1940 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1941 vec_oprnd1);
1942 new_temp = make_ssa_name (vec_dest, new_stmt);
1943 gimple_assign_set_lhs (new_stmt, new_temp);
1944 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1946 if (j == 0)
1947 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1948 else
1949 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1951 prev_stmt_info = vinfo_for_stmt (new_stmt);
1954 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1957 if (vec_oprnds0)
1958 VEC_free (tree, heap, vec_oprnds0);
1960 return true;
1964 /* Function vectorizable_assignment.
1966 Check if STMT performs an assignment (copy) that can be vectorized.
1967 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1968 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1969 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1971 static bool
1972 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1973 gimple *vec_stmt, slp_tree slp_node)
1975 tree vec_dest;
1976 tree scalar_dest;
1977 tree op;
1978 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1979 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1980 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1981 tree new_temp;
1982 tree def;
1983 gimple def_stmt;
1984 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1985 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1986 int ncopies;
1987 int i, j;
1988 VEC(tree,heap) *vec_oprnds = NULL;
1989 tree vop;
1990 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1991 gimple new_stmt = NULL;
1992 stmt_vec_info prev_stmt_info = NULL;
1993 enum tree_code code;
1994 tree vectype_in;
1996 /* Multiple types in SLP are handled by creating the appropriate number of
1997 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1998 case of SLP. */
1999 if (slp_node)
2000 ncopies = 1;
2001 else
2002 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2004 gcc_assert (ncopies >= 1);
2006 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2007 return false;
2009 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2010 return false;
2012 /* Is vectorizable assignment? */
2013 if (!is_gimple_assign (stmt))
2014 return false;
2016 scalar_dest = gimple_assign_lhs (stmt);
2017 if (TREE_CODE (scalar_dest) != SSA_NAME)
2018 return false;
2020 code = gimple_assign_rhs_code (stmt);
2021 if (gimple_assign_single_p (stmt)
2022 || code == PAREN_EXPR
2023 || CONVERT_EXPR_CODE_P (code))
2024 op = gimple_assign_rhs1 (stmt);
2025 else
2026 return false;
2028 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2029 &def_stmt, &def, &dt[0], &vectype_in))
2031 if (vect_print_dump_info (REPORT_DETAILS))
2032 fprintf (vect_dump, "use not simple.");
2033 return false;
2036 /* We can handle NOP_EXPR conversions that do not change the number
2037 of elements or the vector size. */
2038 if (CONVERT_EXPR_CODE_P (code)
2039 && (!vectype_in
2040 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2041 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2042 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2043 return false;
2045 if (!vec_stmt) /* transformation not required. */
2047 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2048 if (vect_print_dump_info (REPORT_DETAILS))
2049 fprintf (vect_dump, "=== vectorizable_assignment ===");
2050 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2051 return true;
2054 /** Transform. **/
2055 if (vect_print_dump_info (REPORT_DETAILS))
2056 fprintf (vect_dump, "transform assignment.");
2058 /* Handle def. */
2059 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2061 /* Handle use. */
2062 for (j = 0; j < ncopies; j++)
2064 /* Handle uses. */
2065 if (j == 0)
2066 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2067 else
2068 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2070 /* Arguments are ready. create the new vector stmt. */
2071 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2073 if (CONVERT_EXPR_CODE_P (code))
2074 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2075 new_stmt = gimple_build_assign (vec_dest, vop);
2076 new_temp = make_ssa_name (vec_dest, new_stmt);
2077 gimple_assign_set_lhs (new_stmt, new_temp);
2078 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2079 if (slp_node)
2080 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2083 if (slp_node)
2084 continue;
2086 if (j == 0)
2087 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2088 else
2089 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2091 prev_stmt_info = vinfo_for_stmt (new_stmt);
2094 VEC_free (tree, heap, vec_oprnds);
2095 return true;
2099 /* Function vectorizable_shift.
2101 Check if STMT performs a shift operation that can be vectorized.
2102 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2103 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2104 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2106 static bool
2107 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2108 gimple *vec_stmt, slp_tree slp_node)
2110 tree vec_dest;
2111 tree scalar_dest;
2112 tree op0, op1 = NULL;
2113 tree vec_oprnd1 = NULL_TREE;
2114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2115 tree vectype;
2116 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2117 enum tree_code code;
2118 enum machine_mode vec_mode;
2119 tree new_temp;
2120 optab optab;
2121 int icode;
2122 enum machine_mode optab_op2_mode;
2123 tree def;
2124 gimple def_stmt;
2125 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2126 gimple new_stmt = NULL;
2127 stmt_vec_info prev_stmt_info;
2128 int nunits_in;
2129 int nunits_out;
2130 tree vectype_out;
2131 int ncopies;
2132 int j, i;
2133 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2134 tree vop0, vop1;
2135 unsigned int k;
2136 bool scalar_shift_arg = true;
2137 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2138 int vf;
2140 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2141 return false;
2143 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2144 return false;
2146 /* Is STMT a vectorizable binary/unary operation? */
2147 if (!is_gimple_assign (stmt))
2148 return false;
2150 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2151 return false;
2153 code = gimple_assign_rhs_code (stmt);
2155 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2156 || code == RROTATE_EXPR))
2157 return false;
2159 scalar_dest = gimple_assign_lhs (stmt);
2160 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2162 op0 = gimple_assign_rhs1 (stmt);
2163 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2164 &def_stmt, &def, &dt[0], &vectype))
2166 if (vect_print_dump_info (REPORT_DETAILS))
2167 fprintf (vect_dump, "use not simple.");
2168 return false;
2170 /* If op0 is an external or constant def use a vector type with
2171 the same size as the output vector type. */
2172 if (!vectype)
2173 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2174 if (vec_stmt)
2175 gcc_assert (vectype);
2176 if (!vectype)
2178 if (vect_print_dump_info (REPORT_DETAILS))
2180 fprintf (vect_dump, "no vectype for scalar type ");
2181 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2184 return false;
2187 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2188 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2189 if (nunits_out != nunits_in)
2190 return false;
2192 op1 = gimple_assign_rhs2 (stmt);
2193 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2195 if (vect_print_dump_info (REPORT_DETAILS))
2196 fprintf (vect_dump, "use not simple.");
2197 return false;
2200 if (loop_vinfo)
2201 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2202 else
2203 vf = 1;
2205 /* Multiple types in SLP are handled by creating the appropriate number of
2206 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2207 case of SLP. */
2208 if (slp_node)
2209 ncopies = 1;
2210 else
2211 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2213 gcc_assert (ncopies >= 1);
2215 /* Determine whether the shift amount is a vector, or scalar. If the
2216 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2218 if (dt[1] == vect_internal_def && !slp_node)
2219 scalar_shift_arg = false;
2220 else if (dt[1] == vect_constant_def
2221 || dt[1] == vect_external_def
2222 || dt[1] == vect_internal_def)
2224 /* In SLP, need to check whether the shift count is the same,
2225 in loops if it is a constant or invariant, it is always
2226 a scalar shift. */
2227 if (slp_node)
2229 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2230 gimple slpstmt;
2232 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2233 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2234 scalar_shift_arg = false;
2237 else
2239 if (vect_print_dump_info (REPORT_DETAILS))
2240 fprintf (vect_dump, "operand mode requires invariant argument.");
2241 return false;
2244 /* Vector shifted by vector. */
2245 if (!scalar_shift_arg)
2247 optab = optab_for_tree_code (code, vectype, optab_vector);
2248 if (vect_print_dump_info (REPORT_DETAILS))
2249 fprintf (vect_dump, "vector/vector shift/rotate found.");
2251 /* See if the machine has a vector shifted by scalar insn and if not
2252 then see if it has a vector shifted by vector insn. */
2253 else
2255 optab = optab_for_tree_code (code, vectype, optab_scalar);
2256 if (optab
2257 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2259 if (vect_print_dump_info (REPORT_DETAILS))
2260 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2262 else
2264 optab = optab_for_tree_code (code, vectype, optab_vector);
2265 if (optab
2266 && (optab_handler (optab, TYPE_MODE (vectype))
2267 != CODE_FOR_nothing))
2269 scalar_shift_arg = false;
2271 if (vect_print_dump_info (REPORT_DETAILS))
2272 fprintf (vect_dump, "vector/vector shift/rotate found.");
2274 /* Unlike the other binary operators, shifts/rotates have
2275 the rhs being int, instead of the same type as the lhs,
2276 so make sure the scalar is the right type if we are
2277 dealing with vectors of short/char. */
2278 if (dt[1] == vect_constant_def)
2279 op1 = fold_convert (TREE_TYPE (vectype), op1);
2284 /* Supportable by target? */
2285 if (!optab)
2287 if (vect_print_dump_info (REPORT_DETAILS))
2288 fprintf (vect_dump, "no optab.");
2289 return false;
2291 vec_mode = TYPE_MODE (vectype);
2292 icode = (int) optab_handler (optab, vec_mode);
2293 if (icode == CODE_FOR_nothing)
2295 if (vect_print_dump_info (REPORT_DETAILS))
2296 fprintf (vect_dump, "op not supported by target.");
2297 /* Check only during analysis. */
2298 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2299 || (vf < vect_min_worthwhile_factor (code)
2300 && !vec_stmt))
2301 return false;
2302 if (vect_print_dump_info (REPORT_DETAILS))
2303 fprintf (vect_dump, "proceeding using word mode.");
2306 /* Worthwhile without SIMD support? Check only during analysis. */
2307 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2308 && vf < vect_min_worthwhile_factor (code)
2309 && !vec_stmt)
2311 if (vect_print_dump_info (REPORT_DETAILS))
2312 fprintf (vect_dump, "not worthwhile without SIMD support.");
2313 return false;
2316 if (!vec_stmt) /* transformation not required. */
2318 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2319 if (vect_print_dump_info (REPORT_DETAILS))
2320 fprintf (vect_dump, "=== vectorizable_shift ===");
2321 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2322 return true;
2325 /** Transform. **/
2327 if (vect_print_dump_info (REPORT_DETAILS))
2328 fprintf (vect_dump, "transform binary/unary operation.");
2330 /* Handle def. */
2331 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2333 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2334 created in the previous stages of the recursion, so no allocation is
2335 needed, except for the case of shift with scalar shift argument. In that
2336 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2337 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2338 In case of loop-based vectorization we allocate VECs of size 1. We
2339 allocate VEC_OPRNDS1 only in case of binary operation. */
2340 if (!slp_node)
2342 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2343 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2345 else if (scalar_shift_arg)
2346 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2348 prev_stmt_info = NULL;
2349 for (j = 0; j < ncopies; j++)
2351 /* Handle uses. */
2352 if (j == 0)
2354 if (scalar_shift_arg)
2356 /* Vector shl and shr insn patterns can be defined with scalar
2357 operand 2 (shift operand). In this case, use constant or loop
2358 invariant op1 directly, without extending it to vector mode
2359 first. */
2360 optab_op2_mode = insn_data[icode].operand[2].mode;
2361 if (!VECTOR_MODE_P (optab_op2_mode))
2363 if (vect_print_dump_info (REPORT_DETAILS))
2364 fprintf (vect_dump, "operand 1 using scalar mode.");
2365 vec_oprnd1 = op1;
2366 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2367 if (slp_node)
2369 /* Store vec_oprnd1 for every vector stmt to be created
2370 for SLP_NODE. We check during the analysis that all
2371 the shift arguments are the same.
2372 TODO: Allow different constants for different vector
2373 stmts generated for an SLP instance. */
2374 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2375 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2380 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2381 (a special case for certain kind of vector shifts); otherwise,
2382 operand 1 should be of a vector type (the usual case). */
2383 if (vec_oprnd1)
2384 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2385 slp_node);
2386 else
2387 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2388 slp_node);
2390 else
2391 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2393 /* Arguments are ready. Create the new vector stmt. */
2394 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2396 vop1 = VEC_index (tree, vec_oprnds1, i);
2397 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2398 new_temp = make_ssa_name (vec_dest, new_stmt);
2399 gimple_assign_set_lhs (new_stmt, new_temp);
2400 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2401 if (slp_node)
2402 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2405 if (slp_node)
2406 continue;
2408 if (j == 0)
2409 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2410 else
2411 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2412 prev_stmt_info = vinfo_for_stmt (new_stmt);
2415 VEC_free (tree, heap, vec_oprnds0);
2416 VEC_free (tree, heap, vec_oprnds1);
2418 return true;
2422 /* Function vectorizable_operation.
2424 Check if STMT performs a binary, unary or ternary operation that can
2425 be vectorized.
2426 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2427 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2428 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2430 static bool
2431 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2432 gimple *vec_stmt, slp_tree slp_node)
2434 tree vec_dest;
2435 tree scalar_dest;
2436 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2437 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2438 tree vectype;
2439 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2440 enum tree_code code;
2441 enum machine_mode vec_mode;
2442 tree new_temp;
2443 int op_type;
2444 optab optab;
2445 int icode;
2446 tree def;
2447 gimple def_stmt;
2448 enum vect_def_type dt[3]
2449 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2450 gimple new_stmt = NULL;
2451 stmt_vec_info prev_stmt_info;
2452 int nunits_in;
2453 int nunits_out;
2454 tree vectype_out;
2455 int ncopies;
2456 int j, i;
2457 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2458 tree vop0, vop1, vop2;
2459 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2460 int vf;
2462 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2463 return false;
2465 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2466 return false;
2468 /* Is STMT a vectorizable binary/unary operation? */
2469 if (!is_gimple_assign (stmt))
2470 return false;
2472 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2473 return false;
2475 code = gimple_assign_rhs_code (stmt);
2477 /* For pointer addition, we should use the normal plus for
2478 the vector addition. */
2479 if (code == POINTER_PLUS_EXPR)
2480 code = PLUS_EXPR;
2482 /* Support only unary or binary operations. */
2483 op_type = TREE_CODE_LENGTH (code);
2484 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2486 if (vect_print_dump_info (REPORT_DETAILS))
2487 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2488 op_type);
2489 return false;
2492 scalar_dest = gimple_assign_lhs (stmt);
2493 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2495 op0 = gimple_assign_rhs1 (stmt);
2496 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2497 &def_stmt, &def, &dt[0], &vectype))
2499 if (vect_print_dump_info (REPORT_DETAILS))
2500 fprintf (vect_dump, "use not simple.");
2501 return false;
2503 /* If op0 is an external or constant def use a vector type with
2504 the same size as the output vector type. */
2505 if (!vectype)
2506 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2507 if (vec_stmt)
2508 gcc_assert (vectype);
2509 if (!vectype)
2511 if (vect_print_dump_info (REPORT_DETAILS))
2513 fprintf (vect_dump, "no vectype for scalar type ");
2514 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2517 return false;
2520 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2521 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2522 if (nunits_out != nunits_in)
2523 return false;
2525 if (op_type == binary_op || op_type == ternary_op)
2527 op1 = gimple_assign_rhs2 (stmt);
2528 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2529 &dt[1]))
2531 if (vect_print_dump_info (REPORT_DETAILS))
2532 fprintf (vect_dump, "use not simple.");
2533 return false;
2536 if (op_type == ternary_op)
2538 op2 = gimple_assign_rhs3 (stmt);
2539 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2540 &dt[2]))
2542 if (vect_print_dump_info (REPORT_DETAILS))
2543 fprintf (vect_dump, "use not simple.");
2544 return false;
2548 if (loop_vinfo)
2549 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2550 else
2551 vf = 1;
2553 /* Multiple types in SLP are handled by creating the appropriate number of
2554 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2555 case of SLP. */
2556 if (slp_node)
2557 ncopies = 1;
2558 else
2559 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2561 gcc_assert (ncopies >= 1);
2563 /* Shifts are handled in vectorizable_shift (). */
2564 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2565 || code == RROTATE_EXPR)
2566 return false;
2568 optab = optab_for_tree_code (code, vectype, optab_default);
2570 /* Supportable by target? */
2571 if (!optab)
2573 if (vect_print_dump_info (REPORT_DETAILS))
2574 fprintf (vect_dump, "no optab.");
2575 return false;
2577 vec_mode = TYPE_MODE (vectype);
2578 icode = (int) optab_handler (optab, vec_mode);
2579 if (icode == CODE_FOR_nothing)
2581 if (vect_print_dump_info (REPORT_DETAILS))
2582 fprintf (vect_dump, "op not supported by target.");
2583 /* Check only during analysis. */
2584 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2585 || (vf < vect_min_worthwhile_factor (code)
2586 && !vec_stmt))
2587 return false;
2588 if (vect_print_dump_info (REPORT_DETAILS))
2589 fprintf (vect_dump, "proceeding using word mode.");
2592 /* Worthwhile without SIMD support? Check only during analysis. */
2593 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2594 && vf < vect_min_worthwhile_factor (code)
2595 && !vec_stmt)
2597 if (vect_print_dump_info (REPORT_DETAILS))
2598 fprintf (vect_dump, "not worthwhile without SIMD support.");
2599 return false;
2602 if (!vec_stmt) /* transformation not required. */
2604 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2605 if (vect_print_dump_info (REPORT_DETAILS))
2606 fprintf (vect_dump, "=== vectorizable_operation ===");
2607 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2608 return true;
2611 /** Transform. **/
2613 if (vect_print_dump_info (REPORT_DETAILS))
2614 fprintf (vect_dump, "transform binary/unary operation.");
2616 /* Handle def. */
2617 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2619 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2620 created in the previous stages of the recursion, so no allocation is
2621 needed, except for the case of shift with scalar shift argument. In that
2622 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2623 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2624 In case of loop-based vectorization we allocate VECs of size 1. We
2625 allocate VEC_OPRNDS1 only in case of binary operation. */
2626 if (!slp_node)
2628 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2629 if (op_type == binary_op || op_type == ternary_op)
2630 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2631 if (op_type == ternary_op)
2632 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2635 /* In case the vectorization factor (VF) is bigger than the number
2636 of elements that we can fit in a vectype (nunits), we have to generate
2637 more than one vector stmt - i.e - we need to "unroll" the
2638 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2639 from one copy of the vector stmt to the next, in the field
2640 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2641 stages to find the correct vector defs to be used when vectorizing
2642 stmts that use the defs of the current stmt. The example below
2643 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2644 we need to create 4 vectorized stmts):
2646 before vectorization:
2647 RELATED_STMT VEC_STMT
2648 S1: x = memref - -
2649 S2: z = x + 1 - -
2651 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2652 there):
2653 RELATED_STMT VEC_STMT
2654 VS1_0: vx0 = memref0 VS1_1 -
2655 VS1_1: vx1 = memref1 VS1_2 -
2656 VS1_2: vx2 = memref2 VS1_3 -
2657 VS1_3: vx3 = memref3 - -
2658 S1: x = load - VS1_0
2659 S2: z = x + 1 - -
2661 step2: vectorize stmt S2 (done here):
2662 To vectorize stmt S2 we first need to find the relevant vector
2663 def for the first operand 'x'. This is, as usual, obtained from
2664 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2665 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2666 relevant vector def 'vx0'. Having found 'vx0' we can generate
2667 the vector stmt VS2_0, and as usual, record it in the
2668 STMT_VINFO_VEC_STMT of stmt S2.
2669 When creating the second copy (VS2_1), we obtain the relevant vector
2670 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2671 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2672 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2673 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2674 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2675 chain of stmts and pointers:
2676 RELATED_STMT VEC_STMT
2677 VS1_0: vx0 = memref0 VS1_1 -
2678 VS1_1: vx1 = memref1 VS1_2 -
2679 VS1_2: vx2 = memref2 VS1_3 -
2680 VS1_3: vx3 = memref3 - -
2681 S1: x = load - VS1_0
2682 VS2_0: vz0 = vx0 + v1 VS2_1 -
2683 VS2_1: vz1 = vx1 + v1 VS2_2 -
2684 VS2_2: vz2 = vx2 + v1 VS2_3 -
2685 VS2_3: vz3 = vx3 + v1 - -
2686 S2: z = x + 1 - VS2_0 */
2688 prev_stmt_info = NULL;
2689 for (j = 0; j < ncopies; j++)
2691 /* Handle uses. */
2692 if (j == 0)
2694 if (op_type == binary_op || op_type == ternary_op)
2695 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2696 slp_node);
2697 else
2698 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2699 slp_node);
2700 if (op_type == ternary_op)
2702 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2703 VEC_quick_push (tree, vec_oprnds2,
2704 vect_get_vec_def_for_operand (op2, stmt, NULL));
2707 else
2709 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2710 if (op_type == ternary_op)
2712 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2713 VEC_quick_push (tree, vec_oprnds2,
2714 vect_get_vec_def_for_stmt_copy (dt[2],
2715 vec_oprnd));
2719 /* Arguments are ready. Create the new vector stmt. */
2720 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2722 vop1 = ((op_type == binary_op || op_type == ternary_op)
2723 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2724 vop2 = ((op_type == ternary_op)
2725 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2726 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2727 vop0, vop1, vop2);
2728 new_temp = make_ssa_name (vec_dest, new_stmt);
2729 gimple_assign_set_lhs (new_stmt, new_temp);
2730 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2731 if (slp_node)
2732 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2735 if (slp_node)
2736 continue;
2738 if (j == 0)
2739 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2740 else
2741 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2742 prev_stmt_info = vinfo_for_stmt (new_stmt);
2745 VEC_free (tree, heap, vec_oprnds0);
2746 if (vec_oprnds1)
2747 VEC_free (tree, heap, vec_oprnds1);
2748 if (vec_oprnds2)
2749 VEC_free (tree, heap, vec_oprnds2);
2751 return true;
2755 /* Get vectorized definitions for loop-based vectorization. For the first
2756 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2757 scalar operand), and for the rest we get a copy with
2758 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2759 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2760 The vectors are collected into VEC_OPRNDS. */
2762 static void
2763 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2764 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2766 tree vec_oprnd;
2768 /* Get first vector operand. */
2769 /* All the vector operands except the very first one (that is scalar oprnd)
2770 are stmt copies. */
2771 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2772 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2773 else
2774 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2776 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2778 /* Get second vector operand. */
2779 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2780 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2782 *oprnd = vec_oprnd;
2784 /* For conversion in multiple steps, continue to get operands
2785 recursively. */
2786 if (multi_step_cvt)
2787 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2791 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2792 For multi-step conversions store the resulting vectors and call the function
2793 recursively. */
2795 static void
2796 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2797 int multi_step_cvt, gimple stmt,
2798 VEC (tree, heap) *vec_dsts,
2799 gimple_stmt_iterator *gsi,
2800 slp_tree slp_node, enum tree_code code,
2801 stmt_vec_info *prev_stmt_info)
2803 unsigned int i;
2804 tree vop0, vop1, new_tmp, vec_dest;
2805 gimple new_stmt;
2806 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2808 vec_dest = VEC_pop (tree, vec_dsts);
2810 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2812 /* Create demotion operation. */
2813 vop0 = VEC_index (tree, *vec_oprnds, i);
2814 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2815 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2816 new_tmp = make_ssa_name (vec_dest, new_stmt);
2817 gimple_assign_set_lhs (new_stmt, new_tmp);
2818 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2820 if (multi_step_cvt)
2821 /* Store the resulting vector for next recursive call. */
2822 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2823 else
2825 /* This is the last step of the conversion sequence. Store the
2826 vectors in SLP_NODE or in vector info of the scalar statement
2827 (or in STMT_VINFO_RELATED_STMT chain). */
2828 if (slp_node)
2829 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2830 else
2832 if (!*prev_stmt_info)
2833 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2834 else
2835 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2837 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2842 /* For multi-step demotion operations we first generate demotion operations
2843 from the source type to the intermediate types, and then combine the
2844 results (stored in VEC_OPRNDS) in demotion operation to the destination
2845 type. */
2846 if (multi_step_cvt)
2848 /* At each level of recursion we have have of the operands we had at the
2849 previous level. */
2850 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2851 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2852 stmt, vec_dsts, gsi, slp_node,
2853 code, prev_stmt_info);
2858 /* Function vectorizable_type_demotion
2860 Check if STMT performs a binary or unary operation that involves
2861 type demotion, and if it can be vectorized.
2862 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2863 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2864 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2866 static bool
2867 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2868 gimple *vec_stmt, slp_tree slp_node)
2870 tree vec_dest;
2871 tree scalar_dest;
2872 tree op0;
2873 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2874 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2875 enum tree_code code, code1 = ERROR_MARK;
2876 tree def;
2877 gimple def_stmt;
2878 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2879 stmt_vec_info prev_stmt_info;
2880 int nunits_in;
2881 int nunits_out;
2882 tree vectype_out;
2883 int ncopies;
2884 int j, i;
2885 tree vectype_in;
2886 int multi_step_cvt = 0;
2887 VEC (tree, heap) *vec_oprnds0 = NULL;
2888 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2889 tree last_oprnd, intermediate_type;
2891 /* FORNOW: not supported by basic block SLP vectorization. */
2892 gcc_assert (loop_vinfo);
2894 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2895 return false;
2897 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2898 return false;
2900 /* Is STMT a vectorizable type-demotion operation? */
2901 if (!is_gimple_assign (stmt))
2902 return false;
2904 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2905 return false;
2907 code = gimple_assign_rhs_code (stmt);
2908 if (!CONVERT_EXPR_CODE_P (code))
2909 return false;
2911 scalar_dest = gimple_assign_lhs (stmt);
2912 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2914 /* Check the operands of the operation. */
2915 op0 = gimple_assign_rhs1 (stmt);
2916 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2917 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2918 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2919 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2920 && CONVERT_EXPR_CODE_P (code))))
2921 return false;
2922 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
2923 &def_stmt, &def, &dt[0], &vectype_in))
2925 if (vect_print_dump_info (REPORT_DETAILS))
2926 fprintf (vect_dump, "use not simple.");
2927 return false;
2929 /* If op0 is an external def use a vector type with the
2930 same size as the output vector type if possible. */
2931 if (!vectype_in)
2932 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2933 if (vec_stmt)
2934 gcc_assert (vectype_in);
2935 if (!vectype_in)
2937 if (vect_print_dump_info (REPORT_DETAILS))
2939 fprintf (vect_dump, "no vectype for scalar type ");
2940 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2943 return false;
2946 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2947 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2948 if (nunits_in >= nunits_out)
2949 return false;
2951 /* Multiple types in SLP are handled by creating the appropriate number of
2952 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2953 case of SLP. */
2954 if (slp_node)
2955 ncopies = 1;
2956 else
2957 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2958 gcc_assert (ncopies >= 1);
2960 /* Supportable by target? */
2961 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
2962 &code1, &multi_step_cvt, &interm_types))
2963 return false;
2965 if (!vec_stmt) /* transformation not required. */
2967 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2968 if (vect_print_dump_info (REPORT_DETAILS))
2969 fprintf (vect_dump, "=== vectorizable_demotion ===");
2970 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2971 return true;
2974 /** Transform. **/
2975 if (vect_print_dump_info (REPORT_DETAILS))
2976 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2977 ncopies);
2979 /* In case of multi-step demotion, we first generate demotion operations to
2980 the intermediate types, and then from that types to the final one.
2981 We create vector destinations for the intermediate type (TYPES) received
2982 from supportable_narrowing_operation, and store them in the correct order
2983 for future use in vect_create_vectorized_demotion_stmts(). */
2984 if (multi_step_cvt)
2985 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2986 else
2987 vec_dsts = VEC_alloc (tree, heap, 1);
2989 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2990 VEC_quick_push (tree, vec_dsts, vec_dest);
2992 if (multi_step_cvt)
2994 for (i = VEC_length (tree, interm_types) - 1;
2995 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2997 vec_dest = vect_create_destination_var (scalar_dest,
2998 intermediate_type);
2999 VEC_quick_push (tree, vec_dsts, vec_dest);
3003 /* In case the vectorization factor (VF) is bigger than the number
3004 of elements that we can fit in a vectype (nunits), we have to generate
3005 more than one vector stmt - i.e - we need to "unroll" the
3006 vector stmt by a factor VF/nunits. */
3007 last_oprnd = op0;
3008 prev_stmt_info = NULL;
3009 for (j = 0; j < ncopies; j++)
3011 /* Handle uses. */
3012 if (slp_node)
3013 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3014 else
3016 VEC_free (tree, heap, vec_oprnds0);
3017 vec_oprnds0 = VEC_alloc (tree, heap,
3018 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3019 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3020 vect_pow2 (multi_step_cvt) - 1);
3023 /* Arguments are ready. Create the new vector stmts. */
3024 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3025 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3026 multi_step_cvt, stmt, tmp_vec_dsts,
3027 gsi, slp_node, code1,
3028 &prev_stmt_info);
3031 VEC_free (tree, heap, vec_oprnds0);
3032 VEC_free (tree, heap, vec_dsts);
3033 VEC_free (tree, heap, tmp_vec_dsts);
3034 VEC_free (tree, heap, interm_types);
3036 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3037 return true;
3041 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3042 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3043 the resulting vectors and call the function recursively. */
3045 static void
3046 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3047 VEC (tree, heap) **vec_oprnds1,
3048 int multi_step_cvt, gimple stmt,
3049 VEC (tree, heap) *vec_dsts,
3050 gimple_stmt_iterator *gsi,
3051 slp_tree slp_node, enum tree_code code1,
3052 enum tree_code code2, tree decl1,
3053 tree decl2, int op_type,
3054 stmt_vec_info *prev_stmt_info)
3056 int i;
3057 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3058 gimple new_stmt1, new_stmt2;
3059 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3060 VEC (tree, heap) *vec_tmp;
3062 vec_dest = VEC_pop (tree, vec_dsts);
3063 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3065 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3067 if (op_type == binary_op)
3068 vop1 = VEC_index (tree, *vec_oprnds1, i);
3069 else
3070 vop1 = NULL_TREE;
3072 /* Generate the two halves of promotion operation. */
3073 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3074 op_type, vec_dest, gsi, stmt);
3075 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3076 op_type, vec_dest, gsi, stmt);
3077 if (is_gimple_call (new_stmt1))
3079 new_tmp1 = gimple_call_lhs (new_stmt1);
3080 new_tmp2 = gimple_call_lhs (new_stmt2);
3082 else
3084 new_tmp1 = gimple_assign_lhs (new_stmt1);
3085 new_tmp2 = gimple_assign_lhs (new_stmt2);
3088 if (multi_step_cvt)
3090 /* Store the results for the recursive call. */
3091 VEC_quick_push (tree, vec_tmp, new_tmp1);
3092 VEC_quick_push (tree, vec_tmp, new_tmp2);
3094 else
3096 /* Last step of promotion sequience - store the results. */
3097 if (slp_node)
3099 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3100 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3102 else
3104 if (!*prev_stmt_info)
3105 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3106 else
3107 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3109 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3110 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3111 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3116 if (multi_step_cvt)
3118 /* For multi-step promotion operation we first generate we call the
3119 function recurcively for every stage. We start from the input type,
3120 create promotion operations to the intermediate types, and then
3121 create promotions to the output type. */
3122 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3123 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3124 multi_step_cvt - 1, stmt,
3125 vec_dsts, gsi, slp_node, code1,
3126 code2, decl2, decl2, op_type,
3127 prev_stmt_info);
3130 VEC_free (tree, heap, vec_tmp);
3134 /* Function vectorizable_type_promotion
3136 Check if STMT performs a binary or unary operation that involves
3137 type promotion, and if it can be vectorized.
3138 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3139 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3140 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3142 static bool
3143 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3144 gimple *vec_stmt, slp_tree slp_node)
3146 tree vec_dest;
3147 tree scalar_dest;
3148 tree op0, op1 = NULL;
3149 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3151 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3152 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3153 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3154 int op_type;
3155 tree def;
3156 gimple def_stmt;
3157 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3158 stmt_vec_info prev_stmt_info;
3159 int nunits_in;
3160 int nunits_out;
3161 tree vectype_out;
3162 int ncopies;
3163 int j, i;
3164 tree vectype_in;
3165 tree intermediate_type = NULL_TREE;
3166 int multi_step_cvt = 0;
3167 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3168 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3170 /* FORNOW: not supported by basic block SLP vectorization. */
3171 gcc_assert (loop_vinfo);
3173 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3174 return false;
3176 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3177 return false;
3179 /* Is STMT a vectorizable type-promotion operation? */
3180 if (!is_gimple_assign (stmt))
3181 return false;
3183 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3184 return false;
3186 code = gimple_assign_rhs_code (stmt);
3187 if (!CONVERT_EXPR_CODE_P (code)
3188 && code != WIDEN_MULT_EXPR)
3189 return false;
3191 scalar_dest = gimple_assign_lhs (stmt);
3192 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3194 /* Check the operands of the operation. */
3195 op0 = gimple_assign_rhs1 (stmt);
3196 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3197 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3198 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3199 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3200 && CONVERT_EXPR_CODE_P (code))))
3201 return false;
3202 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
3203 &def_stmt, &def, &dt[0], &vectype_in))
3205 if (vect_print_dump_info (REPORT_DETAILS))
3206 fprintf (vect_dump, "use not simple.");
3207 return false;
3209 /* If op0 is an external or constant def use a vector type with
3210 the same size as the output vector type. */
3211 if (!vectype_in)
3212 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3213 if (vec_stmt)
3214 gcc_assert (vectype_in);
3215 if (!vectype_in)
3217 if (vect_print_dump_info (REPORT_DETAILS))
3219 fprintf (vect_dump, "no vectype for scalar type ");
3220 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3223 return false;
3226 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3227 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3228 if (nunits_in <= nunits_out)
3229 return false;
3231 /* Multiple types in SLP are handled by creating the appropriate number of
3232 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3233 case of SLP. */
3234 if (slp_node)
3235 ncopies = 1;
3236 else
3237 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3239 gcc_assert (ncopies >= 1);
3241 op_type = TREE_CODE_LENGTH (code);
3242 if (op_type == binary_op)
3244 op1 = gimple_assign_rhs2 (stmt);
3245 if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
3247 if (vect_print_dump_info (REPORT_DETAILS))
3248 fprintf (vect_dump, "use not simple.");
3249 return false;
3253 /* Supportable by target? */
3254 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3255 &decl1, &decl2, &code1, &code2,
3256 &multi_step_cvt, &interm_types))
3257 return false;
3259 /* Binary widening operation can only be supported directly by the
3260 architecture. */
3261 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3263 if (!vec_stmt) /* transformation not required. */
3265 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3266 if (vect_print_dump_info (REPORT_DETAILS))
3267 fprintf (vect_dump, "=== vectorizable_promotion ===");
3268 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3269 return true;
3272 /** Transform. **/
3274 if (vect_print_dump_info (REPORT_DETAILS))
3275 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3276 ncopies);
3278 /* Handle def. */
3279 /* In case of multi-step promotion, we first generate promotion operations
3280 to the intermediate types, and then from that types to the final one.
3281 We store vector destination in VEC_DSTS in the correct order for
3282 recursive creation of promotion operations in
3283 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3284 according to TYPES recieved from supportable_widening_operation(). */
3285 if (multi_step_cvt)
3286 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3287 else
3288 vec_dsts = VEC_alloc (tree, heap, 1);
3290 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3291 VEC_quick_push (tree, vec_dsts, vec_dest);
3293 if (multi_step_cvt)
3295 for (i = VEC_length (tree, interm_types) - 1;
3296 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3298 vec_dest = vect_create_destination_var (scalar_dest,
3299 intermediate_type);
3300 VEC_quick_push (tree, vec_dsts, vec_dest);
3304 if (!slp_node)
3306 vec_oprnds0 = VEC_alloc (tree, heap,
3307 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3308 if (op_type == binary_op)
3309 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3312 /* In case the vectorization factor (VF) is bigger than the number
3313 of elements that we can fit in a vectype (nunits), we have to generate
3314 more than one vector stmt - i.e - we need to "unroll" the
3315 vector stmt by a factor VF/nunits. */
3317 prev_stmt_info = NULL;
3318 for (j = 0; j < ncopies; j++)
3320 /* Handle uses. */
3321 if (j == 0)
3323 if (slp_node)
3324 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3325 &vec_oprnds1, -1);
3326 else
3328 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3329 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3330 if (op_type == binary_op)
3332 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3333 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3337 else
3339 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3340 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3341 if (op_type == binary_op)
3343 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3344 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3348 /* Arguments are ready. Create the new vector stmts. */
3349 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3350 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3351 multi_step_cvt, stmt,
3352 tmp_vec_dsts,
3353 gsi, slp_node, code1, code2,
3354 decl1, decl2, op_type,
3355 &prev_stmt_info);
3358 VEC_free (tree, heap, vec_dsts);
3359 VEC_free (tree, heap, tmp_vec_dsts);
3360 VEC_free (tree, heap, interm_types);
3361 VEC_free (tree, heap, vec_oprnds0);
3362 VEC_free (tree, heap, vec_oprnds1);
3364 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3365 return true;
3368 /* Return true if vector load/store with vinfo in STMT_VINFO
3369 is slow. If it is a strided access, STRIDED is true. */
3371 static bool
3372 is_vector_load_store_slow (stmt_vec_info stmt_info, bool strided)
3374 struct data_reference *dr;
3376 if (!targetm.slow_unaligned_vector_memop
3377 || !targetm.slow_unaligned_vector_memop ())
3378 return false;
3380 if (strided)
3381 dr = STMT_VINFO_DATA_REF (
3382 vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
3383 else
3384 dr = STMT_VINFO_DATA_REF (stmt_info);
3386 if (!aligned_access_p (dr))
3388 if (vect_print_dump_info (REPORT_DETAILS))
3390 fprintf (vect_dump, "Unaligned vectorizable load/store: "
3391 " slow & not allowed. ");
3392 print_gimple_stmt (vect_dump, STMT_VINFO_STMT (stmt_info),
3393 0, TDF_SLIM);
3395 return true;
3398 return false;
3401 /* Function vectorizable_store.
3403 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3404 can be vectorized.
3405 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3406 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3407 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3409 static bool
3410 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3411 slp_tree slp_node)
3413 tree scalar_dest;
3414 tree data_ref;
3415 tree op;
3416 tree vec_oprnd = NULL_TREE;
3417 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3418 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3419 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3421 struct loop *loop = NULL;
3422 enum machine_mode vec_mode;
3423 tree dummy;
3424 enum dr_alignment_support alignment_support_scheme;
3425 tree def;
3426 gimple def_stmt;
3427 enum vect_def_type dt;
3428 stmt_vec_info prev_stmt_info = NULL;
3429 tree dataref_ptr = NULL_TREE;
3430 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3431 int ncopies;
3432 int j;
3433 gimple next_stmt, first_stmt = NULL;
3434 bool strided_store = false;
3435 unsigned int group_size, i;
3436 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3437 bool inv_p;
3438 VEC(tree,heap) *vec_oprnds = NULL;
3439 bool slp = (slp_node != NULL);
3440 unsigned int vec_num;
3441 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3443 if (loop_vinfo)
3444 loop = LOOP_VINFO_LOOP (loop_vinfo);
3446 /* Multiple types in SLP are handled by creating the appropriate number of
3447 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3448 case of SLP. */
3449 if (slp)
3450 ncopies = 1;
3451 else
3452 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3454 gcc_assert (ncopies >= 1);
3456 /* FORNOW. This restriction should be relaxed. */
3457 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3459 if (vect_print_dump_info (REPORT_DETAILS))
3460 fprintf (vect_dump, "multiple types in nested loop.");
3461 return false;
3464 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3465 return false;
3467 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3468 return false;
3470 /* Is vectorizable store? */
3472 if (!is_gimple_assign (stmt))
3473 return false;
3475 scalar_dest = gimple_assign_lhs (stmt);
3476 if (TREE_CODE (scalar_dest) != ARRAY_REF
3477 && TREE_CODE (scalar_dest) != INDIRECT_REF
3478 && TREE_CODE (scalar_dest) != COMPONENT_REF
3479 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3480 && TREE_CODE (scalar_dest) != REALPART_EXPR
3481 && TREE_CODE (scalar_dest) != MEM_REF)
3482 return false;
3484 gcc_assert (gimple_assign_single_p (stmt));
3485 op = gimple_assign_rhs1 (stmt);
3486 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3488 if (vect_print_dump_info (REPORT_DETAILS))
3489 fprintf (vect_dump, "use not simple.");
3490 return false;
3493 /* The scalar rhs type needs to be trivially convertible to the vector
3494 component type. This should always be the case. */
3495 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
3497 if (vect_print_dump_info (REPORT_DETAILS))
3498 fprintf (vect_dump, "??? operands of different types");
3499 return false;
3502 vec_mode = TYPE_MODE (vectype);
3503 /* FORNOW. In some cases can vectorize even if data-type not supported
3504 (e.g. - array initialization with 0). */
3505 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3506 return false;
3508 if (!STMT_VINFO_DATA_REF (stmt_info))
3509 return false;
3511 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3513 if (vect_print_dump_info (REPORT_DETAILS))
3514 fprintf (vect_dump, "negative step for store.");
3515 return false;
3518 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3520 strided_store = true;
3521 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3522 if (!vect_strided_store_supported (vectype)
3523 && !PURE_SLP_STMT (stmt_info) && !slp)
3524 return false;
3526 if (first_stmt == stmt)
3528 /* STMT is the leader of the group. Check the operands of all the
3529 stmts of the group. */
3530 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
3531 while (next_stmt)
3533 gcc_assert (gimple_assign_single_p (next_stmt));
3534 op = gimple_assign_rhs1 (next_stmt);
3535 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3536 &def, &dt))
3538 if (vect_print_dump_info (REPORT_DETAILS))
3539 fprintf (vect_dump, "use not simple.");
3540 return false;
3542 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3547 /* Return false if unaligned vector stores are expensive. */
3548 if (is_vector_load_store_slow (stmt_info, strided_store))
3549 return false;
3551 if (!vec_stmt) /* transformation not required. */
3553 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3554 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
3555 return true;
3558 /** Transform. **/
3560 if (strided_store)
3562 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3563 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3565 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3567 /* FORNOW */
3568 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3570 /* We vectorize all the stmts of the interleaving group when we
3571 reach the last stmt in the group. */
3572 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3573 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
3574 && !slp)
3576 *vec_stmt = NULL;
3577 return true;
3580 if (slp)
3582 strided_store = false;
3583 /* VEC_NUM is the number of vect stmts to be created for this
3584 group. */
3585 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3586 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3587 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3589 else
3590 /* VEC_NUM is the number of vect stmts to be created for this
3591 group. */
3592 vec_num = group_size;
3594 else
3596 first_stmt = stmt;
3597 first_dr = dr;
3598 group_size = vec_num = 1;
3601 if (vect_print_dump_info (REPORT_DETAILS))
3602 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3604 dr_chain = VEC_alloc (tree, heap, group_size);
3605 oprnds = VEC_alloc (tree, heap, group_size);
3607 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3608 gcc_assert (alignment_support_scheme);
3610 /* In case the vectorization factor (VF) is bigger than the number
3611 of elements that we can fit in a vectype (nunits), we have to generate
3612 more than one vector stmt - i.e - we need to "unroll" the
3613 vector stmt by a factor VF/nunits. For more details see documentation in
3614 vect_get_vec_def_for_copy_stmt. */
3616 /* In case of interleaving (non-unit strided access):
3618 S1: &base + 2 = x2
3619 S2: &base = x0
3620 S3: &base + 1 = x1
3621 S4: &base + 3 = x3
3623 We create vectorized stores starting from base address (the access of the
3624 first stmt in the chain (S2 in the above example), when the last store stmt
3625 of the chain (S4) is reached:
3627 VS1: &base = vx2
3628 VS2: &base + vec_size*1 = vx0
3629 VS3: &base + vec_size*2 = vx1
3630 VS4: &base + vec_size*3 = vx3
3632 Then permutation statements are generated:
3634 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3635 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3638 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3639 (the order of the data-refs in the output of vect_permute_store_chain
3640 corresponds to the order of scalar stmts in the interleaving chain - see
3641 the documentation of vect_permute_store_chain()).
3643 In case of both multiple types and interleaving, above vector stores and
3644 permutation stmts are created for every copy. The result vector stmts are
3645 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3646 STMT_VINFO_RELATED_STMT for the next copies.
3649 prev_stmt_info = NULL;
3650 for (j = 0; j < ncopies; j++)
3652 gimple new_stmt;
3653 gimple ptr_incr;
3655 if (j == 0)
3657 if (slp)
3659 /* Get vectorized arguments for SLP_NODE. */
3660 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3661 NULL, -1);
3663 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3665 else
3667 /* For interleaved stores we collect vectorized defs for all the
3668 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3669 used as an input to vect_permute_store_chain(), and OPRNDS as
3670 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3672 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3673 OPRNDS are of size 1. */
3674 next_stmt = first_stmt;
3675 for (i = 0; i < group_size; i++)
3677 /* Since gaps are not supported for interleaved stores,
3678 GROUP_SIZE is the exact number of stmts in the chain.
3679 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3680 there is no interleaving, GROUP_SIZE is 1, and only one
3681 iteration of the loop will be executed. */
3682 gcc_assert (next_stmt
3683 && gimple_assign_single_p (next_stmt));
3684 op = gimple_assign_rhs1 (next_stmt);
3686 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3687 NULL);
3688 VEC_quick_push(tree, dr_chain, vec_oprnd);
3689 VEC_quick_push(tree, oprnds, vec_oprnd);
3690 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3694 /* We should have catched mismatched types earlier. */
3695 gcc_assert (useless_type_conversion_p (vectype,
3696 TREE_TYPE (vec_oprnd)));
3697 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3698 &dummy, &ptr_incr, false,
3699 &inv_p);
3700 gcc_assert (bb_vinfo || !inv_p);
3702 else
3704 /* For interleaved stores we created vectorized defs for all the
3705 defs stored in OPRNDS in the previous iteration (previous copy).
3706 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3707 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3708 next copy.
3709 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3710 OPRNDS are of size 1. */
3711 for (i = 0; i < group_size; i++)
3713 op = VEC_index (tree, oprnds, i);
3714 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3715 &dt);
3716 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3717 VEC_replace(tree, dr_chain, i, vec_oprnd);
3718 VEC_replace(tree, oprnds, i, vec_oprnd);
3720 dataref_ptr =
3721 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3724 if (strided_store)
3726 result_chain = VEC_alloc (tree, heap, group_size);
3727 /* Permute. */
3728 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3729 &result_chain))
3730 return false;
3733 next_stmt = first_stmt;
3734 for (i = 0; i < vec_num; i++)
3736 struct ptr_info_def *pi;
3738 if (i > 0)
3739 /* Bump the vector pointer. */
3740 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3741 NULL_TREE);
3743 if (slp)
3744 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3745 else if (strided_store)
3746 /* For strided stores vectorized defs are interleaved in
3747 vect_permute_store_chain(). */
3748 vec_oprnd = VEC_index (tree, result_chain, i);
3750 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3751 build_int_cst (reference_alias_ptr_type
3752 (DR_REF (first_dr)), 0));
3753 pi = get_ptr_info (dataref_ptr);
3754 pi->align = TYPE_ALIGN_UNIT (vectype);
3755 if (aligned_access_p (first_dr))
3756 pi->misalign = 0;
3757 else if (DR_MISALIGNMENT (first_dr) == -1)
3759 TREE_TYPE (data_ref)
3760 = build_aligned_type (TREE_TYPE (data_ref),
3761 TYPE_ALIGN (TREE_TYPE (vectype)));
3762 pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
3763 pi->misalign = 0;
3765 else
3767 TREE_TYPE (data_ref)
3768 = build_aligned_type (TREE_TYPE (data_ref),
3769 TYPE_ALIGN (TREE_TYPE (vectype)));
3770 pi->misalign = DR_MISALIGNMENT (first_dr);
3773 /* Arguments are ready. Create the new vector stmt. */
3774 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3775 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3776 mark_symbols_for_renaming (new_stmt);
3778 if (slp)
3779 continue;
3781 if (j == 0)
3782 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3783 else
3784 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3786 prev_stmt_info = vinfo_for_stmt (new_stmt);
3787 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3788 if (!next_stmt)
3789 break;
3793 VEC_free (tree, heap, dr_chain);
3794 VEC_free (tree, heap, oprnds);
3795 if (result_chain)
3796 VEC_free (tree, heap, result_chain);
3797 if (vec_oprnds)
3798 VEC_free (tree, heap, vec_oprnds);
3800 return true;
3803 /* Given a vector type VECTYPE returns a builtin DECL to be used
3804 for vector permutation and stores a mask into *MASK that implements
3805 reversal of the vector elements. If that is impossible to do
3806 returns NULL (and *MASK is unchanged). */
3808 static tree
3809 perm_mask_for_reverse (tree vectype, tree *mask)
3811 tree builtin_decl;
3812 tree mask_element_type, mask_type;
3813 tree mask_vec = NULL;
3814 int i;
3815 int nunits;
3816 if (!targetm.vectorize.builtin_vec_perm)
3817 return NULL;
3819 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
3820 &mask_element_type);
3821 if (!builtin_decl || !mask_element_type)
3822 return NULL;
3824 mask_type = get_vectype_for_scalar_type (mask_element_type);
3825 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3826 if (!mask_type
3827 || TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
3828 return NULL;
3830 for (i = 0; i < nunits; i++)
3831 mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
3832 mask_vec = build_vector (mask_type, mask_vec);
3834 if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
3835 return NULL;
3836 if (mask)
3837 *mask = mask_vec;
3838 return builtin_decl;
3841 /* Given a vector variable X, that was generated for the scalar LHS of
3842 STMT, generate instructions to reverse the vector elements of X,
3843 insert them a *GSI and return the permuted vector variable. */
3845 static tree
3846 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
3848 tree vectype = TREE_TYPE (x);
3849 tree mask_vec, builtin_decl;
3850 tree perm_dest, data_ref;
3851 gimple perm_stmt;
3853 builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
3855 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3857 /* Generate the permute statement. */
3858 perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
3859 if (!useless_type_conversion_p (vectype,
3860 TREE_TYPE (TREE_TYPE (builtin_decl))))
3862 tree tem = create_tmp_reg (TREE_TYPE (TREE_TYPE (builtin_decl)), NULL);
3863 tem = make_ssa_name (tem, perm_stmt);
3864 gimple_call_set_lhs (perm_stmt, tem);
3865 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3866 perm_stmt = gimple_build_assign (NULL_TREE,
3867 build1 (VIEW_CONVERT_EXPR,
3868 vectype, tem));
3870 data_ref = make_ssa_name (perm_dest, perm_stmt);
3871 gimple_set_lhs (perm_stmt, data_ref);
3872 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3874 return data_ref;
3877 /* vectorizable_load.
3879 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3880 can be vectorized.
3881 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3882 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3883 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3885 static bool
3886 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3887 slp_tree slp_node, slp_instance slp_node_instance)
3889 tree scalar_dest;
3890 tree vec_dest = NULL;
3891 tree data_ref = NULL;
3892 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3893 stmt_vec_info prev_stmt_info;
3894 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3895 struct loop *loop = NULL;
3896 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3897 bool nested_in_vect_loop = false;
3898 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3899 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3900 tree new_temp;
3901 enum machine_mode mode;
3902 gimple new_stmt = NULL;
3903 tree dummy;
3904 enum dr_alignment_support alignment_support_scheme;
3905 tree dataref_ptr = NULL_TREE;
3906 gimple ptr_incr;
3907 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3908 int ncopies;
3909 int i, j, group_size;
3910 tree msq = NULL_TREE, lsq;
3911 tree offset = NULL_TREE;
3912 tree realignment_token = NULL_TREE;
3913 gimple phi = NULL;
3914 VEC(tree,heap) *dr_chain = NULL;
3915 bool strided_load = false;
3916 gimple first_stmt;
3917 tree scalar_type;
3918 bool inv_p;
3919 bool negative;
3920 bool compute_in_loop = false;
3921 struct loop *at_loop;
3922 int vec_num;
3923 bool slp = (slp_node != NULL);
3924 bool slp_perm = false;
3925 enum tree_code code;
3926 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3927 int vf;
3929 if (loop_vinfo)
3931 loop = LOOP_VINFO_LOOP (loop_vinfo);
3932 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3933 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3935 else
3936 vf = 1;
3938 /* Multiple types in SLP are handled by creating the appropriate number of
3939 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3940 case of SLP. */
3941 if (slp)
3942 ncopies = 1;
3943 else
3944 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3946 gcc_assert (ncopies >= 1);
3948 /* FORNOW. This restriction should be relaxed. */
3949 if (nested_in_vect_loop && ncopies > 1)
3951 if (vect_print_dump_info (REPORT_DETAILS))
3952 fprintf (vect_dump, "multiple types in nested loop.");
3953 return false;
3956 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3957 return false;
3959 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3960 return false;
3962 /* Is vectorizable load? */
3963 if (!is_gimple_assign (stmt))
3964 return false;
3966 scalar_dest = gimple_assign_lhs (stmt);
3967 if (TREE_CODE (scalar_dest) != SSA_NAME)
3968 return false;
3970 code = gimple_assign_rhs_code (stmt);
3971 if (code != ARRAY_REF
3972 && code != INDIRECT_REF
3973 && code != COMPONENT_REF
3974 && code != IMAGPART_EXPR
3975 && code != REALPART_EXPR
3976 && code != MEM_REF)
3977 return false;
3979 if (!STMT_VINFO_DATA_REF (stmt_info))
3980 return false;
3982 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
3983 if (negative && ncopies > 1)
3985 if (vect_print_dump_info (REPORT_DETAILS))
3986 fprintf (vect_dump, "multiple types with negative step.");
3987 return false;
3990 scalar_type = TREE_TYPE (DR_REF (dr));
3991 mode = TYPE_MODE (vectype);
3993 /* FORNOW. In some cases can vectorize even if data-type not supported
3994 (e.g. - data copies). */
3995 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
3997 if (vect_print_dump_info (REPORT_DETAILS))
3998 fprintf (vect_dump, "Aligned load, but unsupported type.");
3999 return false;
4002 /* The vector component type needs to be trivially convertible to the
4003 scalar lhs. This should always be the case. */
4004 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
4006 if (vect_print_dump_info (REPORT_DETAILS))
4007 fprintf (vect_dump, "??? operands of different types");
4008 return false;
4011 /* Check if the load is a part of an interleaving chain. */
4012 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4014 strided_load = true;
4015 /* FORNOW */
4016 gcc_assert (! nested_in_vect_loop);
4018 /* Check if interleaving is supported. */
4019 if (!vect_strided_load_supported (vectype)
4020 && !PURE_SLP_STMT (stmt_info) && !slp)
4021 return false;
4024 if (negative)
4026 gcc_assert (!strided_load);
4027 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4028 if (alignment_support_scheme != dr_aligned
4029 && alignment_support_scheme != dr_unaligned_supported)
4031 if (vect_print_dump_info (REPORT_DETAILS))
4032 fprintf (vect_dump, "negative step but alignment required.");
4033 return false;
4035 if (!perm_mask_for_reverse (vectype, NULL))
4037 if (vect_print_dump_info (REPORT_DETAILS))
4038 fprintf (vect_dump, "negative step and reversing not supported.");
4039 return false;
4043 /* Return false if unaligned vector loads are expensive. */
4044 if (is_vector_load_store_slow (stmt_info, strided_load))
4045 return false;
4047 if (!vec_stmt) /* transformation not required. */
4049 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4050 vect_model_load_cost (stmt_info, ncopies, NULL);
4051 return true;
4054 if (vect_print_dump_info (REPORT_DETAILS))
4055 fprintf (vect_dump, "transform load.");
4057 /** Transform. **/
4059 if (strided_load)
4061 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4062 /* Check if the chain of loads is already vectorized. */
4063 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4065 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4066 return true;
4068 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4069 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4071 /* VEC_NUM is the number of vect stmts to be created for this group. */
4072 if (slp)
4074 strided_load = false;
4075 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4076 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4077 slp_perm = true;
4079 else
4080 vec_num = group_size;
4082 dr_chain = VEC_alloc (tree, heap, vec_num);
4084 else
4086 first_stmt = stmt;
4087 first_dr = dr;
4088 group_size = vec_num = 1;
4091 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4092 gcc_assert (alignment_support_scheme);
4094 /* In case the vectorization factor (VF) is bigger than the number
4095 of elements that we can fit in a vectype (nunits), we have to generate
4096 more than one vector stmt - i.e - we need to "unroll" the
4097 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4098 from one copy of the vector stmt to the next, in the field
4099 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4100 stages to find the correct vector defs to be used when vectorizing
4101 stmts that use the defs of the current stmt. The example below
4102 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4103 need to create 4 vectorized stmts):
4105 before vectorization:
4106 RELATED_STMT VEC_STMT
4107 S1: x = memref - -
4108 S2: z = x + 1 - -
4110 step 1: vectorize stmt S1:
4111 We first create the vector stmt VS1_0, and, as usual, record a
4112 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4113 Next, we create the vector stmt VS1_1, and record a pointer to
4114 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4115 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4116 stmts and pointers:
4117 RELATED_STMT VEC_STMT
4118 VS1_0: vx0 = memref0 VS1_1 -
4119 VS1_1: vx1 = memref1 VS1_2 -
4120 VS1_2: vx2 = memref2 VS1_3 -
4121 VS1_3: vx3 = memref3 - -
4122 S1: x = load - VS1_0
4123 S2: z = x + 1 - -
4125 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4126 information we recorded in RELATED_STMT field is used to vectorize
4127 stmt S2. */
4129 /* In case of interleaving (non-unit strided access):
4131 S1: x2 = &base + 2
4132 S2: x0 = &base
4133 S3: x1 = &base + 1
4134 S4: x3 = &base + 3
4136 Vectorized loads are created in the order of memory accesses
4137 starting from the access of the first stmt of the chain:
4139 VS1: vx0 = &base
4140 VS2: vx1 = &base + vec_size*1
4141 VS3: vx3 = &base + vec_size*2
4142 VS4: vx4 = &base + vec_size*3
4144 Then permutation statements are generated:
4146 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4147 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4150 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4151 (the order of the data-refs in the output of vect_permute_load_chain
4152 corresponds to the order of scalar stmts in the interleaving chain - see
4153 the documentation of vect_permute_load_chain()).
4154 The generation of permutation stmts and recording them in
4155 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4157 In case of both multiple types and interleaving, the vector loads and
4158 permutation stmts above are created for every copy. The result vector
4159 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4160 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4162 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4163 on a target that supports unaligned accesses (dr_unaligned_supported)
4164 we generate the following code:
4165 p = initial_addr;
4166 indx = 0;
4167 loop {
4168 p = p + indx * vectype_size;
4169 vec_dest = *(p);
4170 indx = indx + 1;
4173 Otherwise, the data reference is potentially unaligned on a target that
4174 does not support unaligned accesses (dr_explicit_realign_optimized) -
4175 then generate the following code, in which the data in each iteration is
4176 obtained by two vector loads, one from the previous iteration, and one
4177 from the current iteration:
4178 p1 = initial_addr;
4179 msq_init = *(floor(p1))
4180 p2 = initial_addr + VS - 1;
4181 realignment_token = call target_builtin;
4182 indx = 0;
4183 loop {
4184 p2 = p2 + indx * vectype_size
4185 lsq = *(floor(p2))
4186 vec_dest = realign_load (msq, lsq, realignment_token)
4187 indx = indx + 1;
4188 msq = lsq;
4189 } */
4191 /* If the misalignment remains the same throughout the execution of the
4192 loop, we can create the init_addr and permutation mask at the loop
4193 preheader. Otherwise, it needs to be created inside the loop.
4194 This can only occur when vectorizing memory accesses in the inner-loop
4195 nested within an outer-loop that is being vectorized. */
4197 if (loop && nested_in_vect_loop_p (loop, stmt)
4198 && (TREE_INT_CST_LOW (DR_STEP (dr))
4199 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4201 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4202 compute_in_loop = true;
4205 if ((alignment_support_scheme == dr_explicit_realign_optimized
4206 || alignment_support_scheme == dr_explicit_realign)
4207 && !compute_in_loop)
4209 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4210 alignment_support_scheme, NULL_TREE,
4211 &at_loop);
4212 if (alignment_support_scheme == dr_explicit_realign_optimized)
4214 phi = SSA_NAME_DEF_STMT (msq);
4215 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4218 else
4219 at_loop = loop;
4221 if (negative)
4222 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4224 prev_stmt_info = NULL;
4225 for (j = 0; j < ncopies; j++)
4227 /* 1. Create the vector pointer update chain. */
4228 if (j == 0)
4229 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
4230 at_loop, offset,
4231 &dummy, &ptr_incr, false,
4232 &inv_p);
4233 else
4234 dataref_ptr =
4235 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
4237 for (i = 0; i < vec_num; i++)
4239 if (i > 0)
4240 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4241 NULL_TREE);
4243 /* 2. Create the vector-load in the loop. */
4244 switch (alignment_support_scheme)
4246 case dr_aligned:
4247 case dr_unaligned_supported:
4249 struct ptr_info_def *pi;
4250 data_ref
4251 = build2 (MEM_REF, vectype, dataref_ptr,
4252 build_int_cst (reference_alias_ptr_type
4253 (DR_REF (first_dr)), 0));
4254 pi = get_ptr_info (dataref_ptr);
4255 pi->align = TYPE_ALIGN_UNIT (vectype);
4256 if (alignment_support_scheme == dr_aligned)
4258 gcc_assert (aligned_access_p (first_dr));
4259 pi->misalign = 0;
4261 else if (DR_MISALIGNMENT (first_dr) == -1)
4263 TREE_TYPE (data_ref)
4264 = build_aligned_type (TREE_TYPE (data_ref),
4265 TYPE_ALIGN (TREE_TYPE (vectype)));
4266 pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
4267 pi->misalign = 0;
4269 else
4271 TREE_TYPE (data_ref)
4272 = build_aligned_type (TREE_TYPE (data_ref),
4273 TYPE_ALIGN (TREE_TYPE (vectype)));
4274 pi->misalign = DR_MISALIGNMENT (first_dr);
4276 break;
4278 case dr_explicit_realign:
4280 tree ptr, bump;
4281 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4283 if (compute_in_loop)
4284 msq = vect_setup_realignment (first_stmt, gsi,
4285 &realignment_token,
4286 dr_explicit_realign,
4287 dataref_ptr, NULL);
4289 new_stmt = gimple_build_assign_with_ops
4290 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4291 build_int_cst
4292 (TREE_TYPE (dataref_ptr),
4293 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4294 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4295 gimple_assign_set_lhs (new_stmt, ptr);
4296 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4297 data_ref
4298 = build2 (MEM_REF, vectype, ptr,
4299 build_int_cst (reference_alias_ptr_type
4300 (DR_REF (first_dr)), 0));
4301 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4302 new_stmt = gimple_build_assign (vec_dest, data_ref);
4303 new_temp = make_ssa_name (vec_dest, new_stmt);
4304 gimple_assign_set_lhs (new_stmt, new_temp);
4305 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4306 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4307 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4308 msq = new_temp;
4310 bump = size_binop (MULT_EXPR, vs_minus_1,
4311 TYPE_SIZE_UNIT (scalar_type));
4312 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4313 new_stmt = gimple_build_assign_with_ops
4314 (BIT_AND_EXPR, NULL_TREE, ptr,
4315 build_int_cst
4316 (TREE_TYPE (ptr),
4317 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4318 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4319 gimple_assign_set_lhs (new_stmt, ptr);
4320 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4321 data_ref
4322 = build2 (MEM_REF, vectype, ptr,
4323 build_int_cst (reference_alias_ptr_type
4324 (DR_REF (first_dr)), 0));
4325 break;
4327 case dr_explicit_realign_optimized:
4328 new_stmt = gimple_build_assign_with_ops
4329 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4330 build_int_cst
4331 (TREE_TYPE (dataref_ptr),
4332 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4333 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4334 gimple_assign_set_lhs (new_stmt, new_temp);
4335 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4336 data_ref
4337 = build2 (MEM_REF, vectype, new_temp,
4338 build_int_cst (reference_alias_ptr_type
4339 (DR_REF (first_dr)), 0));
4340 break;
4341 default:
4342 gcc_unreachable ();
4344 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4345 new_stmt = gimple_build_assign (vec_dest, data_ref);
4346 new_temp = make_ssa_name (vec_dest, new_stmt);
4347 gimple_assign_set_lhs (new_stmt, new_temp);
4348 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4349 mark_symbols_for_renaming (new_stmt);
4351 /* 3. Handle explicit realignment if necessary/supported. Create in
4352 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
4353 if (alignment_support_scheme == dr_explicit_realign_optimized
4354 || alignment_support_scheme == dr_explicit_realign)
4356 tree tmp;
4358 lsq = gimple_assign_lhs (new_stmt);
4359 if (!realignment_token)
4360 realignment_token = dataref_ptr;
4361 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4362 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
4363 realignment_token);
4364 new_stmt = gimple_build_assign (vec_dest, tmp);
4365 new_temp = make_ssa_name (vec_dest, new_stmt);
4366 gimple_assign_set_lhs (new_stmt, new_temp);
4367 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4369 if (alignment_support_scheme == dr_explicit_realign_optimized)
4371 gcc_assert (phi);
4372 if (i == vec_num - 1 && j == ncopies - 1)
4373 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
4374 UNKNOWN_LOCATION);
4375 msq = lsq;
4379 /* 4. Handle invariant-load. */
4380 if (inv_p && !bb_vinfo)
4382 gcc_assert (!strided_load);
4383 gcc_assert (nested_in_vect_loop_p (loop, stmt));
4384 if (j == 0)
4386 int k;
4387 tree t = NULL_TREE;
4388 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
4390 /* CHECKME: bitpos depends on endianess? */
4391 bitpos = bitsize_zero_node;
4392 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4393 bitsize, bitpos);
4394 vec_dest =
4395 vect_create_destination_var (scalar_dest, NULL_TREE);
4396 new_stmt = gimple_build_assign (vec_dest, vec_inv);
4397 new_temp = make_ssa_name (vec_dest, new_stmt);
4398 gimple_assign_set_lhs (new_stmt, new_temp);
4399 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4401 for (k = nunits - 1; k >= 0; --k)
4402 t = tree_cons (NULL_TREE, new_temp, t);
4403 /* FIXME: use build_constructor directly. */
4404 vec_inv = build_constructor_from_list (vectype, t);
4405 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4406 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4408 else
4409 gcc_unreachable (); /* FORNOW. */
4412 if (negative)
4414 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4415 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4418 /* Collect vector loads and later create their permutation in
4419 vect_transform_strided_load (). */
4420 if (strided_load || slp_perm)
4421 VEC_quick_push (tree, dr_chain, new_temp);
4423 /* Store vector loads in the corresponding SLP_NODE. */
4424 if (slp && !slp_perm)
4425 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4428 if (slp && !slp_perm)
4429 continue;
4431 if (slp_perm)
4433 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4434 slp_node_instance, false))
4436 VEC_free (tree, heap, dr_chain);
4437 return false;
4440 else
4442 if (strided_load)
4444 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
4445 return false;
4447 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4448 VEC_free (tree, heap, dr_chain);
4449 dr_chain = VEC_alloc (tree, heap, group_size);
4451 else
4453 if (j == 0)
4454 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4455 else
4456 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4457 prev_stmt_info = vinfo_for_stmt (new_stmt);
4462 if (dr_chain)
4463 VEC_free (tree, heap, dr_chain);
4465 return true;
4468 /* Function vect_is_simple_cond.
4470 Input:
4471 LOOP - the loop that is being vectorized.
4472 COND - Condition that is checked for simple use.
4474 Returns whether a COND can be vectorized. Checks whether
4475 condition operands are supportable using vec_is_simple_use. */
4477 static bool
4478 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4480 tree lhs, rhs;
4481 tree def;
4482 enum vect_def_type dt;
4484 if (!COMPARISON_CLASS_P (cond))
4485 return false;
4487 lhs = TREE_OPERAND (cond, 0);
4488 rhs = TREE_OPERAND (cond, 1);
4490 if (TREE_CODE (lhs) == SSA_NAME)
4492 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4493 if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4494 &dt))
4495 return false;
4497 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4498 && TREE_CODE (lhs) != FIXED_CST)
4499 return false;
4501 if (TREE_CODE (rhs) == SSA_NAME)
4503 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4504 if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4505 &dt))
4506 return false;
4508 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4509 && TREE_CODE (rhs) != FIXED_CST)
4510 return false;
4512 return true;
4515 /* vectorizable_condition.
4517 Check if STMT is conditional modify expression that can be vectorized.
4518 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4519 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4520 at GSI.
4522 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4523 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4524 else caluse if it is 2).
4526 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4528 bool
4529 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4530 gimple *vec_stmt, tree reduc_def, int reduc_index)
4532 tree scalar_dest = NULL_TREE;
4533 tree vec_dest = NULL_TREE;
4534 tree op = NULL_TREE;
4535 tree cond_expr, then_clause, else_clause;
4536 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4537 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4538 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4539 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4540 tree vec_compare, vec_cond_expr;
4541 tree new_temp;
4542 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4543 enum machine_mode vec_mode;
4544 tree def;
4545 enum vect_def_type dt, dts[4];
4546 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4547 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4548 enum tree_code code;
4549 stmt_vec_info prev_stmt_info = NULL;
4550 int j;
4552 /* FORNOW: unsupported in basic block SLP. */
4553 gcc_assert (loop_vinfo);
4555 gcc_assert (ncopies >= 1);
4556 if (reduc_index && ncopies > 1)
4557 return false; /* FORNOW */
4559 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4560 return false;
4562 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4563 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4564 && reduc_def))
4565 return false;
4567 /* FORNOW: SLP not supported. */
4568 if (STMT_SLP_TYPE (stmt_info))
4569 return false;
4571 /* FORNOW: not yet supported. */
4572 if (STMT_VINFO_LIVE_P (stmt_info))
4574 if (vect_print_dump_info (REPORT_DETAILS))
4575 fprintf (vect_dump, "value used after loop.");
4576 return false;
4579 /* Is vectorizable conditional operation? */
4580 if (!is_gimple_assign (stmt))
4581 return false;
4583 code = gimple_assign_rhs_code (stmt);
4585 if (code != COND_EXPR)
4586 return false;
4588 gcc_assert (gimple_assign_single_p (stmt));
4589 op = gimple_assign_rhs1 (stmt);
4590 cond_expr = TREE_OPERAND (op, 0);
4591 then_clause = TREE_OPERAND (op, 1);
4592 else_clause = TREE_OPERAND (op, 2);
4594 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4595 return false;
4597 /* We do not handle two different vector types for the condition
4598 and the values. */
4599 if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)),
4600 TREE_TYPE (vectype)))
4601 return false;
4603 if (TREE_CODE (then_clause) == SSA_NAME)
4605 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4606 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4607 &then_def_stmt, &def, &dt))
4608 return false;
4610 else if (TREE_CODE (then_clause) != INTEGER_CST
4611 && TREE_CODE (then_clause) != REAL_CST
4612 && TREE_CODE (then_clause) != FIXED_CST)
4613 return false;
4615 if (TREE_CODE (else_clause) == SSA_NAME)
4617 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4618 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4619 &else_def_stmt, &def, &dt))
4620 return false;
4622 else if (TREE_CODE (else_clause) != INTEGER_CST
4623 && TREE_CODE (else_clause) != REAL_CST
4624 && TREE_CODE (else_clause) != FIXED_CST)
4625 return false;
4628 vec_mode = TYPE_MODE (vectype);
4630 if (!vec_stmt)
4632 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4633 return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
4636 /* Transform */
4638 /* Handle def. */
4639 scalar_dest = gimple_assign_lhs (stmt);
4640 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4642 /* Handle cond expr. */
4643 for (j = 0; j < ncopies; j++)
4645 gimple new_stmt;
4646 if (j == 0)
4648 gimple gtemp;
4649 vec_cond_lhs =
4650 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4651 stmt, NULL);
4652 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4653 NULL, &gtemp, &def, &dts[0]);
4654 vec_cond_rhs =
4655 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4656 stmt, NULL);
4657 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4658 NULL, &gtemp, &def, &dts[1]);
4659 if (reduc_index == 1)
4660 vec_then_clause = reduc_def;
4661 else
4663 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4664 stmt, NULL);
4665 vect_is_simple_use (then_clause, loop_vinfo,
4666 NULL, &gtemp, &def, &dts[2]);
4668 if (reduc_index == 2)
4669 vec_else_clause = reduc_def;
4670 else
4672 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4673 stmt, NULL);
4674 vect_is_simple_use (else_clause, loop_vinfo,
4675 NULL, &gtemp, &def, &dts[3]);
4678 else
4680 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4681 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4682 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4683 vec_then_clause);
4684 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4685 vec_else_clause);
4688 /* Arguments are ready. Create the new vector stmt. */
4689 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4690 vec_cond_lhs, vec_cond_rhs);
4691 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4692 vec_compare, vec_then_clause, vec_else_clause);
4694 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4695 new_temp = make_ssa_name (vec_dest, new_stmt);
4696 gimple_assign_set_lhs (new_stmt, new_temp);
4697 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4698 if (j == 0)
4699 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4700 else
4701 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4703 prev_stmt_info = vinfo_for_stmt (new_stmt);
4706 return true;
4710 /* Make sure the statement is vectorizable. */
4712 bool
4713 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
4715 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4716 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4717 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
4718 bool ok;
4719 tree scalar_type, vectype;
4721 if (vect_print_dump_info (REPORT_DETAILS))
4723 fprintf (vect_dump, "==> examining statement: ");
4724 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4727 if (gimple_has_volatile_ops (stmt))
4729 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4730 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4732 return false;
4735 /* Skip stmts that do not need to be vectorized. In loops this is expected
4736 to include:
4737 - the COND_EXPR which is the loop exit condition
4738 - any LABEL_EXPRs in the loop
4739 - computations that are used only for array indexing or loop control.
4740 In basic blocks we only analyze statements that are a part of some SLP
4741 instance, therefore, all the statements are relevant. */
4743 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4744 && !STMT_VINFO_LIVE_P (stmt_info))
4746 if (vect_print_dump_info (REPORT_DETAILS))
4747 fprintf (vect_dump, "irrelevant.");
4749 return true;
4752 switch (STMT_VINFO_DEF_TYPE (stmt_info))
4754 case vect_internal_def:
4755 break;
4757 case vect_reduction_def:
4758 case vect_nested_cycle:
4759 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
4760 || relevance == vect_used_in_outer_by_reduction
4761 || relevance == vect_unused_in_scope));
4762 break;
4764 case vect_induction_def:
4765 case vect_constant_def:
4766 case vect_external_def:
4767 case vect_unknown_def_type:
4768 default:
4769 gcc_unreachable ();
4772 if (bb_vinfo)
4774 gcc_assert (PURE_SLP_STMT (stmt_info));
4776 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
4777 if (vect_print_dump_info (REPORT_DETAILS))
4779 fprintf (vect_dump, "get vectype for scalar type: ");
4780 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4783 vectype = get_vectype_for_scalar_type (scalar_type);
4784 if (!vectype)
4786 if (vect_print_dump_info (REPORT_DETAILS))
4788 fprintf (vect_dump, "not SLPed: unsupported data-type ");
4789 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4791 return false;
4794 if (vect_print_dump_info (REPORT_DETAILS))
4796 fprintf (vect_dump, "vectype: ");
4797 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4800 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4803 if (STMT_VINFO_RELEVANT_P (stmt_info))
4805 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
4806 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
4807 *need_to_vectorize = true;
4810 ok = true;
4811 if (!bb_vinfo
4812 && (STMT_VINFO_RELEVANT_P (stmt_info)
4813 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4814 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
4815 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
4816 || vectorizable_conversion (stmt, NULL, NULL, NULL)
4817 || vectorizable_shift (stmt, NULL, NULL, NULL)
4818 || vectorizable_operation (stmt, NULL, NULL, NULL)
4819 || vectorizable_assignment (stmt, NULL, NULL, NULL)
4820 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
4821 || vectorizable_call (stmt, NULL, NULL)
4822 || vectorizable_store (stmt, NULL, NULL, NULL)
4823 || vectorizable_reduction (stmt, NULL, NULL, NULL)
4824 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
4825 else
4827 if (bb_vinfo)
4828 ok = (vectorizable_shift (stmt, NULL, NULL, node)
4829 || vectorizable_operation (stmt, NULL, NULL, node)
4830 || vectorizable_assignment (stmt, NULL, NULL, node)
4831 || vectorizable_load (stmt, NULL, NULL, node, NULL)
4832 || vectorizable_store (stmt, NULL, NULL, node));
4835 if (!ok)
4837 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4839 fprintf (vect_dump, "not vectorized: relevant stmt not ");
4840 fprintf (vect_dump, "supported: ");
4841 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4844 return false;
4847 if (bb_vinfo)
4848 return true;
4850 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4851 need extra handling, except for vectorizable reductions. */
4852 if (STMT_VINFO_LIVE_P (stmt_info)
4853 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4854 ok = vectorizable_live_operation (stmt, NULL, NULL);
4856 if (!ok)
4858 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4860 fprintf (vect_dump, "not vectorized: live stmt not ");
4861 fprintf (vect_dump, "supported: ");
4862 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4865 return false;
4868 if (!PURE_SLP_STMT (stmt_info))
4870 /* Groups of strided accesses whose size is not a power of 2 are not
4871 vectorizable yet using loop-vectorization. Therefore, if this stmt
4872 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
4873 loop-based vectorized), the loop cannot be vectorized. */
4874 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4875 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4876 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
4878 if (vect_print_dump_info (REPORT_DETAILS))
4880 fprintf (vect_dump, "not vectorized: the size of group "
4881 "of strided accesses is not a power of 2");
4882 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4885 return false;
4889 return true;
4893 /* Function vect_transform_stmt.
4895 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4897 bool
4898 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
4899 bool *strided_store, slp_tree slp_node,
4900 slp_instance slp_node_instance)
4902 bool is_store = false;
4903 gimple vec_stmt = NULL;
4904 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4905 gimple orig_stmt_in_pattern;
4906 bool done;
4908 switch (STMT_VINFO_TYPE (stmt_info))
4910 case type_demotion_vec_info_type:
4911 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4912 gcc_assert (done);
4913 break;
4915 case type_promotion_vec_info_type:
4916 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4917 gcc_assert (done);
4918 break;
4920 case type_conversion_vec_info_type:
4921 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4922 gcc_assert (done);
4923 break;
4925 case induc_vec_info_type:
4926 gcc_assert (!slp_node);
4927 done = vectorizable_induction (stmt, gsi, &vec_stmt);
4928 gcc_assert (done);
4929 break;
4931 case shift_vec_info_type:
4932 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
4933 gcc_assert (done);
4934 break;
4936 case op_vec_info_type:
4937 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4938 gcc_assert (done);
4939 break;
4941 case assignment_vec_info_type:
4942 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4943 gcc_assert (done);
4944 break;
4946 case load_vec_info_type:
4947 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
4948 slp_node_instance);
4949 gcc_assert (done);
4950 break;
4952 case store_vec_info_type:
4953 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4954 gcc_assert (done);
4955 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4957 /* In case of interleaving, the whole chain is vectorized when the
4958 last store in the chain is reached. Store stmts before the last
4959 one are skipped, and there vec_stmt_info shouldn't be freed
4960 meanwhile. */
4961 *strided_store = true;
4962 if (STMT_VINFO_VEC_STMT (stmt_info))
4963 is_store = true;
4965 else
4966 is_store = true;
4967 break;
4969 case condition_vec_info_type:
4970 gcc_assert (!slp_node);
4971 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
4972 gcc_assert (done);
4973 break;
4975 case call_vec_info_type:
4976 gcc_assert (!slp_node);
4977 done = vectorizable_call (stmt, gsi, &vec_stmt);
4978 stmt = gsi_stmt (*gsi);
4979 break;
4981 case reduc_vec_info_type:
4982 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
4983 gcc_assert (done);
4984 break;
4986 default:
4987 if (!STMT_VINFO_LIVE_P (stmt_info))
4989 if (vect_print_dump_info (REPORT_DETAILS))
4990 fprintf (vect_dump, "stmt not supported.");
4991 gcc_unreachable ();
4995 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4996 is being vectorized, but outside the immediately enclosing loop. */
4997 if (vec_stmt
4998 && STMT_VINFO_LOOP_VINFO (stmt_info)
4999 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5000 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5001 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5002 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5003 || STMT_VINFO_RELEVANT (stmt_info) ==
5004 vect_used_in_outer_by_reduction))
5006 struct loop *innerloop = LOOP_VINFO_LOOP (
5007 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5008 imm_use_iterator imm_iter;
5009 use_operand_p use_p;
5010 tree scalar_dest;
5011 gimple exit_phi;
5013 if (vect_print_dump_info (REPORT_DETAILS))
5014 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5016 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5017 (to be used when vectorizing outer-loop stmts that use the DEF of
5018 STMT). */
5019 if (gimple_code (stmt) == GIMPLE_PHI)
5020 scalar_dest = PHI_RESULT (stmt);
5021 else
5022 scalar_dest = gimple_assign_lhs (stmt);
5024 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5026 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5028 exit_phi = USE_STMT (use_p);
5029 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5034 /* Handle stmts whose DEF is used outside the loop-nest that is
5035 being vectorized. */
5036 if (STMT_VINFO_LIVE_P (stmt_info)
5037 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5039 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5040 gcc_assert (done);
5043 if (vec_stmt)
5045 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5046 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
5047 if (orig_stmt_in_pattern)
5049 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
5050 /* STMT was inserted by the vectorizer to replace a computation idiom.
5051 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
5052 computed this idiom. We need to record a pointer to VEC_STMT in
5053 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
5054 documentation of vect_pattern_recog. */
5055 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
5056 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
5060 return is_store;
5064 /* Remove a group of stores (for SLP or interleaving), free their
5065 stmt_vec_info. */
5067 void
5068 vect_remove_stores (gimple first_stmt)
5070 gimple next = first_stmt;
5071 gimple tmp;
5072 gimple_stmt_iterator next_si;
5074 while (next)
5076 /* Free the attached stmt_vec_info and remove the stmt. */
5077 next_si = gsi_for_stmt (next);
5078 gsi_remove (&next_si, true);
5079 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
5080 free_stmt_vec_info (next);
5081 next = tmp;
5086 /* Function new_stmt_vec_info.
5088 Create and initialize a new stmt_vec_info struct for STMT. */
5090 stmt_vec_info
5091 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5092 bb_vec_info bb_vinfo)
5094 stmt_vec_info res;
5095 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5097 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5098 STMT_VINFO_STMT (res) = stmt;
5099 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5100 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5101 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5102 STMT_VINFO_LIVE_P (res) = false;
5103 STMT_VINFO_VECTYPE (res) = NULL;
5104 STMT_VINFO_VEC_STMT (res) = NULL;
5105 STMT_VINFO_VECTORIZABLE (res) = true;
5106 STMT_VINFO_IN_PATTERN_P (res) = false;
5107 STMT_VINFO_RELATED_STMT (res) = NULL;
5108 STMT_VINFO_DATA_REF (res) = NULL;
5110 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5111 STMT_VINFO_DR_OFFSET (res) = NULL;
5112 STMT_VINFO_DR_INIT (res) = NULL;
5113 STMT_VINFO_DR_STEP (res) = NULL;
5114 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5116 if (gimple_code (stmt) == GIMPLE_PHI
5117 && is_loop_header_bb_p (gimple_bb (stmt)))
5118 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5119 else
5120 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5122 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5123 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5124 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5125 STMT_SLP_TYPE (res) = loop_vect;
5126 DR_GROUP_FIRST_DR (res) = NULL;
5127 DR_GROUP_NEXT_DR (res) = NULL;
5128 DR_GROUP_SIZE (res) = 0;
5129 DR_GROUP_STORE_COUNT (res) = 0;
5130 DR_GROUP_GAP (res) = 0;
5131 DR_GROUP_SAME_DR_STMT (res) = NULL;
5132 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
5134 return res;
5138 /* Create a hash table for stmt_vec_info. */
5140 void
5141 init_stmt_vec_info_vec (void)
5143 gcc_assert (!stmt_vec_info_vec);
5144 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5148 /* Free hash table for stmt_vec_info. */
5150 void
5151 free_stmt_vec_info_vec (void)
5153 gcc_assert (stmt_vec_info_vec);
5154 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5158 /* Free stmt vectorization related info. */
5160 void
5161 free_stmt_vec_info (gimple stmt)
5163 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5165 if (!stmt_info)
5166 return;
5168 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5169 set_vinfo_for_stmt (stmt, NULL);
5170 free (stmt_info);
5174 /* Function get_vectype_for_scalar_type_and_size.
5176 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5177 by the target. */
5179 static tree
5180 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5182 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5183 enum machine_mode simd_mode;
5184 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5185 int nunits;
5186 tree vectype;
5188 if (nbytes == 0)
5189 return NULL_TREE;
5191 /* We can't build a vector type of elements with alignment bigger than
5192 their size. */
5193 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5194 return NULL_TREE;
5196 /* If we'd build a vector type of elements whose mode precision doesn't
5197 match their types precision we'll get mismatched types on vector
5198 extracts via BIT_FIELD_REFs. This effectively means we disable
5199 vectorization of bool and/or enum types in some languages. */
5200 if (INTEGRAL_TYPE_P (scalar_type)
5201 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5202 return NULL_TREE;
5204 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5205 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5206 return NULL_TREE;
5208 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5209 When the component mode passes the above test simply use a type
5210 corresponding to that mode. The theory is that any use that
5211 would cause problems with this will disable vectorization anyway. */
5212 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5213 && !INTEGRAL_TYPE_P (scalar_type)
5214 && !POINTER_TYPE_P (scalar_type))
5215 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5217 /* If no size was supplied use the mode the target prefers. Otherwise
5218 lookup a vector mode of the specified size. */
5219 if (size == 0)
5220 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5221 else
5222 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5223 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5224 if (nunits <= 1)
5225 return NULL_TREE;
5227 vectype = build_vector_type (scalar_type, nunits);
5228 if (vect_print_dump_info (REPORT_DETAILS))
5230 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5231 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5234 if (!vectype)
5235 return NULL_TREE;
5237 if (vect_print_dump_info (REPORT_DETAILS))
5239 fprintf (vect_dump, "vectype: ");
5240 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5243 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5244 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5246 if (vect_print_dump_info (REPORT_DETAILS))
5247 fprintf (vect_dump, "mode not supported by target.");
5248 return NULL_TREE;
5251 return vectype;
5254 unsigned int current_vector_size;
5256 /* Function get_vectype_for_scalar_type.
5258 Returns the vector type corresponding to SCALAR_TYPE as supported
5259 by the target. */
5261 tree
5262 get_vectype_for_scalar_type (tree scalar_type)
5264 tree vectype;
5265 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5266 current_vector_size);
5267 if (vectype
5268 && current_vector_size == 0)
5269 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5270 return vectype;
5273 /* Function get_same_sized_vectype
5275 Returns a vector type corresponding to SCALAR_TYPE of size
5276 VECTOR_TYPE if supported by the target. */
5278 tree
5279 get_same_sized_vectype (tree scalar_type, tree vector_type)
5281 return get_vectype_for_scalar_type_and_size
5282 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5285 /* Function vect_is_simple_use.
5287 Input:
5288 LOOP_VINFO - the vect info of the loop that is being vectorized.
5289 BB_VINFO - the vect info of the basic block that is being vectorized.
5290 OPERAND - operand of a stmt in the loop or bb.
5291 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5293 Returns whether a stmt with OPERAND can be vectorized.
5294 For loops, supportable operands are constants, loop invariants, and operands
5295 that are defined by the current iteration of the loop. Unsupportable
5296 operands are those that are defined by a previous iteration of the loop (as
5297 is the case in reduction/induction computations).
5298 For basic blocks, supportable operands are constants and bb invariants.
5299 For now, operands defined outside the basic block are not supported. */
5301 bool
5302 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5303 bb_vec_info bb_vinfo, gimple *def_stmt,
5304 tree *def, enum vect_def_type *dt)
5306 basic_block bb;
5307 stmt_vec_info stmt_vinfo;
5308 struct loop *loop = NULL;
5310 if (loop_vinfo)
5311 loop = LOOP_VINFO_LOOP (loop_vinfo);
5313 *def_stmt = NULL;
5314 *def = NULL_TREE;
5316 if (vect_print_dump_info (REPORT_DETAILS))
5318 fprintf (vect_dump, "vect_is_simple_use: operand ");
5319 print_generic_expr (vect_dump, operand, TDF_SLIM);
5322 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5324 *dt = vect_constant_def;
5325 return true;
5328 if (is_gimple_min_invariant (operand))
5330 *def = operand;
5331 *dt = vect_external_def;
5332 return true;
5335 if (TREE_CODE (operand) == PAREN_EXPR)
5337 if (vect_print_dump_info (REPORT_DETAILS))
5338 fprintf (vect_dump, "non-associatable copy.");
5339 operand = TREE_OPERAND (operand, 0);
5342 if (TREE_CODE (operand) != SSA_NAME)
5344 if (vect_print_dump_info (REPORT_DETAILS))
5345 fprintf (vect_dump, "not ssa-name.");
5346 return false;
5349 *def_stmt = SSA_NAME_DEF_STMT (operand);
5350 if (*def_stmt == NULL)
5352 if (vect_print_dump_info (REPORT_DETAILS))
5353 fprintf (vect_dump, "no def_stmt.");
5354 return false;
5357 if (vect_print_dump_info (REPORT_DETAILS))
5359 fprintf (vect_dump, "def_stmt: ");
5360 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5363 /* Empty stmt is expected only in case of a function argument.
5364 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5365 if (gimple_nop_p (*def_stmt))
5367 *def = operand;
5368 *dt = vect_external_def;
5369 return true;
5372 bb = gimple_bb (*def_stmt);
5374 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5375 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5376 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5377 *dt = vect_external_def;
5378 else
5380 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5381 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5384 if (*dt == vect_unknown_def_type)
5386 if (vect_print_dump_info (REPORT_DETAILS))
5387 fprintf (vect_dump, "Unsupported pattern.");
5388 return false;
5391 if (vect_print_dump_info (REPORT_DETAILS))
5392 fprintf (vect_dump, "type of def: %d.",*dt);
5394 switch (gimple_code (*def_stmt))
5396 case GIMPLE_PHI:
5397 *def = gimple_phi_result (*def_stmt);
5398 break;
5400 case GIMPLE_ASSIGN:
5401 *def = gimple_assign_lhs (*def_stmt);
5402 break;
5404 case GIMPLE_CALL:
5405 *def = gimple_call_lhs (*def_stmt);
5406 if (*def != NULL)
5407 break;
5408 /* FALLTHRU */
5409 default:
5410 if (vect_print_dump_info (REPORT_DETAILS))
5411 fprintf (vect_dump, "unsupported defining stmt: ");
5412 return false;
5415 return true;
5418 /* Function vect_is_simple_use_1.
5420 Same as vect_is_simple_use_1 but also determines the vector operand
5421 type of OPERAND and stores it to *VECTYPE. If the definition of
5422 OPERAND is vect_uninitialized_def, vect_constant_def or
5423 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5424 is responsible to compute the best suited vector type for the
5425 scalar operand. */
5427 bool
5428 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5429 bb_vec_info bb_vinfo, gimple *def_stmt,
5430 tree *def, enum vect_def_type *dt, tree *vectype)
5432 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5433 return false;
5435 /* Now get a vector type if the def is internal, otherwise supply
5436 NULL_TREE and leave it up to the caller to figure out a proper
5437 type for the use stmt. */
5438 if (*dt == vect_internal_def
5439 || *dt == vect_induction_def
5440 || *dt == vect_reduction_def
5441 || *dt == vect_double_reduction_def
5442 || *dt == vect_nested_cycle)
5444 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5445 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5446 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5447 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5448 gcc_assert (*vectype != NULL_TREE);
5450 else if (*dt == vect_uninitialized_def
5451 || *dt == vect_constant_def
5452 || *dt == vect_external_def)
5453 *vectype = NULL_TREE;
5454 else
5455 gcc_unreachable ();
5457 return true;
5461 /* Function supportable_widening_operation
5463 Check whether an operation represented by the code CODE is a
5464 widening operation that is supported by the target platform in
5465 vector form (i.e., when operating on arguments of type VECTYPE_IN
5466 producing a result of type VECTYPE_OUT).
5468 Widening operations we currently support are NOP (CONVERT), FLOAT
5469 and WIDEN_MULT. This function checks if these operations are supported
5470 by the target platform either directly (via vector tree-codes), or via
5471 target builtins.
5473 Output:
5474 - CODE1 and CODE2 are codes of vector operations to be used when
5475 vectorizing the operation, if available.
5476 - DECL1 and DECL2 are decls of target builtin functions to be used
5477 when vectorizing the operation, if available. In this case,
5478 CODE1 and CODE2 are CALL_EXPR.
5479 - MULTI_STEP_CVT determines the number of required intermediate steps in
5480 case of multi-step conversion (like char->short->int - in that case
5481 MULTI_STEP_CVT will be 1).
5482 - INTERM_TYPES contains the intermediate type required to perform the
5483 widening operation (short in the above example). */
5485 bool
5486 supportable_widening_operation (enum tree_code code, gimple stmt,
5487 tree vectype_out, tree vectype_in,
5488 tree *decl1, tree *decl2,
5489 enum tree_code *code1, enum tree_code *code2,
5490 int *multi_step_cvt,
5491 VEC (tree, heap) **interm_types)
5493 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5494 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5495 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
5496 bool ordered_p;
5497 enum machine_mode vec_mode;
5498 enum insn_code icode1, icode2;
5499 optab optab1, optab2;
5500 tree vectype = vectype_in;
5501 tree wide_vectype = vectype_out;
5502 enum tree_code c1, c2;
5504 /* The result of a vectorized widening operation usually requires two vectors
5505 (because the widened results do not fit int one vector). The generated
5506 vector results would normally be expected to be generated in the same
5507 order as in the original scalar computation, i.e. if 8 results are
5508 generated in each vector iteration, they are to be organized as follows:
5509 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5511 However, in the special case that the result of the widening operation is
5512 used in a reduction computation only, the order doesn't matter (because
5513 when vectorizing a reduction we change the order of the computation).
5514 Some targets can take advantage of this and generate more efficient code.
5515 For example, targets like Altivec, that support widen_mult using a sequence
5516 of {mult_even,mult_odd} generate the following vectors:
5517 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5519 When vectorizing outer-loops, we execute the inner-loop sequentially
5520 (each vectorized inner-loop iteration contributes to VF outer-loop
5521 iterations in parallel). We therefore don't allow to change the order
5522 of the computation in the inner-loop during outer-loop vectorization. */
5524 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5525 && !nested_in_vect_loop_p (vect_loop, stmt))
5526 ordered_p = false;
5527 else
5528 ordered_p = true;
5530 if (!ordered_p
5531 && code == WIDEN_MULT_EXPR
5532 && targetm.vectorize.builtin_mul_widen_even
5533 && targetm.vectorize.builtin_mul_widen_even (vectype)
5534 && targetm.vectorize.builtin_mul_widen_odd
5535 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5537 if (vect_print_dump_info (REPORT_DETAILS))
5538 fprintf (vect_dump, "Unordered widening operation detected.");
5540 *code1 = *code2 = CALL_EXPR;
5541 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5542 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5543 return true;
5546 switch (code)
5548 case WIDEN_MULT_EXPR:
5549 if (BYTES_BIG_ENDIAN)
5551 c1 = VEC_WIDEN_MULT_HI_EXPR;
5552 c2 = VEC_WIDEN_MULT_LO_EXPR;
5554 else
5556 c2 = VEC_WIDEN_MULT_HI_EXPR;
5557 c1 = VEC_WIDEN_MULT_LO_EXPR;
5559 break;
5561 CASE_CONVERT:
5562 if (BYTES_BIG_ENDIAN)
5564 c1 = VEC_UNPACK_HI_EXPR;
5565 c2 = VEC_UNPACK_LO_EXPR;
5567 else
5569 c2 = VEC_UNPACK_HI_EXPR;
5570 c1 = VEC_UNPACK_LO_EXPR;
5572 break;
5574 case FLOAT_EXPR:
5575 if (BYTES_BIG_ENDIAN)
5577 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5578 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5580 else
5582 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5583 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5585 break;
5587 case FIX_TRUNC_EXPR:
5588 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5589 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5590 computing the operation. */
5591 return false;
5593 default:
5594 gcc_unreachable ();
5597 if (code == FIX_TRUNC_EXPR)
5599 /* The signedness is determined from output operand. */
5600 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5601 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5603 else
5605 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5606 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5609 if (!optab1 || !optab2)
5610 return false;
5612 vec_mode = TYPE_MODE (vectype);
5613 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5614 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5615 return false;
5617 /* Check if it's a multi-step conversion that can be done using intermediate
5618 types. */
5619 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5620 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5622 int i;
5623 tree prev_type = vectype, intermediate_type;
5624 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5625 optab optab3, optab4;
5627 if (!CONVERT_EXPR_CODE_P (code))
5628 return false;
5630 *code1 = c1;
5631 *code2 = c2;
5633 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5634 intermediate steps in promotion sequence. We try
5635 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5636 not. */
5637 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5638 for (i = 0; i < 3; i++)
5640 intermediate_mode = insn_data[icode1].operand[0].mode;
5641 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5642 TYPE_UNSIGNED (prev_type));
5643 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5644 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5646 if (!optab3 || !optab4
5647 || ((icode1 = optab_handler (optab1, prev_mode))
5648 == CODE_FOR_nothing)
5649 || insn_data[icode1].operand[0].mode != intermediate_mode
5650 || ((icode2 = optab_handler (optab2, prev_mode))
5651 == CODE_FOR_nothing)
5652 || insn_data[icode2].operand[0].mode != intermediate_mode
5653 || ((icode1 = optab_handler (optab3, intermediate_mode))
5654 == CODE_FOR_nothing)
5655 || ((icode2 = optab_handler (optab4, intermediate_mode))
5656 == CODE_FOR_nothing))
5657 return false;
5659 VEC_quick_push (tree, *interm_types, intermediate_type);
5660 (*multi_step_cvt)++;
5662 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5663 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5664 return true;
5666 prev_type = intermediate_type;
5667 prev_mode = intermediate_mode;
5670 return false;
5673 *code1 = c1;
5674 *code2 = c2;
5675 return true;
5679 /* Function supportable_narrowing_operation
5681 Check whether an operation represented by the code CODE is a
5682 narrowing operation that is supported by the target platform in
5683 vector form (i.e., when operating on arguments of type VECTYPE_IN
5684 and producing a result of type VECTYPE_OUT).
5686 Narrowing operations we currently support are NOP (CONVERT) and
5687 FIX_TRUNC. This function checks if these operations are supported by
5688 the target platform directly via vector tree-codes.
5690 Output:
5691 - CODE1 is the code of a vector operation to be used when
5692 vectorizing the operation, if available.
5693 - MULTI_STEP_CVT determines the number of required intermediate steps in
5694 case of multi-step conversion (like int->short->char - in that case
5695 MULTI_STEP_CVT will be 1).
5696 - INTERM_TYPES contains the intermediate type required to perform the
5697 narrowing operation (short in the above example). */
5699 bool
5700 supportable_narrowing_operation (enum tree_code code,
5701 tree vectype_out, tree vectype_in,
5702 enum tree_code *code1, int *multi_step_cvt,
5703 VEC (tree, heap) **interm_types)
5705 enum machine_mode vec_mode;
5706 enum insn_code icode1;
5707 optab optab1, interm_optab;
5708 tree vectype = vectype_in;
5709 tree narrow_vectype = vectype_out;
5710 enum tree_code c1;
5711 tree intermediate_type, prev_type;
5712 int i;
5714 switch (code)
5716 CASE_CONVERT:
5717 c1 = VEC_PACK_TRUNC_EXPR;
5718 break;
5720 case FIX_TRUNC_EXPR:
5721 c1 = VEC_PACK_FIX_TRUNC_EXPR;
5722 break;
5724 case FLOAT_EXPR:
5725 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
5726 tree code and optabs used for computing the operation. */
5727 return false;
5729 default:
5730 gcc_unreachable ();
5733 if (code == FIX_TRUNC_EXPR)
5734 /* The signedness is determined from output operand. */
5735 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5736 else
5737 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5739 if (!optab1)
5740 return false;
5742 vec_mode = TYPE_MODE (vectype);
5743 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
5744 return false;
5746 /* Check if it's a multi-step conversion that can be done using intermediate
5747 types. */
5748 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
5750 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5752 *code1 = c1;
5753 prev_type = vectype;
5754 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5755 intermediate steps in promotion sequence. We try
5756 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5757 not. */
5758 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5759 for (i = 0; i < 3; i++)
5761 intermediate_mode = insn_data[icode1].operand[0].mode;
5762 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5763 TYPE_UNSIGNED (prev_type));
5764 interm_optab = optab_for_tree_code (c1, intermediate_type,
5765 optab_default);
5766 if (!interm_optab
5767 || ((icode1 = optab_handler (optab1, prev_mode))
5768 == CODE_FOR_nothing)
5769 || insn_data[icode1].operand[0].mode != intermediate_mode
5770 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
5771 == CODE_FOR_nothing))
5772 return false;
5774 VEC_quick_push (tree, *interm_types, intermediate_type);
5775 (*multi_step_cvt)++;
5777 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5778 return true;
5780 prev_type = intermediate_type;
5781 prev_mode = intermediate_mode;
5784 return false;
5787 *code1 = c1;
5788 return true;