* configure.ac: Don't test for [build] __cxa_atexit when building a
[official-gcc.git] / gcc / tree-vectorizer.c
blob656c612e29286b34395e9dcee905248981592d87
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
132 #include "rtl.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
137 #include "timevar.h"
138 #include "cfgloop.h"
139 #include "cfglayout.h"
140 #include "expr.h"
141 #include "optabs.h"
142 #include "toplev.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /* Main analysis functions. */
150 static loop_vec_info vect_analyze_loop (struct loop *);
151 static loop_vec_info vect_analyze_loop_form (struct loop *);
152 static bool vect_analyze_data_refs (loop_vec_info);
153 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
154 static bool vect_analyze_scalar_cycles (loop_vec_info);
155 static bool vect_analyze_data_ref_accesses (loop_vec_info);
156 static bool vect_analyze_data_refs_alignment (loop_vec_info);
157 static void vect_compute_data_refs_alignment (loop_vec_info);
158 static bool vect_analyze_operations (loop_vec_info);
160 /* Main code transformation functions. */
161 static void vect_transform_loop (loop_vec_info, struct loops *);
162 static void vect_transform_loop_bound (loop_vec_info, tree niters);
163 static bool vect_transform_stmt (tree, block_stmt_iterator *);
164 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
167 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
168 static void vect_align_data_ref (tree);
169 static void vect_enhance_data_refs_alignment (loop_vec_info);
171 /* Utility functions for the analyses. */
172 static bool vect_is_simple_use (tree , struct loop *, tree *);
173 static bool exist_non_indexing_operands_for_use_p (tree, tree);
174 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
175 static void vect_mark_relevant (varray_type, tree);
176 static bool vect_stmt_relevant_p (tree, loop_vec_info);
177 static tree vect_get_loop_niters (struct loop *, tree *);
178 static bool vect_compute_data_ref_alignment
179 (struct data_reference *, loop_vec_info);
180 static bool vect_analyze_data_ref_access (struct data_reference *);
181 static bool vect_get_first_index (tree, tree *);
182 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
183 static struct data_reference * vect_analyze_pointer_ref_access
184 (tree, tree, bool);
185 static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
186 struct loop *loop);
187 static tree vect_get_base_and_bit_offset
188 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
189 static struct data_reference * vect_analyze_pointer_ref_access
190 (tree, tree, bool);
191 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
192 static tree vect_compute_array_ref_alignment
193 (struct data_reference *, loop_vec_info, tree, tree *);
194 static tree vect_get_ptr_offset (tree, tree, tree *);
195 static tree vect_get_symbl_and_dr
196 (tree, tree, bool, loop_vec_info, struct data_reference **);
198 /* Utility functions for the code transformation. */
199 static tree vect_create_destination_var (tree, tree);
200 static tree vect_create_data_ref_ptr
201 (tree, block_stmt_iterator *, tree, tree *, bool);
202 static tree vect_create_index_for_vector_ref
203 (struct loop *, block_stmt_iterator *);
204 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
205 static tree get_vectype_for_scalar_type (tree);
206 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
207 static tree vect_get_vec_def_for_operand (tree, tree);
208 static tree vect_init_vector (tree, tree);
209 static tree vect_build_symbol_bound (tree, int, struct loop *);
210 static void vect_finish_stmt_generation
211 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
213 static void vect_generate_tmps_on_preheader (loop_vec_info,
214 tree *, tree *,
215 tree *);
216 static tree vect_build_loop_niters (loop_vec_info);
217 static void vect_update_ivs_after_vectorizer (struct loop *, tree);
219 /* Loop transformations prior to vectorization. */
221 /* Loop transformations entry point function.
222 It can be used outside of the vectorizer
223 in case the loop to be manipulated answers conditions specified
224 in function documentation. */
225 struct loop *tree_duplicate_loop_to_edge (struct loop *,
226 struct loops *, edge,
227 tree, tree, bool);
229 static void allocate_new_names (bitmap);
230 static void rename_use_op (use_operand_p);
231 static void rename_def_op (def_operand_p, tree);
232 static void rename_variables_in_bb (basic_block);
233 static void free_new_names (bitmap);
234 static void rename_variables_in_loop (struct loop *);
235 static void copy_phi_nodes (struct loop *, struct loop *, bool);
236 static void update_phis_for_duplicate_loop (struct loop *,
237 struct loop *,
238 bool after);
239 static void update_phi_nodes_for_guard (edge, struct loop *);
240 static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree);
241 static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *,
242 struct loops *,
243 edge);
244 static edge add_loop_guard (basic_block, tree, basic_block);
245 static bool verify_loop_for_duplication (struct loop *, bool, edge);
247 /* Utilities dealing with loop peeling (not peeling itself). */
248 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
249 static void vect_update_niters_after_peeling (loop_vec_info, tree);
250 static void vect_update_inits_of_dr (struct data_reference *, struct loop *,
251 tree niters);
252 static void vect_update_inits_of_drs (loop_vec_info, tree);
253 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
255 /* Utilities for creation and deletion of vec_info structs. */
256 loop_vec_info new_loop_vec_info (struct loop *loop);
257 void destroy_loop_vec_info (loop_vec_info);
258 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
260 static bool vect_debug_stats (struct loop *loop);
261 static bool vect_debug_details (struct loop *loop);
264 /* Utilities to support loop peeling for vectorization purposes. */
267 /* For each definition in DEFINITIONS this function allocates
268 new ssa name. */
270 static void
271 allocate_new_names (bitmap definitions)
273 unsigned ver;
274 bitmap_iterator bi;
276 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
278 tree def = ssa_name (ver);
279 tree *new_name_ptr = xmalloc (sizeof (tree));
281 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
283 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
284 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
286 SSA_NAME_AUX (def) = new_name_ptr;
291 /* Renames the use *OP_P. */
293 static void
294 rename_use_op (use_operand_p op_p)
296 tree *new_name_ptr;
298 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
299 return;
301 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
303 /* Something defined outside of the loop. */
304 if (!new_name_ptr)
305 return;
307 /* An ordinary ssa name defined in the loop. */
309 SET_USE (op_p, *new_name_ptr);
313 /* Renames the def *OP_P in statement STMT. */
315 static void
316 rename_def_op (def_operand_p op_p, tree stmt)
318 tree *new_name_ptr;
320 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
321 return;
323 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
325 /* Something defined outside of the loop. */
326 if (!new_name_ptr)
327 return;
329 /* An ordinary ssa name defined in the loop. */
331 SET_DEF (op_p, *new_name_ptr);
332 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
336 /* Renames the variables in basic block BB. */
338 static void
339 rename_variables_in_bb (basic_block bb)
341 tree phi;
342 block_stmt_iterator bsi;
343 tree stmt;
344 stmt_ann_t ann;
345 use_optype uses;
346 vuse_optype vuses;
347 def_optype defs;
348 v_may_def_optype v_may_defs;
349 v_must_def_optype v_must_defs;
350 unsigned i;
351 edge e;
352 edge_iterator ei;
354 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
355 rename_def_op (PHI_RESULT_PTR (phi), phi);
357 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
359 stmt = bsi_stmt (bsi);
360 get_stmt_operands (stmt);
361 ann = stmt_ann (stmt);
363 uses = USE_OPS (ann);
364 for (i = 0; i < NUM_USES (uses); i++)
365 rename_use_op (USE_OP_PTR (uses, i));
367 defs = DEF_OPS (ann);
368 for (i = 0; i < NUM_DEFS (defs); i++)
369 rename_def_op (DEF_OP_PTR (defs, i), stmt);
371 vuses = VUSE_OPS (ann);
372 for (i = 0; i < NUM_VUSES (vuses); i++)
373 rename_use_op (VUSE_OP_PTR (vuses, i));
375 v_may_defs = V_MAY_DEF_OPS (ann);
376 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
378 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
379 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
382 v_must_defs = V_MUST_DEF_OPS (ann);
383 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
384 rename_def_op (V_MUST_DEF_OP_PTR (v_must_defs, i), stmt);
387 FOR_EACH_EDGE (e, ei, bb->succs)
388 for (phi = phi_nodes (e->dest); phi; phi = TREE_CHAIN (phi))
389 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
393 /* Releases the structures holding the new ssa names. */
395 static void
396 free_new_names (bitmap definitions)
398 unsigned ver;
399 bitmap_iterator bi;
401 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
403 tree def = ssa_name (ver);
405 if (SSA_NAME_AUX (def))
407 free (SSA_NAME_AUX (def));
408 SSA_NAME_AUX (def) = NULL;
414 /* Renames variables in new generated LOOP. */
416 static void
417 rename_variables_in_loop (struct loop *loop)
419 unsigned i;
420 basic_block *bbs;
422 bbs = get_loop_body (loop);
424 for (i = 0; i < loop->num_nodes; i++)
425 rename_variables_in_bb (bbs[i]);
427 free (bbs);
431 /* This function copies phis from LOOP header to
432 NEW_LOOP header. AFTER is as
433 in update_phis_for_duplicate_loop function. */
435 static void
436 copy_phi_nodes (struct loop *loop, struct loop *new_loop,
437 bool after)
439 tree phi, new_phi, def;
440 edge new_e;
441 edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
443 /* Second add arguments to newly created phi nodes. */
444 for (phi = phi_nodes (loop->header),
445 new_phi = phi_nodes (new_loop->header);
446 phi;
447 phi = TREE_CHAIN (phi),
448 new_phi = TREE_CHAIN (new_phi))
450 new_e = loop_preheader_edge (new_loop);
451 def = PHI_ARG_DEF_FROM_EDGE (phi, e);
452 add_phi_arg (&new_phi, def, new_e);
457 /* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
458 executes after LOOP, and false if it executes before it. */
460 static void
461 update_phis_for_duplicate_loop (struct loop *loop,
462 struct loop *new_loop, bool after)
464 edge old_latch;
465 tree *new_name_ptr, new_ssa_name;
466 tree phi_new, phi_old, def;
467 edge orig_entry_e = loop_preheader_edge (loop);
469 /* Copy phis from loop->header to new_loop->header. */
470 copy_phi_nodes (loop, new_loop, after);
472 old_latch = loop_latch_edge (loop);
474 /* Update PHI args for the new loop latch edge, and
475 the old loop preheader edge, we know that the PHI nodes
476 are ordered appropriately in copy_phi_nodes. */
477 for (phi_new = phi_nodes (new_loop->header),
478 phi_old = phi_nodes (loop->header);
479 phi_new && phi_old;
480 phi_new = TREE_CHAIN (phi_new), phi_old = TREE_CHAIN (phi_old))
482 def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
484 if (TREE_CODE (def) != SSA_NAME)
485 continue;
487 new_name_ptr = SSA_NAME_AUX (def);
489 /* Something defined outside of the loop. */
490 if (!new_name_ptr)
491 continue;
493 /* An ordinary ssa name defined in the loop. */
494 new_ssa_name = *new_name_ptr;
496 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
498 /* Update PHI args for the original loop pre-header edge. */
499 if (! after)
500 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
501 new_ssa_name);
506 /* Update PHI nodes for a guard of the LOOP.
508 LOOP is supposed to have a preheader bb at which a guard condition is
509 located. The true edge of this condition skips the LOOP and ends
510 at the destination of the (unique) LOOP exit. The loop exit bb is supposed
511 to be an empty bb (created by this transformation) with one successor.
513 This function creates phi nodes at the LOOP exit bb. These phis need to be
514 created as a result of adding true edge coming from guard.
516 FORNOW: Only phis which have corresponding phi nodes at the header of the
517 LOOP are created. Here we use the assumption that after the LOOP there
518 are no uses of defs generated in LOOP.
520 After the phis creation, the function updates the values of phi nodes at
521 the LOOP exit successor bb:
523 Original loop:
525 bb0: loop preheader
526 goto bb1
527 bb1: loop header
528 if (exit_cond) goto bb3 else goto bb2
529 bb2: loop latch
530 goto bb1
531 bb3:
534 After guard creation (the loop before this function):
536 bb0: loop preheader
537 if (guard_condition) goto bb4 else goto bb1
538 bb1: loop header
539 if (exit_cond) goto bb4 else goto bb2
540 bb2: loop latch
541 goto bb1
542 bb4: loop exit
543 (new empty bb)
544 goto bb3
545 bb3:
547 This function updates the phi nodes in bb4 and in bb3, to account for the
548 new edge from bb0 to bb4. */
550 static void
551 update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
553 tree phi, phi1;
555 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
557 tree new_phi;
558 tree phi_arg;
560 /* Generate new phi node. */
561 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
562 loop->exit_edges[0]->dest);
564 /* Add argument coming from guard true edge. */
565 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
566 add_phi_arg (&new_phi, phi_arg, guard_true_edge);
568 /* Add argument coming from loop exit edge. */
569 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
570 add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
572 /* Update all phi nodes at the loop exit successor. */
573 for (phi1 = phi_nodes (EDGE_SUCC (loop->exit_edges[0]->dest, 0)->dest);
574 phi1;
575 phi1 = TREE_CHAIN (phi1))
577 tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1,
578 EDGE_SUCC (loop->exit_edges[0]->dest, 0));
579 if (old_arg == phi_arg)
581 edge e = EDGE_SUCC (loop->exit_edges[0]->dest, 0);
583 SET_PHI_ARG_DEF (phi1,
584 phi_arg_from_edge (phi1, e),
585 PHI_RESULT (new_phi));
592 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
593 that starts at zero, increases by one and its limit is NITERS. */
595 static void
596 make_loop_iterate_ntimes (struct loop *loop, tree niters,
597 tree begin_label, tree exit_label)
599 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
600 tree orig_cond;
601 edge exit_edge = loop->exit_edges[0];
602 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
604 /* Flow loop scan does not update loop->single_exit field. */
605 loop->single_exit = loop->exit_edges[0];
606 orig_cond = get_loop_exit_condition (loop);
607 gcc_assert (orig_cond);
608 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
609 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
611 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
612 back to the exit condition statement. */
613 bsi_next (&loop_exit_bsi);
614 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
617 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
618 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
619 else /* 'then' edge loops back. */
620 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
622 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
623 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
624 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
625 begin_label, exit_label);
626 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
628 /* Remove old loop exit test: */
629 bsi_remove (&loop_exit_bsi);
631 if (vect_debug_stats (loop) || vect_debug_details (loop))
632 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
636 /* Given LOOP this function generates a new copy of it and puts it
637 on E which is either the entry or exit of LOOP. */
639 static struct loop *
640 tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
641 edge e)
643 struct loop *new_loop;
644 basic_block *new_bbs, *bbs;
645 bool at_exit;
646 bool was_imm_dom;
647 basic_block exit_dest;
648 tree phi, phi_arg;
650 at_exit = (e == loop->exit_edges[0]);
651 if (!at_exit && e != loop_preheader_edge (loop))
653 if (dump_file && (dump_flags & TDF_DETAILS))
654 fprintf (dump_file,
655 "Edge is not an entry nor an exit edge.\n");
656 return NULL;
659 bbs = get_loop_body (loop);
661 /* Check whether duplication is possible. */
662 if (!can_copy_bbs_p (bbs, loop->num_nodes))
664 if (vect_debug_stats (loop) || vect_debug_details (loop))
665 fprintf (dump_file,
666 "Cannot copy basic blocks.\n");
667 free (bbs);
668 return NULL;
671 /* Generate new loop structure. */
672 new_loop = duplicate_loop (loops, loop, loop->outer);
673 if (!new_loop)
675 if (vect_debug_stats (loop) || vect_debug_details (loop))
676 fprintf (dump_file,
677 "The duplicate_loop returns NULL.\n");
678 free (bbs);
679 return NULL;
682 exit_dest = loop->exit_edges[0]->dest;
683 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
684 exit_dest) == loop->header ?
685 true : false);
687 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
689 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
691 /* Duplicating phi args at exit bbs as coming
692 also from exit of duplicated loop. */
693 for (phi = phi_nodes (exit_dest); phi; phi = TREE_CHAIN (phi))
695 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
696 if (phi_arg)
698 edge new_loop_exit_edge;
700 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
701 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
702 else
703 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
705 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
709 if (at_exit) /* Add the loop copy at exit. */
711 redirect_edge_and_branch_force (e, new_loop->header);
712 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
713 if (was_imm_dom)
714 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
716 else /* Add the copy at entry. */
718 edge new_exit_e;
719 edge entry_e = loop_preheader_edge (loop);
720 basic_block preheader = entry_e->src;
722 if (!flow_bb_inside_loop_p (new_loop,
723 EDGE_SUCC (new_loop->header, 0)->dest))
724 new_exit_e = EDGE_SUCC (new_loop->header, 0);
725 else
726 new_exit_e = EDGE_SUCC (new_loop->header, 1);
728 redirect_edge_and_branch_force (new_exit_e, loop->header);
729 set_immediate_dominator (CDI_DOMINATORS, loop->header,
730 new_exit_e->src);
732 /* We have to add phi args to the loop->header here as coming
733 from new_exit_e edge. */
734 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
736 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
737 if (phi_arg)
738 add_phi_arg (&phi, phi_arg, new_exit_e);
741 redirect_edge_and_branch_force (entry_e, new_loop->header);
742 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
745 flow_loop_scan (new_loop, LOOP_ALL);
746 flow_loop_scan (loop, LOOP_ALL);
747 free (new_bbs);
748 free (bbs);
750 return new_loop;
754 /* Given the condition statement COND, put it as the last statement
755 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
756 Assumes that this is the single exit of the guarded loop.
757 Returns the skip edge. */
759 static edge
760 add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
762 block_stmt_iterator bsi;
763 edge new_e, enter_e;
764 tree cond_stmt, then_label, else_label;
766 enter_e = EDGE_SUCC (guard_bb, 0);
767 enter_e->flags &= ~EDGE_FALLTHRU;
768 enter_e->flags |= EDGE_FALSE_VALUE;
769 bsi = bsi_last (guard_bb);
771 then_label = build1 (GOTO_EXPR, void_type_node,
772 tree_block_label (exit_bb));
773 else_label = build1 (GOTO_EXPR, void_type_node,
774 tree_block_label (enter_e->dest));
775 cond_stmt = build (COND_EXPR, void_type_node, cond,
776 then_label, else_label);
777 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
778 /* Add new edge to connect entry block to the second loop. */
779 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
780 set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
781 return new_e;
785 /* This function verifies that certain restrictions apply to LOOP. */
787 static bool
788 verify_loop_for_duplication (struct loop *loop,
789 bool update_first_loop_count, edge e)
791 edge exit_e = loop->exit_edges [0];
792 edge entry_e = loop_preheader_edge (loop);
794 /* We duplicate only innermost loops. */
795 if (loop->inner)
797 if (vect_debug_stats (loop) || vect_debug_details (loop))
798 fprintf (dump_file,
799 "Loop duplication failed. Loop is not innermost.\n");
800 return false;
803 /* Only loops with 1 exit. */
804 if (loop->num_exits != 1)
806 if (vect_debug_stats (loop) || vect_debug_details (loop))
807 fprintf (dump_file,
808 "More than one exit from loop.\n");
809 return false;
812 /* Only loops with 1 entry. */
813 if (loop->num_entries != 1)
815 if (vect_debug_stats (loop) || vect_debug_details (loop))
816 fprintf (dump_file,
817 "More than one exit from loop.\n");
818 return false;
821 /* All loops has outers, the only case loop->outer is NULL is for
822 the function itself. */
823 if (!loop->outer)
825 if (vect_debug_stats (loop) || vect_debug_details (loop))
826 fprintf (dump_file,
827 "Loop is outer-most loop.\n");
828 return false;
831 /* Verify that new IV can be created and loop condition
832 can be changed to make first loop iterate first_niters times. */
833 if (!update_first_loop_count)
835 tree orig_cond = get_loop_exit_condition (loop);
836 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
838 if (!orig_cond)
840 if (vect_debug_stats (loop) || vect_debug_details (loop))
841 fprintf (dump_file,
842 "Loop has no exit condition.\n");
843 return false;
845 if (orig_cond != bsi_stmt (loop_exit_bsi))
847 if (vect_debug_stats (loop) || vect_debug_details (loop))
848 fprintf (dump_file,
849 "Loop exit condition is not loop header last stmt.\n");
850 return false;
854 /* Make sure E is either an entry or an exit edge. */
855 if (e != exit_e && e != entry_e)
857 if (vect_debug_stats (loop) || vect_debug_details (loop))
858 fprintf (dump_file,
859 "E is not loop entry or exit edge.\n");
860 return false;
863 return true;
867 /* Given LOOP this function duplicates it to the edge E.
869 This transformation takes place before the loop is vectorized.
870 For now, there are two main cases when it's used
871 by the vectorizer: to support loops with unknown loop bounds
872 (or loop bounds indivisible by vectorization factor) and to force the
873 alignment of data references in the loop. In the first case, LOOP is
874 duplicated to the exit edge, producing epilog loop. In the second case, LOOP
875 is duplicated to the preheader edge thus generating prolog loop. In both
876 cases, the original loop will be vectorized after the transformation.
878 The edge E is supposed to be either preheader edge of the LOOP or
879 its exit edge. If preheader edge is specified, the LOOP copy
880 will precede the original one. Otherwise the copy will be located
881 at the exit of the LOOP.
883 FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate
884 the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first
885 loop will be iterated FIRST_NITERS times by introducing additional
886 induction variable and replacing loop exit condition. If
887 UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and
888 the caller to tree_duplicate_loop_to_edge is responsible for updating
889 the first loop count.
891 NITERS (also SSA_NAME) parameter defines the number of iteration the
892 original loop iterated. The function generates two if-then guards:
893 one prior to the first loop and the other prior to the second loop.
894 The first guard will be:
896 if (FIRST_NITERS == 0) then skip the first loop
898 The second guard will be:
900 if (FIRST_NITERS == NITERS) then skip the second loop
902 Thus the equivalence to the original code is guaranteed by correct values
903 of NITERS and FIRST_NITERS and generation of if-then loop guards.
905 For now this function supports only loop forms that are candidate for
906 vectorization. Such types are the following:
908 (1) only innermost loops
909 (2) loops built from 2 basic blocks
910 (3) loops with one entry and one exit
911 (4) loops without function calls
912 (5) loops without defs that are used after the loop
914 (1), (3) are checked in this function; (2) - in function
915 vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
916 (5) is checked as part of the function vect_mark_stmts_to_be_vectorized,
917 when excluding induction/reduction support.
919 The function returns NULL in case one of these checks or
920 transformations failed. */
922 struct loop*
923 tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops,
924 edge e, tree first_niters,
925 tree niters, bool update_first_loop_count)
927 struct loop *new_loop = NULL, *first_loop, *second_loop;
928 edge skip_e;
929 tree pre_condition;
930 bitmap definitions;
931 basic_block first_exit_bb, second_exit_bb;
932 basic_block pre_header_bb;
933 edge exit_e = loop->exit_edges [0];
935 gcc_assert (!any_marked_for_rewrite_p ());
937 if (!verify_loop_for_duplication (loop, update_first_loop_count, e))
938 return NULL;
940 /* We have to initialize cfg_hooks. Then, when calling
941 cfg_hooks->split_edge, the function tree_split_edge
942 is actually called and, when calling cfg_hooks->duplicate_block,
943 the function tree_duplicate_bb is called. */
944 tree_register_cfg_hooks ();
946 /* 1. Generate a copy of LOOP and put it on E (entry or exit). */
947 if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
949 if (vect_debug_stats (loop) || vect_debug_details (loop))
950 fprintf (dump_file,
951 "The tree_duplicate_loop_to_edge_cfg failed.\n");
952 return NULL;
955 definitions = marked_ssa_names ();
956 allocate_new_names (definitions);
957 update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
958 /* Here, using assumption (5), we do not propagate new names further
959 than on phis of the exit from the second loop. */
960 rename_variables_in_loop (new_loop);
961 free_new_names (definitions);
963 if (e == exit_e)
965 first_loop = loop;
966 second_loop = new_loop;
968 else
970 first_loop = new_loop;
971 second_loop = loop;
974 /* 2. Generate bb between the loops. */
975 first_exit_bb = split_edge (first_loop->exit_edges[0]);
976 add_bb_to_loop (first_exit_bb, first_loop->outer);
978 /* We need to update here first loop exit edge
979 and second loop preheader edge. */
980 flow_loop_scan (first_loop, LOOP_ALL);
981 flow_loop_scan (second_loop, LOOP_ALL);
983 /* 3. Make first loop iterate FIRST_NITERS times, if needed. */
984 if (!update_first_loop_count)
986 tree first_loop_latch_lbl = tree_block_label (first_loop->latch);
987 tree first_loop_exit_lbl = tree_block_label (first_exit_bb);
989 make_loop_iterate_ntimes (first_loop, first_niters,
990 first_loop_latch_lbl,
991 first_loop_exit_lbl);
994 /* 4. Add the guard before first loop:
996 if FIRST_NITERS == 0
997 skip first loop
998 else
999 enter first loop */
1001 /* 4a. Generate bb before first loop. */
1002 pre_header_bb = split_edge (loop_preheader_edge (first_loop));
1003 add_bb_to_loop (pre_header_bb, first_loop->outer);
1005 /* First loop preheader edge is changed. */
1006 flow_loop_scan (first_loop, LOOP_ALL);
1008 /* 4b. Generate guard condition. */
1009 pre_condition = build (LE_EXPR, boolean_type_node,
1010 first_niters, integer_zero_node);
1012 /* 4c. Add condition at the end of preheader bb. */
1013 skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
1015 /* 4d. Update phis at first loop exit and propagate changes
1016 to the phis of second loop. */
1017 update_phi_nodes_for_guard (skip_e, first_loop);
1019 /* 5. Add the guard before second loop:
1021 if FIRST_NITERS == NITERS SKIP
1022 skip second loop
1023 else
1024 enter second loop */
1026 /* 5a. Generate empty bb at the exit from the second loop. */
1027 second_exit_bb = split_edge (second_loop->exit_edges[0]);
1028 add_bb_to_loop (second_exit_bb, second_loop->outer);
1030 /* Second loop preheader edge is changed. */
1031 flow_loop_scan (second_loop, LOOP_ALL);
1033 /* 5b. Generate guard condition. */
1034 pre_condition = build (EQ_EXPR, boolean_type_node,
1035 first_niters, niters);
1037 /* 5c. Add condition at the end of preheader bb. */
1038 skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
1039 update_phi_nodes_for_guard (skip_e, second_loop);
1041 BITMAP_XFREE (definitions);
1042 unmark_all_for_rewrite ();
1044 return new_loop;
1049 /* Here the proper Vectorizer starts. */
1051 /* Function new_stmt_vec_info.
1053 Create and initialize a new stmt_vec_info struct for STMT. */
1055 stmt_vec_info
1056 new_stmt_vec_info (tree stmt, struct loop *loop)
1058 stmt_vec_info res;
1059 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1061 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1062 STMT_VINFO_STMT (res) = stmt;
1063 STMT_VINFO_LOOP (res) = loop;
1064 STMT_VINFO_RELEVANT_P (res) = 0;
1065 STMT_VINFO_VECTYPE (res) = NULL;
1066 STMT_VINFO_VEC_STMT (res) = NULL;
1067 STMT_VINFO_DATA_REF (res) = NULL;
1068 STMT_VINFO_MEMTAG (res) = NULL;
1069 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1071 return res;
1075 /* Function new_loop_vec_info.
1077 Create and initialize a new loop_vec_info struct for LOOP, as well as
1078 stmt_vec_info structs for all the stmts in LOOP. */
1080 loop_vec_info
1081 new_loop_vec_info (struct loop *loop)
1083 loop_vec_info res;
1084 basic_block *bbs;
1085 block_stmt_iterator si;
1086 unsigned int i;
1088 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1090 bbs = get_loop_body (loop);
1092 /* Create stmt_info for all stmts in the loop. */
1093 for (i = 0; i < loop->num_nodes; i++)
1095 basic_block bb = bbs[i];
1096 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1098 tree stmt = bsi_stmt (si);
1099 stmt_ann_t ann;
1101 get_stmt_operands (stmt);
1102 ann = stmt_ann (stmt);
1103 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1107 LOOP_VINFO_LOOP (res) = loop;
1108 LOOP_VINFO_BBS (res) = bbs;
1109 LOOP_VINFO_EXIT_COND (res) = NULL;
1110 LOOP_VINFO_NITERS (res) = NULL;
1111 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1112 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1113 LOOP_VINFO_VECT_FACTOR (res) = 0;
1114 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1115 "loop_write_datarefs");
1116 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1117 "loop_read_datarefs");
1119 for (i=0; i<MAX_NUMBER_OF_UNALIGNED_DATA_REFS; i++)
1120 LOOP_UNALIGNED_DR (res, i) = NULL;
1121 return res;
1125 /* Function destroy_loop_vec_info.
1127 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1128 stmts in the loop. */
1130 void
1131 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1133 struct loop *loop;
1134 basic_block *bbs;
1135 int nbbs;
1136 block_stmt_iterator si;
1137 int j;
1139 if (!loop_vinfo)
1140 return;
1142 loop = LOOP_VINFO_LOOP (loop_vinfo);
1144 bbs = LOOP_VINFO_BBS (loop_vinfo);
1145 nbbs = loop->num_nodes;
1147 for (j = 0; j < nbbs; j++)
1149 basic_block bb = bbs[j];
1150 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1152 tree stmt = bsi_stmt (si);
1153 stmt_ann_t ann = stmt_ann (stmt);
1154 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1155 free (stmt_info);
1156 set_stmt_info (ann, NULL);
1160 free (LOOP_VINFO_BBS (loop_vinfo));
1161 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1162 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1164 free (loop_vinfo);
1168 /* Function debug_loop_stats.
1170 For vectorization statistics dumps. */
1172 static bool
1173 vect_debug_stats (struct loop *loop)
1175 basic_block bb;
1176 block_stmt_iterator si;
1177 tree node = NULL_TREE;
1179 if (!dump_file || !(dump_flags & TDF_STATS))
1180 return false;
1182 if (!loop)
1184 fprintf (dump_file, "\n");
1185 return true;
1188 if (!loop->header)
1189 return false;
1191 bb = loop->header;
1193 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1195 node = bsi_stmt (si);
1196 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1197 break;
1200 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1201 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1203 fprintf (dump_file, "\nloop at %s:%d: ",
1204 EXPR_FILENAME (node), EXPR_LINENO (node));
1205 return true;
1208 return false;
1212 /* Function debug_loop_details.
1214 For vectorization debug dumps. */
1216 static bool
1217 vect_debug_details (struct loop *loop)
1219 basic_block bb;
1220 block_stmt_iterator si;
1221 tree node = NULL_TREE;
1223 if (!dump_file || !(dump_flags & TDF_DETAILS))
1224 return false;
1226 if (!loop)
1228 fprintf (dump_file, "\n");
1229 return true;
1232 if (!loop->header)
1233 return false;
1235 bb = loop->header;
1237 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1239 node = bsi_stmt (si);
1240 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1241 break;
1244 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1245 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1247 fprintf (dump_file, "\nloop at %s:%d: ",
1248 EXPR_FILENAME (node), EXPR_LINENO (node));
1249 return true;
1252 return false;
1256 /* Function vect_get_ptr_offset
1258 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1260 static tree
1261 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1262 tree vectype ATTRIBUTE_UNUSED,
1263 tree *offset ATTRIBUTE_UNUSED)
1265 /* TODO: Use alignment information. */
1266 return NULL_TREE;
1270 /* Function vect_get_base_and_bit_offset
1272 Return the BASE of the data reference EXPR.
1273 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1274 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1275 bits of 'a.b[i] + 4B' from a.
1277 Input:
1278 EXPR - the memory reference that is being analyzed
1279 DR - the data_reference struct of the _original_ memory reference
1280 (Note: DR_REF (DR) is not necessarily EXPR)
1281 VECTYPE - the type that defines the alignment (i.e, we compute
1282 alignment relative to TYPE_ALIGN(VECTYPE))
1284 Output:
1285 BASE (returned value) - the base of the data reference EXPR.
1286 E.g, if EXPR is a.b[k].c[i][j] the returned
1287 base is a.
1288 OFFSET - offset of EXPR from BASE in bits
1289 BASE_ALIGNED_P - indicates if BASE is aligned
1291 If something unexpected is encountered (an unsupported form of data-ref),
1292 or if VECTYPE is given but OFFSET cannot be determined:
1293 then NULL_TREE is returned. */
1295 static tree
1296 vect_get_base_and_bit_offset (struct data_reference *dr,
1297 tree expr,
1298 tree vectype,
1299 loop_vec_info loop_vinfo,
1300 tree *offset,
1301 bool *base_aligned_p)
1303 tree this_offset = size_zero_node;
1304 tree base = NULL_TREE;
1305 tree next_ref;
1306 tree oprnd0, oprnd1;
1307 struct data_reference *array_dr;
1308 enum tree_code code = TREE_CODE (expr);
1310 *base_aligned_p = false;
1312 switch (code)
1314 /* These cases end the recursion: */
1315 case VAR_DECL:
1316 *offset = size_zero_node;
1317 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1318 *base_aligned_p = true;
1319 return expr;
1321 case SSA_NAME:
1322 if (!vectype)
1323 return expr;
1325 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1326 return NULL_TREE;
1328 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1330 base = vect_get_ptr_offset (expr, vectype, offset);
1331 if (base)
1332 *base_aligned_p = true;
1334 else
1336 *base_aligned_p = true;
1337 *offset = size_zero_node;
1338 base = expr;
1340 return base;
1342 case INTEGER_CST:
1343 *offset = int_const_binop (MULT_EXPR, expr,
1344 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1345 return expr;
1347 /* These cases continue the recursion: */
1348 case COMPONENT_REF:
1349 oprnd0 = TREE_OPERAND (expr, 0);
1350 oprnd1 = TREE_OPERAND (expr, 1);
1352 this_offset = bit_position (oprnd1);
1353 if (vectype && !host_integerp (this_offset, 1))
1354 return NULL_TREE;
1355 next_ref = oprnd0;
1356 break;
1358 case ADDR_EXPR:
1359 oprnd0 = TREE_OPERAND (expr, 0);
1360 next_ref = oprnd0;
1361 break;
1363 case INDIRECT_REF:
1364 oprnd0 = TREE_OPERAND (expr, 0);
1365 next_ref = oprnd0;
1366 break;
1368 case ARRAY_REF:
1369 if (DR_REF (dr) != expr)
1370 /* Build array data_reference struct if the existing DR_REF
1371 doesn't match EXPR. This happens, for example, when the
1372 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1373 contains information on the access of T, not of arr. In order
1374 to continue the analysis, we create a new DR struct that
1375 describes the access of arr.
1377 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1378 else
1379 array_dr = dr;
1381 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1382 vectype, &this_offset);
1383 if (!next_ref)
1384 return NULL_TREE;
1386 if (vectype &&
1387 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1389 *offset = this_offset;
1390 *base_aligned_p = true;
1391 return next_ref;
1393 break;
1395 case PLUS_EXPR:
1396 case MINUS_EXPR:
1397 /* In case we have a PLUS_EXPR of the form
1398 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1399 This is verified in vect_get_symbl_and_dr. */
1400 oprnd0 = TREE_OPERAND (expr, 0);
1401 oprnd1 = TREE_OPERAND (expr, 1);
1403 base = vect_get_base_and_bit_offset
1404 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1405 if (vectype && !base)
1406 return NULL_TREE;
1408 next_ref = oprnd0;
1409 break;
1411 default:
1412 return NULL_TREE;
1415 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1416 loop_vinfo, offset, base_aligned_p);
1418 if (vectype && base)
1420 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1421 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1422 return NULL_TREE;
1424 if (vect_debug_details (NULL))
1426 print_generic_expr (dump_file, expr, TDF_SLIM);
1427 fprintf (dump_file, " --> total offset for ref: ");
1428 print_generic_expr (dump_file, *offset, TDF_SLIM);
1431 return base;
1435 /* Function vect_force_dr_alignment_p.
1437 Returns whether the alignment of a DECL can be forced to be aligned
1438 on ALIGNMENT bit boundary. */
1440 static bool
1441 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1443 if (TREE_CODE (decl) != VAR_DECL)
1444 return false;
1446 if (DECL_EXTERNAL (decl))
1447 return false;
1449 if (TREE_STATIC (decl))
1450 return (alignment <= MAX_OFILE_ALIGNMENT);
1451 else
1452 /* This is not 100% correct. The absolute correct stack alignment
1453 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1454 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1455 However, until someone implements forced stack alignment, SSE
1456 isn't really usable without this. */
1457 return (alignment <= PREFERRED_STACK_BOUNDARY);
1461 /* Function vect_get_new_vect_var.
1463 Returns a name for a new variable. The current naming scheme appends the
1464 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1465 the name of vectorizer generated variables, and appends that to NAME if
1466 provided. */
1468 static tree
1469 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1471 const char *prefix;
1472 int prefix_len;
1473 tree new_vect_var;
1475 if (var_kind == vect_simple_var)
1476 prefix = "vect_";
1477 else
1478 prefix = "vect_p";
1480 prefix_len = strlen (prefix);
1482 if (name)
1483 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1484 else
1485 new_vect_var = create_tmp_var (type, prefix);
1487 return new_vect_var;
1491 /* Function vect_create_index_for_vector_ref.
1493 Create (and return) an index variable, along with it's update chain in the
1494 loop. This variable will be used to access a memory location in a vector
1495 operation.
1497 Input:
1498 LOOP: The loop being vectorized.
1499 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1500 function can be added here, or in the loop pre-header.
1502 Output:
1503 Return an index that will be used to index a vector array. It is expected
1504 that a pointer to the first vector will be used as the base address for the
1505 indexed reference.
1507 FORNOW: we are not trying to be efficient, just creating a new index each
1508 time from scratch. At this time all vector references could use the same
1509 index.
1511 TODO: create only one index to be used by all vector references. Record
1512 the index in the LOOP_VINFO the first time this procedure is called and
1513 return it on subsequent calls. The increment of this index must be placed
1514 just before the conditional expression that ends the single block loop. */
1516 static tree
1517 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1519 tree init, step;
1520 tree indx_before_incr, indx_after_incr;
1522 /* It is assumed that the base pointer used for vectorized access contains
1523 the address of the first vector. Therefore the index used for vectorized
1524 access must be initialized to zero and incremented by 1. */
1526 init = integer_zero_node;
1527 step = integer_one_node;
1529 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1530 create_iv (init, step, NULL_TREE, loop, bsi, false,
1531 &indx_before_incr, &indx_after_incr);
1533 return indx_before_incr;
1537 /* Function vect_create_addr_base_for_vector_ref.
1539 Create an expression that computes the address of the first memory location
1540 that will be accessed for a data reference.
1542 Input:
1543 STMT: The statement containing the data reference.
1544 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1545 OFFSET: Optional. If supplied, it is be added to the initial address.
1547 Output:
1548 1. Return an SSA_NAME whose value is the address of the memory location of
1549 the first vector of the data reference.
1550 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1551 these statement(s) which define the returned SSA_NAME.
1553 FORNOW: We are only handling array accesses with step 1. */
1555 static tree
1556 vect_create_addr_base_for_vector_ref (tree stmt,
1557 tree *new_stmt_list,
1558 tree offset)
1560 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1561 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1562 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1563 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1564 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1565 tree ref = DR_REF (dr);
1566 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1567 tree scalar_type = TREE_TYPE (ref);
1568 tree scalar_ptr_type = build_pointer_type (scalar_type);
1569 tree access_fn;
1570 tree init_val, step, init_oval;
1571 bool ok;
1572 bool is_ptr_ref, is_array_ref, is_addr_expr;
1573 tree array_base;
1574 tree vec_stmt;
1575 tree new_temp;
1576 tree array_ref;
1577 tree addr_base, addr_expr;
1578 tree dest, new_stmt;
1580 /* Only the access function of the last index is relevant (i_n in
1581 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1582 access_fn = DR_ACCESS_FN (dr, 0);
1583 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1584 true);
1585 if (!ok)
1586 init_oval = integer_zero_node;
1588 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1589 && TREE_CODE (data_ref_base) == SSA_NAME;
1590 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1591 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1592 || TREE_CODE (data_ref_base) == PLUS_EXPR
1593 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1594 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1596 /** Create: &(base[init_val])
1598 if data_ref_base is an ARRAY_TYPE:
1599 base = data_ref_base
1601 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1602 base = *((scalar_array *) data_ref_base)
1605 if (is_array_ref)
1606 array_base = data_ref_base;
1607 else /* is_ptr_ref or is_addr_expr */
1609 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1610 tree scalar_array_type = build_array_type (scalar_type, 0);
1611 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1612 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1613 add_referenced_tmp_var (array_ptr);
1615 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1616 add_referenced_tmp_var (dest);
1617 data_ref_base =
1618 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1619 append_to_statement_list_force (new_stmt, new_stmt_list);
1621 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1622 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1623 new_temp = make_ssa_name (array_ptr, vec_stmt);
1624 TREE_OPERAND (vec_stmt, 0) = new_temp;
1625 append_to_statement_list_force (vec_stmt, new_stmt_list);
1627 /* (*array_ptr) */
1628 array_base = build_fold_indirect_ref (new_temp);
1631 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1632 add_referenced_tmp_var (dest);
1633 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1634 append_to_statement_list_force (new_stmt, new_stmt_list);
1636 if (offset)
1638 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1639 add_referenced_tmp_var (tmp);
1640 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1641 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1642 init_val = make_ssa_name (tmp, vec_stmt);
1643 TREE_OPERAND (vec_stmt, 0) = init_val;
1644 append_to_statement_list_force (vec_stmt, new_stmt_list);
1647 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1648 NULL_TREE, NULL_TREE);
1649 addr_base = build_fold_addr_expr (array_ref);
1651 /* addr_expr = addr_base */
1652 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1653 get_name (base_name));
1654 add_referenced_tmp_var (addr_expr);
1655 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1656 new_temp = make_ssa_name (addr_expr, vec_stmt);
1657 TREE_OPERAND (vec_stmt, 0) = new_temp;
1658 append_to_statement_list_force (vec_stmt, new_stmt_list);
1660 return new_temp;
1664 /* Function get_vectype_for_scalar_type.
1666 Returns the vector type corresponding to SCALAR_TYPE as supported
1667 by the target. */
1669 static tree
1670 get_vectype_for_scalar_type (tree scalar_type)
1672 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1673 int nbytes = GET_MODE_SIZE (inner_mode);
1674 int nunits;
1675 tree vectype;
1677 if (nbytes == 0)
1678 return NULL_TREE;
1680 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1681 is expected. */
1682 nunits = UNITS_PER_SIMD_WORD / nbytes;
1684 vectype = build_vector_type (scalar_type, nunits);
1685 if (vect_debug_details (NULL))
1687 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1688 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1691 if (!vectype)
1692 return NULL_TREE;
1694 if (vect_debug_details (NULL))
1696 fprintf (dump_file, "vectype: ");
1697 print_generic_expr (dump_file, vectype, TDF_SLIM);
1700 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1702 /* TODO: tree-complex.c sometimes can parallelize operations
1703 on generic vectors. We can vectorize the loop in that case,
1704 but then we should re-run the lowering pass. */
1705 if (vect_debug_details (NULL))
1706 fprintf (dump_file, "mode not supported by target.");
1707 return NULL_TREE;
1710 return vectype;
1714 /* Function vect_align_data_ref.
1716 Handle mislignment of a memory accesses.
1718 FORNOW: Can't handle misaligned accesses.
1719 Make sure that the dataref is aligned. */
1721 static void
1722 vect_align_data_ref (tree stmt)
1724 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1725 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1727 /* FORNOW: can't handle misaligned accesses;
1728 all accesses expected to be aligned. */
1729 gcc_assert (aligned_access_p (dr));
1733 /* Function vect_create_data_ref_ptr.
1735 Create a memory reference expression for vector access, to be used in a
1736 vector load/store stmt. The reference is based on a new pointer to vector
1737 type (vp).
1739 Input:
1740 1. STMT: a stmt that references memory. Expected to be of the form
1741 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1742 2. BSI: block_stmt_iterator where new stmts can be added.
1743 3. OFFSET (optional): an offset to be added to the initial address accessed
1744 by the data-ref in STMT.
1745 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1746 pointing to the initial address.
1748 Output:
1749 1. Declare a new ptr to vector_type, and have it point to the base of the
1750 data reference (initial addressed accessed by the data reference).
1751 For example, for vector of type V8HI, the following code is generated:
1753 v8hi *vp;
1754 vp = (v8hi *)initial_address;
1756 if OFFSET is not supplied:
1757 initial_address = &a[init];
1758 if OFFSET is supplied:
1759 initial_address = &a[init + OFFSET];
1761 Return the initial_address in INITIAL_ADDRESS.
1763 2. Create a data-reference in the loop based on the new vector pointer vp,
1764 and using a new index variable 'idx' as follows:
1766 vp' = vp + update
1768 where if ONLY_INIT is true:
1769 update = zero
1770 and otherwise
1771 update = idx + vector_type_size
1773 Return the pointer vp'.
1776 FORNOW: handle only aligned and consecutive accesses. */
1778 static tree
1779 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1780 tree *initial_address, bool only_init)
1782 tree base_name;
1783 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1784 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1785 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1786 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1787 tree vect_ptr_type;
1788 tree vect_ptr;
1789 tree tag;
1790 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1791 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1792 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1793 int nvuses, nv_may_defs, nv_must_defs;
1794 int i;
1795 tree new_temp;
1796 tree vec_stmt;
1797 tree new_stmt_list = NULL_TREE;
1798 tree idx;
1799 edge pe = loop_preheader_edge (loop);
1800 basic_block new_bb;
1801 tree vect_ptr_init;
1802 tree vectype_size;
1803 tree ptr_update;
1804 tree data_ref_ptr;
1806 base_name = unshare_expr (DR_BASE_NAME (dr));
1807 if (vect_debug_details (NULL))
1809 tree data_ref_base = base_name;
1810 fprintf (dump_file, "create array_ref of type: ");
1811 print_generic_expr (dump_file, vectype, TDF_SLIM);
1812 if (TREE_CODE (data_ref_base) == VAR_DECL)
1813 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1814 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1815 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1816 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1817 fprintf (dump_file, "vectorizing a record based array ref: ");
1818 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1819 fprintf (dump_file, "vectorizing a pointer ref: ");
1820 print_generic_expr (dump_file, base_name, TDF_SLIM);
1823 /** (1) Create the new vector-pointer variable: **/
1825 vect_ptr_type = build_pointer_type (vectype);
1826 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1827 get_name (base_name));
1828 add_referenced_tmp_var (vect_ptr);
1831 /** (2) Handle aliasing information of the new vector-pointer: **/
1833 tag = STMT_VINFO_MEMTAG (stmt_info);
1834 gcc_assert (tag);
1835 get_var_ann (vect_ptr)->type_mem_tag = tag;
1837 /* Mark for renaming all aliased variables
1838 (i.e, the may-aliases of the type-mem-tag). */
1839 nvuses = NUM_VUSES (vuses);
1840 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1841 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1842 for (i = 0; i < nvuses; i++)
1844 tree use = VUSE_OP (vuses, i);
1845 if (TREE_CODE (use) == SSA_NAME)
1846 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1848 for (i = 0; i < nv_may_defs; i++)
1850 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1851 if (TREE_CODE (def) == SSA_NAME)
1852 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1854 for (i = 0; i < nv_must_defs; i++)
1856 tree def = V_MUST_DEF_OP (v_must_defs, i);
1857 if (TREE_CODE (def) == SSA_NAME)
1858 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1862 /** (3) Calculate the initial address the vector-pointer, and set
1863 the vector-pointer to point to it before the loop: **/
1865 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1866 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1867 offset);
1868 pe = loop_preheader_edge (loop);
1869 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1870 gcc_assert (!new_bb);
1871 *initial_address = new_temp;
1873 /* Create: p = (vectype *) initial_base */
1874 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1875 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1876 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1877 TREE_OPERAND (vec_stmt, 0) = new_temp;
1878 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1879 gcc_assert (!new_bb);
1880 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1883 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1885 if (only_init) /* No update in loop is required. */
1886 return vect_ptr_init;
1888 idx = vect_create_index_for_vector_ref (loop, bsi);
1890 /* Create: update = idx * vectype_size */
1891 ptr_update = create_tmp_var (integer_type_node, "update");
1892 add_referenced_tmp_var (ptr_update);
1893 vectype_size = build_int_cst (integer_type_node,
1894 GET_MODE_SIZE (TYPE_MODE (vectype)));
1895 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1896 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1897 new_temp = make_ssa_name (ptr_update, vec_stmt);
1898 TREE_OPERAND (vec_stmt, 0) = new_temp;
1899 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1901 /* Create: data_ref_ptr = vect_ptr_init + update */
1902 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1903 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1904 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1905 TREE_OPERAND (vec_stmt, 0) = new_temp;
1906 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1907 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1909 return data_ref_ptr;
1913 /* Function vect_create_destination_var.
1915 Create a new temporary of type VECTYPE. */
1917 static tree
1918 vect_create_destination_var (tree scalar_dest, tree vectype)
1920 tree vec_dest;
1921 const char *new_name;
1923 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1925 new_name = get_name (scalar_dest);
1926 if (!new_name)
1927 new_name = "var_";
1928 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1929 add_referenced_tmp_var (vec_dest);
1931 return vec_dest;
1935 /* Function vect_init_vector.
1937 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1938 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1939 used in the vectorization of STMT. */
1941 static tree
1942 vect_init_vector (tree stmt, tree vector_var)
1944 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1945 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1946 tree new_var;
1947 tree init_stmt;
1948 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1949 tree vec_oprnd;
1950 edge pe;
1951 tree new_temp;
1952 basic_block new_bb;
1954 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1955 add_referenced_tmp_var (new_var);
1957 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1958 new_temp = make_ssa_name (new_var, init_stmt);
1959 TREE_OPERAND (init_stmt, 0) = new_temp;
1961 pe = loop_preheader_edge (loop);
1962 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1963 gcc_assert (!new_bb);
1965 if (vect_debug_details (NULL))
1967 fprintf (dump_file, "created new init_stmt: ");
1968 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1971 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1972 return vec_oprnd;
1976 /* Function vect_get_vec_def_for_operand.
1978 OP is an operand in STMT. This function returns a (vector) def that will be
1979 used in the vectorized stmt for STMT.
1981 In the case that OP is an SSA_NAME which is defined in the loop, then
1982 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1984 In case OP is an invariant or constant, a new stmt that creates a vector def
1985 needs to be introduced. */
1987 static tree
1988 vect_get_vec_def_for_operand (tree op, tree stmt)
1990 tree vec_oprnd;
1991 tree vec_stmt;
1992 tree def_stmt;
1993 stmt_vec_info def_stmt_info = NULL;
1994 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1995 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1996 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1997 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1998 basic_block bb;
1999 tree vec_inv;
2000 tree t = NULL_TREE;
2001 tree def;
2002 int i;
2004 if (vect_debug_details (NULL))
2006 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2007 print_generic_expr (dump_file, op, TDF_SLIM);
2010 /** ===> Case 1: operand is a constant. **/
2012 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2014 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2016 tree vec_cst;
2018 /* Build a tree with vector elements. */
2019 if (vect_debug_details (NULL))
2020 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2022 for (i = nunits - 1; i >= 0; --i)
2024 t = tree_cons (NULL_TREE, op, t);
2026 vec_cst = build_vector (vectype, t);
2027 return vect_init_vector (stmt, vec_cst);
2030 gcc_assert (TREE_CODE (op) == SSA_NAME);
2032 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2034 def_stmt = SSA_NAME_DEF_STMT (op);
2035 def_stmt_info = vinfo_for_stmt (def_stmt);
2037 if (vect_debug_details (NULL))
2039 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2040 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2044 /** ==> Case 2.1: operand is defined inside the loop. **/
2046 if (def_stmt_info)
2048 /* Get the def from the vectorized stmt. */
2050 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2051 gcc_assert (vec_stmt);
2052 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2053 return vec_oprnd;
2057 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2058 it is a reduction/induction. **/
2060 bb = bb_for_stmt (def_stmt);
2061 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2063 if (vect_debug_details (NULL))
2064 fprintf (dump_file, "reduction/induction - unsupported.");
2065 internal_error ("no support for reduction/induction"); /* FORNOW */
2069 /** ==> Case 2.3: operand is defined outside the loop -
2070 it is a loop invariant. */
2072 switch (TREE_CODE (def_stmt))
2074 case PHI_NODE:
2075 def = PHI_RESULT (def_stmt);
2076 break;
2077 case MODIFY_EXPR:
2078 def = TREE_OPERAND (def_stmt, 0);
2079 break;
2080 case NOP_EXPR:
2081 def = TREE_OPERAND (def_stmt, 0);
2082 gcc_assert (IS_EMPTY_STMT (def_stmt));
2083 def = op;
2084 break;
2085 default:
2086 if (vect_debug_details (NULL))
2088 fprintf (dump_file, "unsupported defining stmt: ");
2089 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2091 internal_error ("unsupported defining stmt");
2094 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2096 if (vect_debug_details (NULL))
2097 fprintf (dump_file, "Create vector_inv.");
2099 for (i = nunits - 1; i >= 0; --i)
2101 t = tree_cons (NULL_TREE, def, t);
2104 vec_inv = build_constructor (vectype, t);
2105 return vect_init_vector (stmt, vec_inv);
2109 /* Function vect_finish_stmt_generation.
2111 Insert a new stmt. */
2113 static void
2114 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2116 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2118 if (vect_debug_details (NULL))
2120 fprintf (dump_file, "add new stmt: ");
2121 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2124 /* Make sure bsi points to the stmt that is being vectorized. */
2126 /* Assumption: any stmts created for the vectorization of stmt S were
2127 inserted before S. BSI is expected to point to S or some new stmt before S. */
2129 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2130 bsi_next (bsi);
2131 gcc_assert (stmt == bsi_stmt (*bsi));
2135 /* Function vectorizable_assignment.
2137 Check if STMT performs an assignment (copy) that can be vectorized.
2138 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2139 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2140 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2142 static bool
2143 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2145 tree vec_dest;
2146 tree scalar_dest;
2147 tree op;
2148 tree vec_oprnd;
2149 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2150 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2151 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2152 tree new_temp;
2154 /* Is vectorizable assignment? */
2156 if (TREE_CODE (stmt) != MODIFY_EXPR)
2157 return false;
2159 scalar_dest = TREE_OPERAND (stmt, 0);
2160 if (TREE_CODE (scalar_dest) != SSA_NAME)
2161 return false;
2163 op = TREE_OPERAND (stmt, 1);
2164 if (!vect_is_simple_use (op, loop, NULL))
2166 if (vect_debug_details (NULL))
2167 fprintf (dump_file, "use not simple.");
2168 return false;
2171 if (!vec_stmt) /* transformation not required. */
2173 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2174 return true;
2177 /** Trasform. **/
2178 if (vect_debug_details (NULL))
2179 fprintf (dump_file, "transform assignment.");
2181 /* Handle def. */
2182 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2184 /* Handle use. */
2185 op = TREE_OPERAND (stmt, 1);
2186 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2188 /* Arguments are ready. create the new vector stmt. */
2189 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2190 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2191 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2192 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2194 return true;
2198 /* Function vectorizable_operation.
2200 Check if STMT performs a binary or unary operation that can be vectorized.
2201 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2202 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2203 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2205 static bool
2206 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2208 tree vec_dest;
2209 tree scalar_dest;
2210 tree operation;
2211 tree op0, op1 = NULL;
2212 tree vec_oprnd0, vec_oprnd1=NULL;
2213 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2214 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2215 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2216 int i;
2217 enum tree_code code;
2218 enum machine_mode vec_mode;
2219 tree new_temp;
2220 int op_type;
2221 tree op;
2222 optab optab;
2224 /* Is STMT a vectorizable binary/unary operation? */
2225 if (TREE_CODE (stmt) != MODIFY_EXPR)
2226 return false;
2228 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2229 return false;
2231 operation = TREE_OPERAND (stmt, 1);
2232 code = TREE_CODE (operation);
2233 optab = optab_for_tree_code (code, vectype);
2235 /* Support only unary or binary operations. */
2236 op_type = TREE_CODE_LENGTH (code);
2237 if (op_type != unary_op && op_type != binary_op)
2239 if (vect_debug_details (NULL))
2240 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2241 return false;
2244 for (i = 0; i < op_type; i++)
2246 op = TREE_OPERAND (operation, i);
2247 if (!vect_is_simple_use (op, loop, NULL))
2249 if (vect_debug_details (NULL))
2250 fprintf (dump_file, "use not simple.");
2251 return false;
2255 /* Supportable by target? */
2256 if (!optab)
2258 if (vect_debug_details (NULL))
2259 fprintf (dump_file, "no optab.");
2260 return false;
2262 vec_mode = TYPE_MODE (vectype);
2263 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2265 if (vect_debug_details (NULL))
2266 fprintf (dump_file, "op not supported by target.");
2267 return false;
2270 if (!vec_stmt) /* transformation not required. */
2272 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2273 return true;
2276 /** Transform. **/
2278 if (vect_debug_details (NULL))
2279 fprintf (dump_file, "transform binary/unary operation.");
2281 /* Handle def. */
2282 scalar_dest = TREE_OPERAND (stmt, 0);
2283 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2285 /* Handle uses. */
2286 op0 = TREE_OPERAND (operation, 0);
2287 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2289 if (op_type == binary_op)
2291 op1 = TREE_OPERAND (operation, 1);
2292 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2295 /* Arguments are ready. create the new vector stmt. */
2297 if (op_type == binary_op)
2298 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2299 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2300 else
2301 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2302 build1 (code, vectype, vec_oprnd0));
2303 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2304 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2305 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2307 return true;
2311 /* Function vectorizable_store.
2313 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2314 can be vectorized.
2315 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2316 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2317 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2319 static bool
2320 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2322 tree scalar_dest;
2323 tree data_ref;
2324 tree op;
2325 tree vec_oprnd1;
2326 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2327 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2328 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2329 enum machine_mode vec_mode;
2330 tree dummy;
2332 /* Is vectorizable store? */
2334 if (TREE_CODE (stmt) != MODIFY_EXPR)
2335 return false;
2337 scalar_dest = TREE_OPERAND (stmt, 0);
2338 if (TREE_CODE (scalar_dest) != ARRAY_REF
2339 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2340 return false;
2342 op = TREE_OPERAND (stmt, 1);
2343 if (!vect_is_simple_use (op, loop, NULL))
2345 if (vect_debug_details (NULL))
2346 fprintf (dump_file, "use not simple.");
2347 return false;
2350 vec_mode = TYPE_MODE (vectype);
2351 /* FORNOW. In some cases can vectorize even if data-type not supported
2352 (e.g. - array initialization with 0). */
2353 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2354 return false;
2356 if (!STMT_VINFO_DATA_REF (stmt_info))
2357 return false;
2360 if (!vec_stmt) /* transformation not required. */
2362 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2363 return true;
2366 /** Trasform. **/
2368 if (vect_debug_details (NULL))
2369 fprintf (dump_file, "transform store");
2371 /* Handle use - get the vectorized def from the defining stmt. */
2372 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2374 /* Handle def. */
2375 /* FORNOW: make sure the data reference is aligned. */
2376 vect_align_data_ref (stmt);
2377 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2378 data_ref = build_fold_indirect_ref (data_ref);
2380 /* Arguments are ready. create the new vector stmt. */
2381 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2382 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2384 return true;
2388 /* vectorizable_load.
2390 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2391 can be vectorized.
2392 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2393 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2394 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2396 static bool
2397 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2399 tree scalar_dest;
2400 tree vec_dest = NULL;
2401 tree data_ref = NULL;
2402 tree op;
2403 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2404 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2405 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2406 tree new_temp;
2407 int mode;
2408 tree init_addr;
2409 tree new_stmt;
2410 tree dummy;
2411 basic_block new_bb;
2412 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2413 edge pe = loop_preheader_edge (loop);
2414 bool software_pipeline_loads_p = false;
2416 /* Is vectorizable load? */
2418 if (TREE_CODE (stmt) != MODIFY_EXPR)
2419 return false;
2421 scalar_dest = TREE_OPERAND (stmt, 0);
2422 if (TREE_CODE (scalar_dest) != SSA_NAME)
2423 return false;
2425 op = TREE_OPERAND (stmt, 1);
2426 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2427 return false;
2429 if (!STMT_VINFO_DATA_REF (stmt_info))
2430 return false;
2432 mode = (int) TYPE_MODE (vectype);
2434 /* FORNOW. In some cases can vectorize even if data-type not supported
2435 (e.g. - data copies). */
2436 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2438 if (vect_debug_details (loop))
2439 fprintf (dump_file, "Aligned load, but unsupported type.");
2440 return false;
2443 if (!aligned_access_p (dr))
2445 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2446 && (!targetm.vectorize.builtin_mask_for_load
2447 || targetm.vectorize.builtin_mask_for_load ()))
2448 software_pipeline_loads_p = true;
2449 else if (!targetm.vectorize.misaligned_mem_ok (mode))
2451 /* Possibly unaligned access, and can't software pipeline the loads.
2453 if (vect_debug_details (loop))
2454 fprintf (dump_file, "Arbitrary load not supported.");
2455 return false;
2459 if (!vec_stmt) /* transformation not required. */
2461 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2462 return true;
2465 /** Trasform. **/
2467 if (vect_debug_details (NULL))
2468 fprintf (dump_file, "transform load.");
2470 if (!software_pipeline_loads_p)
2472 /* Create:
2473 p = initial_addr;
2474 indx = 0;
2475 loop {
2476 vec_dest = *(p);
2477 indx = indx + 1;
2481 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2482 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2483 if (aligned_access_p (dr))
2484 data_ref = build_fold_indirect_ref (data_ref);
2485 else
2487 int mis = DR_MISALIGNMENT (dr);
2488 tree tmis = (mis == -1 ?
2489 integer_zero_node :
2490 build_int_cst (integer_type_node, mis));
2491 tmis = int_const_binop (MULT_EXPR, tmis,
2492 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2493 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2495 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2496 new_temp = make_ssa_name (vec_dest, new_stmt);
2497 TREE_OPERAND (new_stmt, 0) = new_temp;
2498 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2500 else /* software-pipeline the loads */
2502 /* Create:
2503 p1 = initial_addr;
2504 msq_init = *(floor(p1))
2505 p2 = initial_addr + VS - 1;
2506 magic = have_builtin ? builtin_result : initial_address;
2507 indx = 0;
2508 loop {
2509 p2' = p2 + indx * vectype_size
2510 lsq = *(floor(p2'))
2511 vec_dest = realign_load (msq, lsq, magic)
2512 indx = indx + 1;
2513 msq = lsq;
2517 tree offset;
2518 tree magic;
2519 tree phi_stmt;
2520 tree msq_init;
2521 tree msq, lsq;
2522 tree dataref_ptr;
2523 tree params;
2525 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2526 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2527 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2528 &init_addr, true);
2529 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2530 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2531 new_temp = make_ssa_name (vec_dest, new_stmt);
2532 TREE_OPERAND (new_stmt, 0) = new_temp;
2533 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2534 gcc_assert (!new_bb);
2535 msq_init = TREE_OPERAND (new_stmt, 0);
2538 /* <2> Create lsq = *(floor(p2')) in the loop */
2539 offset = build_int_cst (integer_type_node,
2540 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2541 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2542 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2543 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2544 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2545 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2546 new_temp = make_ssa_name (vec_dest, new_stmt);
2547 TREE_OPERAND (new_stmt, 0) = new_temp;
2548 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2549 lsq = TREE_OPERAND (new_stmt, 0);
2552 /* <3> */
2553 if (targetm.vectorize.builtin_mask_for_load)
2555 /* Create permutation mask, if required, in loop preheader. */
2556 tree builtin_decl;
2557 params = build_tree_list (NULL_TREE, init_addr);
2558 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2559 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2560 new_stmt = build_function_call_expr (builtin_decl, params);
2561 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2562 new_temp = make_ssa_name (vec_dest, new_stmt);
2563 TREE_OPERAND (new_stmt, 0) = new_temp;
2564 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2565 gcc_assert (!new_bb);
2566 magic = TREE_OPERAND (new_stmt, 0);
2568 else
2570 /* Use current address instead of init_addr for reduced reg pressure.
2572 magic = dataref_ptr;
2576 /* <4> Create msq = phi <msq_init, lsq> in loop */
2577 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2578 msq = make_ssa_name (vec_dest, NULL_TREE);
2579 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2580 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2581 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2582 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2585 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2586 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2587 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2588 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2589 new_temp = make_ssa_name (vec_dest, new_stmt);
2590 TREE_OPERAND (new_stmt, 0) = new_temp;
2591 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2594 *vec_stmt = new_stmt;
2595 return true;
2599 /* Function vect_transform_stmt.
2601 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2603 static bool
2604 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2606 bool is_store = false;
2607 tree vec_stmt = NULL_TREE;
2608 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2609 bool done;
2611 switch (STMT_VINFO_TYPE (stmt_info))
2613 case op_vec_info_type:
2614 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2615 gcc_assert (done);
2616 break;
2618 case assignment_vec_info_type:
2619 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2620 gcc_assert (done);
2621 break;
2623 case load_vec_info_type:
2624 done = vectorizable_load (stmt, bsi, &vec_stmt);
2625 gcc_assert (done);
2626 break;
2628 case store_vec_info_type:
2629 done = vectorizable_store (stmt, bsi, &vec_stmt);
2630 gcc_assert (done);
2631 is_store = true;
2632 break;
2633 default:
2634 if (vect_debug_details (NULL))
2635 fprintf (dump_file, "stmt not supported.");
2636 gcc_unreachable ();
2639 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2641 return is_store;
2645 /* This function builds ni_name = number of iterations loop executes
2646 on the loop preheader. */
2648 static tree
2649 vect_build_loop_niters (loop_vec_info loop_vinfo)
2651 tree ni_name, stmt, var;
2652 edge pe;
2653 basic_block new_bb;
2654 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2655 tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2657 var = create_tmp_var (TREE_TYPE (ni), "niters");
2658 add_referenced_tmp_var (var);
2659 if (TREE_CODE (ni) == INTEGER_CST)
2661 /* This case is generated when treating a known loop bound
2662 indivisible by VF. Here we cannot use force_gimple_operand. */
2663 stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2664 ni_name = make_ssa_name (var, stmt);
2665 TREE_OPERAND (stmt, 0) = ni_name;
2667 else
2668 ni_name = force_gimple_operand (ni, &stmt, false, var);
2670 pe = loop_preheader_edge (loop);
2671 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2672 if (new_bb)
2673 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2675 return ni_name;
2679 /* This function generates the following statements:
2681 ni_name = number of iterations loop executes
2682 ratio = ni_name / vf
2683 ratio_mult_vf_name = ratio * vf
2685 and places them at the loop preheader edge. */
2687 static void
2688 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2689 tree *ratio_mult_vf_name_p, tree *ratio_p)
2692 edge pe;
2693 basic_block new_bb;
2694 tree stmt, ni_name;
2695 tree ratio;
2696 tree ratio_mult_vf_name, ratio_mult_vf;
2697 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2698 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2700 int vf, i;
2702 /* Generate temporary variable that contains
2703 number of iterations loop executes. */
2705 ni_name = vect_build_loop_niters (loop_vinfo);
2707 /* ratio = ni / vf.
2708 vf is power of 2; then if ratio = = n >> log2 (vf). */
2709 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2710 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2712 /* Update initial conditions of loop copy. */
2714 /* ratio_mult_vf = ratio * vf;
2715 then if ratio_mult_vf = ratio << log2 (vf). */
2717 i = exact_log2 (vf);
2718 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2719 add_referenced_tmp_var (ratio_mult_vf);
2721 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2723 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2724 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2725 ratio, build_int_cst (unsigned_type_node,
2726 i)));
2728 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2730 pe = loop_preheader_edge (loop);
2731 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2732 if (new_bb)
2733 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2735 *ni_name_p = ni_name;
2736 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2737 *ratio_p = ratio;
2739 return;
2743 /* This function generates stmt
2745 tmp = n / vf;
2747 and attaches it to preheader of LOOP. */
2749 static tree
2750 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2752 tree var, stmt, var_name;
2753 edge pe;
2754 basic_block new_bb;
2755 int i;
2757 /* create temporary variable */
2758 var = create_tmp_var (TREE_TYPE (n), "bnd");
2759 add_referenced_tmp_var (var);
2761 var_name = make_ssa_name (var, NULL_TREE);
2763 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2765 i = exact_log2 (vf);
2766 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2767 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2768 n, build_int_cst (unsigned_type_node,i)));
2770 SSA_NAME_DEF_STMT (var_name) = stmt;
2772 pe = loop_preheader_edge (loop);
2773 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2774 if (new_bb)
2775 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2776 else
2777 if (vect_debug_details (NULL))
2778 fprintf (dump_file, "New bb on preheader edge was not generated.");
2780 return var_name;
2784 /* Function vect_transform_loop_bound.
2786 Create a new exit condition for the loop. */
2788 static void
2789 vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2791 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2792 edge exit_edge = loop->single_exit;
2793 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
2794 tree indx_before_incr, indx_after_incr;
2795 tree orig_cond_expr;
2796 HOST_WIDE_INT old_N = 0;
2797 int vf;
2798 tree cond_stmt;
2799 tree new_loop_bound;
2800 bool symbol_niters;
2801 tree cond;
2802 tree lb_type;
2804 symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2806 if (!symbol_niters)
2807 old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2809 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2811 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2812 #ifdef ENABLE_CHECKING
2813 gcc_assert (orig_cond_expr);
2814 #endif
2815 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
2817 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
2818 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
2820 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
2821 to point to the exit condition. */
2822 bsi_next (&loop_exit_bsi);
2823 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
2825 /* new loop exit test: */
2826 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
2827 if (!symbol_niters)
2828 new_loop_bound = fold_convert (lb_type,
2829 build_int_cst (unsigned_type_node,
2830 old_N/vf));
2831 else
2832 new_loop_bound = niters;
2834 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
2835 cond = build2 (GE_EXPR, boolean_type_node,
2836 indx_after_incr, new_loop_bound);
2837 else /* 'then' edge loops back. */
2838 cond = build2 (LT_EXPR, boolean_type_node,
2839 indx_after_incr, new_loop_bound);
2841 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
2842 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
2844 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
2846 /* remove old loop exit test: */
2847 bsi_remove (&loop_exit_bsi);
2849 if (vect_debug_details (NULL))
2850 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
2854 /* Advance IVs of the loop (to be vectorized later) to correct position.
2856 When loop is vectorized, its IVs are not always advanced
2857 correctly since vectorization changes the loop count. It's ok
2858 in case epilog loop was not produced after original one before
2859 vectorization process (the vectorizer checks that there is no uses
2860 of IVs after the loop). However, in case the epilog loop was peeled,
2861 IVs from original loop are used in epilog loop and should be
2862 advanced correctly.
2864 Here we use access functions of IVs and number of
2865 iteration loop executes in order to bring IVs to correct position.
2867 Function also update phis of basic block at the exit
2868 from the loop. */
2870 static void
2871 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
2873 edge exit = loop->exit_edges[0];
2874 tree phi;
2875 edge latch = loop_latch_edge (loop);
2877 /* Generate basic block at the exit from the loop. */
2878 basic_block new_bb = split_edge (exit);
2879 add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
2881 loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
2883 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
2885 tree access_fn = NULL;
2886 tree evolution_part;
2887 tree init_expr;
2888 tree step_expr;
2889 tree var, stmt, ni, ni_name;
2890 int i, j, num_elem1, num_elem2;
2891 tree phi1;
2892 block_stmt_iterator last_bsi;
2894 /* Skip virtual phi's. The data dependences that are associated with
2895 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2897 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2899 if (vect_debug_details (NULL))
2900 fprintf (dump_file, "virtual phi. skip.");
2901 continue;
2904 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2906 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2908 /* FORNOW: We do not transform initial conditions of IVs
2909 which evolution functions are a polynomial of degree >= 2 or
2910 exponential. */
2912 step_expr = evolution_part;
2913 init_expr = initial_condition (access_fn);
2915 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2916 build2 (MULT_EXPR, TREE_TYPE (niters),
2917 niters, step_expr), init_expr);
2919 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2920 add_referenced_tmp_var (var);
2922 ni_name = force_gimple_operand (ni, &stmt, false, var);
2924 /* Insert stmt into new_bb. */
2925 last_bsi = bsi_last (new_bb);
2926 bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);
2928 /* Fix phi expressions in duplicated loop. */
2929 num_elem1 = PHI_NUM_ARGS (phi);
2930 for (i = 0; i < num_elem1; i++)
2931 if (PHI_ARG_EDGE (phi, i) == latch)
2933 tree def = PHI_ARG_DEF (phi, i);
2935 for (phi1 = phi_nodes (EDGE_SUCC (new_bb, 0)->dest); phi1;
2936 phi1 = TREE_CHAIN (phi1))
2938 num_elem2 = PHI_NUM_ARGS (phi1);
2939 for (j = 0; j < num_elem2; j++)
2940 if (PHI_ARG_DEF (phi1, j) == def)
2942 SET_PHI_ARG_DEF (phi1, j, ni_name);
2943 PHI_ARG_EDGE (phi1, j) = EDGE_SUCC (new_bb, 0);
2944 break;
2947 break;
2954 /* This function is the main driver of transformation
2955 to be done for loop before vectorizing it in case of
2956 unknown loop bound. */
2958 static void
2959 vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
2960 struct loops *loops)
2963 tree ni_name, ratio_mult_vf_name;
2964 #ifdef ENABLE_CHECKING
2965 int loop_num;
2966 #endif
2967 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2968 struct loop *new_loop;
2970 if (vect_debug_details (NULL))
2971 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
2973 /* Generate the following variables on the preheader of original loop:
2975 ni_name = number of iteration the original loop executes
2976 ratio = ni_name / vf
2977 ratio_mult_vf_name = ratio * vf */
2978 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2979 &ratio_mult_vf_name, ratio);
2981 /* Update loop info. */
2982 loop->pre_header = loop_preheader_edge (loop)->src;
2983 loop->pre_header_edges[0] = loop_preheader_edge (loop);
2985 #ifdef ENABLE_CHECKING
2986 loop_num = loop->num;
2987 #endif
2988 new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0],
2989 ratio_mult_vf_name, ni_name, true);
2990 #ifdef ENABLE_CHECKING
2991 gcc_assert (new_loop);
2992 gcc_assert (loop_num == loop->num);
2993 #endif
2995 /* Update IVs of original loop as if they were advanced
2996 by ratio_mult_vf_name steps. */
2998 #ifdef ENABLE_CHECKING
2999 /* Check existence of intermediate bb. */
3000 gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
3001 #endif
3002 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name);
3004 return;
3009 /* Function vect_gen_niters_for_prolog_loop
3011 Set the number of iterations for the loop represented by LOOP_VINFO
3012 to the minimum between NITERS (the original iteration count of the loop)
3013 and the misalignment DR - the first data reference in the list
3014 LOOP_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of this
3015 loop, the data reference DR will refer to an aligned location. */
3017 static tree
3018 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
3020 struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);
3021 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3022 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3023 tree var, stmt;
3024 tree iters, iters_name;
3025 edge pe;
3026 basic_block new_bb;
3027 tree dr_stmt = DR_STMT (dr);
3028 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3029 tree start_addr, byte_miss_align, elem_miss_align;
3030 int vec_type_align =
3031 GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3032 / BITS_PER_UNIT;
3033 tree tmp1, tmp2;
3034 tree new_stmt_list = NULL_TREE;
3036 start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3037 &new_stmt_list, NULL_TREE);
3039 pe = loop_preheader_edge (loop);
3040 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3041 if (new_bb)
3042 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3044 byte_miss_align =
3045 build (BIT_AND_EXPR, integer_type_node, start_addr,
3046 build (MINUS_EXPR, integer_type_node,
3047 build_int_cst (unsigned_type_node,
3048 vec_type_align), integer_one_node));
3049 tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3050 elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3051 byte_miss_align, tmp1);
3053 tmp2 =
3054 build (BIT_AND_EXPR, integer_type_node,
3055 build (MINUS_EXPR, integer_type_node,
3056 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3057 build (MINUS_EXPR, integer_type_node,
3058 build_int_cst (unsigned_type_node, vf), integer_one_node));
3060 iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3061 var = create_tmp_var (TREE_TYPE (iters), "iters");
3062 add_referenced_tmp_var (var);
3063 iters_name = force_gimple_operand (iters, &stmt, false, var);
3065 /* Insert stmt on loop preheader edge. */
3066 pe = loop_preheader_edge (loop);
3067 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3068 if (new_bb)
3069 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3071 return iters_name;
3075 /* Function vect_update_niters_after_peeling
3077 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3078 The new number of iterations is therefore original_niters - NITERS.
3079 Record the new number of iterations in LOOP_VINFO. */
3081 static void
3082 vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters)
3084 tree n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3085 LOOP_VINFO_NITERS (loop_vinfo) =
3086 build (MINUS_EXPR, integer_type_node, n_iters, niters);
3090 /* Function vect_update_inits_of_dr
3092 NITERS iterations were peeled from LOOP. DR represents a data reference
3093 in LOOP. This function updates the information recorded in DR to
3094 account for the fact that the first NITERS iterations had already been
3095 executed. Specifically, it updates the initial_condition of the
3096 access_function of DR. */
3098 static void
3099 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3100 tree niters)
3102 tree access_fn = DR_ACCESS_FN (dr, 0);
3103 tree init, init_new, step;
3105 step = evolution_part_in_loop_num (access_fn, loop->num);
3106 init = initial_condition (access_fn);
3108 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3109 build (MULT_EXPR, TREE_TYPE (niters),
3110 niters, step), init);
3111 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3113 return;
3117 /* Function vect_update_inits_of_drs
3119 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3120 This function updates the information recorded for the data references in
3121 the loop to account for the fact that the first NITERS iterations had
3122 already been executed. Specifically, it updates the initial_condition of the
3123 access_function of all the data_references in the loop. */
3125 static void
3126 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3128 unsigned int i;
3129 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3130 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3131 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3133 if (dump_file && (dump_flags & TDF_DETAILS))
3134 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3136 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3138 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3139 vect_update_inits_of_dr (dr, loop, niters);
3142 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3144 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3145 vect_update_inits_of_dr (dr, loop, niters);
3146 DR_MISALIGNMENT (dr) = -1;
3151 /* Function vect_do_peeling_for_alignment
3153 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3154 'niters' is set to the misalignment of one of the data references in the
3155 loop, thereby forcing it to refer to an aligned location at the beginning
3156 of the execution of this loop. The data reference for which we are
3157 peeling is chosen from LOOP_UNALIGNED_DR. */
3159 static void
3160 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3162 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3163 tree niters_of_prolog_loop, ni_name;
3164 struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);
3166 if (vect_debug_details (NULL))
3167 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3169 ni_name = vect_build_loop_niters (loop_vinfo);
3170 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3173 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3174 tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop),
3175 niters_of_prolog_loop, ni_name, false);
3178 /* Update stmt info of dr according to which we peeled. */
3179 DR_MISALIGNMENT (dr) = 0;
3181 /* Update number of times loop executes. */
3182 vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop);
3184 /* Update all inits of access functions of all data refs. */
3185 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3187 /* After peeling we have to reset scalar evolution analyzer. */
3188 scev_reset ();
3190 return;
3194 /* Function vect_transform_loop.
3196 The analysis phase has determined that the loop is vectorizable.
3197 Vectorize the loop - created vectorized stmts to replace the scalar
3198 stmts in the loop, and update the loop exit condition. */
3200 static void
3201 vect_transform_loop (loop_vec_info loop_vinfo,
3202 struct loops *loops ATTRIBUTE_UNUSED)
3204 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3205 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3206 int nbbs = loop->num_nodes;
3207 block_stmt_iterator si;
3208 int i;
3209 tree ratio = NULL;
3210 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3212 if (vect_debug_details (NULL))
3213 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3216 /* Peel the loop if there are data refs with unknown alignment.
3217 Only one data ref with unknown store is allowed. */
3220 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3221 vect_do_peeling_for_alignment (loop_vinfo, loops);
3223 /* If the loop has a symbolic number of iterations 'n'
3224 (i.e. it's not a compile time constant),
3225 then an epilog loop needs to be created. We therefore duplicate
3226 the initial loop. The original loop will be vectorized, and will compute
3227 the first (n/VF) iterations. The second copy of the loop will remain
3228 serial and will compute the remaining (n%VF) iterations.
3229 (VF is the vectorization factor). */
3231 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3232 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3234 /* FORNOW: we'll treat the case where niters is constant and
3236 niters % vf != 0
3238 in the way similar to one with symbolic niters.
3239 For this we'll generate variable which value is equal to niters. */
3241 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3242 && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3243 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3246 /* 1) Make sure the loop header has exactly two entries
3247 2) Make sure we have a preheader basic block. */
3249 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3251 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3254 /* FORNOW: the vectorizer supports only loops which body consist
3255 of one basic block (header + empty latch). When the vectorizer will
3256 support more involved loop forms, the order by which the BBs are
3257 traversed need to be reconsidered. */
3259 for (i = 0; i < nbbs; i++)
3261 basic_block bb = bbs[i];
3263 for (si = bsi_start (bb); !bsi_end_p (si);)
3265 tree stmt = bsi_stmt (si);
3266 stmt_vec_info stmt_info;
3267 bool is_store;
3269 if (vect_debug_details (NULL))
3271 fprintf (dump_file, "------>vectorizing statement: ");
3272 print_generic_expr (dump_file, stmt, TDF_SLIM);
3274 stmt_info = vinfo_for_stmt (stmt);
3275 gcc_assert (stmt_info);
3276 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3278 bsi_next (&si);
3279 continue;
3281 #ifdef ENABLE_CHECKING
3282 /* FORNOW: Verify that all stmts operate on the same number of
3283 units and no inner unrolling is necessary. */
3284 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3285 == vectorization_factor);
3286 #endif
3287 /* -------- vectorize statement ------------ */
3288 if (vect_debug_details (NULL))
3289 fprintf (dump_file, "transform statement.");
3291 is_store = vect_transform_stmt (stmt, &si);
3292 if (is_store)
3294 /* free the attached stmt_vec_info and remove the stmt. */
3295 stmt_ann_t ann = stmt_ann (stmt);
3296 free (stmt_info);
3297 set_stmt_info (ann, NULL);
3298 bsi_remove (&si);
3299 continue;
3302 bsi_next (&si);
3303 } /* stmts in BB */
3304 } /* BBs in loop */
3306 vect_transform_loop_bound (loop_vinfo, ratio);
3308 if (vect_debug_details (loop))
3309 fprintf (dump_file,"Success! loop vectorized.");
3310 if (vect_debug_stats (loop))
3311 fprintf (dump_file, "LOOP VECTORIZED.");
3315 /* Function vect_is_simple_use.
3317 Input:
3318 LOOP - the loop that is being vectorized.
3319 OPERAND - operand of a stmt in LOOP.
3320 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3322 Returns whether a stmt with OPERAND can be vectorized.
3323 Supportable operands are constants, loop invariants, and operands that are
3324 defined by the current iteration of the loop. Unsupportable operands are
3325 those that are defined by a previous iteration of the loop (as is the case
3326 in reduction/induction computations). */
3328 static bool
3329 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3331 tree def_stmt;
3332 basic_block bb;
3334 if (def)
3335 *def = NULL_TREE;
3337 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3338 return true;
3340 if (TREE_CODE (operand) != SSA_NAME)
3341 return false;
3343 def_stmt = SSA_NAME_DEF_STMT (operand);
3344 if (def_stmt == NULL_TREE )
3346 if (vect_debug_details (NULL))
3347 fprintf (dump_file, "no def_stmt.");
3348 return false;
3351 /* empty stmt is expected only in case of a function argument.
3352 (Otherwise - we expect a phi_node or a modify_expr). */
3353 if (IS_EMPTY_STMT (def_stmt))
3355 tree arg = TREE_OPERAND (def_stmt, 0);
3356 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3357 return true;
3358 if (vect_debug_details (NULL))
3360 fprintf (dump_file, "Unexpected empty stmt: ");
3361 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3363 return false;
3366 /* phi_node inside the loop indicates an induction/reduction pattern.
3367 This is not supported yet. */
3368 bb = bb_for_stmt (def_stmt);
3369 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3371 if (vect_debug_details (NULL))
3372 fprintf (dump_file, "reduction/induction - unsupported.");
3373 return false; /* FORNOW: not supported yet. */
3376 /* Expecting a modify_expr or a phi_node. */
3377 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3378 || TREE_CODE (def_stmt) == PHI_NODE)
3380 if (def)
3381 *def = def_stmt;
3382 return true;
3385 return false;
3389 /* Function vect_analyze_operations.
3391 Scan the loop stmts and make sure they are all vectorizable. */
3393 static bool
3394 vect_analyze_operations (loop_vec_info loop_vinfo)
3396 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3397 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3398 int nbbs = loop->num_nodes;
3399 block_stmt_iterator si;
3400 int vectorization_factor = 0;
3401 int i;
3402 bool ok;
3403 tree scalar_type;
3405 if (vect_debug_details (NULL))
3406 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3408 for (i = 0; i < nbbs; i++)
3410 basic_block bb = bbs[i];
3412 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3414 tree stmt = bsi_stmt (si);
3415 int nunits;
3416 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3417 tree vectype;
3419 if (vect_debug_details (NULL))
3421 fprintf (dump_file, "==> examining statement: ");
3422 print_generic_expr (dump_file, stmt, TDF_SLIM);
3425 gcc_assert (stmt_info);
3427 /* skip stmts which do not need to be vectorized.
3428 this is expected to include:
3429 - the COND_EXPR which is the loop exit condition
3430 - any LABEL_EXPRs in the loop
3431 - computations that are used only for array indexing or loop
3432 control */
3434 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3436 if (vect_debug_details (NULL))
3437 fprintf (dump_file, "irrelevant.");
3438 continue;
3441 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3443 if (vect_debug_stats (loop) || vect_debug_details (loop))
3445 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3446 print_generic_expr (dump_file, stmt, TDF_SLIM);
3448 return false;
3451 if (STMT_VINFO_DATA_REF (stmt_info))
3452 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3453 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3454 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3455 else
3456 scalar_type = TREE_TYPE (stmt);
3458 if (vect_debug_details (NULL))
3460 fprintf (dump_file, "get vectype for scalar type: ");
3461 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3464 vectype = get_vectype_for_scalar_type (scalar_type);
3465 if (!vectype)
3467 if (vect_debug_stats (loop) || vect_debug_details (loop))
3469 fprintf (dump_file, "not vectorized: unsupported data-type ");
3470 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3472 return false;
3475 if (vect_debug_details (NULL))
3477 fprintf (dump_file, "vectype: ");
3478 print_generic_expr (dump_file, vectype, TDF_SLIM);
3480 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3482 ok = (vectorizable_operation (stmt, NULL, NULL)
3483 || vectorizable_assignment (stmt, NULL, NULL)
3484 || vectorizable_load (stmt, NULL, NULL)
3485 || vectorizable_store (stmt, NULL, NULL));
3487 if (!ok)
3489 if (vect_debug_stats (loop) || vect_debug_details (loop))
3491 fprintf (dump_file, "not vectorized: stmt not supported: ");
3492 print_generic_expr (dump_file, stmt, TDF_SLIM);
3494 return false;
3497 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3498 if (vect_debug_details (NULL))
3499 fprintf (dump_file, "nunits = %d", nunits);
3501 if (vectorization_factor)
3503 /* FORNOW: don't allow mixed units.
3504 This restriction will be relaxed in the future. */
3505 if (nunits != vectorization_factor)
3507 if (vect_debug_stats (loop) || vect_debug_details (loop))
3508 fprintf (dump_file, "not vectorized: mixed data-types");
3509 return false;
3512 else
3513 vectorization_factor = nunits;
3515 #ifdef ENABLE_CHECKING
3516 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3517 * vectorization_factor == UNITS_PER_SIMD_WORD);
3518 #endif
3522 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3524 if (vectorization_factor <= 1)
3526 if (vect_debug_stats (loop) || vect_debug_details (loop))
3527 fprintf (dump_file, "not vectorized: unsupported data-type");
3528 return false;
3530 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3533 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3534 && vect_debug_details (NULL))
3535 fprintf (dump_file,
3536 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3537 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3539 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3540 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3542 /* In this case we have to generate epilog loop, that
3543 can be done only for loops with one entry edge. */
3544 if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1
3545 || !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header))
3547 if (vect_debug_stats (loop) || vect_debug_details (loop))
3548 fprintf (dump_file, "not vectorized: more than one entry.");
3549 return false;
3553 return true;
3557 /* Function exist_non_indexing_operands_for_use_p
3559 USE is one of the uses attached to STMT. Check if USE is
3560 used in STMT for anything other than indexing an array. */
3562 static bool
3563 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3565 tree operand;
3566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3568 /* USE corresponds to some operand in STMT. If there is no data
3569 reference in STMT, then any operand that corresponds to USE
3570 is not indexing an array. */
3571 if (!STMT_VINFO_DATA_REF (stmt_info))
3572 return true;
3574 /* STMT has a data_ref. FORNOW this means that its of one of
3575 the following forms:
3576 -1- ARRAY_REF = var
3577 -2- var = ARRAY_REF
3578 (This should have been verified in analyze_data_refs).
3580 'var' in the second case corresponds to a def, not a use,
3581 so USE cannot correspond to any operands that are not used
3582 for array indexing.
3584 Therefore, all we need to check is if STMT falls into the
3585 first case, and whether var corresponds to USE. */
3587 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3588 return false;
3590 operand = TREE_OPERAND (stmt, 1);
3592 if (TREE_CODE (operand) != SSA_NAME)
3593 return false;
3595 if (operand == use)
3596 return true;
3598 return false;
3602 /* Function vect_is_simple_iv_evolution.
3604 FORNOW: A simple evolution of an induction variables in the loop is
3605 considered a polynomial evolution with constant step. */
3607 static bool
3608 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3609 tree * step, bool strict)
3611 tree init_expr;
3612 tree step_expr;
3614 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3616 /* When there is no evolution in this loop, the evolution function
3617 is not "simple". */
3618 if (evolution_part == NULL_TREE)
3619 return false;
3621 /* When the evolution is a polynomial of degree >= 2
3622 the evolution function is not "simple". */
3623 if (tree_is_chrec (evolution_part))
3624 return false;
3626 step_expr = evolution_part;
3627 init_expr = unshare_expr (initial_condition (access_fn));
3629 if (vect_debug_details (NULL))
3631 fprintf (dump_file, "step: ");
3632 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3633 fprintf (dump_file, ", init: ");
3634 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3637 *init = init_expr;
3638 *step = step_expr;
3640 if (TREE_CODE (step_expr) != INTEGER_CST)
3642 if (vect_debug_details (NULL))
3643 fprintf (dump_file, "step unknown.");
3644 return false;
3647 if (strict)
3648 if (!integer_onep (step_expr))
3650 if (vect_debug_details (NULL))
3651 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3652 return false;
3655 return true;
3659 /* Function vect_analyze_scalar_cycles.
3661 Examine the cross iteration def-use cycles of scalar variables, by
3662 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3663 cycles that they represent do not impede vectorization.
3665 FORNOW: Reduction as in the following loop, is not supported yet:
3666 loop1:
3667 for (i=0; i<N; i++)
3668 sum += a[i];
3669 The cross-iteration cycle corresponding to variable 'sum' will be
3670 considered too complicated and will impede vectorization.
3672 FORNOW: Induction as in the following loop, is not supported yet:
3673 loop2:
3674 for (i=0; i<N; i++)
3675 a[i] = i;
3677 However, the following loop *is* vectorizable:
3678 loop3:
3679 for (i=0; i<N; i++)
3680 a[i] = b[i];
3682 In both loops there exists a def-use cycle for the variable i:
3683 loop: i_2 = PHI (i_0, i_1)
3684 a[i_2] = ...;
3685 i_1 = i_2 + 1;
3686 GOTO loop;
3688 The evolution of the above cycle is considered simple enough,
3689 however, we also check that the cycle does not need to be
3690 vectorized, i.e - we check that the variable that this cycle
3691 defines is only used for array indexing or in stmts that do not
3692 need to be vectorized. This is not the case in loop2, but it
3693 *is* the case in loop3. */
3695 static bool
3696 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3698 tree phi;
3699 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3700 basic_block bb = loop->header;
3701 tree dummy;
3703 if (vect_debug_details (NULL))
3704 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3706 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
3708 tree access_fn = NULL;
3710 if (vect_debug_details (NULL))
3712 fprintf (dump_file, "Analyze phi: ");
3713 print_generic_expr (dump_file, phi, TDF_SLIM);
3716 /* Skip virtual phi's. The data dependences that are associated with
3717 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3719 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3721 if (vect_debug_details (NULL))
3722 fprintf (dump_file, "virtual phi. skip.");
3723 continue;
3726 /* Analyze the evolution function. */
3728 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3729 those of loop induction variables; This property is verified here.
3731 Furthermore, if that induction variable is used in an operation
3732 that needs to be vectorized (i.e, is not solely used to index
3733 arrays and check the exit condition) - we do not support its
3734 vectorization yet. This property is verified in vect_is_simple_use,
3735 during vect_analyze_operations. */
3737 access_fn = /* instantiate_parameters
3738 (loop,*/
3739 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3741 if (!access_fn)
3743 if (vect_debug_stats (loop) || vect_debug_details (loop))
3744 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3745 return false;
3748 if (vect_debug_details (NULL))
3750 fprintf (dump_file, "Access function of PHI: ");
3751 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3754 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3755 &dummy, false))
3757 if (vect_debug_stats (loop) || vect_debug_details (loop))
3758 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3759 return false;
3763 return true;
3767 /* Function vect_analyze_data_ref_dependence.
3769 Return TRUE if there (might) exist a dependence between a memory-reference
3770 DRA and a memory-reference DRB. */
3772 static bool
3773 vect_analyze_data_ref_dependence (struct data_reference *dra,
3774 struct data_reference *drb,
3775 struct loop *loop)
3777 bool differ_p;
3778 struct data_dependence_relation *ddr;
3780 if (!array_base_name_differ_p (dra, drb, &differ_p))
3782 if (vect_debug_stats (loop) || vect_debug_details (loop))
3784 fprintf (dump_file,
3785 "not vectorized: can't determine dependence between: ");
3786 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3787 fprintf (dump_file, " and ");
3788 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3790 return true;
3793 if (differ_p)
3794 return false;
3796 ddr = initialize_data_dependence_relation (dra, drb);
3797 compute_affine_dependence (ddr);
3799 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3800 return false;
3802 if (vect_debug_stats (loop) || vect_debug_details (loop))
3804 fprintf (dump_file,
3805 "not vectorized: possible dependence between data-refs ");
3806 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3807 fprintf (dump_file, " and ");
3808 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3811 return true;
3815 /* Function vect_analyze_data_ref_dependences.
3817 Examine all the data references in the loop, and make sure there do not
3818 exist any data dependences between them.
3820 TODO: dependences which distance is greater than the vectorization factor
3821 can be ignored. */
3823 static bool
3824 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3826 unsigned int i, j;
3827 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3828 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3829 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3831 /* Examine store-store (output) dependences. */
3833 if (vect_debug_details (NULL))
3834 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3836 if (vect_debug_details (NULL))
3837 fprintf (dump_file, "compare all store-store pairs.");
3839 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3841 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3843 struct data_reference *dra =
3844 VARRAY_GENERIC_PTR (loop_write_refs, i);
3845 struct data_reference *drb =
3846 VARRAY_GENERIC_PTR (loop_write_refs, j);
3847 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3848 return false;
3852 /* Examine load-store (true/anti) dependences. */
3854 if (vect_debug_details (NULL))
3855 fprintf (dump_file, "compare all load-store pairs.");
3857 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3859 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3861 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3862 struct data_reference *drb =
3863 VARRAY_GENERIC_PTR (loop_write_refs, j);
3864 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3865 return false;
3869 return true;
3873 /* Function vect_get_first_index.
3875 REF is a data reference.
3876 If it is an ARRAY_REF: if its lower bound is simple enough,
3877 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3878 If it is not an ARRAY_REF: REF has no "first index";
3879 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3881 static bool
3882 vect_get_first_index (tree ref, tree *array_first_index)
3884 tree array_start;
3886 if (TREE_CODE (ref) != ARRAY_REF)
3887 *array_first_index = size_zero_node;
3888 else
3890 array_start = array_ref_low_bound (ref);
3891 if (!host_integerp (array_start,0))
3893 if (vect_debug_details (NULL))
3895 fprintf (dump_file, "array min val not simple integer cst.");
3896 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3898 return false;
3900 *array_first_index = array_start;
3903 return true;
3907 /* Function vect_compute_array_base_alignment.
3908 A utility function of vect_compute_array_ref_alignment.
3910 Compute the misalignment of ARRAY in bits.
3912 Input:
3913 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3914 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3915 if NULL: don't compute misalignment, just return the base of ARRAY.
3916 PREV_DIMENSIONS - initialized to one.
3917 MISALIGNMENT - the computed misalignment in bits.
3919 Output:
3920 If VECTYPE is not NULL:
3921 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3922 the base of the array, and put the computed misalignment in MISALIGNMENT.
3923 If VECTYPE is NULL:
3924 Return the base of the array.
3926 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3927 a[idx_N]...[idx_2][idx_1] is
3928 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3929 ... + idx_N * dim_0 * ... * dim_N-1}.
3930 (The misalignment of &a is not checked here).
3931 Note, that every term contains dim_0, therefore, if dim_0 is a
3932 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3933 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3934 NUINTS, we can say that the misalignment of the sum is equal to
3935 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3936 we can't determine this array misalignment, and we return
3937 false.
3938 We proceed recursively in this manner, accumulating total misalignment
3939 and the multiplication of previous dimensions for correct misalignment
3940 calculation. */
3942 static tree
3943 vect_compute_array_base_alignment (tree array,
3944 tree vectype,
3945 tree *prev_dimensions,
3946 tree *misalignment)
3948 tree index;
3949 tree domain;
3950 tree dimension_size;
3951 tree mis;
3952 tree bits_per_vectype;
3953 tree bits_per_vectype_unit;
3955 /* The 'stop condition' of the recursion. */
3956 if (TREE_CODE (array) != ARRAY_REF)
3957 return array;
3959 if (!vectype)
3960 /* Just get the base decl. */
3961 return vect_compute_array_base_alignment
3962 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3964 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
3965 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
3966 return NULL_TREE;
3968 domain = TYPE_DOMAIN (TREE_TYPE (array));
3969 dimension_size =
3970 int_const_binop (PLUS_EXPR,
3971 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
3972 TYPE_MIN_VALUE (domain), 1),
3973 size_one_node, 1);
3975 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
3976 is a multiple of NUNITS:
3978 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
3980 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
3981 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
3982 if (integer_zerop (mis))
3983 /* This array is aligned. Continue just in order to get the base decl. */
3984 return vect_compute_array_base_alignment
3985 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3987 index = TREE_OPERAND (array, 1);
3988 if (!host_integerp (index, 1))
3989 /* The current index is not constant. */
3990 return NULL_TREE;
3992 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
3994 bits_per_vectype = fold_convert (unsigned_type_node,
3995 build_int_cst (NULL_TREE, BITS_PER_UNIT *
3996 GET_MODE_SIZE (TYPE_MODE (vectype))));
3997 bits_per_vectype_unit = fold_convert (unsigned_type_node,
3998 build_int_cst (NULL_TREE, BITS_PER_UNIT *
3999 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4001 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4002 earlier:
4004 *misalignment =
4005 (*misalignment + index_val * dimension_size * *prev_dimensions)
4006 % vectype_nunits;
4009 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4010 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4011 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4012 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4013 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4016 *prev_dimensions = int_const_binop (MULT_EXPR,
4017 *prev_dimensions, dimension_size, 1);
4019 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4020 prev_dimensions,
4021 misalignment);
4025 /* Function vect_compute_data_ref_alignment
4027 Compute the misalignment of the data reference DR.
4029 Output:
4030 1. If during the misalignment computation it is found that the data reference
4031 cannot be vectorized then false is returned.
4032 2. DR_MISALIGNMENT (DR) is defined.
4034 FOR NOW: No analysis is actually performed. Misalignment is calculated
4035 only for trivial cases. TODO. */
4037 static bool
4038 vect_compute_data_ref_alignment (struct data_reference *dr,
4039 loop_vec_info loop_vinfo)
4041 tree stmt = DR_STMT (dr);
4042 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4043 tree ref = DR_REF (dr);
4044 tree vectype;
4045 tree scalar_type;
4046 tree offset = size_zero_node;
4047 tree base, bit_offset, alignment;
4048 tree unit_bits = fold_convert (unsigned_type_node,
4049 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4050 tree dr_base;
4051 bool base_aligned_p;
4053 if (vect_debug_details (NULL))
4054 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4056 /* Initialize misalignment to unknown. */
4057 DR_MISALIGNMENT (dr) = -1;
4059 scalar_type = TREE_TYPE (ref);
4060 vectype = get_vectype_for_scalar_type (scalar_type);
4061 if (!vectype)
4063 if (vect_debug_details (NULL))
4065 fprintf (dump_file, "no vectype for stmt: ");
4066 print_generic_expr (dump_file, stmt, TDF_SLIM);
4067 fprintf (dump_file, " scalar_type: ");
4068 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4070 /* It is not possible to vectorize this data reference. */
4071 return false;
4073 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4075 if (TREE_CODE (ref) == ARRAY_REF)
4076 dr_base = ref;
4077 else
4078 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4080 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4081 loop_vinfo, &bit_offset, &base_aligned_p);
4082 if (!base)
4084 if (vect_debug_details (NULL))
4086 fprintf (dump_file, "Unknown alignment for access: ");
4087 print_generic_expr (dump_file,
4088 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4090 return true;
4093 if (!base_aligned_p)
4095 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4097 if (vect_debug_details (NULL))
4099 fprintf (dump_file, "can't force alignment of ref: ");
4100 print_generic_expr (dump_file, ref, TDF_SLIM);
4102 return true;
4105 /* Force the alignment of the decl.
4106 NOTE: This is the only change to the code we make during
4107 the analysis phase, before deciding to vectorize the loop. */
4108 if (vect_debug_details (NULL))
4109 fprintf (dump_file, "force alignment");
4110 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4111 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4114 /* At this point we assume that the base is aligned, and the offset from it
4115 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4116 gcc_assert (base_aligned_p
4117 || (TREE_CODE (base) == VAR_DECL
4118 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4120 /* Convert into bytes. */
4121 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4122 /* Check that there is no remainder in bits. */
4123 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4124 if (!integer_zerop (bit_offset))
4126 if (vect_debug_details (NULL))
4128 fprintf (dump_file, "bit offset alignment: ");
4129 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4131 return false;
4134 /* Alignment required, in bytes: */
4135 alignment = fold_convert (unsigned_type_node,
4136 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4138 /* Modulo alignment. */
4139 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4140 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4142 if (vect_debug_details (NULL))
4143 fprintf (dump_file, "unexpected misalign value");
4144 return false;
4147 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4149 if (vect_debug_details (NULL))
4150 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4152 return true;
4156 /* Function vect_compute_array_ref_alignment
4158 Compute the alignment of an array-ref.
4159 The alignment we compute here is relative to
4160 TYPE_ALIGN(VECTYPE) boundary.
4162 Output:
4163 OFFSET - the alignment in bits
4164 Return value - the base of the array-ref. E.g,
4165 if the array-ref is a.b[k].c[i][j] the returned
4166 base is a.b[k].c
4169 static tree
4170 vect_compute_array_ref_alignment (struct data_reference *dr,
4171 loop_vec_info loop_vinfo,
4172 tree vectype,
4173 tree *offset)
4175 tree array_first_index = size_zero_node;
4176 tree init;
4177 tree ref = DR_REF (dr);
4178 tree scalar_type = TREE_TYPE (ref);
4179 tree oprnd0 = TREE_OPERAND (ref, 0);
4180 tree dims = size_one_node;
4181 tree misalign = size_zero_node;
4182 tree next_ref, this_offset = size_zero_node;
4183 tree nunits;
4184 tree nbits;
4186 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4187 /* The reference is an array without its last index. */
4188 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4189 &misalign);
4190 else
4191 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4192 &misalign);
4193 if (!vectype)
4194 /* Alignment is not requested. Just return the base. */
4195 return next_ref;
4197 /* Compute alignment. */
4198 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4199 return NULL_TREE;
4200 this_offset = misalign;
4202 /* Check the first index accessed. */
4203 if (!vect_get_first_index (ref, &array_first_index))
4205 if (vect_debug_details (NULL))
4206 fprintf (dump_file, "no first_index for array.");
4207 return NULL_TREE;
4210 /* Check the index of the array_ref. */
4211 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4212 LOOP_VINFO_LOOP (loop_vinfo)->num);
4214 /* FORNOW: In order to simplify the handling of alignment, we make sure
4215 that the first location at which the array is accessed ('init') is on an
4216 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4217 This is too conservative, since we require that
4218 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4219 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4220 This should be relaxed in the future. */
4222 if (!init || !host_integerp (init, 0))
4224 if (vect_debug_details (NULL))
4225 fprintf (dump_file, "non constant init. ");
4226 return NULL_TREE;
4229 /* bytes per scalar element: */
4230 nunits = fold_convert (unsigned_type_node,
4231 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4232 nbits = int_const_binop (MULT_EXPR, nunits,
4233 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4235 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4236 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4237 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4238 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4240 /* TODO: allow negative misalign values. */
4241 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4243 if (vect_debug_details (NULL))
4244 fprintf (dump_file, "unexpected misalign value");
4245 return NULL_TREE;
4247 *offset = misalign;
4248 return next_ref;
4252 /* Function vect_compute_data_refs_alignment
4254 Compute the misalignment of data references in the loop.
4255 This pass may take place at function granularity instead of at loop
4256 granularity.
4258 FOR NOW: No analysis is actually performed. Misalignment is calculated
4259 only for trivial cases. TODO. */
4261 static void
4262 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4264 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4265 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4266 unsigned int i;
4268 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4270 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4271 vect_compute_data_ref_alignment (dr, loop_vinfo);
4274 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4276 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4277 vect_compute_data_ref_alignment (dr, loop_vinfo);
4282 /* Function vect_enhance_data_refs_alignment
4284 This pass will use loop versioning and loop peeling in order to enhance
4285 the alignment of data references in the loop.
4287 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4288 original loop is to be vectorized; Any other loops that are created by
4289 the transformations performed in this pass - are not supposed to be
4290 vectorized. This restriction will be relaxed.
4292 FOR NOW: No transformation is actually performed. TODO. */
4294 static void
4295 vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED)
4298 This pass will require a cost model to guide it whether to apply peeling
4299 or versioning or a combination of the two. For example, the scheme that
4300 intel uses when given a loop with several memory accesses, is as follows:
4301 choose one memory access ('p') which alignment you want to force by doing
4302 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4303 other accesses are not necessarily aligned, or (2) use loop versioning to
4304 generate one loop in which all accesses are aligned, and another loop in
4305 which only 'p' is necessarily aligned.
4307 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4308 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4309 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4311 Devising a cost model is the most critical aspect of this work. It will
4312 guide us on which access to peel for, whether to use loop versioning, how
4313 many versions to create, etc. The cost model will probably consist of
4314 generic considerations as well as target specific considerations (on
4315 powerpc for example, misaligned stores are more painful than misaligned
4316 loads).
4318 Here is the general steps involved in alignment enhancements:
4320 -- original loop, before alignment analysis:
4321 for (i=0; i<N; i++){
4322 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4323 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4326 -- After vect_compute_data_refs_alignment:
4327 for (i=0; i<N; i++){
4328 x = q[i]; # DR_MISALIGNMENT(q) = 3
4329 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4332 -- Possibility 1: we do loop versioning:
4333 if (p is aligned) {
4334 for (i=0; i<N; i++){ # loop 1A
4335 x = q[i]; # DR_MISALIGNMENT(q) = 3
4336 p[i] = y; # DR_MISALIGNMENT(p) = 0
4339 else {
4340 for (i=0; i<N; i++){ # loop 1B
4341 x = q[i]; # DR_MISALIGNMENT(q) = 3
4342 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4346 -- Possibility 2: we do loop peeling:
4347 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4348 x = q[i];
4349 p[i] = y;
4351 for (i = 3; i < N; i++){ # loop 2A
4352 x = q[i]; # DR_MISALIGNMENT(q) = 0
4353 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4356 -- Possibility 3: combination of loop peeling and versioning:
4357 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4358 x = q[i];
4359 p[i] = y;
4361 if (p is aligned) {
4362 for (i = 3; i<N; i++){ # loop 3A
4363 x = q[i]; # DR_MISALIGNMENT(q) = 0
4364 p[i] = y; # DR_MISALIGNMENT(p) = 0
4367 else {
4368 for (i = 3; i<N; i++){ # loop 3B
4369 x = q[i]; # DR_MISALIGNMENT(q) = 0
4370 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4374 These loops are later passed to loop_transform to be vectorized. The
4375 vectorizer will use the alignment information to guide the transformation
4376 (whether to generate regular loads/stores, or with special handling for
4377 misalignment).
4382 /* Function vect_analyze_data_refs_alignment
4384 Analyze the alignment of the data-references in the loop.
4385 FOR NOW: Until support for misliagned accesses is in place, only if all
4386 accesses are aligned can the loop be vectorized. This restriction will be
4387 relaxed. */
4389 static bool
4390 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4392 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4393 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4394 /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
4396 unsigned int i;
4397 unsigned int decide_peeling_count = 0;
4399 if (vect_debug_details (NULL))
4400 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4403 /* This pass may take place at function granularity instead of at loop
4404 granularity. */
4406 vect_compute_data_refs_alignment (loop_vinfo);
4409 /* This pass will use loop versioning and loop peeling in order to enhance
4410 the alignment of data references in the loop.
4411 FOR NOW: we assume that whatever versioning/peeling took place, the
4412 original loop is to be vectorized. Any other loops that were created by
4413 the transformations performed in this pass - are not supposed to be
4414 vectorized. This restriction will be relaxed. */
4416 vect_enhance_data_refs_alignment (loop_vinfo);
4419 /* Finally, check that loop can be vectorized.
4420 FOR NOW: Until support for misaligned stores is in place, only if all
4421 stores are aligned can the loop be vectorized. This restriction will be
4422 relaxed. In the meantime, we can force the alignment of on of the
4423 data-references in the loop using peeling. We currently use a heuristic
4424 that peels the first misaligned store, but we plan to develop a
4425 better cost model to guide the decision on which data-access to peel for.
4428 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4430 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4431 if (!aligned_access_p (dr))
4433 /* Decide here whether we need peeling for alignment. */
4434 decide_peeling_count++;
4435 if (decide_peeling_count > MAX_NUMBER_OF_UNALIGNED_DATA_REFS)
4437 if (vect_debug_stats (loop) || vect_debug_details (loop))
4438 fprintf (dump_file,
4439 "not vectorized: multiple misaligned stores.");
4440 return false;
4442 else
4444 LOOP_UNALIGNED_DR (loop_vinfo, decide_peeling_count - 1) = dr;
4445 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4450 /* The vectorizer now supports misaligned loads, so we don't fail anymore
4451 in the presence of a misaligned read dataref. For some targets however
4452 it may be preferable not to vectorize in such a case as misaligned
4453 accesses are very costly. This should be considered in the future. */
4455 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4457 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4458 if (!aligned_access_p (dr))
4460 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4461 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4462 fprintf (dump_file, "not vectorized: unaligned load.");
4463 return false;
4468 return true;
4472 /* Function vect_analyze_data_ref_access.
4474 Analyze the access pattern of the data-reference DR. For now, a data access
4475 has to consecutive and aligned to be considered vectorizable. */
4477 static bool
4478 vect_analyze_data_ref_access (struct data_reference *dr)
4480 varray_type access_fns = DR_ACCESS_FNS (dr);
4481 tree access_fn;
4482 tree init, step;
4483 unsigned int dimensions, i;
4485 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4486 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4487 access is contiguous). */
4488 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4490 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4492 access_fn = DR_ACCESS_FN (dr, i);
4494 if (evolution_part_in_loop_num (access_fn,
4495 loop_containing_stmt (DR_STMT (dr))->num))
4497 /* Evolution part is not NULL in this loop (it is neither constant
4498 nor invariant). */
4499 if (vect_debug_details (NULL))
4501 fprintf (dump_file,
4502 "not vectorized: complicated multidim. array access.");
4503 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4505 return false;
4509 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4510 if (!evolution_function_is_constant_p (access_fn)
4511 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4512 access_fn, &init, &step, true))
4514 if (vect_debug_details (NULL))
4516 fprintf (dump_file, "not vectorized: complicated access function.");
4517 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4519 return false;
4522 return true;
4526 /* Function vect_analyze_data_ref_accesses.
4528 Analyze the access pattern of all the data references in the loop.
4530 FORNOW: the only access pattern that is considered vectorizable is a
4531 simple step 1 (consecutive) access.
4533 FORNOW: handle only arrays and pointer accesses. */
4535 static bool
4536 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4538 unsigned int i;
4539 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4540 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4542 if (vect_debug_details (NULL))
4543 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4545 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4547 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4548 bool ok = vect_analyze_data_ref_access (dr);
4549 if (!ok)
4551 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4552 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4553 fprintf (dump_file, "not vectorized: complicated access pattern.");
4554 return false;
4558 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4560 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4561 bool ok = vect_analyze_data_ref_access (dr);
4562 if (!ok)
4564 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4565 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4566 fprintf (dump_file, "not vectorized: complicated access pattern.");
4567 return false;
4571 return true;
4575 /* Function vect_analyze_pointer_ref_access.
4577 Input:
4578 STMT - a stmt that contains a data-ref
4579 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4581 If the data-ref access is vectorizable, return a data_reference structure
4582 that represents it (DR). Otherwise - return NULL. */
4584 static struct data_reference *
4585 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4587 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4588 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4589 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4590 tree init, step;
4591 int step_val;
4592 tree reftype, innertype;
4593 enum machine_mode innermode;
4594 tree indx_access_fn;
4595 int loopnum = loop->num;
4596 struct data_reference *dr;
4598 if (!access_fn)
4600 if (vect_debug_stats (loop) || vect_debug_details (loop))
4601 fprintf (dump_file, "not vectorized: complicated pointer access.");
4602 return NULL;
4605 if (vect_debug_details (NULL))
4607 fprintf (dump_file, "Access function of ptr: ");
4608 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4611 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4613 if (vect_debug_stats (loop) || vect_debug_details (loop))
4614 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4615 return NULL;
4618 STRIP_NOPS (init);
4620 if (!host_integerp (step,0))
4622 if (vect_debug_stats (loop) || vect_debug_details (loop))
4623 fprintf (dump_file,
4624 "not vectorized: non constant step for pointer access.");
4625 return NULL;
4628 step_val = TREE_INT_CST_LOW (step);
4630 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4631 if (TREE_CODE (reftype) != POINTER_TYPE)
4633 if (vect_debug_stats (loop) || vect_debug_details (loop))
4634 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4635 return NULL;
4638 reftype = TREE_TYPE (init);
4639 if (TREE_CODE (reftype) != POINTER_TYPE)
4641 if (vect_debug_stats (loop) || vect_debug_details (loop))
4642 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4643 return NULL;
4646 innertype = TREE_TYPE (reftype);
4647 innermode = TYPE_MODE (innertype);
4648 if (GET_MODE_SIZE (innermode) != step_val)
4650 /* FORNOW: support only consecutive access */
4651 if (vect_debug_stats (loop) || vect_debug_details (loop))
4652 fprintf (dump_file, "not vectorized: non consecutive access.");
4653 return NULL;
4656 indx_access_fn =
4657 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4658 if (vect_debug_details (NULL))
4660 fprintf (dump_file, "Access function of ptr indx: ");
4661 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4663 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4664 return dr;
4668 /* Function vect_get_symbl_and_dr.
4670 The function returns SYMBL - the relevant variable for
4671 memory tag (for aliasing purposes).
4672 Also data reference structure DR is created.
4674 Input:
4675 MEMREF - data reference in STMT
4676 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4678 Output:
4679 DR - data_reference struct for MEMREF
4680 return value - the relevant variable for memory tag (for aliasing purposes).
4684 static tree
4685 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4686 loop_vec_info loop_vinfo, struct data_reference **dr)
4688 tree symbl, oprnd0, oprnd1;
4689 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4690 tree offset;
4691 tree array_base, base;
4692 struct data_reference *new_dr;
4693 bool base_aligned_p;
4695 *dr = NULL;
4696 switch (TREE_CODE (memref))
4698 case INDIRECT_REF:
4699 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4700 if (! new_dr)
4701 return NULL_TREE;
4702 *dr = new_dr;
4703 symbl = DR_BASE_NAME (new_dr);
4704 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4706 switch (TREE_CODE (symbl))
4708 case PLUS_EXPR:
4709 case MINUS_EXPR:
4710 oprnd0 = TREE_OPERAND (symbl, 0);
4711 oprnd1 = TREE_OPERAND (symbl, 1);
4713 STRIP_NOPS(oprnd1);
4714 /* Only {address_base + offset} expressions are supported,
4715 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4716 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4717 TODO: swap operands if {offset + address_base}. */
4718 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4719 && TREE_CODE (oprnd1) != INTEGER_CST)
4720 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4721 return NULL_TREE;
4723 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4724 symbl = oprnd0;
4725 else
4726 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4727 loop_vinfo, &new_dr);
4729 case SSA_NAME:
4730 case ADDR_EXPR:
4731 /* symbl remains unchanged. */
4732 break;
4734 default:
4735 if (vect_debug_details (NULL))
4737 fprintf (dump_file, "unhandled data ref: ");
4738 print_generic_expr (dump_file, memref, TDF_SLIM);
4739 fprintf (dump_file, " (symbl ");
4740 print_generic_expr (dump_file, symbl, TDF_SLIM);
4741 fprintf (dump_file, ") in stmt ");
4742 print_generic_expr (dump_file, stmt, TDF_SLIM);
4744 return NULL_TREE;
4746 break;
4748 case ARRAY_REF:
4749 offset = size_zero_node;
4751 /* Store the array base in the stmt info.
4752 For one dimensional array ref a[i], the base is a,
4753 for multidimensional a[i1][i2]..[iN], the base is
4754 a[i1][i2]..[iN-1]. */
4755 array_base = TREE_OPERAND (memref, 0);
4756 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4758 new_dr = analyze_array (stmt, memref, is_read);
4759 *dr = new_dr;
4761 /* Find the relevant symbol for aliasing purposes. */
4762 base = DR_BASE_NAME (new_dr);
4763 switch (TREE_CODE (base))
4765 case VAR_DECL:
4766 symbl = base;
4767 break;
4769 case INDIRECT_REF:
4770 symbl = TREE_OPERAND (base, 0);
4771 break;
4773 case COMPONENT_REF:
4774 /* Could have recorded more accurate information -
4775 i.e, the actual FIELD_DECL that is being referenced -
4776 but later passes expect VAR_DECL as the nmt. */
4777 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4778 loop_vinfo, &offset, &base_aligned_p);
4779 if (symbl)
4780 break;
4781 /* fall through */
4782 default:
4783 if (vect_debug_details (NULL))
4785 fprintf (dump_file, "unhandled struct/class field access ");
4786 print_generic_expr (dump_file, stmt, TDF_SLIM);
4788 return NULL_TREE;
4790 break;
4792 default:
4793 if (vect_debug_details (NULL))
4795 fprintf (dump_file, "unhandled data ref: ");
4796 print_generic_expr (dump_file, memref, TDF_SLIM);
4797 fprintf (dump_file, " in stmt ");
4798 print_generic_expr (dump_file, stmt, TDF_SLIM);
4800 return NULL_TREE;
4802 return symbl;
4806 /* Function vect_analyze_data_refs.
4808 Find all the data references in the loop.
4810 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4811 which base is really an array (not a pointer) and which alignment
4812 can be forced. This restriction will be relaxed. */
4814 static bool
4815 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4817 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4818 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4819 int nbbs = loop->num_nodes;
4820 block_stmt_iterator si;
4821 int j;
4822 struct data_reference *dr;
4823 tree tag;
4824 tree address_base;
4825 bool base_aligned_p;
4826 tree offset;
4828 if (vect_debug_details (NULL))
4829 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4831 for (j = 0; j < nbbs; j++)
4833 basic_block bb = bbs[j];
4834 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4836 bool is_read = false;
4837 tree stmt = bsi_stmt (si);
4838 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4839 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4840 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4841 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4842 varray_type *datarefs = NULL;
4843 int nvuses, nv_may_defs, nv_must_defs;
4844 tree memref = NULL;
4845 tree symbl;
4847 /* Assumption: there exists a data-ref in stmt, if and only if
4848 it has vuses/vdefs. */
4850 if (!vuses && !v_may_defs && !v_must_defs)
4851 continue;
4853 nvuses = NUM_VUSES (vuses);
4854 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4855 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4857 if (nvuses && (nv_may_defs || nv_must_defs))
4859 if (vect_debug_details (NULL))
4861 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4862 print_generic_expr (dump_file, stmt, TDF_SLIM);
4864 return false;
4867 if (TREE_CODE (stmt) != MODIFY_EXPR)
4869 if (vect_debug_details (NULL))
4871 fprintf (dump_file, "unexpected vops in stmt: ");
4872 print_generic_expr (dump_file, stmt, TDF_SLIM);
4874 return false;
4877 if (vuses)
4879 memref = TREE_OPERAND (stmt, 1);
4880 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4881 is_read = true;
4883 else /* vdefs */
4885 memref = TREE_OPERAND (stmt, 0);
4886 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4887 is_read = false;
4890 /* Analyze MEMREF. If it is of a supported form, build data_reference
4891 struct for it (DR) and find the relevant symbol for aliasing
4892 purposes. */
4893 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4894 &dr);
4895 if (!symbl)
4897 if (vect_debug_stats (loop) || vect_debug_details (loop))
4899 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4900 print_generic_expr (dump_file, stmt, TDF_SLIM);
4902 return false;
4905 /* Find and record the memtag assigned to this data-ref. */
4906 switch (TREE_CODE (symbl))
4908 case VAR_DECL:
4909 STMT_VINFO_MEMTAG (stmt_info) = symbl;
4910 break;
4912 case SSA_NAME:
4913 symbl = SSA_NAME_VAR (symbl);
4914 tag = get_var_ann (symbl)->type_mem_tag;
4915 if (!tag)
4917 tree ptr = TREE_OPERAND (memref, 0);
4918 if (TREE_CODE (ptr) == SSA_NAME)
4919 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4921 if (!tag)
4923 if (vect_debug_stats (loop) || vect_debug_details (loop))
4924 fprintf (dump_file, "not vectorized: no memtag for ref.");
4925 return false;
4927 STMT_VINFO_MEMTAG (stmt_info) = tag;
4928 break;
4930 case ADDR_EXPR:
4931 address_base = TREE_OPERAND (symbl, 0);
4933 switch (TREE_CODE (address_base))
4935 case ARRAY_REF:
4936 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
4937 DR_IS_READ(dr));
4938 STMT_VINFO_MEMTAG (stmt_info) =
4939 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
4940 loop_vinfo, &offset,
4941 &base_aligned_p);
4942 break;
4944 case VAR_DECL:
4945 STMT_VINFO_MEMTAG (stmt_info) = address_base;
4946 break;
4948 default:
4949 if (vect_debug_stats (loop) || vect_debug_details (loop))
4951 fprintf (dump_file,
4952 "not vectorized: unhandled address expr: ");
4953 print_generic_expr (dump_file, stmt, TDF_SLIM);
4955 return false;
4957 break;
4959 default:
4960 if (vect_debug_stats (loop) || vect_debug_details (loop))
4962 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
4963 print_generic_expr (dump_file, memref, TDF_SLIM);
4965 return false;
4968 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
4969 STMT_VINFO_DATA_REF (stmt_info) = dr;
4973 return true;
4977 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
4979 /* Function vect_mark_relevant.
4981 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
4983 static void
4984 vect_mark_relevant (varray_type worklist, tree stmt)
4986 stmt_vec_info stmt_info;
4988 if (vect_debug_details (NULL))
4989 fprintf (dump_file, "mark relevant.");
4991 if (TREE_CODE (stmt) == PHI_NODE)
4993 VARRAY_PUSH_TREE (worklist, stmt);
4994 return;
4997 stmt_info = vinfo_for_stmt (stmt);
4999 if (!stmt_info)
5001 if (vect_debug_details (NULL))
5003 fprintf (dump_file, "mark relevant: no stmt info!!.");
5004 print_generic_expr (dump_file, stmt, TDF_SLIM);
5006 return;
5009 if (STMT_VINFO_RELEVANT_P (stmt_info))
5011 if (vect_debug_details (NULL))
5012 fprintf (dump_file, "already marked relevant.");
5013 return;
5016 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5017 VARRAY_PUSH_TREE (worklist, stmt);
5021 /* Function vect_stmt_relevant_p.
5023 Return true if STMT in loop that is represented by LOOP_VINFO is
5024 "relevant for vectorization".
5026 A stmt is considered "relevant for vectorization" if:
5027 - it has uses outside the loop.
5028 - it has vdefs (it alters memory).
5029 - control stmts in the loop (except for the exit condition).
5031 CHECKME: what other side effects would the vectorizer allow? */
5033 static bool
5034 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5036 v_may_def_optype v_may_defs;
5037 v_must_def_optype v_must_defs;
5038 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5039 int i;
5040 dataflow_t df;
5041 int num_uses;
5043 /* cond stmt other than loop exit cond. */
5044 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5045 return true;
5047 /* changing memory. */
5048 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5049 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5050 if (v_may_defs || v_must_defs)
5052 if (vect_debug_details (NULL))
5053 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5054 return true;
5057 /* uses outside the loop. */
5058 df = get_immediate_uses (stmt);
5059 num_uses = num_immediate_uses (df);
5060 for (i = 0; i < num_uses; i++)
5062 tree use = immediate_use (df, i);
5063 basic_block bb = bb_for_stmt (use);
5064 if (!flow_bb_inside_loop_p (loop, bb))
5066 if (vect_debug_details (NULL))
5067 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5068 return true;
5072 return false;
5076 /* Function vect_mark_stmts_to_be_vectorized.
5078 Not all stmts in the loop need to be vectorized. For example:
5080 for i...
5081 for j...
5082 1. T0 = i + j
5083 2. T1 = a[T0]
5085 3. j = j + 1
5087 Stmt 1 and 3 do not need to be vectorized, because loop control and
5088 addressing of vectorized data-refs are handled differently.
5090 This pass detects such stmts. */
5092 static bool
5093 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5095 varray_type worklist;
5096 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5097 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5098 unsigned int nbbs = loop->num_nodes;
5099 block_stmt_iterator si;
5100 tree stmt;
5101 stmt_ann_t ann;
5102 unsigned int i;
5103 int j;
5104 use_optype use_ops;
5105 stmt_vec_info stmt_info;
5107 if (vect_debug_details (NULL))
5108 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5110 VARRAY_TREE_INIT (worklist, 64, "work list");
5112 /* 1. Init worklist. */
5114 for (i = 0; i < nbbs; i++)
5116 basic_block bb = bbs[i];
5117 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5119 stmt = bsi_stmt (si);
5121 if (vect_debug_details (NULL))
5123 fprintf (dump_file, "init: stmt relevant? ");
5124 print_generic_expr (dump_file, stmt, TDF_SLIM);
5127 stmt_info = vinfo_for_stmt (stmt);
5128 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5130 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5131 vect_mark_relevant (worklist, stmt);
5136 /* 2. Process_worklist */
5138 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5140 stmt = VARRAY_TOP_TREE (worklist);
5141 VARRAY_POP (worklist);
5143 if (vect_debug_details (NULL))
5145 fprintf (dump_file, "worklist: examine stmt: ");
5146 print_generic_expr (dump_file, stmt, TDF_SLIM);
5149 /* Examine the USES in this statement. Mark all the statements which
5150 feed this statement's uses as "relevant", unless the USE is used as
5151 an array index. */
5153 if (TREE_CODE (stmt) == PHI_NODE)
5155 /* follow the def-use chain inside the loop. */
5156 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5158 tree arg = PHI_ARG_DEF (stmt, j);
5159 tree def_stmt = NULL_TREE;
5160 basic_block bb;
5161 if (!vect_is_simple_use (arg, loop, &def_stmt))
5163 if (vect_debug_details (NULL))
5164 fprintf (dump_file, "worklist: unsupported use.");
5165 varray_clear (worklist);
5166 return false;
5168 if (!def_stmt)
5169 continue;
5171 if (vect_debug_details (NULL))
5173 fprintf (dump_file, "worklist: def_stmt: ");
5174 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5177 bb = bb_for_stmt (def_stmt);
5178 if (flow_bb_inside_loop_p (loop, bb))
5179 vect_mark_relevant (worklist, def_stmt);
5183 ann = stmt_ann (stmt);
5184 use_ops = USE_OPS (ann);
5186 for (i = 0; i < NUM_USES (use_ops); i++)
5188 tree use = USE_OP (use_ops, i);
5190 /* We are only interested in uses that need to be vectorized. Uses
5191 that are used for address computation are not considered relevant.
5193 if (exist_non_indexing_operands_for_use_p (use, stmt))
5195 tree def_stmt = NULL_TREE;
5196 basic_block bb;
5197 if (!vect_is_simple_use (use, loop, &def_stmt))
5199 if (vect_debug_details (NULL))
5200 fprintf (dump_file, "worklist: unsupported use.");
5201 varray_clear (worklist);
5202 return false;
5205 if (!def_stmt)
5206 continue;
5208 if (vect_debug_details (NULL))
5210 fprintf (dump_file, "worklist: examine use %d: ", i);
5211 print_generic_expr (dump_file, use, TDF_SLIM);
5214 bb = bb_for_stmt (def_stmt);
5215 if (flow_bb_inside_loop_p (loop, bb))
5216 vect_mark_relevant (worklist, def_stmt);
5219 } /* while worklist */
5221 varray_clear (worklist);
5222 return true;
5226 /* Function vect_analyze_loop_with_symbolic_num_of_iters.
5228 In case the number of iterations that LOOP iterates in unknown at compile
5229 time, an epilog loop will be generated, and the loop induction variables
5230 (IVs) will be "advanced" to the value they are supposed to take just before
5231 the epilog loop. Here we check that the access function of the loop IVs
5232 and the expression that represents the loop bound are simple enough.
5233 These restrictions will be relaxed in the future. */
5235 static bool
5236 vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
5237 struct loop *loop)
5239 basic_block bb = loop->header;
5240 tree phi;
5242 if (vect_debug_details (NULL))
5243 fprintf (dump_file,
5244 "\n<<vect_analyze_loop_with_symbolic_num_of_iters>>\n");
5246 if (chrec_contains_undetermined (niters))
5248 if (vect_debug_details (NULL))
5249 fprintf (dump_file, "Infinite number of iterations.");
5250 return false;
5253 if (!niters)
5255 if (vect_debug_details (NULL))
5256 fprintf (dump_file, "niters is NULL pointer.");
5257 return false;
5260 if (vect_debug_details (NULL))
5262 fprintf (dump_file, "Symbolic number of iterations is ");
5263 print_generic_expr (dump_file, niters, TDF_DETAILS);
5266 /* Analyze phi functions of the loop header. */
5268 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
5270 tree access_fn = NULL;
5271 tree evolution_part;
5273 if (vect_debug_details (NULL))
5275 fprintf (dump_file, "Analyze phi: ");
5276 print_generic_expr (dump_file, phi, TDF_SLIM);
5279 /* Skip virtual phi's. The data dependences that are associated with
5280 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5282 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5284 if (vect_debug_details (NULL))
5285 fprintf (dump_file, "virtual phi. skip.");
5286 continue;
5289 /* Analyze the evolution function. */
5291 access_fn = instantiate_parameters
5292 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5294 if (!access_fn)
5296 if (vect_debug_details (NULL))
5297 fprintf (dump_file, "No Access function.");
5298 return false;
5301 if (vect_debug_details (NULL))
5303 fprintf (dump_file, "Access function of PHI: ");
5304 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5307 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5309 if (evolution_part == NULL_TREE)
5310 return false;
5312 /* FORNOW: We do not transform initial conditions of IVs
5313 which evolution functions are a polynomial of degree >= 2. */
5315 if (tree_is_chrec (evolution_part))
5316 return false;
5319 return true;
5323 /* Function vect_get_loop_niters.
5325 Determine how many iterations the loop is executed. */
5327 static tree
5328 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5330 tree niters;
5332 if (vect_debug_details (NULL))
5333 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5335 niters = number_of_iterations_in_loop (loop);
5337 if (niters != NULL_TREE
5338 && niters != chrec_dont_know)
5340 *number_of_iterations = niters;
5342 if (vect_debug_details (NULL))
5344 fprintf (dump_file, "==> get_loop_niters:" );
5345 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5349 return get_loop_exit_condition (loop);
5353 /* Function vect_analyze_loop_form.
5355 Verify the following restrictions (some may be relaxed in the future):
5356 - it's an inner-most loop
5357 - number of BBs = 2 (which are the loop header and the latch)
5358 - the loop has a pre-header
5359 - the loop has a single entry and exit
5360 - the loop exit condition is simple enough, and the number of iterations
5361 can be analyzed (a countable loop). */
5363 static loop_vec_info
5364 vect_analyze_loop_form (struct loop *loop)
5366 loop_vec_info loop_vinfo;
5367 tree loop_cond;
5368 tree number_of_iterations = NULL;
5370 if (vect_debug_details (loop))
5371 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5373 if (loop->inner
5374 || !loop->single_exit
5375 || loop->num_nodes != 2)
5377 if (vect_debug_stats (loop) || vect_debug_details (loop))
5379 fprintf (dump_file, "not vectorized: bad loop form. ");
5380 if (loop->inner)
5381 fprintf (dump_file, "nested loop.");
5382 else if (!loop->single_exit)
5383 fprintf (dump_file, "multiple exits.");
5384 else if (loop->num_nodes != 2)
5385 fprintf (dump_file, "too many BBs in loop.");
5388 return NULL;
5391 /* We assume that the loop exit condition is at the end of the loop. i.e,
5392 that the loop is represented as a do-while (with a proper if-guard
5393 before the loop if needed), where the loop header contains all the
5394 executable statements, and the latch is empty. */
5395 if (!empty_block_p (loop->latch))
5397 if (vect_debug_stats (loop) || vect_debug_details (loop))
5398 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5399 return NULL;
5402 if (empty_block_p (loop->header))
5404 if (vect_debug_stats (loop) || vect_debug_details (loop))
5405 fprintf (dump_file, "not vectorized: empty loop.");
5406 return NULL;
5409 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5410 if (!loop_cond)
5412 if (vect_debug_stats (loop) || vect_debug_details (loop))
5413 fprintf (dump_file, "not vectorized: complicated exit condition.");
5414 return NULL;
5417 if (!number_of_iterations)
5419 if (vect_debug_stats (loop) || vect_debug_details (loop))
5420 fprintf (dump_file,
5421 "not vectorized: number of iterations cannot be computed.");
5422 return NULL;
5425 loop_vinfo = new_loop_vec_info (loop);
5426 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5427 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5429 if (vect_debug_stats (loop) || vect_debug_details (loop))
5430 fprintf (dump_file, "loop bound unknown.");
5432 /* Unknown loop bound. */
5433 if (!vect_analyze_loop_with_symbolic_num_of_iters
5434 (number_of_iterations, loop))
5436 if (vect_debug_stats (loop) || vect_debug_details (loop))
5437 fprintf (dump_file,
5438 "not vectorized: can't determine loop bound.");
5439 return NULL;
5441 else
5443 /* We need only one loop entry for unknown loop bound support. */
5444 if (loop->num_entries != 1 || !loop->pre_header)
5446 if (vect_debug_stats (loop) || vect_debug_details (loop))
5447 fprintf (dump_file,
5448 "not vectorized: more than one loop entry.");
5449 return NULL;
5453 else
5454 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5456 if (vect_debug_stats (loop) || vect_debug_details (loop))
5457 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5458 return NULL;
5461 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5463 return loop_vinfo;
5467 /* Function vect_analyze_loop.
5469 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5470 for it. The different analyses will record information in the
5471 loop_vec_info struct. */
5473 static loop_vec_info
5474 vect_analyze_loop (struct loop *loop)
5476 bool ok;
5477 loop_vec_info loop_vinfo;
5479 if (vect_debug_details (NULL))
5480 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5482 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5484 loop_vinfo = vect_analyze_loop_form (loop);
5485 if (!loop_vinfo)
5487 if (vect_debug_details (loop))
5488 fprintf (dump_file, "bad loop form.");
5489 return NULL;
5492 /* Find all data references in the loop (which correspond to vdefs/vuses)
5493 and analyze their evolution in the loop.
5495 FORNOW: Handle only simple, array references, which
5496 alignment can be forced, and aligned pointer-references. */
5498 ok = vect_analyze_data_refs (loop_vinfo);
5499 if (!ok)
5501 if (vect_debug_details (loop))
5502 fprintf (dump_file, "bad data references.");
5503 destroy_loop_vec_info (loop_vinfo);
5504 return NULL;
5507 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5509 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5510 if (!ok)
5512 if (vect_debug_details (loop))
5513 fprintf (dump_file, "unexpected pattern.");
5514 if (vect_debug_details (loop))
5515 fprintf (dump_file, "not vectorized: unexpected pattern.");
5516 destroy_loop_vec_info (loop_vinfo);
5517 return NULL;
5520 /* Check that all cross-iteration scalar data-flow cycles are OK.
5521 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5523 ok = vect_analyze_scalar_cycles (loop_vinfo);
5524 if (!ok)
5526 if (vect_debug_details (loop))
5527 fprintf (dump_file, "bad scalar cycle.");
5528 destroy_loop_vec_info (loop_vinfo);
5529 return NULL;
5532 /* Analyze data dependences between the data-refs in the loop.
5533 FORNOW: fail at the first data dependence that we encounter. */
5535 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5536 if (!ok)
5538 if (vect_debug_details (loop))
5539 fprintf (dump_file, "bad data dependence.");
5540 destroy_loop_vec_info (loop_vinfo);
5541 return NULL;
5544 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5545 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5547 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5548 if (!ok)
5550 if (vect_debug_details (loop))
5551 fprintf (dump_file, "bad data access.");
5552 destroy_loop_vec_info (loop_vinfo);
5553 return NULL;
5556 /* Analyze the alignment of the data-refs in the loop.
5557 FORNOW: Only aligned accesses are handled. */
5559 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5560 if (!ok)
5562 if (vect_debug_details (loop))
5563 fprintf (dump_file, "bad data alignment.");
5564 destroy_loop_vec_info (loop_vinfo);
5565 return NULL;
5568 /* Scan all the operations in the loop and make sure they are
5569 vectorizable. */
5571 ok = vect_analyze_operations (loop_vinfo);
5572 if (!ok)
5574 if (vect_debug_details (loop))
5575 fprintf (dump_file, "bad operation or unsupported loop bound.");
5576 destroy_loop_vec_info (loop_vinfo);
5577 return NULL;
5580 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5582 return loop_vinfo;
5586 /* Function need_imm_uses_for.
5588 Return whether we ought to include information for 'var'
5589 when calculating immediate uses. For this pass we only want use
5590 information for non-virtual variables. */
5592 static bool
5593 need_imm_uses_for (tree var)
5595 return is_gimple_reg (var);
5599 /* Function vectorize_loops.
5601 Entry Point to loop vectorization phase. */
5603 void
5604 vectorize_loops (struct loops *loops)
5606 unsigned int i, loops_num;
5607 unsigned int num_vectorized_loops = 0;
5609 /* Does the target support SIMD? */
5610 /* FORNOW: until more sophisticated machine modelling is in place. */
5611 if (!UNITS_PER_SIMD_WORD)
5613 if (vect_debug_details (NULL))
5614 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5615 return;
5618 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5620 /* ----------- Analyze loops. ----------- */
5622 /* If some loop was duplicated, it gets bigger number
5623 than all previously defined loops. This fact allows us to run
5624 only over initial loops skipping newly generated ones. */
5625 loops_num = loops->num;
5626 for (i = 1; i < loops_num; i++)
5628 loop_vec_info loop_vinfo;
5629 struct loop *loop = loops->parray[i];
5631 if (!loop)
5632 continue;
5634 loop_vinfo = vect_analyze_loop (loop);
5635 loop->aux = loop_vinfo;
5637 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5638 continue;
5640 vect_transform_loop (loop_vinfo, loops);
5641 num_vectorized_loops++;
5644 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5645 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5646 num_vectorized_loops);
5648 /* ----------- Finalize. ----------- */
5650 free_df ();
5651 for (i = 1; i < loops_num; i++)
5653 struct loop *loop = loops->parray[i];
5654 loop_vec_info loop_vinfo;
5656 if (!loop)
5657 continue;
5658 loop_vinfo = loop->aux;
5659 destroy_loop_vec_info (loop_vinfo);
5660 loop->aux = NULL;
5663 rewrite_into_ssa (false);
5664 if (bitmap_first_set_bit (vars_to_rename) >= 0)
5666 /* The rewrite of ssa names may cause violation of loop closed ssa
5667 form invariants. TODO -- avoid these rewrites completely.
5668 Information in virtual phi nodes is sufficient for it. */
5669 rewrite_into_loop_closed_ssa ();
5671 rewrite_into_loop_closed_ssa ();
5672 bitmap_clear (vars_to_rename);